Mesh Oriented datABase  (version 5.5.1)
An array-based unstructured mesh library
gs.cpp
Go to the documentation of this file.
1 /* compile-time settings:
2 
3  FORTRAN naming convention
4  default cpgs_setup, etc.
5  -DUPCASE CPGS_SETUP, etc.
6  -DUNDERSCORE cpgs_setup_, etc.
7 
8  -DMPI parallel version (sequential otherwise)
9  -DCRYSTAL_STATIC avoid some message exchange at the risk of
10  crashing b/c of insufficient buffer size
11 
12  -DINITIAL_BUFFER_SIZE=expression
13  ignored unless CRYSTAL_STATIC is defined.
14  arithmetic expression controlling the initial buffer size for the crystal
15  router; this needs to be large enough to hold the intermediate messages
16  during all stages of the crystal router
17 
18  variables that can be used in expression include
19  num - the number of processors
20  n - the length of the global index array
21 
22  */
23 
24 /* default for INITIAL_BUFFER_SIZE */
25 #ifdef CRYSTAL_STATIC
26 #ifndef INITIAL_BUFFER_SIZE
27 #define INITIAL_BUFFER_SIZE 2 * ( 3 * num + n * 9 )
28 #endif
29 #endif
30 
31 /* FORTRAN usage:
32 
33  integer hc, np
34  call crystal_new(hc,comm,np) ! get a crystal router handle (see fcrystal.c)
35 
36  integer hgs
37  integer n, max_vec_dim
38  integer*? global_index_array(1:n) ! type corresponding to slong in "types.h"
39 
40  call cpgs_setup(hgs,hc,global_index_array,n,max_vec_dim)
41  sets hgs to new handle
42 
43  !ok to call crystal_done(hc) here, or any later time
44 
45  call cpgs_op(hgs, u, op)
46  integer handle, op : 1-add, 2-multiply, 3-min, 4-max
47  real u(1:n) - same layout as global_index_array provided to cpgs_setup
48 
49  call cpgs_op_vec(hgs, u, d, op)
50  integer op : 1-add, 2-multiply, 3-min, 4-max
51  integer d <= max_vec_dim
52  real u(1:d, 1:n) - vector components for each node stored together
53 
54  call cpgs_op_many(hgs, u1, u2, u3, u4, u5, u6, d, op)
55  integer op : 1-add, 2-multiply, 3-min, 4-max
56  integer d : in {1,2,3,4,5,6}, <= max_vec_dim
57  real u1(1:n), u2(1:n), u3(1:n), etc.
58 
59  same effect as: call cpgs_op(hgs, u1, op)
60  if(d.gt.1) call cpgs_op(hgs, u2, op)
61  if(d.gt.2) call cpgs_op(hgs, u3, op)
62  etc.
63  with possibly some savings as fewer messages are exchanged
64 
65  call cpgs_free(hgs)
66  */
67 
68 #include <cstdio>
69 #include <cstdlib>
70 #include <cstdarg>
71 #include <cstring>
72 #include <cmath>
73 #include "moab/gs.hpp"
74 #ifdef MOAB_HAVE_MPI
75 #include "moab_mpi.h"
76 #endif
77 
78 #ifdef MOAB_HAVE_MPI
79 
80 #ifdef MOAB_HAVE_VALGRIND
81 #include <valgrind/memcheck.h>
82 #elif !defined( VALGRIND_CHECK_MEM_IS_DEFINED )
83 #define VALGRIND_CHECK_MEM_IS_DEFINED( a, b ) ( (void)0 )
84 #endif
85 
86 #endif
87 
88 namespace moab
89 {
90 
91 /*--------------------------------------------------------------------------
92  Local Execution Phases
93  --------------------------------------------------------------------------*/
94 
95 #define DO_SET( a, b ) b = a
96 #define DO_ADD( a, b ) a += b
97 #define DO_MUL( a, b ) a *= b
98 #define DO_MIN( a, b ) \
99  if( ( b ) < ( a ) ) ( a ) = b
100 #define DO_MAX( a, b ) \
101  if( ( b ) > ( a ) ) ( a ) = b
102 #define DO_BPR( a, b ) \
103  do \
104  { \
105  uint a_ = a; \
106  uint b_ = b; \
107  for( ;; ) \
108  { \
109  if( a_ < b_ ) \
110  b_ >>= 1; \
111  else if( b_ < a_ ) \
112  a_ >>= 1; \
113  else \
114  break; \
115  } \
116  ( a ) = a_; \
117  } while( 0 )
118 
119 #define LOOP( op ) \
120  do \
121  { \
122  sint i, j; \
123  while( ( i = *cm++ ) != -1 ) \
124  while( ( j = *cm++ ) != -1 ) \
125  op( u[i], u[j] ); \
126  } while( 0 )
127 
128 static void local_condense( realType* u, int op, const sint* cm )
129 {
130  switch( op )
131  {
132  case GS_OP_ADD:
133  LOOP( DO_ADD );
134  break;
135  case GS_OP_MUL:
136  LOOP( DO_MUL );
137  break;
138  case GS_OP_MIN:
139  LOOP( DO_MIN );
140  break;
141  case GS_OP_MAX:
142  LOOP( DO_MAX );
143  break;
144  case GS_OP_BPR:
145  LOOP( DO_BPR );
146  break;
147  }
148 }
149 
150 static void local_uncondense( realType* u, const sint* cm )
151 {
152  LOOP( DO_SET );
153 }
154 
155 #undef LOOP
156 
157 #define LOOP( op ) \
158  do \
159  { \
160  sint i, j, k; \
161  while( ( i = *cm++ ) != -1 ) \
162  { \
163  realType* pi = u + n * i; \
164  while( ( j = *cm++ ) != -1 ) \
165  { \
166  realType* pj = u + n * j; \
167  for( k = n; k; --k ) \
168  { \
169  op( *pi, *pj ); \
170  ++pi; \
171  ++pj; \
172  } \
173  } \
174  } \
175  } while( 0 )
176 
177 static void local_condense_vec( realType* u, uint n, int op, const sint* cm )
178 {
179  switch( op )
180  {
181  case GS_OP_ADD:
182  LOOP( DO_ADD );
183  break;
184  case GS_OP_MUL:
185  LOOP( DO_MUL );
186  break;
187  case GS_OP_MIN:
188  LOOP( DO_MIN );
189  break;
190  case GS_OP_MAX:
191  LOOP( DO_MAX );
192  break;
193  case GS_OP_BPR:
194  LOOP( DO_BPR );
195  break;
196  }
197 }
198 
199 static void local_uncondense_vec( realType* u, uint n, const sint* cm )
200 {
201  LOOP( DO_SET );
202 }
203 
204 #undef LOOP
205 
206 /*--------------------------------------------------------------------------
207  Non-local Execution Phases
208  --------------------------------------------------------------------------*/
209 
210 #ifdef MOAB_HAVE_MPI
211 
212 void gs_data::nonlocal_info::initialize( uint np, uint count, uint nlabels, uint nulabels, uint maxv )
213 {
214  _target = NULL;
215  _nshared = NULL;
216  _sh_ind = NULL;
217  _slabels = NULL;
218  _ulabels = NULL;
219  _reqs = NULL;
220  _buf = NULL;
221  _np = np;
222  _target = (uint*)malloc( ( 2 * np + count ) * sizeof( uint ) );
223  _nshared = _target + np;
224  _sh_ind = _nshared + np;
225  if( 1 < nlabels )
226  _slabels = (slong*)malloc( ( ( nlabels - 1 ) * count ) * sizeof( slong ) );
227  else
228  _slabels = NULL;
229  _ulabels = (Ulong*)malloc( ( nulabels * count ) * sizeof( Ulong ) );
230  _reqs = (MPI_Request*)malloc( 2 * np * sizeof( MPI_Request ) );
231  _buf = (realType*)malloc( ( 2 * count * maxv ) * sizeof( realType ) );
232  _maxv = maxv;
233 }
234 
235 void gs_data::nonlocal_info::nlinfo_free()
236 {
237  // Free the ptrs
238  free( _buf );
239  free( _reqs );
240  free( _target );
241  free( _slabels );
242  free( _ulabels );
243  // Set to null
244  _ulabels = NULL;
245  _buf = NULL;
246  _reqs = NULL;
247  _target = NULL;
248  _slabels = NULL;
249  _nshared = NULL;
250  _sh_ind = NULL;
251 }
252 
253 void gs_data::nonlocal_info::nonlocal( realType* u, int op, MPI_Comm comm )
254 {
255  MPI_Status status;
256  uint np = this->_np;
257  MPI_Request* reqs = this->_reqs;
258  uint* targ = this->_target;
259  uint* nshared = this->_nshared;
260  uint* sh_ind = this->_sh_ind;
261  uint id;
262  realType *buf = this->_buf, *start;
263  unsigned int i;
264  {
265  MPI_Comm_rank( comm, (int*)&i );
266  id = i;
267  }
268  for( i = 0; i < np; ++i )
269  {
270  uint c = nshared[i];
271  start = buf;
272  for( ; c; --c )
273  *buf++ = u[*sh_ind++];
274  MPI_Isend( (void*)start, nshared[i] * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
275  }
276  start = buf;
277  for( i = 0; i < np; ++i )
278  {
279  MPI_Irecv( (void*)start, nshared[i] * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
280  start += nshared[i];
281  }
282  for( reqs = this->_reqs, i = np * 2; i; --i )
283  MPI_Wait( reqs++, &status );
284  sh_ind = this->_sh_ind;
285 #define LOOP( OP ) \
286  do \
287  { \
288  for( i = 0; i < np; ++i ) \
289  { \
290  uint c; \
291  for( c = nshared[i]; c; --c ) \
292  { \
293  OP( u[*sh_ind], *buf ); \
294  ++sh_ind, ++buf; \
295  } \
296  } \
297  } while( 0 )
298  switch( op )
299  {
300  case GS_OP_ADD:
301  LOOP( DO_ADD );
302  break;
303  case GS_OP_MUL:
304  LOOP( DO_MUL );
305  break;
306  case GS_OP_MIN:
307  LOOP( DO_MIN );
308  break;
309  case GS_OP_MAX:
310  LOOP( DO_MAX );
311  break;
312  case GS_OP_BPR:
313  LOOP( DO_BPR );
314  break;
315  }
316 #undef LOOP
317 }
318 
319 void gs_data::nonlocal_info::nonlocal_vec( realType* u, uint n, int op, MPI_Comm comm )
320 {
321  MPI_Status status;
322  uint np = this->_np;
323  MPI_Request* reqs = this->_reqs;
324  uint* targ = this->_target;
325  uint* nshared = this->_nshared;
326  uint* sh_ind = this->_sh_ind;
327  uint id;
328  realType *buf = this->_buf, *start;
329  uint size = n * sizeof( realType );
330  unsigned int i;
331  {
332  MPI_Comm_rank( comm, (int*)&i );
333  id = i;
334  }
335  for( i = 0; i < np; ++i )
336  {
337  uint ns = nshared[i], c = ns;
338  start = buf;
339  for( ; c; --c )
340  {
341  memcpy( buf, u + n * ( *sh_ind++ ), size );
342  buf += n;
343  }
344  MPI_Isend( (void*)start, ns * size, MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
345  }
346  start = buf;
347  for( i = 0; i < np; ++i )
348  {
349  int nsn = n * nshared[i];
350  MPI_Irecv( (void*)start, nsn * size, MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
351  start += nsn;
352  }
353  for( reqs = this->_reqs, i = np * 2; i; --i )
354  MPI_Wait( reqs++, &status );
355  sh_ind = this->_sh_ind;
356 #define LOOP( OP ) \
357  do \
358  { \
359  for( i = 0; i < np; ++i ) \
360  { \
361  uint c, j; \
362  for( c = nshared[i]; c; --c ) \
363  { \
364  realType* uu = u + n * ( *sh_ind++ ); \
365  for( j = n; j; --j ) \
366  { \
367  OP( *uu, *buf ); \
368  ++uu; \
369  ++buf; \
370  } \
371  } \
372  } \
373  } while( 0 )
374  switch( op )
375  {
376  case GS_OP_ADD:
377  LOOP( DO_ADD );
378  break;
379  case GS_OP_MUL:
380  LOOP( DO_MUL );
381  break;
382  case GS_OP_MIN:
383  LOOP( DO_MIN );
384  break;
385  case GS_OP_MAX:
386  LOOP( DO_MAX );
387  break;
388  case GS_OP_BPR:
389  LOOP( DO_BPR );
390  break;
391  }
392 #undef LOOP
393 }
394 
395 void gs_data::nonlocal_info::nonlocal_many( realType** u, uint n, int op, MPI_Comm comm )
396 {
397  MPI_Status status;
398  uint np = this->_np;
399  MPI_Request* reqs = this->_reqs;
400  uint* targ = this->_target;
401  uint* nshared = this->_nshared;
402  uint* sh_ind = this->_sh_ind;
403  uint id;
404  realType *buf = this->_buf, *start;
405  unsigned int i;
406  {
407  MPI_Comm_rank( comm, (int*)&i );
408  id = i;
409  }
410  for( i = 0; i < np; ++i )
411  {
412  uint c, j, ns = nshared[i];
413  start = buf;
414  for( j = 0; j < n; ++j )
415  {
416  realType* uu = u[j];
417  for( c = 0; c < ns; ++c )
418  *buf++ = uu[sh_ind[c]];
419  }
420  sh_ind += ns;
421  MPI_Isend( (void*)start, n * ns * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], id, comm, reqs++ );
422  }
423  start = buf;
424  for( i = 0; i < np; ++i )
425  {
426  int nsn = n * nshared[i];
427  MPI_Irecv( (void*)start, nsn * sizeof( realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
428  start += nsn;
429  }
430  for( reqs = this->_reqs, i = np * 2; i; --i )
431  MPI_Wait( reqs++, &status );
432  sh_ind = this->_sh_ind;
433 #define LOOP( OP ) \
434  do \
435  { \
436  for( i = 0; i < np; ++i ) \
437  { \
438  uint c, j, ns = nshared[i]; \
439  for( j = 0; j < n; ++j ) \
440  { \
441  realType* uu = u[j]; \
442  for( c = 0; c < ns; ++c ) \
443  { \
444  OP( uu[sh_ind[c]], *buf ); \
445  ++buf; \
446  } \
447  } \
448  sh_ind += ns; \
449  } \
450  } while( 0 )
451  switch( op )
452  {
453  case GS_OP_ADD:
454  LOOP( DO_ADD );
455  break;
456  case GS_OP_MUL:
457  LOOP( DO_MUL );
458  break;
459  case GS_OP_MIN:
460  LOOP( DO_MIN );
461  break;
462  case GS_OP_MAX:
463  LOOP( DO_MAX );
464  break;
465  case GS_OP_BPR:
466  LOOP( DO_BPR );
467  break;
468  }
469 #undef LOOP
470 }
471 
472 /*---------------------------------------------------------------------------
473  MOAB Crystal Router
474  ---------------------------------------------------------------------------*/
475 gs_data::crystal_data::crystal_data() {}
476 
477 void gs_data::crystal_data::initialize( MPI_Comm comm )
478 {
479  int num, id;
480  buffers[0].buf.buffer_init( 1024 );
481  buffers[1].buf.buffer_init( 1024 );
482  buffers[2].buf.buffer_init( 1024 );
483  all = &buffers[0];
484  keep = &buffers[1];
485  send = &buffers[2];
486  memcpy( &( this->_comm ), &comm, sizeof( MPI_Comm ) );
487  MPI_Comm_rank( comm, &id );
488  this->_id = id;
489  MPI_Comm_size( comm, &num );
490  this->_num = num;
491 }
492 
493 void gs_data::crystal_data::reset()
494 {
495  buffers[0].buf.reset();
496  buffers[1].buf.reset();
497  buffers[2].buf.reset();
498  keep = NULL;
499  all = NULL;
500  send = NULL;
501 }
502 
503 // Partition data before sending
504 void gs_data::crystal_data::partition( uint cutoff, crystal_buf* lo, crystal_buf* hi )
505 {
506  const uint* src = (uint*)all->buf.ptr;
507  const uint* end = (uint*)src + all->n;
508  uint *target, *lop, *hip;
509  lo->n = hi->n = 0;
510  lo->buf.buffer_reserve( all->n * sizeof( uint ) );
511  hi->buf.buffer_reserve( all->n * sizeof( uint ) );
512  lop = (uint*)lo->buf.ptr;
513  hip = (uint*)hi->buf.ptr;
514  while( src != end )
515  {
516  uint chunk_len = 3 + src[2];
517  if( src[0] < cutoff )
518  {
519  target = lop;
520  lo->n += chunk_len;
521  lop += chunk_len;
522  }
523  else
524  {
525  target = hip;
526  hi->n += chunk_len;
527  hip += chunk_len;
528  }
529  memcpy( target, src, chunk_len * sizeof( uint ) );
530  src += chunk_len;
531  }
532 }
533 
534 // Send message to target process
535 void gs_data::crystal_data::send_( uint target, int recvn )
536 {
537  MPI_Request req[3] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL };
538  MPI_Status status[3];
539  uint count[2] = { 0, 0 }, sum, *recv[2];
540  crystal_buf* t;
541  int i;
542 
543  (void)VALGRIND_CHECK_MEM_IS_DEFINED( &send->n, sizeof( uint ) );
544  MPI_Isend( (void*)&send->n, sizeof( uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] );
545  for( i = 0; i < recvn; ++i )
546  MPI_Irecv( (void*)&count[i], sizeof( uint ), MPI_UNSIGNED_CHAR, target + i, target + i, _comm, &req[i + 1] );
547  MPI_Waitall( recvn + 1, req, status );
548  sum = keep->n;
549  for( i = 0; i < recvn; ++i )
550  sum += count[i];
551  keep->buf.buffer_reserve( sum * sizeof( uint ) );
552  recv[0] = (uint*)keep->buf.ptr;
553  recv[0] += keep->n;
554  recv[1] = recv[0] + count[0];
555  keep->n = sum;
556 
557  (void)VALGRIND_CHECK_MEM_IS_DEFINED( send->buf.ptr, send->n * sizeof( uint ) );
558  MPI_Isend( (void*)send->buf.ptr, send->n * sizeof( uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] );
559  if( recvn )
560  {
561  MPI_Irecv( (void*)recv[0], count[0] * sizeof( uint ), MPI_UNSIGNED_CHAR, target, target, _comm, &req[1] );
562  if( recvn == 2 )
563  MPI_Irecv( (void*)recv[1], count[1] * sizeof( uint ), MPI_UNSIGNED_CHAR, target + 1, target + 1, _comm,
564  &req[2] );
565  }
566  MPI_Waitall( recvn + 1, req, status );
567 
568  t = all;
569  all = keep;
570  keep = t;
571 }
572 
573 void gs_data::crystal_data::crystal_router()
574 {
575  uint bl = 0, bh, n = _num, nl, target;
576  int recvn;
577  crystal_buf *lo, *hi;
578  while( n > 1 )
579  {
580  nl = n / 2, bh = bl + nl;
581  if( _id < bh )
582  {
583  target = _id + nl;
584  recvn = ( n & 1 && _id == bh - 1 ) ? 2 : 1;
585  lo = keep;
586  hi = send;
587  }
588  else
589  {
590  target = _id - nl;
591  recvn = ( target == bh ) ? ( --target, 0 ) : 1;
592  hi = keep;
593  lo = send;
594  }
595  partition( bh, lo, hi );
596  send_( target, recvn );
597  if( _id < bh )
598  n = nl;
599  else
600  {
601  n -= nl;
602  bl = bh;
603  }
604  }
605 }
606 
607 #define UINT_PER_X( X ) ( ( sizeof( X ) + sizeof( uint ) - 1 ) / sizeof( uint ) )
608 #define UINT_PER_REAL UINT_PER_X( realType )
609 #define UINT_PER_LONG UINT_PER_X( slong )
610 #define UINT_PER_ULONG UINT_PER_X( Ulong )
611 
612 /*-------------------------------------------------------------------------
613  Transfer
614  -------------------------------------------------------------------------*/
615 ErrorCode gs_data::crystal_data::gs_transfer( int dynamic, moab::TupleList& tl, unsigned pf )
616 {
617 
618  unsigned mi, ml, mul, mr;
619  tl.getTupleSize( mi, ml, mul, mr );
620 
621  const unsigned tsize = ( mi - 1 ) + ml * UINT_PER_LONG + mul * UINT_PER_ULONG + mr * UINT_PER_REAL;
622  sint p, lp = -1;
623  sint* ri;
624  slong* rl;
625  Ulong* rul;
626  realType* rr;
627  uint i, j, *buf, *len = 0, *buf_end;
628 
629  /* sort to group by target proc */
630  if( pf >= mi ) return moab::MB_MEMORY_ALLOCATION_FAILED;
631  // fail("pf expected to be in vi array (%d, %d)", pf, mi);
632  tl.sort( pf, &all->buf );
633 
634  /* pack into buffer for crystal router */
635  all->buf.buffer_reserve( ( tl.get_n() * ( 3 + tsize ) ) * sizeof( uint ) );
636  all->n = 0;
637  buf = (uint*)all->buf.ptr;
638 
639  bool canWrite = tl.get_writeEnabled();
640  if( !canWrite ) tl.enableWriteAccess();
641 
642  ri = tl.vi_wr;
643  rl = tl.vl_wr;
644  rul = tl.vul_wr;
645  rr = tl.vr_wr;
646 
647  for( i = tl.get_n(); i; --i )
648  {
649  p = ri[pf];
650  if( p != lp )
651  {
652  lp = p;
653  *buf++ = p; /* target */
654  *buf++ = _id; /* source */
655  len = buf++;
656  *len = 0; /* length */
657  all->n += 3;
658  }
659  for( j = 0; j < mi; ++j, ++ri )
660  if( j != pf ) *buf++ = *ri;
661  for( j = ml; j; --j, ++rl )
662  {
663  memcpy( buf, rl, sizeof( slong ) );
664  buf += UINT_PER_LONG;
665  }
666  for( j = mul; j; --j, ++rul )
667  {
668  memcpy( buf, rul, sizeof( Ulong ) );
669  buf += UINT_PER_ULONG;
670  }
671  for( j = mr; j; --j, ++rr )
672  {
673  memcpy( buf, rr, sizeof( realType ) );
674  buf += UINT_PER_REAL;
675  }
676  *len += tsize, all->n += tsize;
677  }
678 
679  crystal_router();
680 
681  /* unpack */
682  buf = (uint*)all->buf.ptr;
683  buf_end = buf + all->n;
684  tl.set_n( 0 );
685  ri = tl.vi_wr;
686  rl = tl.vl_wr;
687  rul = tl.vul_wr;
688  rr = tl.vr_wr;
689 
690  while( buf != buf_end )
691  {
692  sint llen;
693  buf++; /* target ( == this proc ) */
694  p = *buf++; /* source */
695  llen = *buf++; /* length */
696  while( llen > 0 )
697  {
698  if( tl.get_n() == tl.get_max() )
699  {
700  if( !dynamic )
701  {
702  tl.set_n( tl.get_max() + 1 );
703  if( !canWrite ) tl.disableWriteAccess();
704  return moab::MB_SUCCESS;
705  }
706  ErrorCode rval = tl.resize( tl.get_max() + ( 1 + tl.get_max() ) / 2 + 1 );
707  if( rval != moab::MB_SUCCESS )
708  {
709  if( !canWrite ) tl.disableWriteAccess();
710  return rval;
711  }
712  ri = tl.vi_wr + mi * tl.get_n();
713  rl = tl.vl_wr + ml * tl.get_n();
714  rul = tl.vul_wr + mul * tl.get_n();
715  rr = tl.vr_wr + mr * tl.get_n();
716  }
717  tl.inc_n();
718  for( j = 0; j < mi; ++j )
719  if( j != pf )
720  *ri++ = *buf++;
721  else
722  *ri++ = p;
723  for( j = ml; j; --j )
724  {
725  memcpy( rl++, buf, sizeof( slong ) );
726  buf += UINT_PER_LONG;
727  }
728  for( j = mul; j; --j )
729  {
730  memcpy( rul++, buf, sizeof( Ulong ) );
731  buf += UINT_PER_ULONG;
732  }
733  for( j = mr; j; --j )
734  {
735  memcpy( rr++, buf, sizeof( realType ) );
736  buf += UINT_PER_REAL;
737  }
738  llen -= tsize;
739  }
740  }
741 
742  if( !canWrite ) tl.disableWriteAccess();
743  return moab::MB_SUCCESS;
744 }
745 #endif
746 
747 /*--------------------------------------------------------------------------
748  Combined Execution
749  --------------------------------------------------------------------------*/
750 
751 void gs_data::gs_data_op( realType* u, int op )
752 {
753  local_condense( u, op, this->local_cm );
754 #ifdef MOAB_HAVE_MPI
755  this->nlinfo->nonlocal( u, op, _comm );
756 #endif
758 }
759 
760 void gs_data::gs_data_op_vec( realType* u, uint n, int op )
761 {
762 #ifdef MOAB_HAVE_MPI
763  if( n > nlinfo->_maxv )
764  moab::fail( "%s: initialized with max vec size = %d,"
765  " but called with vec size = %d\n",
766  __FILE__, nlinfo->_maxv, n );
767 #endif
768  local_condense_vec( u, n, op, local_cm );
769 #ifdef MOAB_HAVE_MPI
770  this->nlinfo->nonlocal_vec( u, n, op, _comm );
771 #endif
773 }
774 
775 void gs_data::gs_data_op_many( realType** u, uint n, int op )
776 {
777  uint i;
778 #ifdef MOAB_HAVE_MPI
779  if( n > nlinfo->_maxv )
780  moab::fail( "%s: initialized with max vec size = %d,"
781  " but called with vec size = %d\n",
782  __FILE__, nlinfo->_maxv, n );
783 #endif
784  for( i = 0; i < n; ++i )
785  local_condense( u[i], op, local_cm );
786 
787  moab::fail( "%s: initialized with max vec size = %d,"
788  " but called with vec size = %d\n",
789  __FILE__, 6, n );
790 
791 #ifdef MOAB_HAVE_MPI
792  this->nlinfo->nonlocal_many( u, n, op, _comm );
793 #endif
794  for( i = 0; i < n; ++i )
795  local_uncondense( u[i], local_cm );
796 }
797 
798 /*--------------------------------------------------------------------------
799  Setup
800  --------------------------------------------------------------------------*/
801 
803  const long* label,
804  const Ulong* ulabel,
805  uint maxv,
806  const unsigned int nlabels,
807  const unsigned int nulabels,
808  crystal_data* crystal )
809 {
810  nlinfo = NULL;
811  unsigned int j;
812  TupleList nonzero, primary;
813  ErrorCode rval;
814 #ifdef MOAB_HAVE_MPI
815  TupleList shared;
816 #else
818 #endif
819  (void)VALGRIND_CHECK_MEM_IS_DEFINED( label, nlabels * sizeof( long ) );
820  (void)VALGRIND_CHECK_MEM_IS_DEFINED( ulabel, nlabels * sizeof( Ulong ) );
821 #ifdef MOAB_HAVE_MPI
822  MPI_Comm_dup( crystal->_comm, &this->_comm );
823 #else
824  buf.buffer_init( 1024 );
825 #endif
826 
827  /* construct list of nonzeros: (index ^, label) */
828  nonzero.initialize( 1, nlabels, nulabels, 0, n );
829  nonzero.enableWriteAccess();
830  {
831  uint i;
832  sint* nzi;
833  long* nzl;
834  Ulong* nzul;
835  nzi = nonzero.vi_wr;
836  nzl = nonzero.vl_wr;
837  nzul = nonzero.vul_wr;
838  for( i = 0; i < n; ++i )
839  if( label[nlabels * i] != 0 )
840  {
841  nzi[0] = i;
842  for( j = 0; j < nlabels; j++ )
843  nzl[j] = label[nlabels * i + j];
844  for( j = 0; j < nulabels; j++ )
845  nzul[j] = ulabel[nulabels * i + j];
846  nzi++;
847  nzl += nlabels;
848  nzul += nulabels;
849  nonzero.inc_n();
850  }
851  }
852 
853  /* sort nonzeros by label: (index ^2, label ^1) */
854 #ifndef MOAB_HAVE_MPI
855  nonzero.sort( 1, &buf );
856 #else
857  nonzero.sort( 1, &crystal->all->buf );
858 #endif
859 
860  /* build list of unique labels w/ lowest associated index:
861  (index in nonzero ^, primary (lowest) index in label, count, label(s),
862  ulabel(s)) */
863  primary.initialize( 3, nlabels, nulabels, 0, nonzero.get_n() );
864  primary.enableWriteAccess();
865  {
866  uint i;
867  sint *nzi = nonzero.vi_wr, *pi = primary.vi_wr;
868  slong *nzl = nonzero.vl_wr, *pl = primary.vl_wr;
869  Ulong *nzul = nonzero.vul_wr, *pul = primary.vul_wr;
870  slong last = -1;
871  for( i = 0; i < nonzero.get_n(); ++i, nzi += 1, nzl += nlabels, nzul += nulabels )
872  {
873  if( nzl[0] == last )
874  {
875  ++pi[-1];
876  continue;
877  }
878  last = nzl[0];
879  pi[0] = i;
880  pi[1] = nzi[0];
881  for( j = 0; j < nlabels; j++ )
882  pl[j] = nzl[j];
883  for( j = 0; j < nulabels; j++ )
884  pul[j] = nzul[j];
885  pi[2] = 1;
886  pi += 3, pl += nlabels;
887  pul += nulabels;
888  primary.inc_n();
889  }
890  }
891 
892  /* calculate size of local condense map */
893  {
894  uint i, count = 1;
895  sint* pi = primary.vi_wr;
896  for( i = primary.get_n(); i; --i, pi += 3 )
897  if( pi[2] > 1 ) count += pi[2] + 1;
898  this->local_cm = (sint*)malloc( count * sizeof( sint ) );
899  }
900 
901  /* sort unique labels by primary index:
902  (nonzero index ^2, primary index ^1, count, label ^2) */
903 #ifndef MOAB_HAVE_MPI
904  primary.sort( 0, &buf );
905  buf.reset();
906  // buffer_free(&buf);
907 #else
908  primary.sort( 0, &crystal->all->buf );
909 #endif
910 
911  /* construct local condense map */
912  {
913  uint i, ln;
914  sint* pi = primary.vi_wr;
915  sint* cm = this->local_cm;
916  for( i = primary.get_n(); i > 0; --i, pi += 3 )
917  if( ( ln = pi[2] ) > 1 )
918  {
919  sint* nzi = nonzero.vi_wr + 1 * pi[0];
920  for( j = ln; j > 0; --j, nzi += 1 )
921  *cm++ = nzi[0];
922  *cm++ = -1;
923  }
924  *cm++ = -1;
925  }
926  nonzero.reset();
927 #ifndef MOAB_HAVE_MPI
928  primary.reset();
929 #else
930  /* assign work proc by label modulo np */
931  {
932  uint i;
933  sint* pi = primary.vi_wr;
934  slong* pl = primary.vl_wr;
935  for( i = primary.get_n(); i; --i, pi += 3, pl += nlabels )
936  pi[0] = pl[0] % crystal->_num;
937  }
938  rval = crystal->gs_transfer( 1, primary, 0 ); /* transfer to work procs */
939  if( rval != MB_SUCCESS ) return rval;
940  /* primary: (source proc, index on src, useless, label) */
941  /* sort by label */
942  primary.sort( 3, &crystal->all->buf );
943  /* add sentinel to primary list */
944  if( primary.get_n() == primary.get_max() )
945  primary.resize( ( primary.get_max() ? primary.get_max() + ( primary.get_max() + 1 ) / 2 + 1 : 2 ) );
946  primary.vl_wr[nlabels * primary.get_n()] = -1;
947  /* construct shared list: (proc1, proc2, index1, label) */
948 #ifdef MOAB_HAVE_MPI
949  shared.initialize( 3, nlabels, nulabels, 0, primary.get_n() );
950  shared.enableWriteAccess();
951 #endif
952  {
953  sint *pi1 = primary.vi_wr, *si = shared.vi_wr;
954  slong lbl, *pl1 = primary.vl_wr, *sl = shared.vl_wr;
955  Ulong *pul1 = primary.vul_wr, *sul = shared.vul_wr;
956  for( ; ( lbl = pl1[0] ) != -1; pi1 += 3, pl1 += nlabels, pul1 += nulabels )
957  {
958  sint* pi2 = pi1 + 3;
959  slong* pl2 = pl1 + nlabels;
960  Ulong* pul2 = pul1 + nulabels;
961  for( ; pl2[0] == lbl; pi2 += 3, pl2 += nlabels, pul2 += nulabels )
962  {
963  if( shared.get_n() + 2 > shared.get_max() )
964  shared.resize( ( shared.get_max() ? shared.get_max() + ( shared.get_max() + 1 ) / 2 + 1 : 2 ) ),
965  si = shared.vi_wr + shared.get_n() * 3;
966  sl = shared.vl_wr + shared.get_n() * nlabels;
967  sul = shared.vul_wr + shared.get_n() * nulabels;
968  si[0] = pi1[0];
969  si[1] = pi2[0];
970  si[2] = pi1[1];
971  for( j = 0; j < nlabels; j++ )
972  sl[j] = pl2[j];
973  for( j = 0; j < nulabels; j++ )
974  sul[j] = pul2[j];
975  si += 3;
976  sl += nlabels;
977  sul += nulabels;
978  shared.inc_n();
979  si[0] = pi2[0];
980  si[1] = pi1[0];
981  si[2] = pi2[1];
982  for( j = 0; j < nlabels; j++ )
983  sl[j] = pl1[j];
984  for( j = 0; j < nulabels; j++ )
985  sul[j] = pul1[j];
986  si += 3;
987  sl += nlabels;
988  sul += nulabels;
989  shared.inc_n();
990  }
991  }
992  }
993  primary.reset();
994  rval = crystal->gs_transfer( 1, shared, 0 ); /* segfaulting transfer to dest procs */
995  if( rval != MB_SUCCESS ) return rval;
996  /* shared list: (useless, proc2, index, label) */
997  /* sort by label */
998  shared.sort( 3, &crystal->all->buf );
999  /* sort by partner proc */
1000  shared.sort( 1, &crystal->all->buf );
1001  /* count partner procs */
1002  {
1003  uint i, count = 0;
1004  sint proc = -1, *si = shared.vi_wr;
1005  for( i = shared.get_n(); i; --i, si += 3 )
1006  if( si[1] != proc )
1007  {
1008  ++count;
1009  proc = si[1];
1010  }
1011  // this->nlinfo = new nonlocal_info();
1012  // this->nlinfo->initialize(count,shared.get_n(),
1013  // nlabels, nulabels, maxv);
1014  this->nlinfo = new nonlocal_info( count, shared.get_n(), nlabels, nulabels, maxv );
1015  }
1016  /* construct non-local info */
1017  {
1018  uint i;
1019  sint proc = -1, *si = shared.vi_wr;
1020  slong* sl = shared.vl_wr;
1021  Ulong* ul = shared.vul_wr;
1022  uint* target = this->nlinfo->_target;
1023  uint* nshared = this->nlinfo->_nshared;
1024  uint* sh_ind = this->nlinfo->_sh_ind;
1025  slong* slabels = this->nlinfo->_slabels;
1026  Ulong* ulabels = this->nlinfo->_ulabels;
1027  for( i = shared.get_n(); i; --i, si += 3 )
1028  {
1029  if( si[1] != proc )
1030  {
1031  proc = si[1];
1032  *target++ = proc;
1033  *nshared++ = 0;
1034  }
1035  ++nshared[-1];
1036  *sh_ind++ = si[2];
1037  // don't store 1st slabel
1038  sl++;
1039  for( j = 0; j < nlabels - 1; j++ )
1040  slabels[j] = sl[j];
1041  for( j = 0; j < nulabels; j++ )
1042  ulabels[j] = ul[j];
1043  slabels += nlabels - 1;
1044  ulabels += nulabels;
1045  sl += nlabels - 1;
1046  ul += nulabels;
1047  }
1048  }
1049  shared.reset();
1050 #endif
1051  return MB_SUCCESS;
1052 }
1053 
1055 {
1056  free( local_cm );
1057  local_cm = NULL;
1058 #ifdef MOAB_HAVE_MPI
1059  if( nlinfo != NULL )
1060  {
1061  nlinfo->nlinfo_free();
1062  delete this->nlinfo;
1063  MPI_Comm_free( &_comm );
1064  nlinfo = NULL;
1065  }
1066 #endif
1067 }
1068 
1069 } // namespace moab