26 #ifndef INITIAL_BUFFER_SIZE
27 #define INITIAL_BUFFER_SIZE 2 * ( 3 * num + n * 9 )
80 #ifdef MOAB_HAVE_VALGRIND
81 #include <valgrind/memcheck.h>
82 #elif !defined( VALGRIND_CHECK_MEM_IS_DEFINED )
83 #define VALGRIND_CHECK_MEM_IS_DEFINED( a, b ) ( (void)0 )
95 #define DO_SET( a, b ) b = a
96 #define DO_ADD( a, b ) a += b
97 #define DO_MUL( a, b ) a *= b
98 #define DO_MIN( a, b ) \
99 if( ( b ) < ( a ) ) ( a ) = b
100 #define DO_MAX( a, b ) \
101 if( ( b ) > ( a ) ) ( a ) = b
102 #define DO_BPR( a, b ) \
123 while( ( i = *cm++ ) != -1 ) \
124 while( ( j = *cm++ ) != -1 ) \
161 while( ( i = *cm++ ) != -1 ) \
163 realType* pi = u + n * i; \
164 while( ( j = *cm++ ) != -1 ) \
166 realType* pj = u + n * j; \
167 for( k = n; k; --k ) \
222 _target = (
uint*)malloc( ( 2 * np + count ) *
sizeof(
uint ) );
223 _nshared = _target + np;
224 _sh_ind = _nshared + np;
226 _slabels = (
slong*)malloc( ( ( nlabels - 1 ) * count ) *
sizeof(
slong ) );
229 _ulabels = (
Ulong*)malloc( ( nulabels * count ) *
sizeof(
Ulong ) );
230 _reqs = (MPI_Request*)malloc( 2 * np *
sizeof( MPI_Request ) );
235 void gs_data::nonlocal_info::nlinfo_free()
253 void gs_data::nonlocal_info::nonlocal(
realType* u,
int op, MPI_Comm comm )
257 MPI_Request* reqs = this->_reqs;
258 uint* targ = this->_target;
259 uint* nshared = this->_nshared;
260 uint* sh_ind = this->_sh_ind;
265 MPI_Comm_rank( comm, (
int*)&i );
268 for( i = 0; i < np; ++i )
273 *buf++ = u[*sh_ind++];
274 MPI_Isend( (
void*)start, nshared[i] *
sizeof(
realType ), MPI_UNSIGNED_CHAR, targ[i],
id, comm, reqs++ );
277 for( i = 0; i < np; ++i )
279 MPI_Irecv( (
void*)start, nshared[i] *
sizeof(
realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
282 for( reqs = this->_reqs, i = np * 2; i; --i )
283 MPI_Wait( reqs++, &status );
284 sh_ind = this->_sh_ind;
288 for( i = 0; i < np; ++i ) \
291 for( c = nshared[i]; c; --c ) \
293 OP( u[*sh_ind], *buf ); \
319 void gs_data::nonlocal_info::nonlocal_vec(
realType* u,
uint n,
int op, MPI_Comm comm )
323 MPI_Request* reqs = this->_reqs;
324 uint* targ = this->_target;
325 uint* nshared = this->_nshared;
326 uint* sh_ind = this->_sh_ind;
332 MPI_Comm_rank( comm, (
int*)&i );
335 for( i = 0; i < np; ++i )
337 uint ns = nshared[i], c = ns;
341 memcpy( buf, u + n * ( *sh_ind++ ),
size );
344 MPI_Isend( (
void*)start, ns *
size, MPI_UNSIGNED_CHAR, targ[i],
id, comm, reqs++ );
347 for( i = 0; i < np; ++i )
349 int nsn = n * nshared[i];
350 MPI_Irecv( (
void*)start, nsn *
size, MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
353 for( reqs = this->_reqs, i = np * 2; i; --i )
354 MPI_Wait( reqs++, &status );
355 sh_ind = this->_sh_ind;
359 for( i = 0; i < np; ++i ) \
362 for( c = nshared[i]; c; --c ) \
364 realType* uu = u + n * ( *sh_ind++ ); \
365 for( j = n; j; --j ) \
395 void gs_data::nonlocal_info::nonlocal_many(
realType** u,
uint n,
int op, MPI_Comm comm )
399 MPI_Request* reqs = this->_reqs;
400 uint* targ = this->_target;
401 uint* nshared = this->_nshared;
402 uint* sh_ind = this->_sh_ind;
407 MPI_Comm_rank( comm, (
int*)&i );
410 for( i = 0; i < np; ++i )
412 uint c, j, ns = nshared[i];
414 for( j = 0; j < n; ++j )
417 for( c = 0; c < ns; ++c )
418 *buf++ = uu[sh_ind[c]];
421 MPI_Isend( (
void*)start, n * ns *
sizeof(
realType ), MPI_UNSIGNED_CHAR, targ[i],
id, comm, reqs++ );
424 for( i = 0; i < np; ++i )
426 int nsn = n * nshared[i];
427 MPI_Irecv( (
void*)start, nsn *
sizeof(
realType ), MPI_UNSIGNED_CHAR, targ[i], targ[i], comm, reqs++ );
430 for( reqs = this->_reqs, i = np * 2; i; --i )
431 MPI_Wait( reqs++, &status );
432 sh_ind = this->_sh_ind;
436 for( i = 0; i < np; ++i ) \
438 uint c, j, ns = nshared[i]; \
439 for( j = 0; j < n; ++j ) \
441 realType* uu = u[j]; \
442 for( c = 0; c < ns; ++c ) \
444 OP( uu[sh_ind[c]], *buf ); \
475 gs_data::crystal_data::crystal_data() {}
480 buffers[0].buf.buffer_init( 1024 );
481 buffers[1].buf.buffer_init( 1024 );
482 buffers[2].buf.buffer_init( 1024 );
486 memcpy( &( this->_comm ), &comm,
sizeof( MPI_Comm ) );
487 MPI_Comm_rank( comm, &
id );
489 MPI_Comm_size( comm, &num );
493 void gs_data::crystal_data::reset()
495 buffers[0].buf.reset();
496 buffers[1].buf.reset();
497 buffers[2].buf.reset();
504 void gs_data::crystal_data::partition(
uint cutoff, crystal_buf* lo, crystal_buf* hi )
506 const uint* src = (
uint*)all->buf.ptr;
507 const uint* end = (
uint*)src + all->n;
508 uint *target, *lop, *hip;
510 lo->buf.buffer_reserve( all->n *
sizeof(
uint ) );
511 hi->buf.buffer_reserve( all->n *
sizeof(
uint ) );
512 lop = (
uint*)lo->buf.ptr;
513 hip = (
uint*)hi->buf.ptr;
516 uint chunk_len = 3 + src[2];
517 if( src[0] < cutoff )
529 memcpy( target, src, chunk_len *
sizeof(
uint ) );
535 void gs_data::crystal_data::send_(
uint target,
int recvn )
537 MPI_Request req[3] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL };
538 MPI_Status status[3];
539 uint count[2] = { 0, 0 },
sum, *recv[2];
544 MPI_Isend( (
void*)&send->n,
sizeof(
uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] );
545 for( i = 0; i < recvn; ++i )
546 MPI_Irecv( (
void*)&count[i],
sizeof(
uint ), MPI_UNSIGNED_CHAR, target + i, target + i, _comm, &req[i + 1] );
547 MPI_Waitall( recvn + 1, req, status );
549 for( i = 0; i < recvn; ++i )
551 keep->buf.buffer_reserve(
sum *
sizeof(
uint ) );
552 recv[0] = (
uint*)keep->buf.ptr;
554 recv[1] = recv[0] + count[0];
558 MPI_Isend( (
void*)send->buf.ptr, send->n *
sizeof(
uint ), MPI_UNSIGNED_CHAR, target, _id, _comm, &req[0] );
561 MPI_Irecv( (
void*)recv[0], count[0] *
sizeof(
uint ), MPI_UNSIGNED_CHAR, target, target, _comm, &req[1] );
563 MPI_Irecv( (
void*)recv[1], count[1] *
sizeof(
uint ), MPI_UNSIGNED_CHAR, target + 1, target + 1, _comm,
566 MPI_Waitall( recvn + 1, req, status );
573 void gs_data::crystal_data::crystal_router()
575 uint bl = 0, bh, n = _num,
nl, target;
577 crystal_buf *lo, *hi;
580 nl = n / 2, bh = bl +
nl;
584 recvn = ( n & 1 && _id == bh - 1 ) ? 2 : 1;
591 recvn = ( target == bh ) ? ( --target, 0 ) : 1;
595 partition( bh, lo, hi );
596 send_( target, recvn );
607 #define UINT_PER_X( X ) ( ( sizeof( X ) + sizeof( uint ) - 1 ) / sizeof( uint ) )
608 #define UINT_PER_REAL UINT_PER_X( realType )
609 #define UINT_PER_LONG UINT_PER_X( slong )
610 #define UINT_PER_ULONG UINT_PER_X( Ulong )
618 unsigned mi, ml, mul, mr;
627 uint i, j, *buf, *len = 0, *buf_end;
632 tl.
sort( pf, &all->buf );
635 all->buf.buffer_reserve( ( tl.
get_n() * ( 3 + tsize ) ) *
sizeof(
uint ) );
637 buf = (
uint*)all->buf.ptr;
647 for( i = tl.
get_n(); i; --i )
659 for( j = 0; j < mi; ++j, ++ri )
660 if( j != pf ) *buf++ = *ri;
661 for( j = ml; j; --j, ++rl )
663 memcpy( buf, rl,
sizeof(
slong ) );
666 for( j = mul; j; --j, ++rul )
668 memcpy( buf, rul,
sizeof(
Ulong ) );
671 for( j = mr; j; --j, ++rr )
673 memcpy( buf, rr,
sizeof(
realType ) );
676 *len += tsize, all->n += tsize;
682 buf = (
uint*)all->buf.ptr;
683 buf_end = buf + all->n;
690 while( buf != buf_end )
718 for( j = 0; j < mi; ++j )
723 for( j = ml; j; --j )
725 memcpy( rl++, buf,
sizeof(
slong ) );
728 for( j = mul; j; --j )
730 memcpy( rul++, buf,
sizeof(
Ulong ) );
733 for( j = mr; j; --j )
735 memcpy( rr++, buf,
sizeof(
realType ) );
755 this->nlinfo->nonlocal( u, op, _comm );
763 if( n > nlinfo->_maxv )
764 moab::fail(
"%s: initialized with max vec size = %d,"
765 " but called with vec size = %d\n",
766 __FILE__, nlinfo->_maxv, n );
770 this->nlinfo->nonlocal_vec( u, n, op, _comm );
779 if( n > nlinfo->_maxv )
780 moab::fail(
"%s: initialized with max vec size = %d,"
781 " but called with vec size = %d\n",
782 __FILE__, nlinfo->_maxv, n );
784 for( i = 0; i < n; ++i )
787 moab::fail(
"%s: initialized with max vec size = %d,"
788 " but called with vec size = %d\n",
792 this->nlinfo->nonlocal_many( u, n, op, _comm );
794 for( i = 0; i < n; ++i )
806 const unsigned int nlabels,
807 const unsigned int nulabels,
822 MPI_Comm_dup( crystal->_comm, &this->_comm );
824 buf.buffer_init( 1024 );
828 nonzero.
initialize( 1, nlabels, nulabels, 0, n );
838 for( i = 0; i < n; ++i )
839 if( label[nlabels * i] != 0 )
842 for( j = 0; j < nlabels; j++ )
843 nzl[j] = label[nlabels * i + j];
844 for( j = 0; j < nulabels; j++ )
845 nzul[j] = ulabel[nulabels * i + j];
854 #ifndef MOAB_HAVE_MPI
855 nonzero.
sort( 1, &buf );
857 nonzero.
sort( 1, &crystal->all->buf );
871 for( i = 0; i < nonzero.
get_n(); ++i, nzi += 1, nzl += nlabels, nzul += nulabels )
881 for( j = 0; j < nlabels; j++ )
883 for( j = 0; j < nulabels; j++ )
886 pi += 3, pl += nlabels;
896 for( i = primary.
get_n(); i; --i, pi += 3 )
897 if( pi[2] > 1 ) count += pi[2] + 1;
903 #ifndef MOAB_HAVE_MPI
904 primary.
sort( 0, &buf );
908 primary.
sort( 0, &crystal->all->buf );
916 for( i = primary.
get_n(); i > 0; --i, pi += 3 )
917 if( ( ln = pi[2] ) > 1 )
920 for( j = ln; j > 0; --j, nzi += 1 )
927 #ifndef MOAB_HAVE_MPI
935 for( i = primary.
get_n(); i; --i, pi += 3, pl += nlabels )
936 pi[0] = pl[0] % crystal->_num;
938 rval = crystal->gs_transfer( 1, primary, 0 );
942 primary.
sort( 3, &crystal->all->buf );
946 primary.
vl_wr[nlabels * primary.
get_n()] = -1;
956 for( ; ( lbl = pl1[0] ) != -1; pi1 += 3, pl1 += nlabels, pul1 += nulabels )
959 slong* pl2 = pl1 + nlabels;
960 Ulong* pul2 = pul1 + nulabels;
961 for( ; pl2[0] == lbl; pi2 += 3, pl2 += nlabels, pul2 += nulabels )
971 for( j = 0; j < nlabels; j++ )
973 for( j = 0; j < nulabels; j++ )
982 for( j = 0; j < nlabels; j++ )
984 for( j = 0; j < nulabels; j++ )
994 rval = crystal->gs_transfer( 1, shared, 0 );
998 shared.
sort( 3, &crystal->all->buf );
1000 shared.
sort( 1, &crystal->all->buf );
1005 for( i = shared.
get_n(); i; --i, si += 3 )
1014 this->nlinfo =
new nonlocal_info( count, shared.
get_n(), nlabels, nulabels, maxv );
1022 uint* target = this->nlinfo->_target;
1023 uint* nshared = this->nlinfo->_nshared;
1024 uint* sh_ind = this->nlinfo->_sh_ind;
1025 slong* slabels = this->nlinfo->_slabels;
1026 Ulong* ulabels = this->nlinfo->_ulabels;
1027 for( i = shared.
get_n(); i; --i, si += 3 )
1039 for( j = 0; j < nlabels - 1; j++ )
1041 for( j = 0; j < nulabels; j++ )
1043 slabels += nlabels - 1;
1044 ulabels += nulabels;
1058 #ifdef MOAB_HAVE_MPI
1059 if( nlinfo != NULL )
1061 nlinfo->nlinfo_free();
1062 delete this->nlinfo;
1063 MPI_Comm_free( &_comm );