114 if( !skip_local_merge )
133 for(
int skin_dim =
dim; skin_dim >= 0; skin_dim-- )
156 MPI_Allreduce( (
void*)&box, gbox, 6, MPI_DOUBLE, MPI_MIN,
myPcomm->
comm() );
160 for(
int i = 3; i < 6; i++ )
190 std::vector< int > toProcs;
191 int xPart, yPart, zPart, xEps, yEps, zEps, baseProc;
192 unsigned long long tup_i = 0, tup_ul = 0, tup_r = 0, count = 0;
194 bool xDup, yDup, zDup;
204 xPart =
static_cast< int >( floor( ( x[count] - gbox[0] ) / lengths[0] ) );
205 xPart = ( xPart < parts[0] ? xPart : parts[0] - 1 );
207 yPart =
static_cast< int >( floor( ( y[count] - gbox[1] ) / lengths[1] ) );
208 yPart = ( yPart < parts[1] ? yPart : parts[1] - 1 );
210 zPart =
static_cast< int >( floor( ( z[count] - gbox[2] ) / lengths[2] ) );
211 zPart = ( zPart < parts[2] ? zPart : parts[2] - 1 );
214 xEps =
static_cast< int >( floor( ( x[count] - gbox[0] +
myEps ) / lengths[0] ) );
215 yEps =
static_cast< int >( floor( ( y[count] - gbox[1] +
myEps ) / lengths[1] ) );
216 zEps =
static_cast< int >( floor( ( z[count] - gbox[2] +
myEps ) / lengths[2] ) );
219 xDup = ( xPart != xEps && xEps < parts[0] );
220 yDup = ( yPart != yEps && yEps < parts[1] );
221 zDup = ( zPart != zEps && zEps < parts[2] );
224 baseProc = xPart + yPart * parts[0] + zPart * parts[0] * parts[1];
225 toProcs.push_back( baseProc );
228 toProcs.push_back( baseProc + 1 );
233 toProcs.push_back( baseProc + parts[0] );
238 toProcs.push_back( baseProc + parts[0] * parts[1] );
243 toProcs.push_back( baseProc + parts[0] + 1 );
248 toProcs.push_back( baseProc + parts[0] * parts[1] + 1 );
253 toProcs.push_back( baseProc + parts[0] * parts[1] + parts[0] );
255 if( xDup && yDup && zDup )
258 toProcs.push_back( baseProc + parts[0] * parts[1] + parts[0] + 1 );
267 for( std::vector< int >::iterator proc = toProcs.begin(); proc != toProcs.end(); ++proc )
290 double xLen = gbox[3] - gbox[0];
291 double yLen = gbox[4] - gbox[1];
292 double zLen = gbox[5] - gbox[2];
303 if( xLen >= yLen && xLen >= zLen )
305 parts[0] =
PartitionSide( xLen, yLen * zLen, numProcs,
true );
306 numProcs /= parts[0];
311 parts[2] = numProcs / parts[1];
317 parts[1] = numProcs / parts[2];
321 else if( yLen >= zLen )
323 parts[1] =
PartitionSide( yLen, xLen * zLen, numProcs,
true );
324 numProcs /= parts[1];
329 parts[2] = numProcs / parts[0];
335 parts[0] = numProcs / parts[2];
341 parts[2] =
PartitionSide( zLen, xLen * yLen, numProcs,
true );
342 numProcs /= parts[2];
347 parts[1] = numProcs / parts[0];
353 parts[0] = numProcs / parts[1];
358 lengths[0] = xLen / (double)parts[0];
359 lengths[1] = yLen / (double)parts[1];
360 lengths[2] = zLen / (double)parts[2];
373 double ratio = -DBL_MAX;
376 double oldRatio = ratio;
377 double oldFactor = 1;
380 double goalRatio = sideLen / restLen;
387 divisor = (double)numProcs * sideLen;
392 divisor = (double)numProcs;
397 for(
unsigned i = 2; i <= numProcs / 2; i++ )
400 if( numProcs % i == 0 )
412 ratio = pow( (
double)i, p ) / divisor;
416 if( ratio >= goalRatio )
423 if( ratio < goalRatio )
428 ratio = pow( (
double)numProcs, p ) / divisor;
432 if( fabs( ratio - goalRatio ) > fabs( oldRatio - goalRatio ) )
444 unsigned long i = 0, mat_i = 0, mat_ul = 0, j = 0, tup_r = 0;
447 uint tup_mi, tup_ml, tup_mul, tup_mr;
484 int kproc = i * tup_mi;
485 unsigned long khand = i * tup_mul;
486 for(
unsigned long k = i; k < j; k++ )
488 int lproc = kproc + tup_mi;
489 unsigned long lhand = khand + tup_mul;
490 for(
unsigned long l = k + 1; l < j; l++ )
549 proc_ents.
merge( tmp_ents );
555 proc_ents.
erase( lower, upper );
561 std::fill( sharing_procs.begin(), sharing_procs.end(), maxp );
564 std::map< std::vector< int >, std::vector< EntityHandle > > proc_nranges;
594 std::set< unsigned int > procs;
629 uint mi, ml, mul, mr;
641 uint mi, ml, mul, mr;
645 unsigned long a_val = a * mi, b_val = b * mi;
646 for(
unsigned long i = 0; i < mi; i++ )
650 tup.
vi_wr[b_val] = t;
657 for(
unsigned long i = 0; i < ml; i++ )
661 tup.
vl_wr[b_val] = t;
668 for(
unsigned long i = 0; i < mul; i++ )
679 for(
unsigned long i = 0; i < mr; i++ )
683 tup.
vr_wr[b_val] = t;
698 if( left + 1 >= right )
702 unsigned long swap = left, tup_l = left * tup_mr, tup_t = tup_l + tup_mr;
705 SwapTuples( tup, left, ( left + right ) / 2 );
708 for(
unsigned long t = left + 1; t < right; t++ )
735 while(
check < tup_mr )