10 #include <sys/resource.h>
13 extern "C" int getrusage(
int,
struct rusage* );
15 #include </usr/ucbinclude/sys/rusage.h>
36 void print_time(
const bool print_em,
double& tot_time,
double& utime,
double& stime,
long& imem,
long& rmem );
37 void build_connect(
const int nelem,
const int vstart,
int*& connect );
40 void compute_edge(
double* start,
const int nelem,
const double xint,
const int stride );
41 void compute_face(
double* a,
const int nelem,
const double xint,
const int stride1,
const int stride2 );
43 void build_connect(
const int nelem,
const int vstart,
int*& connect );
45 int main(
int argc,
char* argv[] )
50 std::cout <<
"Usage: " << argv[0] <<
" <ints_per_side> <A|B|C>" << std::endl;
54 char which_test =
'\0';
56 sscanf( argv[1],
"%d", &nelem );
57 sscanf( argv[2],
"%c", &which_test );
59 if( which_test !=
'B' && which_test !=
'C' )
61 std::cout <<
"Must indicate B or C for test." << std::endl;
65 std::cout <<
"number of elements: " << nelem <<
"; test " << which_test << std::endl;
72 assert( NULL != coords );
84 cerr <<
"Couldn't create mesh instance." << endl;
92 cerr <<
"Couldn't set geometric dimension." << endl;
119 double utime, stime, ttime0, ttime1, ttime2, ttime3, ttime4;
120 long imem0, rmem0, imem1, rmem1, imem2, rmem2, imem3, rmem3, imem4, rmem4;
122 print_time(
false, ttime0, utime, stime, imem0, rmem0 );
123 int num_verts = ( nelem + 1 ) * ( nelem + 1 ) * ( nelem + 1 );
124 int num_elems = nelem * nelem * nelem;
128 int vertices_allocated = 0, vertices_size;
131 &vertices_size, &result );
134 cerr <<
"Couldn't create vertices in bulk call" << endl;
141 int nconnect = 8 * num_elems;
144 for(
int i = 0; i < nconnect; i++ )
147 assert( connect[i] - 1 < num_verts );
148 sidl_connect[i] = vertices[connect[i] - 1];
157 int new_hexes_allocated = 0, new_hexes_size;
159 int status_allocated = 0, status_size;
162 &new_hexes_size, &status, &status_allocated, &status_size, &result );
165 cerr <<
"Couldn't create hex elements in bulk call" << endl;
168 free( sidl_connect );
172 print_time(
false, ttime1, utime, stime, imem1, rmem1 );
176 free( sidl_connect );
181 print_time(
false, ttime2, utime, stime, imem2, rmem2 );
185 print_time(
false, ttime3, utime, stime, imem3, rmem3 );
189 print_time(
false, ttime4, utime, stime, imem4, rmem4 );
191 std::cout <<
"TSTTb/MOAB_ucd_blocked: nelem, construct, e_to_v, v_to_e, after_dtor = " << nelem <<
", "
192 << ttime1 - ttime0 <<
", " << ttime2 - ttime1 <<
", " << ttime3 - ttime2 <<
", " << ttime4 - ttime3
193 <<
" seconds" << std::endl;
194 std::cout <<
"TSTTb/MOAB_ucd_blocked_memory_(rss): initial, after_construction, e-v, v-e, after_dtor:" << rmem0
195 <<
", " << rmem1 <<
", " << rmem2 <<
", " << rmem3 <<
", " << rmem4 <<
" kb" << std::endl;
200 double utime, stime, ttime0, ttime1, ttime2, ttime3, ttime4;
201 long imem0, rmem0, imem1, rmem1, imem2, rmem2, imem3, rmem3, imem4, rmem4;
202 print_time(
false, ttime0, utime, stime, imem0, rmem0 );
205 int numv = nelem + 1;
206 int numv_sq = numv * numv;
207 int num_verts = numv * numv * numv;
208 #define VINDEX( i, j, k ) ( ( i ) + ( (j)*numv ) + ( (k)*numv_sq ) )
214 for(
int i = 0; i < num_verts; i++ )
217 iMesh_createVtx(
mesh, coords[i], coords[i + num_verts], coords[i + 2 * num_verts], sidl_vertices + i,
221 cerr <<
"Couldn't create vertex in individual call" << endl;
228 for(
int i = 0; i < nelem; i++ )
230 for(
int j = 0; j < nelem; j++ )
232 for(
int k = 0; k < nelem; k++ )
234 int vijk =
VINDEX( i, j, k );
235 tmp_conn[0] = sidl_vertices[vijk];
236 tmp_conn[1] = sidl_vertices[vijk + 1];
237 tmp_conn[2] = sidl_vertices[vijk + 1 + numv];
238 tmp_conn[3] = sidl_vertices[vijk + numv];
239 tmp_conn[4] = sidl_vertices[vijk + numv * numv];
240 tmp_conn[5] = sidl_vertices[vijk + 1 + numv * numv];
241 tmp_conn[6] = sidl_vertices[vijk + 1 + numv + numv * numv];
242 tmp_conn[7] = sidl_vertices[vijk + numv + numv * numv];
250 cerr <<
"Couldn't create hex element in individual call" << endl;
257 print_time(
false, ttime1, utime, stime, imem1, rmem1 );
259 free( sidl_vertices );
264 print_time(
false, ttime2, utime, stime, imem2, rmem2 );
268 print_time(
false, ttime3, utime, stime, imem3, rmem3 );
272 print_time(
false, ttime4, utime, stime, imem4, rmem4 );
274 std::cout <<
"TSTTb/MOAB_ucd_indiv: nelem, construct, e_to_v, v_to_e, after_dtor = " << nelem <<
", "
275 << ttime1 - ttime0 <<
", " << ttime2 - ttime1 <<
", " << ttime3 - ttime2 <<
", " << ttime4 - ttime3
276 <<
" seconds" << std::endl;
277 std::cout <<
"TSTTb/MOAB_ucd_indiv_memory_(rss): initial, after_construction, e-v, v-e, after_dtor:" << rmem0
278 <<
", " << rmem1 <<
", " << rmem2 <<
", " << rmem3 <<
", " << rmem4 <<
" kb" << std::endl;
284 int all_hexes_size, all_hexes_allocated = 0;
292 cerr <<
"Couldn't get root set." << endl;
297 &all_hexes_size, &success );
300 cerr <<
"Couldn't get all hex elements in query_mesh" << endl;
306 int dum_connect_allocated = 0, dum_connect_size;
307 double* dum_coords = NULL;
308 int dum_coords_size, dum_coords_allocated = 0;
313 for(
int i = 0; i < all_hexes_size; i++ )
325 &dum_coords_size, &success );
327 double centroid[3] = { 0.0, 0.0, 0.0 };
330 for(
int j = 0; j < 8; j++ )
332 centroid[0] += dum_coords[j];
333 centroid[1] += dum_coords[8 + j];
334 centroid[2] += dum_coords[16 + j];
339 for(
int j = 0; j < 8; j++ )
341 centroid[0] += dum_coords[3 * j];
342 centroid[1] += dum_coords[3 * j + 1];
343 centroid[2] += dum_coords[3 * j + 2];
350 cerr <<
"Problem getting connectivity or vertex coords." << endl;
363 int all_verts_allocated = 0, all_verts_size;
370 cerr <<
"Couldn't get root set." << endl;
379 cerr <<
"Couldn't get all vertices in query_vert_to_elem" << endl;
386 int dum_hexes_allocated = 8, dum_hexes_size;
389 for(
int i = 0; i < all_verts_size; i++ )
398 cerr <<
"Problem getting connectivity or vertex coords." << endl;
407 void print_time(
const bool print_em,
double& tot_time,
double& utime,
double& stime,
long& imem,
long& rmem )
412 struct rusage r_usage;
413 getrusage( RUSAGE_SELF, &r_usage );
414 utime = (double)r_usage.ru_utime.tv_sec + ( (
double)r_usage.ru_utime.tv_usec / 1.e6 );
415 stime = (double)r_usage.ru_stime.tv_sec + ( (
double)r_usage.ru_stime.tv_usec / 1.e6 );
416 tot_time = utime + stime;
418 std::cout <<
"User, system, total time = " << utime <<
", " << stime <<
", " << tot_time << std::endl;
421 imem = r_usage.ru_idrss;
422 rmem = r_usage.ru_maxrss;
423 std::cout <<
"Max resident set size = " << r_usage.ru_maxrss <<
" kbytes" << std::endl;
424 std::cout <<
"Int resident set size = " << r_usage.ru_idrss <<
" kbytes" << std::endl;
427 system(
"ps o args,drs,rss | grep perf | grep -v grep" );
434 void compute_edge(
double* start,
const int nelem,
const double xint,
const int stride )
436 for(
int i = 1; i < nelem; i++ )
438 start[i * stride] = start[0] + i * xint;
439 start[nelem + 1 + i * stride] = start[nelem + 1] + i * xint;
440 start[2 * ( nelem + 1 ) + i * stride] = start[2 * ( nelem + 1 )] + i * xint;
444 void compute_face(
double* a,
const int nelem,
const double xint,
const int stride1,
const int stride2 )
447 for(
int j = 1; j < nelem; j++ )
449 double tse = j * xint;
450 for(
int i = 1; i < nelem; i++ )
452 double ada = i * xint;
454 a[i * stride1 + j * stride2] =
455 ( 1.0 - ada ) * a[i * stride1] + ada * a[i * stride1 + nelem * stride2] +
456 ( 1.0 - tse ) * a[j * stride2] + tse * a[j * stride2 + nelem * stride1] -
457 ( 1.0 - tse ) * ( 1.0 - ada ) * a[0] - ( 1.0 - tse ) * ada * a[nelem * stride1] -
458 tse * ( 1.0 - ada ) * a[nelem * stride2] - tse * ada * a[nelem * ( stride1 + stride2 )];
459 a[nelem + 1 + i * stride1 + j * stride2] =
460 ( 1.0 - ada ) * a[nelem + 1 + i * stride1] + ada * a[nelem + 1 + i * stride1 + nelem * stride2] +
461 ( 1.0 - tse ) * a[nelem + 1 + j * stride2] + tse * a[nelem + 1 + j * stride2 + nelem * stride1] -
462 ( 1.0 - tse ) * ( 1.0 - ada ) * a[nelem + 1 + 0] -
463 ( 1.0 - tse ) * ada * a[nelem + 1 + nelem * stride1] -
464 tse * ( 1.0 - ada ) * a[nelem + 1 + nelem * stride2] -
465 tse * ada * a[nelem + 1 + nelem * ( stride1 + stride2 )];
466 a[2 * ( nelem + 1 ) + i * stride1 + j * stride2] =
467 ( 1.0 - ada ) * a[2 * ( nelem + 1 ) + i * stride1] +
468 ada * a[2 * ( nelem + 1 ) + i * stride1 + nelem * stride2] +
469 ( 1.0 - tse ) * a[2 * ( nelem + 1 ) + j * stride2] +
470 tse * a[2 * ( nelem + 1 ) + j * stride2 + nelem * stride1] -
471 ( 1.0 - tse ) * ( 1.0 - ada ) * a[2 * ( nelem + 1 ) + 0] -
472 ( 1.0 - tse ) * ada * a[2 * ( nelem + 1 ) + nelem * stride1] -
473 tse * ( 1.0 - ada ) * a[2 * ( nelem + 1 ) + nelem * stride2] -
474 tse * ada * a[2 * ( nelem + 1 ) + nelem * ( stride1 + stride2 )];
481 double ttime0, ttime1, utime1, stime1;
483 print_time(
false, ttime0, utime1, stime1, imem, rmem );
486 int numv = nelem + 1;
487 int numv_sq = numv * numv;
488 int tot_numv = numv * numv * numv;
489 coords = (
double*)malloc( 3 * tot_numv *
sizeof(
double ) );
492 #define VINDEX( i, j, k ) ( ( i ) + ( (j)*numv ) + ( (k)*numv_sq ) )
494 double scale1, scale2, scale3;
532 int i000 =
VINDEX( 0, 0, 0 );
533 int ia00 =
VINDEX( nelem, 0, 0 );
534 int i0t0 =
VINDEX( 0, nelem, 0 );
535 int iat0 =
VINDEX( nelem, nelem, 0 );
536 int i00g =
VINDEX( 0, 0, nelem );
537 int ia0g =
VINDEX( nelem, 0, nelem );
538 int i0tg =
VINDEX( 0, nelem, nelem );
539 int iatg =
VINDEX( nelem, nelem, nelem );
543 int gammaInts = nelem;
545 for(
int i = 1; i < nelem; i++ )
547 for(
int j = 1; j < nelem; j++ )
549 for(
int k = 1; k < nelem; k++ )
552 double tse = i * scale1;
553 double ada = j * scale2;
554 double gamma = k * scale3;
555 double tm1 = 1.0 - tse;
556 double am1 = 1.0 - ada;
557 double gm1 = 1.0 - gamma;
559 cX = gm1 * ( am1 * ( tm1 * coords[i000] + tse * coords[i0t0] ) +
560 ada * ( tm1 * coords[ia00] + tse * coords[iat0] ) ) +
561 gamma * ( am1 * ( tm1 * coords[i00g] + tse * coords[i0tg] ) +
562 ada * ( tm1 * coords[ia0g] + tse * coords[iatg] ) );
564 cY = gm1 * ( am1 * ( tm1 * coords[i000] + tse * coords[i0t0] ) +
565 ada * ( tm1 * coords[ia00] + tse * coords[iat0] ) ) +
566 gamma * ( am1 * ( tm1 * coords[i00g] + tse * coords[i0tg] ) +
567 ada * ( tm1 * coords[ia0g] + tse * coords[iatg] ) );
569 cZ = gm1 * ( am1 * ( tm1 * coords[i000] + tse * coords[i0t0] ) +
570 ada * ( tm1 * coords[ia00] + tse * coords[iat0] ) ) +
571 gamma * ( am1 * ( tm1 * coords[i00g] + tse * coords[i0tg] ) +
572 ada * ( tm1 * coords[ia0g] + tse * coords[iatg] ) );
574 double* ai0k = &coords[
VINDEX( k, 0, i )];
575 double* aiak = &coords[
VINDEX( k, adaInts, i )];
576 double* a0jk = &coords[
VINDEX( k, j, 0 )];
577 double* atjk = &coords[
VINDEX( k, j, tseInts )];
578 double* aij0 = &coords[
VINDEX( 0, j, i )];
579 double* aijg = &coords[
VINDEX( gammaInts, j, i )];
581 coords[
VINDEX( i, j, k )] = ( am1 * ai0k[0] + ada * aiak[0] + tm1 * a0jk[0] + tse * atjk[0] +
582 gm1 * aij0[0] + gamma * aijg[0] ) /
586 coords[nelem + 1 +
VINDEX( i, j, k )] =
587 ( am1 * ai0k[nelem + 1] + ada * aiak[nelem + 1] + tm1 * a0jk[nelem + 1] + tse * atjk[nelem + 1] +
588 gm1 * aij0[nelem + 1] + gamma * aijg[nelem + 1] ) /
592 coords[2 * ( nelem + 1 ) +
VINDEX( i, j, k )] =
593 ( am1 * ai0k[2 * ( nelem + 1 )] + ada * aiak[2 * ( nelem + 1 )] + tm1 * a0jk[2 * ( nelem + 1 )] +
594 tse * atjk[2 * ( nelem + 1 )] + gm1 * aij0[2 * ( nelem + 1 )] +
595 gamma * aijg[2 * ( nelem + 1 )] ) /
603 for(
int i = 0; i < numv; i++ )
605 for(
int j = 0; j < numv; j++ )
607 for(
int k = 0; k < numv; k++ )
611 coords[idx] = i * scale1;
612 coords[tot_numv + idx] = j * scale2;
613 coords[2 * tot_numv + idx] = k * scale3;
619 print_time(
false, ttime1, utime1, stime1, imem, rmem );
620 std::cout <<
"TSTTbinding/MOAB: TFI time = " << ttime1 - ttime0 <<
" sec" << std::endl;
626 int nume_tot = nelem * nelem * nelem;
627 connect = (
int*)malloc( 8 * nume_tot *
sizeof(
int ) );
630 int numv = nelem + 1;
631 int numv_sq = numv * numv;
633 for(
int i = 0; i < nelem; i++ )
635 for(
int j = 0; j < nelem; j++ )
637 for(
int k = 0; k < nelem; k++ )
639 vijk = vstart +
VINDEX( i, j, k );
640 connect[idx++] = vijk;
641 connect[idx++] = vijk + 1;
642 connect[idx++] = vijk + 1 + numv;
643 connect[idx++] = vijk + numv;
644 connect[idx++] = vijk + numv * numv;
645 connect[idx++] = vijk + 1 + numv * numv;
646 connect[idx++] = vijk + 1 + numv + numv * numv;
647 connect[idx++] = vijk + numv + numv * numv;
648 assert( i <= numv * numv * numv );