docs/moab/tstt__perf__binding_8cpp_source.html

/** TSTT Mesh Interface brick mesh performance test

 *

 * This program tests TSTT mesh interface functions used to create a square structured

 * mesh. Boilerplate taken from tstt mesh interface test in MOAB and performance test in MOAB

 *

 */


// Different platforms follow different conventions for usage

#ifndef _WIN32

#include <sys/resource.h>

#endif

#ifdef SOLARIS

extern "C" int getrusage( int, struct rusage* );

#ifndef RUSAGE_SELF

#include </usr/ucbinclude/sys/rusage.h>

#endif

#endif


#include <cstdlib>

#include <cstdio>

#include <iostream>


#include <iostream>

#include <cassert>

#include "iMesh.h"


// needed to get the proper size for handles


using namespace std;


double LENGTH = 1.0;


// forward declare some functions

void query_elem_to_vert( iMesh_Instance mesh );

void query_vert_to_elem( iMesh_Instance mesh );

void print_time( const bool print_em, double& tot_time, double& utime, double& stime, long& imem, long& rmem );

void build_connect( const int nelem, const int vstart, int*& connect );

void testB( iMesh_Instance mesh, const int nelem, const double* coords, int* connect );

void testC( iMesh_Instance mesh, const int nelem, const double* coords );

void compute_edge( double* start, const int nelem, const double xint, const int stride );

void compute_face( double* a, const int nelem, const double xint, const int stride1, const int stride2 );

void build_coords( const int nelem, double*& coords );

void build_connect( const int nelem, const int vstart, int*& connect );


int main( int argc, char* argv[] )

{

 int nelem = 20;

 if( argc < 3 )

 {

 std::cout << "Usage: " << argv[0] << " <ints_per_side> <A|B|C>" << std::endl;

 return 1;

 }


 char which_test = '\0';


 sscanf( argv[1], "%d", &nelem );

 sscanf( argv[2], "%c", &which_test );


 if( which_test != 'B' && which_test != 'C' )

 {

 std::cout << "Must indicate B or C for test." << std::endl;

 return 1;

 }


 std::cout << "number of elements: " << nelem << "; test " << which_test << std::endl;


 // initialize the data in native format


 // pre-build the coords array

 double* coords;

 build_coords( nelem, coords );

 assert( NULL != coords );


 // test B: create mesh using bulk interface


 // create an implementation

 iMesh_Instance mesh;

 int result;

 iMesh_newMesh( NULL, &mesh, &result, 0 );

 int* connect = NULL;


 if( iBase_SUCCESS != result )

 {

 cerr << "Couldn't create mesh instance." << endl;

 iMesh_dtor( mesh, &result );

 return 1;

 }


 iMesh_setGeometricDimension( mesh, 3, &result );

 if( iBase_SUCCESS != result )

 {

 cerr << "Couldn't set geometric dimension." << endl;

 iMesh_dtor( mesh, &result );

 return 1;

 }


 switch( which_test )

 {

 case 'B':

 build_connect( nelem, 1, connect );


 // test B: create mesh using bulk interface

 testB( mesh, nelem, coords, connect );

 break;


 case 'C':

 // test C: create mesh using individual interface

 testC( mesh, nelem, coords );

 break;

 }


 free( coords );


 return 0;

}


void testB( iMesh_Instance mesh, const int nelem, const double* coords, int* connect )

{

 double utime, stime, ttime0, ttime1, ttime2, ttime3, ttime4;

 long imem0, rmem0, imem1, rmem1, imem2, rmem2, imem3, rmem3, imem4, rmem4;


 print_time( false, ttime0, utime, stime, imem0, rmem0 );

 int num_verts = ( nelem + 1 ) * ( nelem + 1 ) * ( nelem + 1 );

 int num_elems = nelem * nelem * nelem;


 // create vertices as a block; initialize to NULL so allocation is done in interface

 iBase_EntityHandle* vertices = NULL;

 int vertices_allocated = 0, vertices_size;

 int result;

 iMesh_createVtxArr( mesh, num_verts, iBase_BLOCKED, coords, 3 * num_verts, &vertices, &vertices_allocated,

 &vertices_size, &result );

 if( iBase_SUCCESS != result )

 {

 cerr << "Couldn't create vertices in bulk call" << endl;

 free( vertices );

 return;

 }


 // need to explicitly fill connectivity array, since we don't know

 // the format of entity handles

 int nconnect = 8 * num_elems;

 iBase_EntityHandle* sidl_connect = (iBase_EntityHandle*)malloc( nconnect * sizeof( iBase_EntityHandle ) );


 for( int i = 0; i < nconnect; i++ )

 {

 // use connect[i]-1 because we used starting vertex index (vstart) of 1

 assert( connect[i] - 1 < num_verts );

 sidl_connect[i] = vertices[connect[i] - 1];

 }


 // no longer need vertices and connect arrays, free here to reduce overall peak memory usage

 free( vertices );

 free( connect );


 // create the entities

 iBase_EntityHandle* new_hexes = NULL;

 int new_hexes_allocated = 0, new_hexes_size;

 int* status = NULL;

 int status_allocated = 0, status_size;


 iMesh_createEntArr( mesh, iMesh_HEXAHEDRON, sidl_connect, nconnect, &new_hexes, &new_hexes_allocated,

 &new_hexes_size, &status, &status_allocated, &status_size, &result );

 if( iBase_SUCCESS != result )

 {

 cerr << "Couldn't create hex elements in bulk call" << endl;

 free( new_hexes );

 free( status );

 free( sidl_connect );

 return;

 }


 print_time( false, ttime1, utime, stime, imem1, rmem1 );


 free( status );

 free( new_hexes );

 free( sidl_connect );


 // query the mesh 2 ways

 query_elem_to_vert( mesh );


 print_time( false, ttime2, utime, stime, imem2, rmem2 );


 query_vert_to_elem( mesh );


 print_time( false, ttime3, utime, stime, imem3, rmem3 );


 iMesh_dtor( mesh, &result );


 print_time( false, ttime4, utime, stime, imem4, rmem4 );


 std::cout << "TSTTb/MOAB_ucd_blocked: nelem, construct, e_to_v, v_to_e, after_dtor = " << nelem << ", "

 << ttime1 - ttime0 << ", " << ttime2 - ttime1 << ", " << ttime3 - ttime2 << ", " << ttime4 - ttime3

 << " seconds" << std::endl;

 std::cout << "TSTTb/MOAB_ucd_blocked_memory_(rss): initial, after_construction, e-v, v-e, after_dtor:" << rmem0

 << ", " << rmem1 << ", " << rmem2 << ", " << rmem3 << ", " << rmem4 << " kb" << std::endl;

}


void testC( iMesh_Instance mesh, const int nelem, const double* coords )

{

 double utime, stime, ttime0, ttime1, ttime2, ttime3, ttime4;

 long imem0, rmem0, imem1, rmem1, imem2, rmem2, imem3, rmem3, imem4, rmem4;

 print_time( false, ttime0, utime, stime, imem0, rmem0 );


 // need some dimensions

 int numv = nelem + 1;

 int numv_sq = numv * numv;

 int num_verts = numv * numv * numv;

#define VINDEX( i, j, k ) ( ( i ) + ( (j)*numv ) + ( (k)*numv_sq ) )


 // array to hold vertices created individually

 iBase_EntityHandle* sidl_vertices = (iBase_EntityHandle*)malloc( num_verts * sizeof( iBase_EntityHandle ) );

 int result;


 for( int i = 0; i < num_verts; i++ )

 {

 // create the vertex

 iMesh_createVtx( mesh, coords[i], coords[i + num_verts], coords[i + 2 * num_verts], sidl_vertices + i,

 &result );

 if( iBase_SUCCESS != result )

 {

 cerr << "Couldn't create vertex in individual call" << endl;

 return;

 }

 }


 iBase_EntityHandle tmp_conn[8], new_hex;


 for( int i = 0; i < nelem; i++ )

 {

 for( int j = 0; j < nelem; j++ )

 {

 for( int k = 0; k < nelem; k++ )

 {

 int vijk = VINDEX( i, j, k );

 tmp_conn[0] = sidl_vertices[vijk];

 tmp_conn[1] = sidl_vertices[vijk + 1];

 tmp_conn[2] = sidl_vertices[vijk + 1 + numv];

 tmp_conn[3] = sidl_vertices[vijk + numv];

 tmp_conn[4] = sidl_vertices[vijk + numv * numv];

 tmp_conn[5] = sidl_vertices[vijk + 1 + numv * numv];

 tmp_conn[6] = sidl_vertices[vijk + 1 + numv + numv * numv];

 tmp_conn[7] = sidl_vertices[vijk + numv + numv * numv];


 // create the entity


 int status;

 iMesh_createEnt( mesh, iMesh_HEXAHEDRON, tmp_conn, 8, &new_hex, &status, &result );

 if( iBase_SUCCESS != result )

 {

 cerr << "Couldn't create hex element in individual call" << endl;

 // return; do not return if errors, as we would leak memory

 }

 }

 }

 }


 print_time( false, ttime1, utime, stime, imem1, rmem1 );


 free( sidl_vertices );


 // query the mesh 2 ways

 query_elem_to_vert( mesh );


 print_time( false, ttime2, utime, stime, imem2, rmem2 );


 query_vert_to_elem( mesh );


 print_time( false, ttime3, utime, stime, imem3, rmem3 );


 iMesh_dtor( mesh, &result );


 print_time( false, ttime4, utime, stime, imem4, rmem4 );


 std::cout << "TSTTb/MOAB_ucd_indiv: nelem, construct, e_to_v, v_to_e, after_dtor = " << nelem << ", "

 << ttime1 - ttime0 << ", " << ttime2 - ttime1 << ", " << ttime3 - ttime2 << ", " << ttime4 - ttime3

 << " seconds" << std::endl;

 std::cout << "TSTTb/MOAB_ucd_indiv_memory_(rss): initial, after_construction, e-v, v-e, after_dtor:" << rmem0

 << ", " << rmem1 << ", " << rmem2 << ", " << rmem3 << ", " << rmem4 << " kb" << std::endl;

}


void query_elem_to_vert( iMesh_Instance mesh )

{

 iBase_EntityHandle* all_hexes = NULL;

 int all_hexes_size, all_hexes_allocated = 0;


 // get all the hex elements

 int success;

 iBase_EntitySetHandle root_set;

 iMesh_getRootSet( mesh, &root_set, &success );

 if( iBase_SUCCESS != success )

 {

 cerr << "Couldn't get root set." << endl;

 return;

 }


 iMesh_getEntities( mesh, root_set, iBase_REGION, iMesh_HEXAHEDRON, &all_hexes, &all_hexes_allocated,

 &all_hexes_size, &success );

 if( iBase_SUCCESS != success )

 {

 cerr << "Couldn't get all hex elements in query_mesh" << endl;

 return;

 }


 // now loop over elements

 iBase_EntityHandle* dum_connect = NULL;

 int dum_connect_allocated = 0, dum_connect_size;

 double* dum_coords = NULL;

 int dum_coords_size, dum_coords_allocated = 0;

 int order;

 iMesh_getDfltStorage( mesh, &order, &success );

 if( iBase_SUCCESS != success ) return;


 for( int i = 0; i < all_hexes_size; i++ )

 {

 // get the connectivity of this element; will allocate space on 1st iteration,

 // but will have correct size on subsequent ones

 iMesh_getEntAdj( mesh, all_hexes[i], iBase_VERTEX, &dum_connect, &dum_connect_allocated, &dum_connect_size,

 &success );


 if( iBase_SUCCESS == success )

 {

 // get vertex coordinates; ; will allocate space on 1st iteration,

 // but will have correct size on subsequent ones

 iMesh_getVtxArrCoords( mesh, dum_connect, dum_connect_size, order, &dum_coords, &dum_coords_allocated,

 &dum_coords_size, &success );


 double centroid[3] = { 0.0, 0.0, 0.0 };

 if( order == iBase_BLOCKED )

 {

 for( int j = 0; j < 8; j++ )

 {

 centroid[0] += dum_coords[j];

 centroid[1] += dum_coords[8 + j];

 centroid[2] += dum_coords[16 + j];

 }

 }

 else

 {

 for( int j = 0; j < 8; j++ )

 {

 centroid[0] += dum_coords[3 * j];

 centroid[1] += dum_coords[3 * j + 1];

 centroid[2] += dum_coords[3 * j + 2];

 }

 }

 }


 if( iBase_SUCCESS != success )

 {

 cerr << "Problem getting connectivity or vertex coords." << endl;

 return;

 }

 }


 free( all_hexes );

 free( dum_connect );

 free( dum_coords );

}


void query_vert_to_elem( iMesh_Instance mesh )

{

 iBase_EntityHandle* all_verts = NULL;

 int all_verts_allocated = 0, all_verts_size;


 iBase_EntitySetHandle root_set;

 int success;

 iMesh_getRootSet( mesh, &root_set, &success );

 if( iBase_SUCCESS != success )

 {

 cerr << "Couldn't get root set." << endl;

 return;

 }


 // get all the vertices elements

 iMesh_getEntities( mesh, root_set, iBase_VERTEX, iMesh_POINT, &all_verts, &all_verts_allocated, &all_verts_size,

 &success );

 if( iBase_SUCCESS != success )

 {

 cerr << "Couldn't get all vertices in query_vert_to_elem" << endl;

 return;

 }


 // for this mesh, should never be more than 8 hexes connected to a vertex

 iBase_EntityHandle* dum_hexes = (iBase_EntityHandle*)calloc( 8, sizeof( iBase_EntityHandle ) );


 int dum_hexes_allocated = 8, dum_hexes_size;


 // now loop over vertices

 for( int i = 0; i < all_verts_size; i++ )

 {


 // get the connectivity of this element; will have to allocate space on every

 // iteration, since size can vary

 iMesh_getEntAdj( mesh, all_verts[i], iBase_REGION, &dum_hexes, &dum_hexes_allocated, &dum_hexes_size,

 &success );

 if( iBase_SUCCESS != success )

 {

 cerr << "Problem getting connectivity or vertex coords." << endl;

 // do not return early, as we would leak memory

 }

 }


 free( dum_hexes );

 free( all_verts );

}


void print_time( const bool print_em, double& tot_time, double& utime, double& stime, long& imem, long& rmem )

{

 // Disabling this for windows

 // Different platforms follow different conventions for usage

#ifndef _WIN32 // Windows does not have rusage

 struct rusage r_usage;

 getrusage( RUSAGE_SELF, &r_usage );

 utime = (double)r_usage.ru_utime.tv_sec + ( (double)r_usage.ru_utime.tv_usec / 1.e6 );

 stime = (double)r_usage.ru_stime.tv_sec + ( (double)r_usage.ru_stime.tv_usec / 1.e6 );

 tot_time = utime + stime;

 if( print_em )

 std::cout << "User, system, total time = " << utime << ", " << stime << ", " << tot_time << std::endl;


#ifndef LINUX

 imem = r_usage.ru_idrss;

 rmem = r_usage.ru_maxrss;

 std::cout << "Max resident set size = " << r_usage.ru_maxrss << " kbytes" << std::endl;

 std::cout << "Int resident set size = " << r_usage.ru_idrss << " kbytes" << std::endl;

#else

 imem = rmem = 0;

 system( "ps o args,drs,rss | grep perf | grep -v grep" ); // RedHat 9.0 doesnt fill in actual

 // memory data

#endif

 // delete [] hex_array;

#endif // if not on windows

}


void compute_edge( double* start, const int nelem, const double xint, const int stride )

{

 for( int i = 1; i < nelem; i++ )

 {

 start[i * stride] = start[0] + i * xint;

 start[nelem + 1 + i * stride] = start[nelem + 1] + i * xint;

 start[2 * ( nelem + 1 ) + i * stride] = start[2 * ( nelem + 1 )] + i * xint;

 }

}


void compute_face( double* a, const int nelem, const double xint, const int stride1, const int stride2 )

{

 // 2D TFI on a face starting at a, with strides stride1 in ada and stride2 in tse

 for( int j = 1; j < nelem; j++ )

 {

 double tse = j * xint;

 for( int i = 1; i < nelem; i++ )

 {

 double ada = i * xint;


 a[i * stride1 + j * stride2] =

 ( 1.0 - ada ) * a[i * stride1] + ada * a[i * stride1 + nelem * stride2] +

 ( 1.0 - tse ) * a[j * stride2] + tse * a[j * stride2 + nelem * stride1] -

 ( 1.0 - tse ) * ( 1.0 - ada ) * a[0] - ( 1.0 - tse ) * ada * a[nelem * stride1] -

 tse * ( 1.0 - ada ) * a[nelem * stride2] - tse * ada * a[nelem * ( stride1 + stride2 )];

 a[nelem + 1 + i * stride1 + j * stride2] =

 ( 1.0 - ada ) * a[nelem + 1 + i * stride1] + ada * a[nelem + 1 + i * stride1 + nelem * stride2] +

 ( 1.0 - tse ) * a[nelem + 1 + j * stride2] + tse * a[nelem + 1 + j * stride2 + nelem * stride1] -

 ( 1.0 - tse ) * ( 1.0 - ada ) * a[nelem + 1 + 0] -

 ( 1.0 - tse ) * ada * a[nelem + 1 + nelem * stride1] -

 tse * ( 1.0 - ada ) * a[nelem + 1 + nelem * stride2] -

 tse * ada * a[nelem + 1 + nelem * ( stride1 + stride2 )];

 a[2 * ( nelem + 1 ) + i * stride1 + j * stride2] =

 ( 1.0 - ada ) * a[2 * ( nelem + 1 ) + i * stride1] +

 ada * a[2 * ( nelem + 1 ) + i * stride1 + nelem * stride2] +

 ( 1.0 - tse ) * a[2 * ( nelem + 1 ) + j * stride2] +

 tse * a[2 * ( nelem + 1 ) + j * stride2 + nelem * stride1] -

 ( 1.0 - tse ) * ( 1.0 - ada ) * a[2 * ( nelem + 1 ) + 0] -

 ( 1.0 - tse ) * ada * a[2 * ( nelem + 1 ) + nelem * stride1] -

 tse * ( 1.0 - ada ) * a[2 * ( nelem + 1 ) + nelem * stride2] -

 tse * ada * a[2 * ( nelem + 1 ) + nelem * ( stride1 + stride2 )];

 }

 }

}


void build_coords( const int nelem, double*& coords )

{

 double ttime0, ttime1, utime1, stime1;

 long imem, rmem;

 print_time( false, ttime0, utime1, stime1, imem, rmem );


 // allocate the memory

 int numv = nelem + 1;

 int numv_sq = numv * numv;

 int tot_numv = numv * numv * numv;

 coords = (double*)malloc( 3 * tot_numv * sizeof( double ) );


// use FORTRAN-like indexing

#define VINDEX( i, j, k ) ( ( i ) + ( (j)*numv ) + ( (k)*numv_sq ) )

 int idx;

 double scale1, scale2, scale3;

 // use these to prevent optimization on 1-scale, etc (real map wouldn't have

 // all these equal)

 scale1 = LENGTH / nelem;

 scale2 = LENGTH / nelem;

 scale3 = LENGTH / nelem;


#ifdef REALTFI

 // use a real TFI xform to compute coordinates

 // compute edges

 // i (stride=1)

 compute_edge( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, 1 );

 compute_edge( &coords[VINDEX( 0, nelem, 0 )], nelem, scale1, 1 );

 compute_edge( &coords[VINDEX( 0, 0, nelem )], nelem, scale1, 1 );

 compute_edge( &coords[VINDEX( 0, nelem, nelem )], nelem, scale1, 1 );

 // j (stride=numv)

 compute_edge( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, numv );

 compute_edge( &coords[VINDEX( nelem, 0, 0 )], nelem, scale1, numv );

 compute_edge( &coords[VINDEX( 0, 0, nelem )], nelem, scale1, numv );

 compute_edge( &coords[VINDEX( nelem, 0, nelem )], nelem, scale1, numv );

 // k (stride=numv^2)

 compute_edge( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, numv_sq );

 compute_edge( &coords[VINDEX( nelem, 0, 0 )], nelem, scale1, numv_sq );

 compute_edge( &coords[VINDEX( 0, nelem, 0 )], nelem, scale1, numv_sq );

 compute_edge( &coords[VINDEX( nelem, nelem, 0 )], nelem, scale1, numv_sq );


 // compute faces

 // i=0, nelem

 compute_face( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, numv, numv_sq );

 compute_face( &coords[VINDEX( nelem, 0, 0 )], nelem, scale1, numv, numv_sq );

 // j=0, nelem

 compute_face( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, 1, numv_sq );

 compute_face( &coords[VINDEX( 0, nelem, 0 )], nelem, scale1, 1, numv_sq );

 // k=0, nelem

 compute_face( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, 1, numv );

 compute_face( &coords[VINDEX( 0, 0, nelem )], nelem, scale1, 1, numv );


 // initialize corner indices

 int i000 = VINDEX( 0, 0, 0 );

 int ia00 = VINDEX( nelem, 0, 0 );

 int i0t0 = VINDEX( 0, nelem, 0 );

 int iat0 = VINDEX( nelem, nelem, 0 );

 int i00g = VINDEX( 0, 0, nelem );

 int ia0g = VINDEX( nelem, 0, nelem );

 int i0tg = VINDEX( 0, nelem, nelem );

 int iatg = VINDEX( nelem, nelem, nelem );

 double cX, cY, cZ;

 int adaInts = nelem;

 int tseInts = nelem;

 int gammaInts = nelem;


 for( int i = 1; i < nelem; i++ )

 {

 for( int j = 1; j < nelem; j++ )

 {

 for( int k = 1; k < nelem; k++ )

 {

 // idx = VINDEX(i,j,k);

 double tse = i * scale1;

 double ada = j * scale2;

 double gamma = k * scale3;

 double tm1 = 1.0 - tse;

 double am1 = 1.0 - ada;

 double gm1 = 1.0 - gamma;


 cX = gm1 * ( am1 * ( tm1 * coords[i000] + tse * coords[i0t0] ) +

 ada * ( tm1 * coords[ia00] + tse * coords[iat0] ) ) +

 gamma * ( am1 * ( tm1 * coords[i00g] + tse * coords[i0tg] ) +

 ada * ( tm1 * coords[ia0g] + tse * coords[iatg] ) );


 cY = gm1 * ( am1 * ( tm1 * coords[i000] + tse * coords[i0t0] ) +

 ada * ( tm1 * coords[ia00] + tse * coords[iat0] ) ) +

 gamma * ( am1 * ( tm1 * coords[i00g] + tse * coords[i0tg] ) +

 ada * ( tm1 * coords[ia0g] + tse * coords[iatg] ) );


 cZ = gm1 * ( am1 * ( tm1 * coords[i000] + tse * coords[i0t0] ) +

 ada * ( tm1 * coords[ia00] + tse * coords[iat0] ) ) +

 gamma * ( am1 * ( tm1 * coords[i00g] + tse * coords[i0tg] ) +

 ada * ( tm1 * coords[ia0g] + tse * coords[iatg] ) );


 double* ai0k = &coords[VINDEX( k, 0, i )];

 double* aiak = &coords[VINDEX( k, adaInts, i )];

 double* a0jk = &coords[VINDEX( k, j, 0 )];

 double* atjk = &coords[VINDEX( k, j, tseInts )];

 double* aij0 = &coords[VINDEX( 0, j, i )];

 double* aijg = &coords[VINDEX( gammaInts, j, i )];


 coords[VINDEX( i, j, k )] = ( am1 * ai0k[0] + ada * aiak[0] + tm1 * a0jk[0] + tse * atjk[0] +

 gm1 * aij0[0] + gamma * aijg[0] ) /

 2.0 -

 cX / 2.0;


 coords[nelem + 1 + VINDEX( i, j, k )] =

 ( am1 * ai0k[nelem + 1] + ada * aiak[nelem + 1] + tm1 * a0jk[nelem + 1] + tse * atjk[nelem + 1] +

 gm1 * aij0[nelem + 1] + gamma * aijg[nelem + 1] ) /

 2.0 -

 cY / 2.0;


 coords[2 * ( nelem + 1 ) + VINDEX( i, j, k )] =

 ( am1 * ai0k[2 * ( nelem + 1 )] + ada * aiak[2 * ( nelem + 1 )] + tm1 * a0jk[2 * ( nelem + 1 )] +

 tse * atjk[2 * ( nelem + 1 )] + gm1 * aij0[2 * ( nelem + 1 )] +

 gamma * aijg[2 * ( nelem + 1 )] ) /

 2.0 -

 cZ / 2.0;

 }

 }

 }


#else

 for( int i = 0; i < numv; i++ )

 {

 for( int j = 0; j < numv; j++ )

 {

 for( int k = 0; k < numv; k++ )

 {

 idx = VINDEX( i, j, k );

 // blocked coordinate ordering

 coords[idx] = i * scale1;

 coords[tot_numv + idx] = j * scale2;

 coords[2 * tot_numv + idx] = k * scale3;

 }

 }

 }

#endif


 print_time( false, ttime1, utime1, stime1, imem, rmem );

 std::cout << "TSTTbinding/MOAB: TFI time = " << ttime1 - ttime0 << " sec" << std::endl;

}


void build_connect( const int nelem, const int vstart, int*& connect )

{

 // allocate the memory

 int nume_tot = nelem * nelem * nelem;

 connect = (int*)malloc( 8 * nume_tot * sizeof( int ) );


 int vijk;

 int numv = nelem + 1;

 int numv_sq = numv * numv;

 int idx = 0;

 for( int i = 0; i < nelem; i++ )

 {

 for( int j = 0; j < nelem; j++ )

 {

 for( int k = 0; k < nelem; k++ )

 {

 vijk = vstart + VINDEX( i, j, k );

 connect[idx++] = vijk;

 connect[idx++] = vijk + 1;

 connect[idx++] = vijk + 1 + numv;

 connect[idx++] = vijk + numv;

 connect[idx++] = vijk + numv * numv;

 connect[idx++] = vijk + 1 + numv * numv;

 connect[idx++] = vijk + 1 + numv + numv * numv;

 connect[idx++] = vijk + numv + numv * numv;

 assert( i <= numv * numv * numv );

 }

 }

 }

}