Loading [MathJax]/extensions/tex2jax.js
Mesh Oriented datABase  (version 5.5.1)
An array-based unstructured mesh library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
findpt.c
Go to the documentation of this file.
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <stdarg.h> 4 #include <math.h> /* for cos, fabs */ 5 #include <float.h> 6 #include <string.h> /* for memcpy */ 7  8 #include "moab/FindPtFuncs.h" 9  10 const unsigned opt_no_constraints_2 = 3 + 1; 11 const unsigned opt_no_constraints_3 = 9 + 3 + 1; 12  13 /*-------------------------------------------------------------------------- 14  Lobatto Polynomial Bounds 15  16  Needed inputs are the Gauss-Lobatto quadrature nodes and weights: 17  unsigned nr = ..., ns = ...; 18  realType zr[nr], wr[nr]; 19  realType zs[ns], ws[ns]; 20  21  lobatto_nodes(zr,nr); lobatto_weights(zr,wr,nr); 22  lobatto_nodes(zs,ns); lobatto_weights(zs,ws,ns); 23  24  The number of points in the constructed piecewise (bi-)linear bounds 25  is a parameter; more points give tighter bounds 26  27  unsigned mr = 2*nr, ms = 2*ns; 28  29  The necessary setup is accomplished via: 30  lob_bnd_base b_data_r; 31  lob_bnd_ext e_data_s; 32  33  lob_bnd_base_alloc(&b_data_r,nr,mr); 34  lob_bnd_base_setup(&b_data_r,zr,wr); 35  lob_bnd_ext_alloc(&e_data_s,ns,ms); 36  lob_bnd_ext_setup(&e_data_s,zs,ws); 37  38  Bounds may then be computed via: 39  realType work1r[2*mr], work1s[2*ms], work2[2*mr + 2*mr*ns + 2*mr*ms]; 40  realType ur[nr], us[ns]; // 1-d polynomials on the zr[] and zs[] nodes 41  realType u[ns][nr]; // 2-d polynomial on zr[] (x) zs[] 42  realType bound[2]; // = { min, max } (to be computed) 43  44  lob_bnd_1(&b_data_r ,ur,bound,work1r); // compute bounds on ur 45  lob_bnd_1(&e_data_s.b,us,bound,work1s); // compute bounds on us 46  lob_bnd_2(&b_data_r, &e_data_s, 47  (const double*)&u[0][0],bound,work2); // compute bounds on u 48  The above routines access the zr,zs arrays passed to *_setup 49  (so do not delete them between calls) 50  51  Memory allocated in *_setup is freed with 52  lob_bnd_base_free(&b_data_r); 53  lob_bnd_ext_free(&e_data_s); 54  55  --------------------------------------------------------------------------*/ 56  57 typedef struct 58 { 59  unsigned n; /* number of Lobatto nodes in input */ 60  unsigned m; /* number of Chebyshev nodes used to calculate bounds */ 61  realType *Q0, *Q1; /* Q0[n], Q1[n] -- first two rows of change of basis matrix 62  from Lobatto node Lagrangian to Legendre */ 63  const realType* z; /* z[n] -- external; Lobatto nodes */ 64  realType* h; /* h[m] -- Chebyshev nodes */ 65  realType *uv, *ov; /* uv[n][m], ov[n][m] -- 66  uv[j][:] is a piecewise linear function in the nodal 67  basis with nodes h[m] that is everywhere less 68  than or equal to the jth Lagrangian basis 69  function (with the Lobatto nodes z[n]) 70  ov[j][:] is everywhere greater than or equal */ 71 } lob_bnd_base; 72  73 typedef struct 74 { 75  lob_bnd_base b; 76  realType *uvp, *uvn, *ovp, *ovn; /* [n][m] -- uv and ov split into 77  positive and negative parts */ 78 } lob_bnd_ext; 79  80 static void lob_bnd_base_alloc( lob_bnd_base* p, unsigned n, unsigned m ) 81 { 82  p->n = n, p->m = m; 83  p->Q0 = tmalloc( realType, 2 * n + m + 2 * n * m ); 84  p->Q1 = p->Q0 + n; 85  p->h = p->Q1 + n; 86  p->uv = p->h + m; 87  p->ov = p->uv + n * m; 88 } 89  90 static void lob_bnd_base_free( lob_bnd_base* p ) 91 { 92  free( p->Q0 ); 93 } 94  95 static void lob_bnd_ext_alloc( lob_bnd_ext* p, unsigned n, unsigned m ) 96 { 97  p->b.n = n, p->b.m = m; 98  p->b.Q0 = tmalloc( realType, 2 * n + m + 6 * n * m ); 99  p->b.Q1 = p->b.Q0 + n; 100  p->b.h = p->b.Q1 + n; 101  p->b.uv = p->b.h + m; 102  p->b.ov = p->b.uv + n * m; 103  p->uvp = p->b.ov + n * m; 104  p->uvn = p->uvp + n * m; 105  p->ovp = p->uvn + n * m; 106  p->ovn = p->ovp + n * m; 107 } 108  109 static void lob_bnd_ext_free( lob_bnd_ext* p ) 110 { 111  free( p->b.Q0 ); 112 } 113  114 static void lob_bnd_base_setup( lob_bnd_base* p, const realType* z, const realType* w ) 115 { 116  unsigned i, j, m = p->m, n = p->n, mm = 2 * m - 1; 117  realType *q = tmalloc( realType, ( 2 * n + 1 ) * mm + 6 * n ), *J = q + mm, *D = J + n * mm, *work = D + n * mm; 118  p->z = z; 119  for( i = 0; i < n; ++i ) 120  p->Q0[i] = w[i] / 2, p->Q1[i] = 3 * p->Q0[i] * z[i]; 121  p->h[0] = -1, p->h[m - 1] = 1; 122  for( j = 1; j < m - 1; ++j ) 123  p->h[j] = mbcos( ( m - j - 1 ) * MOAB_POLY_PI / ( m - 1 ) ); 124  for( j = 0; j < m - 1; ++j ) 125  q[2 * j] = p->h[j], q[2 * j + 1] = ( p->h[j] + p->h[j + 1] ) / 2; 126  q[mm - 1] = p->h[m - 1]; 127  lagrange_weights_deriv( z, n, q, mm, J, D, work ); 128  for( i = 0; i < n; ++i ) 129  { 130  realType *uv = p->uv + i * m, *ov = p->ov + i * m; 131  ov[0] = uv[0] = J[i]; 132  ov[m - 1] = uv[m - 1] = J[( mm - 1 ) * n + i]; 133  for( j = 1; j < m - 1; ++j ) 134  { 135  unsigned jj = 2 * j; 136  realType c0 = J[( jj - 1 ) * n + i] + ( q[jj] - q[jj - 1] ) * D[( jj - 1 ) * n + i]; 137  realType c1 = J[( jj + 1 ) * n + i] + ( q[jj] - q[jj + 1] ) * D[( jj + 1 ) * n + i]; 138  realType c2 = J[jj * n + i]; 139  rminmax_3( &uv[j], &ov[j], c0, c1, c2 ); 140  } 141  } 142  free( q ); 143 } 144  145 static void lob_bnd_ext_setup( lob_bnd_ext* p, const realType* z, const realType* w ) 146 { 147  unsigned i, mn = p->b.m * p->b.n; 148  lob_bnd_base_setup( &p->b, z, w ); 149  for( i = 0; i < mn; ++i ) 150  { 151  realType uvi = p->b.uv[i], ovi = p->b.ov[i]; 152  p->uvp[i] = p->uvn[i] = p->ovp[i] = p->ovn[i] = 0; 153  if( uvi > 0 ) 154  p->uvp[i] = uvi; 155  else 156  p->uvn[i] = uvi; 157  if( ovi > 0 ) 158  p->ovp[i] = ovi; 159  else 160  p->ovn[i] = ovi; 161  } 162 } 163  164 static void lob_bnd_lines( const lob_bnd_base* p, const realType* u, realType* a, realType* b ) 165 { 166  unsigned i, j; 167  realType a0 = 0, a1 = 0; 168  const realType *uv = p->uv, *ov = p->ov; 169  for( i = 0; i < p->n; ++i ) 170  a0 += p->Q0[i] * u[i], a1 += p->Q1[i] * u[i]; 171  for( j = 0; j < p->m; ++j ) 172  b[j] = a[j] = a0 + a1 * p->h[j]; 173  for( i = 0; i < p->n; ++i ) 174  { 175  realType w = u[i] - ( a0 + a1 * p->z[i] ); 176  if( w >= 0 ) 177  for( j = 0; j < p->m; ++j ) 178  a[j] += w * ( *uv++ ), b[j] += w * ( *ov++ ); 179  else 180  for( j = 0; j < p->m; ++j ) 181  a[j] += w * ( *ov++ ), b[j] += w * ( *uv++ ); 182  } 183 } 184  185 /* work holds p->m * 2 doubles */ 186 static void lob_bnd_1( const lob_bnd_base* p, const realType* u, realType bnd[2], realType* work ) 187 { 188  unsigned j; 189  realType *a = work, *b = work + p->m; 190  lob_bnd_lines( p, u, a, b ); 191  bnd[0] = a[0], bnd[1] = b[0]; 192  for( j = 1; j < p->m; ++j ) 193  { 194  if( a[j] < bnd[0] ) bnd[0] = a[j]; 195  if( b[j] > bnd[1] ) bnd[1] = b[j]; 196  } 197 } 198  199 /* work holds 2*mr + 2*mr*ns + 2*mr*ms doubles */ 200 static void lob_bnd_2( const lob_bnd_base* pr, 201  const lob_bnd_ext* ps, 202  const realType* u, 203  realType bnd[2], 204  realType* work ) 205 { 206  unsigned nr = pr->n, mr = pr->m, ns = ps->b.n, ms = ps->b.m; 207  realType *a0 = work, *a1 = a0 + mr, *ar_ = a1 + mr, *ar = ar_, *br_ = ar + mr * ns, *br = br_, *a_ = br + mr * ns, 208  *a = a_, *b_ = a + mr * ms, *b = b_, *uvp, *ovp, *uvn, *ovn; 209  realType b0, b1; 210  unsigned i, j, k; 211  for( i = 0; i < mr; ++i ) 212  a0[i] = a1[i] = 0; 213  for( j = 0; j < ns; ++j, u += nr ) 214  { 215  realType q0 = ps->b.Q0[j], q1 = ps->b.Q1[j]; 216  lob_bnd_lines( pr, u, ar, br ); 217  for( i = 0; i < mr; ++i ) 218  { 219  realType t = ( *ar++ + *br++ ) / 2; 220  a0[i] += q0 * t, a1[i] += q1 * t; 221  } 222  } 223  for( i = 0; i < mr; ++i ) 224  { 225  realType a0i = a0[i], a1i = a1[i]; 226  for( k = 0; k < ms; ++k ) 227  *b++ = *a++ = a0i + a1i * ps->b.h[k]; 228  } 229  ar = ar_, br = br_; 230  uvp = ps->uvp, ovp = ps->ovp, uvn = ps->uvn, ovn = ps->ovn; 231  for( j = 0; j < ns; ++j, uvp += ms, uvn += ms, ovp += ms, ovn += ms ) 232  { 233  realType zj = ps->b.z[j]; 234  a = a_, b = b_; 235  for( i = 0; i < mr; ++i ) 236  { 237  realType t = a0[i] + a1[i] * zj; 238  realType uw = *ar++ - t; 239  realType ow = *br++ - t; 240  if( uw >= 0 ) /* 0 <= uw <= ow */ 241  for( k = 0; k < ms; ++k ) 242  *a++ += uw * uvp[k] + ow * uvn[k], *b++ += ow * ovp[k] + uw * ovn[k]; 243  else if( ow <= 0 ) /* uw <= ow <= 0 */ 244  for( k = 0; k < ms; ++k ) 245  *a++ += uw * ovp[k] + ow * ovn[k], *b++ += ow * uvp[k] + uw * uvn[k]; 246  else /* uw < 0 < ow */ 247  for( k = 0; k < ms; ++k ) 248  *a++ += uw * ovp[k] + ow * uvn[k], *b++ += ow * ovp[k] + uw * uvn[k]; 249  } 250  } 251  b0 = a_[0], b1 = b_[0]; 252  for( i = 1; i < mr * ms; ++i ) 253  { 254  if( a_[i] < b0 ) b0 = a_[i]; 255  if( b_[i] > b1 ) b1 = b_[i]; 256  } 257  bnd[0] = b0, bnd[1] = b1; 258 } 259  260 /*-------------------------------------------------------------------------- 261  Small Matrix Inverse 262  --------------------------------------------------------------------------*/ 263  264 static void mat_inv_2( const realType A[4], realType inv[4] ) 265 { 266  const realType idet = 1 / ( A[0] * A[3] - A[1] * A[2] ); 267  inv[0] = idet * A[3]; 268  inv[1] = -( idet * A[1] ); 269  inv[2] = -( idet * A[2] ); 270  inv[3] = idet * A[0]; 271 } 272  273 static void mat_inv_3( const realType A[9], realType inv[9] ) 274 { 275  const realType a = A[4] * A[8] - A[5] * A[7], b = A[5] * A[6] - A[3] * A[8], c = A[3] * A[7] - A[4] * A[6], 276  idet = 1 / ( A[0] * a + A[1] * b + A[2] * c ); 277  inv[0] = idet * a; 278  inv[1] = idet * ( A[2] * A[7] - A[1] * A[8] ); 279  inv[2] = idet * ( A[1] * A[5] - A[2] * A[4] ); 280  inv[3] = idet * b; 281  inv[4] = idet * ( A[0] * A[8] - A[2] * A[6] ); 282  inv[5] = idet * ( A[2] * A[3] - A[0] * A[5] ); 283  inv[6] = idet * c; 284  inv[7] = idet * ( A[1] * A[6] - A[0] * A[7] ); 285  inv[8] = idet * ( A[0] * A[4] - A[1] * A[3] ); 286 } 287  288 static void mat_app_2r( realType y[2], const realType A[4], const realType x[2] ) 289 { 290  y[0] = A[0] * x[0] + A[1] * x[1]; 291  y[1] = A[2] * x[0] + A[3] * x[1]; 292 } 293  294 static void mat_app_2c( realType y[2], const realType A[4], const realType x[2] ) 295 { 296  y[0] = A[0] * x[0] + A[2] * x[1]; 297  y[1] = A[1] * x[0] + A[3] * x[1]; 298 } 299  300 static void mat_app_3r( realType y[3], const realType A[9], const realType x[3] ) 301 { 302  y[0] = A[0] * x[0] + A[1] * x[1] + A[2] * x[2]; 303  y[1] = A[3] * x[0] + A[4] * x[1] + A[5] * x[2]; 304  y[2] = A[6] * x[0] + A[7] * x[1] + A[8] * x[2]; 305 } 306  307 static void mat_app_3c( realType y[3], const realType A[9], const realType x[3] ) 308 { 309  y[0] = A[0] * x[0] + A[3] * x[1] + A[6] * x[2]; 310  y[1] = A[1] * x[0] + A[4] * x[1] + A[7] * x[2]; 311  y[2] = A[2] * x[0] + A[5] * x[1] + A[8] * x[2]; 312 } 313  314 static void tinyla_solve_2( realType x[2], const realType A[4], const realType b[2] ) 315 { 316  realType inv[4]; 317  mat_inv_2( A, inv ); 318  mat_app_2r( x, inv, b ); 319 } 320  321 static void tinyla_solve_3( realType x[3], const realType A[9], const realType b[3] ) 322 { 323  realType inv[9]; 324  mat_inv_3( A, inv ); 325  mat_app_3r( x, inv, b ); 326 } 327  328 /* solve 329  A[0] x0 + A[2] x1 = b0, 330  A[2] x0 + A[1] x1 = b1 331 */ 332 static void tinyla_solve_sym_2( realType* x0, realType* x1, const realType A[3], realType b0, realType b1 ) 333 { 334  const realType idet = 1 / ( A[0] * A[1] - A[2] * A[2] ); 335  *x0 = idet * ( A[1] * b0 - A[2] * b1 ); 336  *x1 = idet * ( A[0] * b1 - A[2] * b0 ); 337 } 338  339 /*-------------------------------------------------------------------------- 340  Oriented Bounding Box 341  342  Suffixes on names are _2 for 2-d and _3 for 3-d 343  344  Needed inputs are the Gauss-Lobatto quadrature nodes and weights: 345  unsigned nr = ..., ns = ...; 346  realType zr[nr], wr[nr]; 347  realType zs[ns], ws[ns]; 348  349  lobatto_nodes(zr,nr); lobatto_weights(zr,wr,nr); 350  lobatto_nodes(zs,ns); lobatto_weights(zs,ws,ns); 351  352  The number of points in the constructed piecewise (bi-)linear bounds 353  for the boundaries is a parameter; more points give tighter bounds 354  355  unsigned mr = 2*nr, ms = 2*ns; 356  357  Bounding boxes are increased by a relative amount as a parameter 358  359  realType tol = 0.01; 360  361  Setup is accomplished via: 362  363  const realType *z[2] = {zr,zs}, *w[2] = {wr,ws}; 364  const unsigned n[2] = {nr,ns}, m[2] = {mr,ms}; 365  obbox_data_2 *data = obbox_setup_2(z,w,n,m); 366  367  Bounding box data may then be computed: 368  369  obbox_2 box; // will store bounding box information 370  realType xm[ns][nr], ym[ns][nr]; // x, y coordinates of the element nodes 371  372  obbox_calc_2(data, tol, (const realType *)&xm[0][0], 373  (const realType *)&ym[0][0], &box); 374  375  A point may be tested: 376  377  const realType x[2]; // point to test 378  realType r[2]; 379  380  if( obbox_axis_test_2(&box, x) ) 381  ... // x failed axis-aligned bounding box test 382  383  if( obbox_test_2(&box, x, r) ) 384  ... // x failed oriented bounding box test 385  else 386  ... // r suitable as initial guess for parametric coords 387  388  Once all bounding box information has been computed 389  390  obbox_free_2(data); 391  392  to free the memory allocated with obbox_setup_2. 393  394  --------------------------------------------------------------------------*/ 395  396 typedef struct 397 { 398  lob_bnd_base dr, ds; 399  realType *Jr0, *Dr0, *Js0, *Ds0, *work; 400 } obbox_data_2; 401  402 typedef struct 403 { 404  lob_bnd_base dr; 405  lob_bnd_ext ds, dt; 406  realType *Jr0, *Dr0, *Js0, *Ds0, *Jt0, *Dt0, *work; 407 } obbox_data_3; 408  409 static void obbox_data_alloc_2( obbox_data_2* p, const unsigned n[2], const unsigned m[2] ) 410 { 411  const unsigned max_npm = umax_2( n[0] + m[0], n[1] + m[1] ); 412  lob_bnd_base_alloc( &p->dr, n[0], m[0] ); 413  lob_bnd_base_alloc( &p->ds, n[1], m[1] ); 414  p->Jr0 = tmalloc( realType, 2 * n[0] + 2 * n[1] + 2 * max_npm ); 415  p->Dr0 = p->Jr0 + n[0]; 416  p->Js0 = p->Dr0 + n[0]; 417  p->Ds0 = p->Js0 + n[1]; 418  p->work = p->Ds0 + n[1]; 419 } 420  421 static void obbox_data_free_2( obbox_data_2* p ) 422 { 423  lob_bnd_base_free( &p->dr ); 424  lob_bnd_base_free( &p->ds ); 425  free( p->Jr0 ); 426 } 427  428 static void obbox_data_alloc_3( obbox_data_3* p, const unsigned n[3], const unsigned m[3] ) 429 { 430  const unsigned wk1 = 3 * n[0] * n[1] + 2 * m[0] + 2 * m[0] * n[1] + 2 * m[0] * m[1]; 431  const unsigned wk2 = 3 * n[0] * n[2] + 2 * m[0] + 2 * m[0] * n[2] + 2 * m[0] * m[2]; 432  const unsigned wk3 = 3 * n[1] * n[2] + 2 * m[1] + 2 * m[1] * n[2] + 2 * m[1] * m[2]; 433  const unsigned wk_max = umax_3( wk1, wk2, wk3 ); 434  lob_bnd_base_alloc( &p->dr, n[0], m[0] ); 435  lob_bnd_ext_alloc( &p->ds, n[1], m[1] ); 436  lob_bnd_ext_alloc( &p->dt, n[2], m[2] ); 437  p->Jr0 = tmalloc( realType, 2 * n[0] + 2 * n[1] + 2 * n[2] + wk_max ); 438  p->Dr0 = p->Jr0 + n[0]; 439  p->Js0 = p->Dr0 + n[0]; 440  p->Ds0 = p->Js0 + n[1]; 441  p->Jt0 = p->Ds0 + n[1]; 442  p->Dt0 = p->Jt0 + n[2]; 443  p->work = p->Dt0 + n[2]; 444 } 445  446 static void obbox_data_free_3( obbox_data_3* p ) 447 { 448  lob_bnd_base_free( &p->dr ); 449  lob_bnd_ext_free( &p->ds ); 450  lob_bnd_ext_free( &p->dt ); 451  free( p->Jr0 ); 452 } 453  454 static obbox_data_2* obbox_setup_2( const realType* const z[2], 455  const realType* const w[2], 456  const unsigned n[2], 457  const unsigned m[2] ) 458 { 459  const realType zero = 0; 460  realType* work; 461  obbox_data_2* p = tmalloc( obbox_data_2, 1 ); 462  obbox_data_alloc_2( p, n, m ); 463  lob_bnd_base_setup( &p->dr, z[0], w[0] ); 464  lob_bnd_base_setup( &p->ds, z[1], w[1] ); 465  work = tmalloc( realType, 6 * umax_2( n[0], n[1] ) ); 466  lagrange_weights_deriv( z[0], n[0], &zero, 1, p->Jr0, p->Dr0, work ); 467  lagrange_weights_deriv( z[1], n[1], &zero, 1, p->Js0, p->Ds0, work ); 468  free( work ); 469  return p; 470 } 471  472 static obbox_data_3* obbox_setup_3( const realType* const z[3], 473  const realType* const w[3], 474  const unsigned n[3], 475  const unsigned m[3] ) 476 { 477  const realType zero = 0; 478  realType* work; 479  obbox_data_3* p = tmalloc( obbox_data_3, 1 ); 480  obbox_data_alloc_3( p, n, m ); 481  lob_bnd_base_setup( &p->dr, z[0], w[0] ); 482  lob_bnd_ext_setup( &p->ds, z[1], w[1] ); 483  lob_bnd_ext_setup( &p->dt, z[2], w[2] ); 484  work = tmalloc( realType, 6 * umax_3( n[0], n[1], n[2] ) ); 485  lagrange_weights_deriv( z[0], n[0], &zero, 1, p->Jr0, p->Dr0, work ); 486  lagrange_weights_deriv( z[1], n[1], &zero, 1, p->Js0, p->Ds0, work ); 487  lagrange_weights_deriv( z[2], n[2], &zero, 1, p->Jt0, p->Dt0, work ); 488  free( work ); 489  return p; 490 } 491  492 static void obbox_free_2( obbox_data_2* p ) 493 { 494  obbox_data_free_2( p ); 495  free( p ); 496 } 497  498 static void obbox_free_3( obbox_data_3* p ) 499 { 500  obbox_data_free_3( p ); 501  free( p ); 502 } 503  504 int obbox_axis_test_2( const obbox_2* p, const realType x[2] ) 505 { 506  return ( x[0] < p->axis_bnd[0] || x[0] > p->axis_bnd[1] || x[1] < p->axis_bnd[2] || x[1] > p->axis_bnd[3] ); 507 } 508  509 int obbox_axis_test_3( const obbox_3* p, const realType x[3] ) 510 { 511  return ( x[0] < p->axis_bnd[0] || x[0] > p->axis_bnd[1] || x[1] < p->axis_bnd[2] || x[1] > p->axis_bnd[3] || 512  x[2] < p->axis_bnd[4] || x[2] > p->axis_bnd[5] ); 513 } 514  515 int obbox_test_2( const obbox_2* p, const realType x[2], realType r[2] ) 516 { 517  realType xt[2]; 518  xt[0] = x[0] - p->x[0]; 519  xt[1] = x[1] - p->x[1]; 520  r[0] = p->A[0] * xt[0] + p->A[1] * xt[1]; 521  if( mbabs( r[0] ) > 1 ) return 1; 522  r[1] = p->A[2] * xt[0] + p->A[3] * xt[1]; 523  return mbabs( r[1] ) > 1; 524 } 525  526 int obbox_test_3( const obbox_3* p, const realType x[3], realType r[3] ) 527 { 528  realType xt[3]; 529  xt[0] = x[0] - p->x[0]; 530  xt[1] = x[1] - p->x[1]; 531  xt[2] = x[2] - p->x[2]; 532  r[0] = p->A[0] * xt[0] + p->A[1] * xt[1] + p->A[2] * xt[2]; 533  if( mbabs( r[0] ) > 1 ) return 1; 534  r[1] = p->A[3] * xt[0] + p->A[4] * xt[1] + p->A[5] * xt[2]; 535  if( mbabs( r[1] ) > 1 ) return 1; 536  r[2] = p->A[6] * xt[0] + p->A[7] * xt[1] + p->A[8] * xt[2]; 537  return mbabs( r[2] ) > 1; 538 } 539  540 void obbox_calc_tfm_2( const realType* x, 541  const realType* y, 542  unsigned n, 543  unsigned s, 544  const realType c0[2], 545  const realType A[4], 546  realType* u ) 547 { 548  unsigned i; 549  realType* v = u + n; 550  for( i = 0; i < n; ++i, x += s, y += s ) 551  { 552  const realType xt = *x - c0[0], yt = *y - c0[1]; 553  u[i] = A[0] * xt + A[1] * yt; 554  v[i] = A[2] * xt + A[3] * yt; 555  } 556 } 557  558 void obbox_calc_tfm_3( const realType* x, 559  const realType* y, 560  const realType* z, 561  unsigned nr, 562  unsigned sr, 563  unsigned ns, 564  unsigned ss, 565  const realType c0[3], 566  const realType A[9], 567  realType* u ) 568 { 569  unsigned i, j; 570  realType *v = u + nr * ns, *w = v + nr * ns; 571  for( j = 0; j < ns; ++j, x += ss, y += ss, z += ss ) 572  { 573  for( i = 0; i < nr; ++i, x += sr, y += sr, z += sr ) 574  { 575  const realType xt = *x - c0[0], yt = *y - c0[1], zt = *z - c0[2]; 576  *u++ = A[0] * xt + A[1] * yt + A[2] * zt; 577  *v++ = A[3] * xt + A[4] * yt + A[5] * zt; 578  *w++ = A[6] * xt + A[7] * yt + A[8] * zt; 579  } 580  } 581 } 582  583 void obbox_merge_2( realType* b, const realType* ob ) 584 { 585  if( ob[0] < b[0] ) b[0] = ob[0]; 586  if( ob[1] > b[1] ) b[1] = ob[1]; 587  if( ob[2] < b[2] ) b[2] = ob[2]; 588  if( ob[3] > b[3] ) b[3] = ob[3]; 589 } 590  591 void obbox_merge_3( realType* b, const realType* ob ) 592 { 593  if( ob[0] < b[0] ) b[0] = ob[0]; 594  if( ob[1] > b[1] ) b[1] = ob[1]; 595  if( ob[2] < b[2] ) b[2] = ob[2]; 596  if( ob[3] > b[3] ) b[3] = ob[3]; 597  if( ob[4] < b[4] ) b[4] = ob[4]; 598  if( ob[5] > b[5] ) b[5] = ob[5]; 599 } 600  601 /* work holds 2*n + 2*m realTypes */ 602 void obbox_side_2( const realType* x, 603  const realType* y, 604  unsigned n, 605  unsigned s, 606  const realType c0[2], 607  const realType A[4], 608  realType* work, 609  const lob_bnd_base* lbd, 610  realType bnd[4] ) 611 { 612  obbox_calc_tfm_2( x, y, n, s, c0, A, work ); 613  lob_bnd_1( lbd, work, bnd, work + 2 * n ); 614  lob_bnd_1( lbd, work + n, bnd + 2, work + 2 * n ); 615 } 616  617 /* work holds 3*nr*ns + 2*mr + 2*mr*ns + 2*mr*ms realTypes */ 618 void obbox_side_3( const realType* x, 619  const realType* y, 620  const realType* z, 621  unsigned nr, 622  unsigned sr, 623  unsigned ns, 624  unsigned ss, 625  const realType c0[3], 626  const realType A[9], 627  realType* work, 628  const lob_bnd_base* dr, 629  const lob_bnd_ext* ds, 630  realType bnd[6] ) 631 { 632  obbox_calc_tfm_3( x, y, z, nr, sr, ns, ss, c0, A, work ); 633  lob_bnd_2( dr, ds, work, bnd, work + 3 * nr * ns ); 634  lob_bnd_2( dr, ds, work + nr * ns, bnd + 2, work + 3 * nr * ns ); 635  lob_bnd_2( dr, ds, work + 2 * nr * ns, bnd + 4, work + 3 * nr * ns ); 636 } 637  638 /* return bounds on u = A (x - c0) 639  bnd[0] <= u_0 <= bnd[1] 640  bnd[2] <= u_1 <= bnd[3] */ 641 void obbox_bnd_2( const obbox_data_2* p, 642  const realType* x, 643  const realType* y, 644  const realType c0[2], 645  const realType A[4], 646  realType bnd[4] ) 647 { 648  unsigned i, nr = p->dr.n, ns = p->ds.n; 649  realType obnd[4]; 650  651  i = nr * ( ns - 1 ); 652  obbox_side_2( x, y, nr, 1, c0, A, p->work, &p->dr, bnd ); 653  obbox_side_2( x + i, y + i, nr, 1, c0, A, p->work, &p->dr, obnd ); 654  obbox_merge_2( bnd, obnd ); 655  656  i = nr - 1; 657  obbox_side_2( x, y, ns, nr, c0, A, p->work, &p->ds, obnd ); 658  obbox_merge_2( bnd, obnd ); 659  obbox_side_2( x + i, y + i, nr, nr, c0, A, p->work, &p->ds, obnd ); 660  obbox_merge_2( bnd, obnd ); 661 } 662  663 /* return bounds on u = A (x - c0) 664  bnd[0] <= u_0 <= bnd[1] 665  bnd[2] <= u_1 <= bnd[3] 666  bnd[4] <= u_2 <= bnd[5] */ 667 void obbox_bnd_3( const obbox_data_3* p, 668  const realType* x, 669  const realType* y, 670  const realType* z, 671  const realType c0[3], 672  const realType A[9], 673  realType bnd[6] ) 674 { 675  unsigned i, nr = p->dr.n, ns = p->ds.b.n, nt = p->dt.b.n; 676  realType obnd[6]; 677  678  i = nr * ns * ( nt - 1 ); 679  obbox_side_3( x, y, z, nr, 1, ns, 0, c0, A, p->work, &p->dr, &p->ds, bnd ); 680  obbox_side_3( x + i, y + i, z + i, nr, 1, ns, 0, c0, A, p->work, &p->dr, &p->ds, obnd ); 681  obbox_merge_3( bnd, obnd ); 682  683  i = nr * ( ns - 1 ); 684  obbox_side_3( x, y, z, nr, 1, nt, i, c0, A, p->work, &p->dr, &p->dt, obnd ); 685  obbox_merge_3( bnd, obnd ); 686  obbox_side_3( x + i, y + i, z + i, nr, 1, nt, i, c0, A, p->work, &p->dr, &p->dt, obnd ); 687  obbox_merge_3( bnd, obnd ); 688  689  i = nr - 1; 690  obbox_side_3( x, y, z, ns, nr, nt, 0, c0, A, p->work, &p->ds.b, &p->dt, obnd ); 691  obbox_merge_3( bnd, obnd ); 692  obbox_side_3( x + i, y + i, z + i, ns, nr, nt, 0, c0, A, p->work, &p->ds.b, &p->dt, obnd ); 693  obbox_merge_3( bnd, obnd ); 694 } 695  696 void obbox_calc_2( const obbox_data_2* p, realType tol, const realType* x, const realType* y, obbox_2* b ) 697 { 698  const realType zero[2] = { 0, 0 }, id[4] = { 1, 0, 0, 1 }; 699  realType c0[2], jac[4], inv[4], bnd[4], u0[2], d[2]; 700  701  obbox_bnd_2( p, x, y, zero, id, b->axis_bnd ); 702  d[0] = b->axis_bnd[1] - b->axis_bnd[0]; 703  d[1] = b->axis_bnd[3] - b->axis_bnd[2]; 704  b->axis_bnd[0] -= tol * d[0], b->axis_bnd[1] += tol * d[0]; 705  b->axis_bnd[2] -= tol * d[1], b->axis_bnd[3] += tol * d[1]; 706  707  c0[0] = tensor_ig2( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.n, x, jac, p->work ); 708  c0[1] = tensor_ig2( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.n, y, jac + 2, p->work ); 709  mat_inv_2( jac, inv ); 710  711  obbox_bnd_2( p, x, y, c0, inv, bnd ); 712  713  u0[0] = ( bnd[0] + bnd[1] ) / 2; 714  u0[1] = ( bnd[2] + bnd[3] ) / 2; 715  d[0] = 2 / ( ( 1 + tol ) * ( bnd[1] - bnd[0] ) ); 716  d[1] = 2 / ( ( 1 + tol ) * ( bnd[3] - bnd[2] ) ); 717  b->x[0] = c0[0] + jac[0] * u0[0] + jac[1] * u0[1]; 718  b->x[1] = c0[1] + jac[2] * u0[0] + jac[3] * u0[1]; 719  b->A[0] = d[0] * inv[0], b->A[1] = d[0] * inv[1]; 720  b->A[2] = d[1] * inv[2], b->A[3] = d[1] * inv[3]; 721 } 722  723 void obbox_calc_3( const obbox_data_3* p, 724  realType tol, 725  const realType* x, 726  const realType* y, 727  const realType* z, 728  obbox_3* b ) 729 { 730  const realType zero[3] = { 0, 0 }, id[9] = { 1, 0, 0, 0, 1, 0, 0, 0, 1 }; 731  realType c0[3], jac[9], inv[9], bnd[6], u0[3], d[3]; 732  733  obbox_bnd_3( p, x, y, z, zero, id, b->axis_bnd ); 734  d[0] = b->axis_bnd[1] - b->axis_bnd[0]; 735  d[1] = b->axis_bnd[3] - b->axis_bnd[2]; 736  d[2] = b->axis_bnd[5] - b->axis_bnd[4]; 737  b->axis_bnd[0] -= tol * d[0], b->axis_bnd[1] += tol * d[0]; 738  b->axis_bnd[2] -= tol * d[1], b->axis_bnd[3] += tol * d[1]; 739  b->axis_bnd[4] -= tol * d[2], b->axis_bnd[5] += tol * d[2]; 740  741  c0[0] = 742  tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, x, jac, p->work ); 743  c0[1] = tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, y, jac + 3, 744  p->work ); 745  c0[2] = tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, z, jac + 6, 746  p->work ); 747  mat_inv_3( jac, inv ); 748  749  obbox_bnd_3( p, x, y, z, c0, inv, bnd ); 750  751  u0[0] = ( bnd[0] + bnd[1] ) / 2; 752  u0[1] = ( bnd[2] + bnd[3] ) / 2; 753  u0[2] = ( bnd[4] + bnd[5] ) / 2; 754  d[0] = 2 / ( ( 1 + tol ) * ( bnd[1] - bnd[0] ) ); 755  d[1] = 2 / ( ( 1 + tol ) * ( bnd[3] - bnd[2] ) ); 756  d[2] = 2 / ( ( 1 + tol ) * ( bnd[5] - bnd[4] ) ); 757  b->x[0] = c0[0] + jac[0] * u0[0] + jac[1] * u0[1] + jac[2] * u0[2]; 758  b->x[1] = c0[1] + jac[3] * u0[0] + jac[4] * u0[1] + jac[5] * u0[2]; 759  b->x[2] = c0[2] + jac[6] * u0[0] + jac[7] * u0[1] + jac[8] * u0[2]; 760  b->A[0] = d[0] * inv[0], b->A[1] = d[0] * inv[1], b->A[2] = d[0] * inv[2]; 761  b->A[3] = d[1] * inv[3], b->A[4] = d[1] * inv[4], b->A[5] = d[1] * inv[5]; 762  b->A[6] = d[2] * inv[6], b->A[7] = d[2] * inv[7], b->A[8] = d[2] * inv[8]; 763 } 764  765 /*-------------------------------------------------------------------------- 766  Point to Possible Elements Hashing 767  768  Initializing the data: 769  unsigned nel; // number of elements 770  const unsigned n[3]; // number of nodes in r, s, t directions 771  const realType *xm[3]; // n[0]*n[1]*n[2]*nel x,y,z coordinates 772  realType tol = 0.01; // how far point is allowed to be outside element 773  // relative to element size 774  unsigned max_size = n[0]*n[1]*n[2]*nel; // maximum size of hash table 775  776  hash_data_3 data; 777  hash_build_3(&data, xm, n, nel, max_size, tol); 778  779  Using the data: 780  realType x[3]; // point to find 781  782  unsigned index = hash_index_3(&data, x); 783  unsigned i, b = data.offset[index], e = data.offset[index+1]; 784  785  // point may be in elements 786  // data.offset[b], data.offset[b+1], ... , data.offset[e-1] 787  // 788  // list has maximum size data.max (e.g., e-b <= data.max) 789  790  for(i=b; i!=e; ++i) { 791  unsigned el = data.offset[i]; 792  const obbox_3 *obb = &data.obb[el]; // bounding box data for element el 793  ... 794  } 795  796  When done: 797  hash_free_3(&data); 798  799  --------------------------------------------------------------------------*/ 800  801 static int ifloor( realType x ) 802 { 803  /* 804  int y = x; 805  return (double)y > x ? y-1 : y; 806  */ 807  return mbfloor( x ); 808 } 809  810 static int iceil( realType x ) 811 { 812  /* 813  int y = x; 814  return (double)y < x ? y+1 : y; 815  */ 816  return mbceil( x ); 817 } 818  819 static unsigned hash_index_helper( realType low, realType fac, unsigned n, realType x ) 820 { 821  const int i = ifloor( ( x - low ) * fac ); 822  if( i < 0 ) return 0; 823  return umin_2( i, n - 1 ); 824 } 825  826 static uint hash_index_2( const hash_data_2* p, const realType x[2] ) 827 { 828  const unsigned n = p->hash_n; 829  return (uint)hash_index_helper( p->bnd[2], p->fac[1], n, x[1] ) * n + 830  hash_index_helper( p->bnd[0], p->fac[0], n, x[0] ); 831 } 832  833 static uint hash_index_3( const hash_data_3* p, const realType x[3] ) 834 { 835  const unsigned n = p->hash_n; 836  return ( (uint)hash_index_helper( p->bnd[4], p->fac[2], n, x[2] ) * n + 837  hash_index_helper( p->bnd[2], p->fac[1], n, x[1] ) ) * 838  n + 839  hash_index_helper( p->bnd[0], p->fac[0], n, x[0] ); 840 } 841  842 static void hash_setfac_2( hash_data_2* p, unsigned n ) 843 { 844  p->hash_n = n; 845  p->fac[0] = n / ( p->bnd[1] - p->bnd[0] ); 846  p->fac[1] = n / ( p->bnd[3] - p->bnd[2] ); 847 } 848  849 static void hash_setfac_3( hash_data_3* p, unsigned n ) 850 { 851  p->hash_n = n; 852  p->fac[0] = n / ( p->bnd[1] - p->bnd[0] ); 853  p->fac[1] = n / ( p->bnd[3] - p->bnd[2] ); 854  p->fac[2] = n / ( p->bnd[5] - p->bnd[4] ); 855 } 856  857 static void hash_range_2( const hash_data_2* p, uint i, unsigned d, unsigned* ia, unsigned* ib ) 858 { 859  const realType a = p->obb[i].axis_bnd[d * 2]; 860  const realType b = p->obb[i].axis_bnd[d * 2 + 1]; 861  const int i0 = ifloor( ( a - p->bnd[d * 2] ) * p->fac[d] ); 862  const unsigned i1 = iceil( ( b - p->bnd[d * 2] ) * p->fac[d] ); 863  *ia = imax_2( 0, i0 ); 864  *ib = imin_2( i1, p->hash_n ); 865  if( *ib == *ia ) ++( *ib ); 866 } 867  868 static void hash_range_3( const hash_data_3* p, uint i, unsigned d, unsigned* ia, unsigned* ib ) 869 { 870  const realType a = p->obb[i].axis_bnd[d * 2]; 871  const realType b = p->obb[i].axis_bnd[d * 2 + 1]; 872  const int i0 = ifloor( ( a - p->bnd[d * 2] ) * p->fac[d] ); 873  const unsigned i1 = iceil( ( b - p->bnd[d * 2] ) * p->fac[d] ); 874  *ia = imax_2( 0, i0 ); 875  *ib = imin_2( i1, p->hash_n ); 876  if( *ib == *ia ) ++( *ib ); 877 } 878  879 static uint hash_count_2( hash_data_2* p, uint nel, unsigned n ) 880 { 881  uint i, count = 0; 882  hash_setfac_2( p, n ); 883  for( i = 0; i < nel; ++i ) 884  { 885  unsigned ia, ib; 886  uint ci; 887  hash_range_2( p, i, 0, &ia, &ib ); 888  ci = ib - ia; 889  hash_range_2( p, i, 1, &ia, &ib ); 890  ci *= ib - ia; 891  count += ci; 892  } 893  return count; 894 } 895  896 static uint hash_count_3( hash_data_3* p, uint nel, unsigned n ) 897 { 898  uint i, count = 0; 899  hash_setfac_3( p, n ); 900  for( i = 0; i < nel; ++i ) 901  { 902  unsigned ia, ib; 903  uint ci; 904  hash_range_3( p, i, 0, &ia, &ib ); 905  ci = ib - ia; 906  hash_range_3( p, i, 1, &ia, &ib ); 907  ci *= ib - ia; 908  hash_range_3( p, i, 2, &ia, &ib ); 909  ci *= ib - ia; 910  count += ci; 911  } 912  return count; 913 } 914  915 static uint hash_opt_size_2( hash_data_2* p, uint nel, uint max_size ) 916 { 917  unsigned nl = 1, nu = ceil( sqrt( max_size - nel ) ); 918  uint size_low = 2 + nel; 919  while( nu - nl > 1 ) 920  { 921  unsigned nm = nl + ( nu - nl ) / 2; 922  uint size = (uint)nm * nm + 1 + hash_count_2( p, nel, nm ); 923  if( size <= max_size ) 924  nl = nm, size_low = size; 925  else 926  nu = nm; 927  } 928  hash_setfac_2( p, nl ); 929  return size_low; 930 } 931  932 static uint hash_opt_size_3( hash_data_3* p, uint nel, uint max_size ) 933 { 934  unsigned nl = 1, nu = ceil( pow( max_size - nel, 1.0 / 3 ) ); 935  uint size_low = 2 + nel; 936  while( nu - nl > 1 ) 937  { 938  unsigned nm = nl + ( nu - nl ) / 2; 939  uint size = (uint)nm * nm * nm + 1 + hash_count_3( p, nel, nm ); 940  if( size <= max_size ) 941  nl = nm, size_low = size; 942  else 943  nu = nm; 944  } 945  hash_setfac_3( p, nl ); 946  return size_low; 947 } 948  949 static void hash_getbb_2( hash_data_2* p, const realType* const elx[2], const unsigned n[2], uint nel, realType tol ) 950 { 951  obbox_data_2* data; 952  realType *z[2], *w[2]; 953  uint i; 954  unsigned d; 955  const unsigned nn = n[0] * n[1]; 956  unsigned int m[2]; 957  const realType* x[2]; 958  959  for( i = 0; i < 2; ++i ) 960  { 961  x[i] = elx[i]; 962  m[i] = 2 * n[i]; 963  } 964  965  z[0] = tmalloc( realType, 2 * ( n[0] + n[1] ) ); 966  w[0] = z[0] + n[0]; 967  z[1] = w[0] + n[0], w[1] = z[1] + n[1]; 968  for( d = 0; d < 2; ++d ) 969  lobatto_nodes( z[d], n[d] ), lobatto_weights( z[d], w[d], n[d] ); 970  data = obbox_setup_2( (const realType* const*)z, (const realType* const*)w, n, m ); 971  obbox_calc_2( data, tol, x[0], x[1], &p->obb[0] ); 972  memcpy( &p->bnd[0], (const realType*)&p->obb[0].axis_bnd[0], 4 * sizeof( realType ) ); 973  for( i = 0; i < nel; ++i, x[0] += nn, x[1] += nn ) 974  { 975  obbox_calc_2( data, tol, x[0], x[1], &p->obb[i] ); 976  obbox_merge_2( &p->bnd[0], (const realType*)&p->obb[i].axis_bnd[0] ); 977  } 978  obbox_free_2( data ); 979  free( z[0] ); 980 } 981  982 static void hash_getbb_3( hash_data_3* p, const realType* const elx[3], const unsigned n[3], uint nel, realType tol ) 983 { 984  obbox_data_3* data; 985  const realType* x[3]; 986  realType *z[3], *w[3]; 987  uint i; 988  unsigned d; 989  const unsigned nn = n[0] * n[1] * n[2]; 990  unsigned int m[3]; 991  992  for( i = 0; i < 3; ++i ) 993  { 994  x[i] = elx[i]; 995  m[i] = 2 * n[i]; 996  } 997  998  z[0] = tmalloc( realType, 2 * ( n[0] + n[1] + n[2] ) ); 999  w[0] = z[0] + n[0]; 1000  for( d = 1; d < 3; ++d ) 1001  z[d] = w[d - 1] + n[d - 1], w[d] = z[d] + n[d]; 1002  for( d = 0; d < 3; ++d ) 1003  lobatto_nodes( z[d], n[d] ), lobatto_weights( z[d], w[d], n[d] ); 1004  data = obbox_setup_3( (const realType* const*)z, (const realType* const*)w, n, m ); 1005  obbox_calc_3( data, tol, x[0], x[1], x[2], &p->obb[0] ); 1006  memcpy( &p->bnd[0], (const realType*)&p->obb[0].axis_bnd[0], 6 * sizeof( realType ) ); 1007  for( i = 0; i < nel; ++i, x[0] += nn, x[1] += nn, x[2] += nn ) 1008  { 1009  obbox_calc_3( data, tol, x[0], x[1], x[2], &p->obb[i] ); 1010  obbox_merge_3( &p->bnd[0], (const realType*)&p->obb[i].axis_bnd[0] ); 1011  } 1012  obbox_free_3( data ); 1013  free( z[0] ); 1014 } 1015  1016 static void hash_build_2( hash_data_2* p, 1017  const realType* const x[2], 1018  const unsigned n[2], 1019  uint nel, 1020  uint max_hash_size, 1021  realType tol ) 1022 { 1023  uint i, el, size, hn2, sum; 1024  unsigned hn; 1025  unsigned* count; 1026  p->obb = tmalloc( obbox_2, nel ); 1027  hash_getbb_2( p, x, n, nel, tol ); 1028  size = hash_opt_size_2( p, nel, max_hash_size ); 1029  p->offset = tmalloc( uint, size ); 1030  hn = p->hash_n; 1031  hn2 = (uint)hn * hn; 1032  count = tcalloc( unsigned, hn2 ); 1033  for( el = 0; el < nel; ++el ) 1034  { 1035  unsigned ia, ib, j, ja, jb; 1036  hash_range_2( p, el, 0, &ia, &ib ); 1037  hash_range_2( p, el, 1, &ja, &jb ); 1038  for( j = ja; j < jb; ++j ) 1039  for( i = ia; i < ib; ++i ) 1040  ++count[(uint)j * hn + i]; 1041  } 1042  sum = hn2 + 1, p->max = count[0]; 1043  p->offset[0] = sum; 1044  for( i = 0; i < hn2; ++i ) 1045  { 1046  if( count[i] > p->max ) p->max = count[i]; 1047  sum += count[i]; 1048  p->offset[i + 1] = sum; 1049  } 1050  for( el = 0; el < nel; ++el ) 1051  { 1052  unsigned ia, ib, j, ja, jb; 1053  hash_range_2( p, el, 0, &ia, &ib ); 1054  hash_range_2( p, el, 1, &ja, &jb ); 1055  for( j = ja; j < jb; ++j ) 1056  for( i = ia; i < ib; ++i ) 1057  { 1058  uint arrindex = (uint)j * hn + i; 1059  p->offset[p->offset[arrindex + 1] - count[arrindex]] = el; 1060  --count[arrindex]; 1061  } 1062  } 1063  free( count ); 1064 } 1065  1066 static void hash_build_3( hash_data_3* p, 1067  const realType* const x[3], 1068  const unsigned n[3], 1069  uint nel, 1070  uint max_hash_size, 1071  realType tol ) 1072 { 1073  uint i, el, size, hn3, sum; 1074  unsigned hn; 1075  unsigned* count; 1076  p->obb = tmalloc( obbox_3, nel ); 1077  hash_getbb_3( p, x, n, nel, tol ); 1078  size = hash_opt_size_3( p, nel, max_hash_size ); 1079  p->offset = tmalloc( uint, size ); 1080  hn = p->hash_n; 1081  hn3 = (uint)hn * hn * hn; 1082  count = tcalloc( unsigned, hn3 ); 1083  for( el = 0; el < nel; ++el ) 1084  { 1085  unsigned ia, ib, j, ja, jb, k, ka, kb; 1086  hash_range_3( p, el, 0, &ia, &ib ); 1087  hash_range_3( p, el, 1, &ja, &jb ); 1088  hash_range_3( p, el, 2, &ka, &kb ); 1089  for( k = ka; k < kb; ++k ) 1090  for( j = ja; j < jb; ++j ) 1091  for( i = ia; i < ib; ++i ) 1092  ++count[( (uint)k * hn + j ) * hn + i]; 1093  } 1094  sum = hn3 + 1, p->max = count[0]; 1095  p->offset[0] = sum; 1096  for( i = 0; i < hn3; ++i ) 1097  { 1098  if( count[i] > p->max ) p->max = count[i]; 1099  sum += count[i]; 1100  p->offset[i + 1] = sum; 1101  } 1102  for( el = 0; el < nel; ++el ) 1103  { 1104  unsigned ia, ib, j, ja, jb, k, ka, kb; 1105  hash_range_3( p, el, 0, &ia, &ib ); 1106  hash_range_3( p, el, 1, &ja, &jb ); 1107  hash_range_3( p, el, 2, &ka, &kb ); 1108  for( k = ka; k < kb; ++k ) 1109  for( j = ja; j < jb; ++j ) 1110  for( i = ia; i < ib; ++i ) 1111  { 1112  uint arrindex = ( (uint)k * hn + j ) * hn + i; 1113  p->offset[p->offset[arrindex + 1] - count[arrindex]] = el; 1114  --count[arrindex]; 1115  } 1116  } 1117  free( count ); 1118 } 1119  1120 static void hash_free_2( hash_data_2* p ) 1121 { 1122  free( p->obb ); 1123  free( p->offset ); 1124 } 1125  1126 static void hash_free_3( hash_data_3* p ) 1127 { 1128  free( p->obb ); 1129  free( p->offset ); 1130 } 1131  1132 /*-------------------------------------------------------------------------- 1133  Optimization algorithm to find a point within an element 1134  1135  Given x(r) (as values of x,y,z at all Lobatto nodes) and x_star, 1136  find the r that minimizes || x_star - x(r) ||_2 1137  1138  As a minimization problem, the Newton step is 1139  1140  __ 3 1141  [ J^T J - >_ d=1 resid_d H_d ] dr = J^t resid 1142  1143  where resid = x_star - x(r), J = [ dx_i/dr_j ], 1144  and H_d = [ d^2 x_d/dr_i dr_j ]. 1145  1146  This is the appropriate step to take whenever constraints are active, 1147  and the current iterate is on a boundary of the element. When the current 1148  iterate is inside, J is square ( dim r = dim x ), resid will become small, 1149  and the step 1150  1151  J dr = resid 1152  1153  may be used instead, still giving quadratic convergence. 1154  1155  1156  Names use a _3 suffix for 3-d and _2 for 2-d. 1157  The routines require an initialized lagrange_data array as input: 1158  unsigned d, n[3] = { ... }; 1159  realType *z[3] = { tmalloc(realType, n[0]), ... }; 1160  for(d=0;d<3;++d) lobatto_nodes(z[d],n[d]); 1161  1162  lagrange_data ld[3]; 1163  for(d=0;d<3;++d) lagrange_setup(&ld[d],z[d],n[d]); 1164  1165  Initialization: 1166  opt_data_3 data; 1167  opt_alloc_3(&data, ld); 1168  1169  Use: 1170  const realType *xm[3]; // 3 pointers, each to n[0]*n[1]*n[2] realTypes 1171  // giving the nodal x, y, or z coordinates 1172  1173  const realType x_star[3] = { ... }; // point to find 1174  realType r[3] = { 0,0,0 }; // initial guess with 1175  unsigned c = opt_no_constraints_3; // these constraints active 1176  1177  realType dist = opt_findpt_3(&data,xm,x_star,r,&c); 1178  // minimizer is r with constraints c; 2-norm of resid is dist 1179  1180  Clean-up: 1181  opt_free_3(&data); 1182  1183  for(d=0;d<3;++d) lagrange_free(&ld[d]); 1184  for(d=0;d<3;++d) free(z[d]); 1185  1186  The constraint number works as follows. Let cr be the constraints 1187  on the r variable: 1188  cr = 0 r fixed at -1 1189  cr = 1 r not fixed 1190  cr = 2 r fixed at 1 1191  Then the constraint number is (ct*3+cs)*3+cr 1192  1193  --------------------------------------------------------------------------*/ 1194  1195 /* how many directions are constrained? */ 1196 static const char opt_constr_num_2[9] = { 2, 1, 2, 1, 0, 1, 2, 1, 2 }; 1197 static const char opt_constr_num_3[27] = { 3, 2, 3, 2, 1, 2, 3, 2, 3, 2, 1, 2, 1, 0, 1198  1, 2, 1, 2, 3, 2, 3, 2, 1, 2, 3, 2, 3 }; 1199  1200 /* which direction is constrained? */ 1201 /* static const char opt_constr_dir_2[9] = {-1, 1,-1, 0,-1, 0, -1, 1,-1}; */ 1202 static const char opt_constr_dir_3[27] = { -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, 1, -1, 0, -1, 1203  0, -1, 1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1 }; 1204  1205 /* which direction is not constrained? */ 1206 static const char opt_constr_not[27] = { -1, 0, -1, 1, -1, 1, -1, 0, -1, 2, -1, 2, -1, -1, 1207  -1, 2, -1, 2, -1, 0, -1, 1, -1, 1, -1, 0, -1 }; 1208  1209 static const char opt_constr_wide[27] = { 0x00, 0x01, 0x02, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a, 1210  0x10, 0x11, 0x12, 0x14, 0x15, 0x16, 0x18, 0x19, 0x1a, 1211  0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a }; 1212  1213 static const unsigned opt_other1_3[3] = { 1, 0, 0 }, opt_other2_3[3] = { 2, 2, 1 }; 1214  1215 static unsigned opt_constr( unsigned constraints, unsigned d ) 1216 { 1217  return ( opt_constr_wide[constraints] >> ( d * 2 ) ) & 3; 1218 } 1219  1220 static void opt_constr_unpack_2( unsigned constraints, unsigned* c ) 1221 { 1222  const char cw = opt_constr_wide[constraints]; 1223  c[0] = cw & 3; 1224  c[1] = cw >> 2; 1225 } 1226  1227 static void opt_constr_unpack_3( unsigned constraints, unsigned* c ) 1228 { 1229  const char cw = opt_constr_wide[constraints]; 1230  c[0] = cw & 3; 1231  c[1] = ( cw >> 2 ) & 3; 1232  c[2] = cw >> 4; 1233 } 1234  1235 static unsigned opt_constr_pack_2( const unsigned* c ) 1236 { 1237  return c[1] * 3 + c[0]; 1238 } 1239  1240 static unsigned opt_constr_pack_3( const unsigned* c ) 1241 { 1242  return ( c[2] * 3 + c[1] ) * 3 + c[0]; 1243 } 1244  1245 /*-------------------------------------------------------------------------- 1246  1247  3 - D 1248  1249  --------------------------------------------------------------------------*/ 1250  1251 void opt_alloc_3( opt_data_3* p, lagrange_data* ld ) 1252 { 1253  const unsigned nr = ld[0].n, ns = ld[1].n, nt = ld[2].n, nf = umax_3( nr * ns, nr * nt, ns * nt ), 1254  ne = umax_3( nr, ns, nt ), nw = 2 * ns * nt + 3 * ns; 1255  p->size[0] = 1; 1256  p->size[1] = nr; 1257  p->size[2] = nr * ns; 1258  p->size[3] = p->size[2] * nt; 1259  p->ld = ld; 1260  p->work = tmalloc( realType, 6 * nf + 9 * ne + nw ); 1261  p->fd.x[0] = p->work + nw; 1262  p->fd.x[1] = p->fd.x[0] + nf; 1263  p->fd.x[2] = p->fd.x[1] + nf; 1264  p->fd.fdn[0] = p->fd.x[2] + nf; 1265  p->fd.fdn[1] = p->fd.fdn[0] + nf; 1266  p->fd.fdn[2] = p->fd.fdn[1] + nf; 1267  p->ed.x[0] = p->fd.fdn[2] + nf; 1268  p->ed.x[1] = p->ed.x[0] + ne; 1269  p->ed.x[2] = p->ed.x[1] + ne; 1270  p->ed.fd1[0] = p->ed.x[2] + ne; 1271  p->ed.fd1[1] = p->ed.fd1[0] + ne; 1272  p->ed.fd1[2] = p->ed.fd1[1] + ne; 1273  p->ed.fd2[0] = p->ed.fd1[2] + ne; 1274  p->ed.fd2[1] = p->ed.fd2[0] + ne; 1275  p->ed.fd2[2] = p->ed.fd2[1] + ne; 1276 } 1277  1278 void opt_free_3( opt_data_3* p ) 1279 { 1280  free( p->work ); 1281 } 1282  1283 static void opt_vol_set_3( opt_data_3* p, const realType r[3] ) 1284 { 1285  lagrange_1( &p->ld[0], r[0] ); 1286  lagrange_1( &p->ld[1], r[1] ); 1287  lagrange_1( &p->ld[2], r[2] ); 1288 } 1289  1290 /* work holds 2*ns*nt + 3*ns realTypes */ 1291 static void opt_vol_intp_3( opt_data_3* p ) 1292 { 1293  unsigned d; 1294  const lagrange_data* ld = p->ld; 1295  1296  for( d = 0; d < 3; ++d ) 1297  p->x[d] = tensor_ig3( ld[0].J, ld[0].D, ld[0].n, ld[1].J, ld[1].D, ld[1].n, ld[2].J, ld[2].D, ld[2].n, 1298  p->elx[d], &p->jac[d * 3], p->work ); 1299 } 1300  1301 void opt_vol_set_intp_3( opt_data_3* p, const realType r[3] ) 1302 { 1303  opt_vol_set_3( p, r ); 1304  opt_vol_intp_3( p ); 1305 } 1306  1307 static void opt_face_proj_3( opt_data_3* p ) 1308 { 1309  unsigned d, off = 0; 1310  const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2, so = p->size[d2] - p->size[d1 + 1], s1 = p->size[d1], 1311  sn = p->size[dn], n1 = p->ld[d1].n, n2 = p->ld[d2].n, nn = p->ld[dn].n; 1312  const realType* D = p->ld[dn].D_z0; 1313  if( opt_constr( p->fd.constraints, dn ) == 2 ) off = p->size[dn + 1] - p->size[dn], D = p->ld[dn].D_zn; 1314  for( d = 0; d < 3; ++d ) 1315  { 1316  unsigned i, j, k, arrindex = 0; 1317  const realType* in = p->elx[d] + off; 1318  for( j = n2; j; --j, in += so ) 1319  for( i = n1; i; --i, ++arrindex, in += s1 ) 1320  { 1321  const realType* ind = in - off; 1322  realType* fdn = &p->fd.fdn[d][arrindex]; 1323  p->fd.x[d][arrindex] = *in; 1324  *fdn = 0; 1325  for( k = 0; k < nn; ++k, ind += sn ) 1326  *fdn += *ind * D[k]; 1327  } 1328  } 1329 } 1330  1331 static void opt_face_set_3( opt_data_3* p, const realType r[3], unsigned constr ) 1332 { 1333  if( p->fd.constraints != constr ) 1334  { 1335  p->fd.constraints = constr; 1336  p->fd.dn = opt_constr_dir_3[constr]; 1337  p->fd.d1 = opt_other1_3[p->fd.dn]; 1338  p->fd.d2 = opt_other2_3[p->fd.dn]; 1339  opt_face_proj_3( p ); 1340  } 1341  lagrange_1( &p->ld[p->fd.d1], r[p->fd.d1] ); 1342  lagrange_1( &p->ld[p->fd.d2], r[p->fd.d2] ); 1343 } 1344  1345 /* work holds 2*ld[d2].n realTypes */ 1346 static void opt_face_intp_3( opt_data_3* p ) 1347 { 1348  unsigned d; 1349  const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2, n1 = p->ld[d1].n, n2 = p->ld[d2].n; 1350  const realType *J1 = p->ld[d1].J, *J2 = p->ld[d2].J, *D1 = p->ld[d1].D, *D2 = p->ld[d2].D; 1351  1352  for( d = 0; d < 3; ++d ) 1353  { 1354  realType g[2]; 1355  p->x[d] = tensor_ig2( J1, D1, n1, J2, D2, n2, p->fd.x[d], &g[0], p->work ); 1356  p->jac[d * 3 + d1] = g[0]; 1357  p->jac[d * 3 + d2] = g[1]; 1358  p->jac[d * 3 + dn] = tensor_i2( J1, n1, J2, n2, p->fd.fdn[d], p->work ); 1359  } 1360 } 1361  1362 static void opt_face_set_intp_3( opt_data_3* p, const realType r[3], unsigned constr ) 1363 { 1364  opt_face_set_3( p, r, constr ); 1365  opt_face_intp_3( p ); 1366 } 1367  1368 static void opt_face_hess_3( opt_data_3* p, realType hess[9] ) 1369 { 1370  unsigned d; 1371  const unsigned d1 = p->fd.d1, d2 = p->fd.d2, n1 = p->ld[d1].n, n2 = p->ld[d2].n; 1372  const realType *J1 = p->ld[d1].J, *J2 = p->ld[d2].J, *D1 = p->ld[d1].D, *D2 = p->ld[d2].D, *S1 = p->ld[d1].D2, 1373  *S2 = p->ld[d2].D2; 1374  1375  lagrange_2u( &p->ld[d1] ); 1376  lagrange_2u( &p->ld[d2] ); 1377  1378  for( d = 0; d < 3; ++d ) 1379  { 1380  (void)tensor_ig2( J1, S1, n1, J2, S2, n2, p->fd.x[d], hess + d * 3, p->work ); 1381  hess[d * 3 + 0] = tensor_i2( S1, n1, J2, n2, p->fd.x[d], p->work ); 1382  hess[d * 3 + 1] = tensor_i2( J1, n1, S2, n2, p->fd.x[d], p->work ); 1383  hess[d * 3 + 2] = tensor_i2( D1, n1, D2, n2, p->fd.x[d], p->work ); 1384  } 1385 } 1386  1387 static void opt_edge_proj_3( opt_data_3* p ) 1388 { 1389  unsigned d, off, off1 = 0, off2 = 0; 1390  const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2, se = p->size[de], s1 = p->size[d1], s2 = p->size[d2], 1391  ne = p->ld[de].n, n1 = p->ld[d1].n, n2 = p->ld[d2].n; 1392  const realType *fD1, *fD2; 1393  if( opt_constr( p->ed.constraints, d1 ) == 0 ) 1394  fD1 = p->ld[d1].D_z0; 1395  else 1396  fD1 = p->ld[d1].D_zn, off1 = p->size[d1 + 1] - p->size[d1]; 1397  if( opt_constr( p->ed.constraints, d2 ) == 0 ) 1398  fD2 = p->ld[d2].D_z0; 1399  else 1400  fD2 = p->ld[d2].D_zn, off2 = p->size[d2 + 1] - p->size[d2]; 1401  off = off1 + off2; 1402  for( d = 0; d < 3; ++d ) 1403  { 1404  unsigned i, j; 1405  const realType* in = p->elx[d] + off; 1406  for( i = 0; i < ne; ++i, in += se ) 1407  { 1408  const realType *in1 = in - off1, *in2 = in - off2; 1409  realType *fd1 = &p->ed.fd1[d][i], *fd2 = &p->ed.fd2[d][i]; 1410  p->ed.x[d][i] = *in; 1411  *fd1 = *fd2 = 0; 1412  for( j = 0; j < n1; ++j, in1 += s1 ) 1413  *fd1 += *in1 * fD1[j]; 1414  for( j = 0; j < n2; ++j, in2 += s2 ) 1415  *fd2 += *in2 * fD2[j]; 1416  } 1417  } 1418 } 1419  1420 static void opt_edge_set_3( opt_data_3* p, const realType r[3], unsigned constr ) 1421 { 1422  if( p->ed.constraints != constr ) 1423  { 1424  p->ed.constraints = constr; 1425  p->ed.de = opt_constr_not[constr]; 1426  p->ed.d1 = opt_other1_3[p->ed.de]; 1427  p->ed.d2 = opt_other2_3[p->ed.de]; 1428  opt_edge_proj_3( p ); 1429  } 1430  lagrange_1( &p->ld[p->ed.de], r[p->ed.de] ); 1431 } 1432  1433 static void opt_edge_intp_3( opt_data_3* p ) 1434 { 1435  unsigned d; 1436  const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2, n = p->ld[de].n; 1437  const realType *J = p->ld[de].J, *D = p->ld[de].D; 1438  1439  for( d = 0; d < 3; ++d ) 1440  { 1441  p->x[d] = tensor_ig1( J, D, n, p->ed.x[d], &p->jac[d * 3 + de] ); 1442  p->jac[d * 3 + d1] = tensor_i1( J, n, p->ed.fd1[d] ); 1443  p->jac[d * 3 + d2] = tensor_i1( J, n, p->ed.fd2[d] ); 1444  } 1445 } 1446  1447 static void opt_edge_set_intp_3( opt_data_3* p, const realType r[3], unsigned constr ) 1448 { 1449  opt_edge_set_3( p, r, constr ); 1450  opt_edge_intp_3( p ); 1451 } 1452  1453 static void opt_edge_hess_3( opt_data_3* p, realType hess[3] ) 1454 { 1455  unsigned d; 1456  const unsigned de = p->ed.de, n = p->ld[de].n; 1457  const realType* D2 = p->ld[de].D2; 1458  lagrange_2u( &p->ld[de] ); 1459  for( d = 0; d < 3; ++d ) 1460  hess[d] = tensor_i1( D2, n, p->ed.x[d] ); 1461 } 1462  1463 static void opt_point_proj_3( opt_data_3* p ) 1464 { 1465  unsigned off[3], offt, d, c[3]; 1466  const realType* fD[3]; 1467  opt_constr_unpack_3( p->pd.constraints, c ); 1468  for( d = 0; d < 3; ++d ) 1469  if( c[d] == 0 ) 1470  fD[d] = p->ld[d].D_z0, off[d] = 0; 1471  else 1472  fD[d] = p->ld[d].D_zn, off[d] = p->size[d + 1] - p->size[d]; 1473  offt = off[0] + off[1] + off[2]; 1474  for( d = 0; d < 9; ++d ) 1475  p->pd.jac[d] = 0; 1476  for( d = 0; d < 3; ++d ) 1477  { 1478  unsigned i, j; 1479  p->pd.x[d] = p->elx[d][offt]; 1480  for( i = 0; i < 3; ++i ) 1481  { 1482  const realType* in = p->elx[d] + offt - off[i]; 1483  for( j = 0; j < p->ld[i].n; ++j, in += p->size[i] ) 1484  p->pd.jac[d * 3 + i] += *in * fD[i][j]; 1485  } 1486  } 1487 } 1488  1489 static void opt_point_set_3( opt_data_3* p, unsigned constr ) 1490 { 1491  if( p->pd.constraints != constr ) 1492  { 1493  p->pd.constraints = constr; 1494  opt_point_proj_3( p ); 1495  } 1496 } 1497  1498 static void opt_point_intp_3( opt_data_3* p ) 1499 { 1500  memcpy( p->x, p->pd.x, 3 * sizeof( realType ) ); 1501  memcpy( p->jac, p->pd.jac, 9 * sizeof( realType ) ); 1502 } 1503  1504 static void opt_point_set_intp_3( opt_data_3* p, unsigned constr ) 1505 { 1506  opt_point_set_3( p, constr ); 1507  opt_point_intp_3( p ); 1508 } 1509  1510 #define DIAGNOSTICS 0 1511  1512 double opt_findpt_3( opt_data_3* p, 1513  const realType* const elx[3], 1514  const realType xstar[3], 1515  realType r[3], 1516  unsigned* constr ) 1517 { 1518  realType dr[3], resid[3], steep[3]; 1519  1520  unsigned c = *constr, ac, d, cc[3], step = 0; 1521  1522  p->elx[0] = elx[0], p->elx[1] = elx[1], p->elx[2] = elx[2]; 1523  1524  p->fd.constraints = opt_no_constraints_3; 1525  p->ed.constraints = opt_no_constraints_3; 1526  p->pd.constraints = opt_no_constraints_3; 1527  1528 #if DIAGNOSTICS 1529  printf( "opt_findpt: xstar = %g, %g, %g\n", xstar[0], xstar[1], xstar[2] ); 1530 #endif 1531  1532  do 1533  { 1534  ++step; 1535  if( step == 50 ) /*fail("%s: opt_findpt_3 did not converge\n",__FILE__);*/ 1536  return 1.e+30; 1537 #if DIAGNOSTICS 1538  printf( " iteration %u\n", step ); 1539  printf( " %d constraint(s) active\n", (int)opt_constr_num_3[c] ); 1540 #endif 1541  /* update face/edge/point data if necessary, 1542  and evaluate x(r) as well as the jacobian */ 1543  switch( opt_constr_num_3[c] ) 1544  { 1545  case 0: 1546  opt_vol_set_intp_3( p, r ); 1547  break; 1548  case 1: 1549  opt_face_set_intp_3( p, r, c ); 1550  break; 1551  case 2: 1552  opt_edge_set_intp_3( p, r, c ); 1553  break; 1554  case 3: 1555  opt_point_set_intp_3( p, c ); 1556  break; 1557  } 1558 #if DIAGNOSTICS 1559  printf( " r = %g, %g, %g\n", r[0], r[1], r[2] ); 1560  printf( " x = %g, %g, %g\n", p->x[0], p->x[1], p->x[2] ); 1561 #endif 1562  /* compute residual */ 1563  for( d = 0; d < 3; ++d ) 1564  resid[d] = xstar[d] - p->x[d]; 1565 #if DIAGNOSTICS 1566  printf( " resid = %g, %g, %g\n", resid[0], resid[1], resid[2] ); 1567  printf( " 2-norm = %g\n", r2norm_3( resid[0], resid[1], resid[2] ) ); 1568 #endif 1569  /* check constraints against steepest descent direction */ 1570  ac = c; 1571  if( opt_constr_num_3[c] ) 1572  { 1573  opt_constr_unpack_3( c, cc ); 1574  mat_app_3c( steep, p->jac, resid ); /* steepest descent = J^T r */ 1575 #if DIAGNOSTICS 1576  printf( " steepest descent = %g, %g, %g\n", steep[0], steep[1], steep[2] ); 1577 #endif 1578  for( d = 0; d < 3; ++d ) 1579  if( ( cc[d] == 0 && steep[d] > 0 ) || ( cc[d] == 2 && steep[d] < 0 ) ) cc[d] = 1; 1580  ac = opt_constr_pack_3( cc ); 1581  } 1582  /* update face/edge/point data if necessary */ 1583  if( ac != c ) 1584  { 1585  c = ac; 1586 #if DIAGNOSTICS 1587  printf( " relaxed to %d constraints\n", (int)opt_constr_num_3[c] ); 1588 #endif 1589  switch( opt_constr_num_3[c] ) 1590  { 1591  case 1: 1592  opt_face_set_3( p, r, c ); 1593  break; 1594  case 2: 1595  opt_edge_set_3( p, r, c ); 1596  break; 1597  case 3: 1598  opt_point_set_3( p, c ); 1599  break; 1600  } 1601  } 1602  /* compute Newton step */ 1603  switch( opt_constr_num_3[c] ) 1604  { 1605  case 0: 1606  tinyla_solve_3( dr, p->jac, resid ); 1607  break; 1608  case 1: { 1609  const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2; 1610  realType A[4], H[9]; 1611  const realType* J = p->jac; 1612  opt_face_hess_3( p, H ); 1613  A[0] = J[d1] * J[d1] + J[3 + d1] * J[3 + d1] + J[6 + d1] * J[6 + d1]; 1614  A[1] = J[d2] * J[d2] + J[3 + d2] * J[3 + d2] + J[6 + d2] * J[6 + d2]; 1615  A[2] = J[d1] * J[d2] + J[3 + d1] * J[3 + d2] + J[6 + d1] * J[6 + d2]; 1616  A[0] -= resid[0] * H[0] + resid[1] * H[3] + resid[2] * H[6]; 1617  A[1] -= resid[0] * H[1] + resid[1] * H[4] + resid[2] * H[7]; 1618  A[2] -= resid[0] * H[2] + resid[1] * H[5] + resid[2] * H[8]; 1619  tinyla_solve_sym_2( &dr[d1], &dr[d2], A, steep[d1], steep[d2] ); 1620  dr[dn] = 0; 1621  } 1622  break; 1623  case 2: { 1624  const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2; 1625  realType fac, H[3]; 1626  const realType* J = p->jac + de; 1627  opt_edge_hess_3( p, H ); 1628  fac = J[0] * J[0] + J[3] * J[3] + J[6] * J[6] - ( resid[0] * H[0] + resid[1] * H[1] + resid[2] * H[2] ); 1629  dr[de] = steep[de] / fac; 1630  dr[d1] = 0, dr[d2] = 0; 1631  } 1632  break; 1633  case 3: 1634  dr[0] = dr[1] = dr[2] = 0; 1635  break; 1636  } 1637 #if DIAGNOSTICS 1638  printf( " dr = %g, %g, %g\n", dr[0], dr[1], dr[2] ); 1639 #endif 1640  /* project new iteration onto [-1,1]^3 */ 1641  opt_constr_unpack_3( c, cc ); 1642  for( d = 0; d < 3; ++d ) 1643  { 1644  if( cc[d] != 1 ) continue; 1645  r[d] += dr[d]; 1646  if( r[d] <= -1 ) 1647  dr[d] -= r[d] + 1, r[d] = -1, cc[d] = 0; 1648  else if( r[d] >= 1 ) 1649  dr[d] -= r[d] - 1, r[d] = 1, cc[d] = 2; 1650  } 1651  c = opt_constr_pack_3( cc ); 1652  } while( r1norm_3( dr[0], dr[1], dr[2] ) > 3 * MOAB_POLY_EPS ); 1653  *constr = c; 1654 #if 0 1655  printf("opt_findpt_3 converged in %u iterations\n", step); 1656 #endif 1657  return r2norm_3( resid[0], resid[1], resid[2] ); 1658 } 1659  1660 #undef DIAGNOSTICS 1661  1662 void opt_alloc_2( opt_data_2* p, lagrange_data* ld ) 1663 { 1664  const unsigned nr = ld[0].n, ns = ld[1].n, ne = umax_2( nr, ns ), nw = 2 * ns; 1665  p->size[0] = 1; 1666  p->size[1] = nr; 1667  p->size[2] = nr * ns; 1668  p->ld = ld; 1669  p->work = tmalloc( realType, 4 * ne + nw ); 1670  p->ed.x[0] = p->work + nw; 1671  p->ed.x[1] = p->ed.x[0] + ne; 1672  p->ed.fd1[0] = p->ed.x[1] + ne; 1673  p->ed.fd1[1] = p->ed.fd1[0] + ne; 1674 } 1675  1676 void opt_free_2( opt_data_2* p ) 1677 { 1678  free( p->work ); 1679 } 1680  1681 static void opt_area_set_2( opt_data_2* p, const realType r[2] ) 1682 { 1683  lagrange_1( &p->ld[0], r[0] ); 1684  lagrange_1( &p->ld[1], r[1] ); 1685 } 1686  1687 /* work holds 2*ns realTypes */ 1688 static void opt_area_intp_2( opt_data_2* p ) 1689 { 1690  unsigned d; 1691  const lagrange_data* ld = p->ld; 1692  1693  for( d = 0; d < 2; ++d ) 1694  p->x[d] = 1695  tensor_ig2( ld[0].J, ld[0].D, ld[0].n, ld[1].J, ld[1].D, ld[1].n, p->elx[d], &p->jac[d * 2], p->work ); 1696 } 1697  1698 static void opt_area_set_intp_2( opt_data_2* p, const realType r[2] ) 1699 { 1700  opt_area_set_2( p, r ); 1701  opt_area_intp_2( p ); 1702 } 1703  1704 static void opt_edge_proj_2( opt_data_2* p ) 1705 { 1706  unsigned d, off = 0; 1707  const unsigned de = p->ed.de, d1 = p->ed.d1, se = p->size[de], s1 = p->size[d1], ne = p->ld[de].n, n1 = p->ld[d1].n; 1708  const realType* fD1; 1709  if( opt_constr( p->ed.constraints, d1 ) == 0 ) 1710  fD1 = p->ld[d1].D_z0; 1711  else 1712  fD1 = p->ld[d1].D_zn, off = p->size[d1 + 1] - p->size[d1]; 1713  for( d = 0; d < 2; ++d ) 1714  { 1715  unsigned i, j; 1716  const realType* in = p->elx[d] + off; 1717  for( i = 0; i < ne; ++i, in += se ) 1718  { 1719  const realType* in1 = in - off; 1720  realType* fd1 = &p->ed.fd1[d][i]; 1721  p->ed.x[d][i] = *in; 1722  *fd1 = 0; 1723  for( j = 0; j < n1; ++j, in1 += s1 ) 1724  *fd1 += *in1 * fD1[j]; 1725  } 1726  } 1727 } 1728  1729 static void opt_edge_set_2( opt_data_2* p, const realType r[2], unsigned constr ) 1730 { 1731  if( p->ed.constraints != constr ) 1732  { 1733  p->ed.constraints = constr; 1734  p->ed.de = opt_constr_not[constr]; 1735  p->ed.d1 = 1 - p->ed.de; 1736  opt_edge_proj_2( p ); 1737  } 1738  lagrange_1( &p->ld[p->ed.de], r[p->ed.de] ); 1739 } 1740  1741 static void opt_edge_intp_2( opt_data_2* p ) 1742 { 1743  unsigned d; 1744  const unsigned de = p->ed.de, d1 = p->ed.d1, n = p->ld[de].n; 1745  const realType *J = p->ld[de].J, *D = p->ld[de].D; 1746  for( d = 0; d < 2; ++d ) 1747  { 1748  p->x[d] = tensor_ig1( J, D, n, p->ed.x[d], &p->jac[d * 2 + de] ); 1749  p->jac[d * 2 + d1] = tensor_i1( J, n, p->ed.fd1[d] ); 1750  } 1751 } 1752  1753 static void opt_edge_set_intp_2( opt_data_2* p, const realType r[2], unsigned constr ) 1754 { 1755  opt_edge_set_2( p, r, constr ); 1756  opt_edge_intp_2( p ); 1757 } 1758  1759 static void opt_edge_hess_2( opt_data_2* p, realType hess[2] ) 1760 { 1761  unsigned d; 1762  const unsigned de = p->ed.de, n = p->ld[de].n; 1763  const realType* D2 = p->ld[de].D2; 1764  lagrange_2u( &p->ld[de] ); 1765  for( d = 0; d < 2; ++d ) 1766  hess[d] = tensor_i1( D2, n, p->ed.x[d] ); 1767 } 1768  1769 static void opt_point_proj_2( opt_data_2* p ) 1770 { 1771  unsigned off[2], offt, d, c[2]; 1772  const realType* fD[2]; 1773  opt_constr_unpack_2( p->pd.constraints, c ); 1774  for( d = 0; d < 2; ++d ) 1775  if( c[d] == 0 ) 1776  fD[d] = p->ld[d].D_z0, off[d] = 0; 1777  else 1778  fD[d] = p->ld[d].D_zn, off[d] = p->size[d + 1] - p->size[d]; 1779  offt = off[0] + off[1]; 1780  for( d = 0; d < 4; ++d ) 1781  p->pd.jac[d] = 0; 1782  for( d = 0; d < 2; ++d ) 1783  { 1784  unsigned i, j; 1785  p->pd.x[d] = p->elx[d][offt]; 1786  for( i = 0; i < 2; ++i ) 1787  { 1788  const realType* in = p->elx[d] + offt - off[i]; 1789  for( j = 0; j < p->ld[i].n; ++j, in += p->size[i] ) 1790  p->pd.jac[d * 2 + i] += *in * fD[i][j]; 1791  } 1792  } 1793 } 1794  1795 static void opt_point_set_2( opt_data_2* p, unsigned constr ) 1796 { 1797  if( p->pd.constraints != constr ) 1798  { 1799  p->pd.constraints = constr; 1800  opt_point_proj_2( p ); 1801  } 1802 } 1803  1804 static void opt_point_intp_2( opt_data_2* p ) 1805 { 1806  memcpy( p->x, p->pd.x, 2 * sizeof( realType ) ); 1807  memcpy( p->jac, p->pd.jac, 4 * sizeof( realType ) ); 1808 } 1809  1810 static void opt_point_set_intp_2( opt_data_2* p, unsigned constr ) 1811 { 1812  opt_point_set_2( p, constr ); 1813  opt_point_intp_2( p ); 1814 } 1815  1816 #define DIAGNOSTICS 0 1817  1818 double opt_findpt_2( opt_data_2* p, 1819  const realType* const elx[2], 1820  const realType xstar[2], 1821  realType r[2], 1822  unsigned* constr ) 1823 { 1824  realType dr[2], resid[2], steep[2]; 1825  1826  unsigned c = *constr, ac, d, cc[2], step = 0; 1827  1828  p->elx[0] = elx[0], p->elx[1] = elx[1]; 1829  1830  p->ed.constraints = opt_no_constraints_2; 1831  p->pd.constraints = opt_no_constraints_2; 1832  1833 #if DIAGNOSTICS 1834  printf( "opt_findpt: xstar = %g, %g\n", xstar[0], xstar[1] ); 1835 #endif 1836  1837  do 1838  { 1839  ++step; 1840  if( step == 50 ) /*fail("%s: opt_findpt_2 did not converge\n",__FILE__);*/ 1841  return 1.e+30; 1842 #if DIAGNOSTICS 1843  printf( " iteration %u\n", step ); 1844  printf( " %d constraint(s) active\n", (int)opt_constr_num_2[c] ); 1845 #endif 1846  /* update face/edge/point data if necessary, 1847  and evaluate x(r) as well as the jacobian */ 1848  switch( opt_constr_num_2[c] ) 1849  { 1850  case 0: 1851  opt_area_set_intp_2( p, r ); 1852  break; 1853  case 1: 1854  opt_edge_set_intp_2( p, r, c ); 1855  break; 1856  case 2: 1857  opt_point_set_intp_2( p, c ); 1858  break; 1859  } 1860 #if DIAGNOSTICS 1861  printf( " r = %g, %g\n", r[0], r[1] ); 1862  printf( " x = %g, %g\n", p->x[0], p->x[1] ); 1863 #endif 1864  /* compute residual */ 1865  for( d = 0; d < 2; ++d ) 1866  resid[d] = xstar[d] - p->x[d]; 1867 #if DIAGNOSTICS 1868  printf( " resid = %g, %g\n", resid[0], resid[1] ); 1869  printf( " 2-norm = %g\n", r2norm_2( resid[0], resid[1] ) ); 1870 #endif 1871  /* check constraints against steepest descent direction */ 1872  ac = c; 1873  if( opt_constr_num_2[c] ) 1874  { 1875  opt_constr_unpack_2( c, cc ); 1876  mat_app_2c( steep, p->jac, resid ); /* steepest descent = J^T r */ 1877 #if DIAGNOSTICS 1878  printf( " steepest descent = %g, %g\n", steep[0], steep[1] ); 1879 #endif 1880  for( d = 0; d < 2; ++d ) 1881  if( ( cc[d] == 0 && steep[d] > 0 ) || ( cc[d] == 2 && steep[d] < 0 ) ) cc[d] = 1; 1882  ac = opt_constr_pack_2( cc ); 1883  } 1884  /* update face/edge/point data if necessary */ 1885  if( ac != c ) 1886  { 1887  c = ac; 1888 #if DIAGNOSTICS 1889  printf( " relaxed to %d constraints\n", (int)opt_constr_num_2[c] ); 1890 #endif 1891  switch( opt_constr_num_2[c] ) 1892  { 1893  case 1: 1894  opt_edge_set_2( p, r, c ); 1895  break; 1896  case 2: 1897  opt_point_set_2( p, c ); 1898  break; 1899  } 1900  } 1901  /* compute Newton step */ 1902  switch( opt_constr_num_2[c] ) 1903  { 1904  case 0: 1905  tinyla_solve_2( dr, p->jac, resid ); 1906  break; 1907  case 1: { 1908  const unsigned de = p->ed.de, d1 = p->ed.d1; 1909  realType fac, H[2]; 1910  const realType* J = p->jac + de; 1911  opt_edge_hess_2( p, H ); 1912  fac = J[0] * J[0] + J[2] * J[2] - ( resid[0] * H[0] + resid[1] * H[1] ); 1913  dr[de] = steep[de] / fac; 1914  dr[d1] = 0; 1915  } 1916  break; 1917  case 2: 1918  dr[0] = dr[1] = 0; 1919  break; 1920  } 1921 #if DIAGNOSTICS 1922  printf( " dr = %g, %g\n", dr[0], dr[1] ); 1923 #endif 1924  /* project new iteration onto [-1,1]^2 */ 1925  opt_constr_unpack_2( c, cc ); 1926  for( d = 0; d < 2; ++d ) 1927  { 1928  if( cc[d] != 1 ) continue; 1929  r[d] += dr[d]; 1930  if( r[d] <= -1 ) 1931  dr[d] -= r[d] + 1, r[d] = -1, cc[d] = 0; 1932  else if( r[d] >= 1 ) 1933  dr[d] -= r[d] - 1, r[d] = 1, cc[d] = 2; 1934  } 1935  c = opt_constr_pack_2( cc ); 1936  } while( r1norm_2( dr[0], dr[1] ) > 2 * MOAB_POLY_EPS ); 1937  *constr = c; 1938  return r2norm_2( resid[0], resid[1] ); 1939 } 1940  1941 #undef DIAGNOSTICS 1942  1943 /*-------------------------------------------------------------------------- 1944  Point Finding (interface/top-level) 1945  1946  Initializing the data: 1947  unsigned nel; // number of elements 1948  const unsigned n[3]; // number of nodes in r, s, t directions 1949  const realType *xm[3]; // n[0]*n[1]*n[2]*nel x,y,z coordinates 1950  realType tol = 0.01; // how far point is allowed to be outside element 1951  // relative to element size 1952  unsigned max_size = n[0]*n[1]*n[2]*nel; // maximum size of hash table 1953  1954  findpt_data_3 *data = findpt_setup_3(xm,n,nel,max_size,tol); 1955  1956  Using the data: 1957  realType x[3] = { ... }; // point to find 1958  int el; // element number 1959  realType r[3]; // parametric coordinates 1960  int guess = 0; // do we use (el,r,s,t) as an initial guess? 1961  int code; // 0 : normal, -1 : outside all elements, 1962  // 1 : border, or outside but within tolerance 1963  realType dist; // distance in xyz space from returned (el,r,s,t) to given 1964  // (x,y,z) 1965  1966  code = findpt_3(data, x, guess, &el, r, &dist); 1967  1968  When done: 1969  findpt_free_3(&data); 1970  1971  --------------------------------------------------------------------------*/ 1972  1973 /* heap sort on A[0:n-1] with key A[i]->dist 1974  precondition: n!=0 */ 1975 static void findpt_list_sort( findpt_listel** A, unsigned n ) 1976 { 1977  unsigned i; 1978  --A; /* make A have a base index of 1 */ 1979  /* build heap */ 1980  for( i = 2; i <= n; ++i ) 1981  { 1982  findpt_listel* item = A[i]; 1983  unsigned hole = i, parent = hole >> 1; 1984  if( A[parent]->dist >= item->dist ) continue; 1985  do 1986  { 1987  A[hole] = A[parent]; 1988  hole = parent; 1989  parent >>= 1; 1990  } while( parent && A[parent]->dist < item->dist ); 1991  A[hole] = item; 1992  } 1993  /* extract */ 1994  for( i = n - 1; i; --i ) 1995  { 1996  findpt_listel* item = A[i + 1]; 1997  unsigned hole = 1; 1998  A[i + 1] = A[1]; 1999  for( ;; ) 2000  { 2001  unsigned ch = hole << 1, r = ch + 1; 2002  if( r <= i && A[ch]->dist < A[r]->dist ) ch = r; 2003  if( ch > i || item->dist >= A[ch]->dist ) break; 2004  A[hole] = A[ch]; 2005  hole = ch; 2006  } 2007  A[hole] = item; 2008  } 2009 } 2010  2011 findpt_data_2* findpt_setup_2( const realType* const xw[2], 2012  const unsigned n[2], 2013  uint nel, 2014  uint max_hash_size, 2015  realType bbox_tol ) 2016 { 2017  unsigned d; 2018  findpt_data_2* p = tmalloc( findpt_data_2, 1 ); 2019  2020  p->hash = tmalloc( hash_data_2, 1 ); 2021  p->od = tmalloc( opt_data_2, 1 ); 2022  2023  for( d = 0; d < 2; ++d ) 2024  p->xw[d] = xw[d]; 2025  p->nptel = n[0] * n[1]; 2026  2027  hash_build_2( p->hash, xw, n, nel, max_hash_size, bbox_tol ); 2028  2029  for( d = 0; d < 2; ++d ) 2030  { 2031  p->z[d] = tmalloc( realType, n[d] ); 2032  lobatto_nodes( p->z[d], n[d] ); 2033  lagrange_setup( &p->ld[d], p->z[d], n[d] ); 2034  } 2035  2036  p->list = tmalloc( findpt_listel, p->hash->max ); 2037  p->sorted = tmalloc( findpt_listel*, p->hash->max ); 2038  2039  opt_alloc_2( p->od, p->ld ); 2040  p->od_work = p->od->work; 2041  2042  return p; 2043 } 2044  2045 findpt_data_3* findpt_setup_3( const realType* const xw[3], 2046  const unsigned n[3], 2047  uint nel, 2048  uint max_hash_size, 2049  realType bbox_tol ) 2050 { 2051  unsigned d; 2052  findpt_data_3* p = tmalloc( findpt_data_3, 1 ); 2053  2054  p->hash = tmalloc( hash_data_3, 1 ); 2055  p->od = tmalloc( opt_data_3, 1 ); 2056  2057  for( d = 0; d < 3; ++d ) 2058  p->xw[d] = xw[d]; 2059  p->nptel = n[0] * n[1] * n[2]; 2060  2061  hash_build_3( p->hash, xw, n, nel, max_hash_size, bbox_tol ); 2062  2063  for( d = 0; d < 3; ++d ) 2064  { 2065  p->z[d] = tmalloc( realType, n[d] ); 2066  lobatto_nodes( p->z[d], n[d] ); 2067  lagrange_setup( &p->ld[d], p->z[d], n[d] ); 2068  } 2069  2070  p->list = tmalloc( findpt_listel, p->hash->max ); 2071  p->sorted = tmalloc( findpt_listel*, p->hash->max ); 2072  2073  opt_alloc_3( p->od, p->ld ); 2074  p->od_work = p->od->work; 2075  2076  return p; 2077 } 2078  2079 void findpt_free_2( findpt_data_2* p ) 2080 { 2081  unsigned d; 2082  opt_free_2( p->od ); 2083  free( p->od ); 2084  hash_free_2( p->hash ); 2085  free( p->hash ); 2086  free( p->list ); 2087  free( p->sorted ); 2088  for( d = 0; d < 2; ++d ) 2089  free( p->z[d] ); 2090  free( p ); 2091 } 2092  2093 void findpt_free_3( findpt_data_3* p ) 2094 { 2095  unsigned d; 2096  opt_free_3( p->od ); 2097  free( p->od ); 2098  hash_free_3( p->hash ); 2099  free( p->hash ); 2100  free( p->list ); 2101  free( p->sorted ); 2102  for( d = 0; d < 3; ++d ) 2103  free( p->z[d] ); 2104  free( p ); 2105 } 2106  2107 const realType* findpt_allbnd_2( const findpt_data_2* p ) 2108 { 2109  return p->hash->bnd; 2110 } 2111  2112 const realType* findpt_allbnd_3( const findpt_data_3* p ) 2113 { 2114  return p->hash->bnd; 2115 } 2116  2117 static void findpt_hash_2( findpt_data_2* p, const realType x[2] ) 2118 { 2119  findpt_listel *list = p->list, **sorted = p->sorted; 2120  const uint hi = hash_index_2( p->hash, x ); 2121  const uint* offset = p->hash->offset; 2122  uint i; 2123  const uint b = offset[hi], e = offset[hi + 1]; 2124  for( i = b; i != e; ++i ) 2125  { 2126  const uint el = offset[i]; 2127  realType* r = &list->r[0]; 2128  const obbox_2* obb = &p->hash->obb[el]; 2129  if( obbox_axis_test_2( obb, x ) ) continue; 2130  if( obbox_test_2( obb, x, r ) ) continue; 2131  list->el = el; 2132  list->dist = r1norm_2( r[0], r[1] ); 2133  *sorted++ = list++; 2134  } 2135  p->end = sorted; 2136  if( p->end != p->sorted ) findpt_list_sort( p->sorted, p->end - p->sorted ); 2137 } 2138  2139 static void findpt_hash_3( findpt_data_3* p, const realType x[3] ) 2140 { 2141  findpt_listel *list = p->list, **sorted = p->sorted; 2142  const uint hi = hash_index_3( p->hash, x ); 2143  const uint* offset = p->hash->offset; 2144  uint i; 2145  const uint b = offset[hi], e = offset[hi + 1]; 2146  for( i = b; i != e; ++i ) 2147  { 2148  const uint el = offset[i]; 2149  realType* r = &list->r[0]; 2150  const obbox_3* obb = &p->hash->obb[el]; 2151  if( obbox_axis_test_3( obb, x ) ) continue; 2152  if( obbox_test_3( obb, x, r ) ) continue; 2153  list->el = el; 2154  list->dist = r1norm_3( r[0], r[1], r[2] ); 2155  *sorted++ = list++; 2156  } 2157  p->end = sorted; 2158  if( p->end != p->sorted ) findpt_list_sort( p->sorted, p->end - p->sorted ); 2159 } 2160  2161 static int findpt_guess_2( findpt_data_2* p, const realType x[2], uint el, realType r[2], realType* dist ) 2162 { 2163  const uint arrindex = p->nptel * el; 2164  const realType* elx[2]; 2165  realType g[2]; 2166  unsigned c = opt_no_constraints_2; 2167  const obbox_2* obb = &p->hash->obb[el]; 2168  elx[0] = p->xw[0] + arrindex; 2169  elx[1] = p->xw[1] + arrindex; 2170  if( obbox_axis_test_2( obb, x ) || obbox_test_2( obb, x, g ) ) return 0; 2171  *dist = opt_findpt_2( p->od, elx, x, r, &c ); 2172  return c == opt_no_constraints_2; 2173 } 2174  2175 static int findpt_guess_3( findpt_data_3* p, const realType x[3], uint el, realType r[3], realType* dist ) 2176 { 2177  const uint arrindex = p->nptel * el; 2178  const realType* elx[3]; 2179  realType g[3]; 2180  unsigned c = opt_no_constraints_3; 2181  const obbox_3* obb = &p->hash->obb[el]; 2182  elx[0] = p->xw[0] + arrindex; 2183  elx[1] = p->xw[1] + arrindex; 2184  elx[2] = p->xw[2] + arrindex; 2185  if( obbox_axis_test_3( obb, x ) || obbox_test_3( obb, x, g ) ) return 0; 2186  *dist = opt_findpt_3( p->od, elx, x, r, &c ); 2187  return c == opt_no_constraints_3; 2188 } 2189  2190 #define DIAGNOSTICS 0 2191  2192 static int findpt_pass_2( findpt_data_2* p, const realType x[2], uint* el, realType r[2], realType* dist_min ) 2193 { 2194  findpt_listel** qq = p->sorted; 2195  const realType* bnd; 2196  realType dist; 2197  do 2198  { 2199  findpt_listel* q = *qq; 2200  const uint arrindex = p->nptel * q->el; 2201  const realType* elx[2]; 2202  unsigned c = opt_no_constraints_2; 2203  elx[0] = p->xw[0] + arrindex; 2204  elx[1] = p->xw[1] + arrindex; 2205  dist = opt_findpt_2( p->od, elx, x, q->r, &c ); 2206  if( qq == p->sorted || dist < *dist_min || c == opt_no_constraints_2 ) 2207  { 2208  *dist_min = dist; 2209  *el = q->el; 2210  memcpy( r, q->r, 2 * sizeof( realType ) ); 2211  if( c == opt_no_constraints_2 ) return 0; 2212  } 2213  } while( ++qq != p->end ); 2214  bnd = p->hash->obb[*el].axis_bnd; 2215  return *dist_min > r2norm_2( bnd[1] - bnd[0], bnd[3] - bnd[2] ) ? -1 : 1; 2216 } 2217  2218 static int findpt_pass_3( findpt_data_3* p, const realType x[3], uint* el, realType r[3], realType* dist_min ) 2219 { 2220  findpt_listel** qq = p->sorted; 2221  const realType* bnd; 2222  realType dist; 2223  do 2224  { 2225  findpt_listel* q = *qq; 2226  const uint arrindex = p->nptel * q->el; 2227  const realType* elx[3]; 2228  unsigned c = opt_no_constraints_3; 2229  elx[0] = p->xw[0] + arrindex; 2230  elx[1] = p->xw[1] + arrindex; 2231  elx[2] = p->xw[2] + arrindex; 2232  dist = opt_findpt_3( p->od, elx, x, q->r, &c ); 2233  if( qq == p->sorted || dist < *dist_min || c == opt_no_constraints_3 ) 2234  { 2235  *dist_min = dist; 2236  *el = q->el; 2237  memcpy( r, q->r, 3 * sizeof( realType ) ); 2238  if( c == opt_no_constraints_3 ) 2239  { 2240 #if DIAGNOSTICS 2241  printf( "point found in element #%d\n", qq - p->sorted ); 2242 #endif 2243  return 0; 2244  } 2245  } 2246  } while( ++qq != p->end ); 2247  bnd = p->hash->obb[*el].axis_bnd; 2248  return *dist_min > r2norm_3( bnd[1] - bnd[0], bnd[3] - bnd[2], bnd[5] - bnd[4] ) ? -1 : 1; 2249 } 2250  2251 int findpt_2( findpt_data_2* p, const realType x[2], int guess, uint* el, realType r[2], realType* dist ) 2252 { 2253  if( guess && findpt_guess_2( p, x, *el, r, dist ) ) return 0; 2254  findpt_hash_2( p, x ); 2255  if( p->sorted == p->end ) return -1; 2256  return findpt_pass_2( p, x, el, r, dist ); 2257 } 2258  2259 int findpt_3( findpt_data_3* p, const realType x[3], int guess, uint* el, realType r[3], realType* dist ) 2260 { 2261  if( guess && findpt_guess_3( p, x, *el, r, dist ) ) return 0; 2262  findpt_hash_3( p, x ); 2263 #if DIAGNOSTICS 2264  printf( "hashing leaves %d elements to consider\n", p->end - p->sorted ); 2265 #endif 2266  if( p->sorted == p->end ) return -1; 2267  return findpt_pass_3( p, x, el, r, dist ); 2268 } 2269  2270 void findpt_weights_2( findpt_data_2* p, const realType r[2] ) 2271 { 2272  lagrange_0( &p->ld[0], r[0] ); 2273  lagrange_0( &p->ld[1], r[1] ); 2274 } 2275  2276 void findpt_weights_3( findpt_data_3* p, const realType r[3] ) 2277 { 2278  lagrange_0( &p->ld[0], r[0] ); 2279  lagrange_0( &p->ld[1], r[1] ); 2280  lagrange_0( &p->ld[2], r[2] ); 2281 } 2282  2283 double findpt_eval_2( findpt_data_2* p, const realType* u ) 2284 { 2285  return tensor_i2( p->ld[0].J, p->ld[0].n, p->ld[1].J, p->ld[1].n, u, p->od_work ); 2286 } 2287  2288 double findpt_eval_3( findpt_data_3* p, const realType* u ) 2289 { 2290  return tensor_i3( p->ld[0].J, p->ld[0].n, p->ld[1].J, p->ld[1].n, p->ld[2].J, p->ld[2].n, u, p->od_work ); 2291 }