1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <stdarg.h>
4 #include <math.h>
5 #include <float.h>
6 #include <string.h>
7
8 #include "moab/FindPtFuncs.h"
9
10 const unsigned opt_no_constraints_2 = 3 + 1;
11 const unsigned opt_no_constraints_3 = 9 + 3 + 1;
12
13
56
57 typedef struct
58 {
59 unsigned n;
60 unsigned m;
61 realType *Q0, *Q1;
63 const realType* z;
64 realType* h;
65 realType *uv, *ov;
71 } lob_bnd_base;
72
73 typedef struct
74 {
75 lob_bnd_base b;
76 realType *uvp, *uvn, *ovp, *ovn;
78 } lob_bnd_ext;
79
80 static void lob_bnd_base_alloc( lob_bnd_base* p, unsigned n, unsigned m )
81 {
82 p->n = n, p->m = m;
83 p->Q0 = tmalloc( realType, 2 * n + m + 2 * n * m );
84 p->Q1 = p->Q0 + n;
85 p->h = p->Q1 + n;
86 p->uv = p->h + m;
87 p->ov = p->uv + n * m;
88 }
89
90 static void lob_bnd_base_free( lob_bnd_base* p )
91 {
92 free( p->Q0 );
93 }
94
95 static void lob_bnd_ext_alloc( lob_bnd_ext* p, unsigned n, unsigned m )
96 {
97 p->b.n = n, p->b.m = m;
98 p->b.Q0 = tmalloc( realType, 2 * n + m + 6 * n * m );
99 p->b.Q1 = p->b.Q0 + n;
100 p->b.h = p->b.Q1 + n;
101 p->b.uv = p->b.h + m;
102 p->b.ov = p->b.uv + n * m;
103 p->uvp = p->b.ov + n * m;
104 p->uvn = p->uvp + n * m;
105 p->ovp = p->uvn + n * m;
106 p->ovn = p->ovp + n * m;
107 }
108
109 static void lob_bnd_ext_free( lob_bnd_ext* p )
110 {
111 free( p->b.Q0 );
112 }
113
114 static void lob_bnd_base_setup( lob_bnd_base* p, const realType* z, const realType* w )
115 {
116 unsigned i, j, m = p->m, n = p->n, mm = 2 * m - 1;
117 realType *q = tmalloc( realType, ( 2 * n + 1 ) * mm + 6 * n ), *J = q + mm, *D = J + n * mm, *work = D + n * mm;
118 p->z = z;
119 for( i = 0; i < n; ++i )
120 p->Q0[i] = w[i] / 2, p->Q1[i] = 3 * p->Q0[i] * z[i];
121 p->h[0] = -1, p->h[m - 1] = 1;
122 for( j = 1; j < m - 1; ++j )
123 p->h[j] = mbcos( ( m - j - 1 ) * MOAB_POLY_PI / ( m - 1 ) );
124 for( j = 0; j < m - 1; ++j )
125 q[2 * j] = p->h[j], q[2 * j + 1] = ( p->h[j] + p->h[j + 1] ) / 2;
126 q[mm - 1] = p->h[m - 1];
127 lagrange_weights_deriv( z, n, q, mm, J, D, work );
128 for( i = 0; i < n; ++i )
129 {
130 realType *uv = p->uv + i * m, *ov = p->ov + i * m;
131 ov[0] = uv[0] = J[i];
132 ov[m - 1] = uv[m - 1] = J[( mm - 1 ) * n + i];
133 for( j = 1; j < m - 1; ++j )
134 {
135 unsigned jj = 2 * j;
136 realType c0 = J[( jj - 1 ) * n + i] + ( q[jj] - q[jj - 1] ) * D[( jj - 1 ) * n + i];
137 realType c1 = J[( jj + 1 ) * n + i] + ( q[jj] - q[jj + 1] ) * D[( jj + 1 ) * n + i];
138 realType c2 = J[jj * n + i];
139 rminmax_3( &uv[j], &ov[j], c0, c1, c2 );
140 }
141 }
142 free( q );
143 }
144
145 static void lob_bnd_ext_setup( lob_bnd_ext* p, const realType* z, const realType* w )
146 {
147 unsigned i, mn = p->b.m * p->b.n;
148 lob_bnd_base_setup( &p->b, z, w );
149 for( i = 0; i < mn; ++i )
150 {
151 realType uvi = p->b.uv[i], ovi = p->b.ov[i];
152 p->uvp[i] = p->uvn[i] = p->ovp[i] = p->ovn[i] = 0;
153 if( uvi > 0 )
154 p->uvp[i] = uvi;
155 else
156 p->uvn[i] = uvi;
157 if( ovi > 0 )
158 p->ovp[i] = ovi;
159 else
160 p->ovn[i] = ovi;
161 }
162 }
163
164 static void lob_bnd_lines( const lob_bnd_base* p, const realType* u, realType* a, realType* b )
165 {
166 unsigned i, j;
167 realType a0 = 0, a1 = 0;
168 const realType *uv = p->uv, *ov = p->ov;
169 for( i = 0; i < p->n; ++i )
170 a0 += p->Q0[i] * u[i], a1 += p->Q1[i] * u[i];
171 for( j = 0; j < p->m; ++j )
172 b[j] = a[j] = a0 + a1 * p->h[j];
173 for( i = 0; i < p->n; ++i )
174 {
175 realType w = u[i] - ( a0 + a1 * p->z[i] );
176 if( w >= 0 )
177 for( j = 0; j < p->m; ++j )
178 a[j] += w * ( *uv++ ), b[j] += w * ( *ov++ );
179 else
180 for( j = 0; j < p->m; ++j )
181 a[j] += w * ( *ov++ ), b[j] += w * ( *uv++ );
182 }
183 }
184
185
186 static void lob_bnd_1( const lob_bnd_base* p, const realType* u, realType bnd[2], realType* work )
187 {
188 unsigned j;
189 realType *a = work, *b = work + p->m;
190 lob_bnd_lines( p, u, a, b );
191 bnd[0] = a[0], bnd[1] = b[0];
192 for( j = 1; j < p->m; ++j )
193 {
194 if( a[j] < bnd[0] ) bnd[0] = a[j];
195 if( b[j] > bnd[1] ) bnd[1] = b[j];
196 }
197 }
198
199
200 static void lob_bnd_2( const lob_bnd_base* pr,
201 const lob_bnd_ext* ps,
202 const realType* u,
203 realType bnd[2],
204 realType* work )
205 {
206 unsigned nr = pr->n, mr = pr->m, ns = ps->b.n, ms = ps->b.m;
207 realType *a0 = work, *a1 = a0 + mr, *ar_ = a1 + mr, *ar = ar_, *br_ = ar + mr * ns, *br = br_, *a_ = br + mr * ns,
208 *a = a_, *b_ = a + mr * ms, *b = b_, *uvp, *ovp, *uvn, *ovn;
209 realType b0, b1;
210 unsigned i, j, k;
211 for( i = 0; i < mr; ++i )
212 a0[i] = a1[i] = 0;
213 for( j = 0; j < ns; ++j, u += nr )
214 {
215 realType q0 = ps->b.Q0[j], q1 = ps->b.Q1[j];
216 lob_bnd_lines( pr, u, ar, br );
217 for( i = 0; i < mr; ++i )
218 {
219 realType t = ( *ar++ + *br++ ) / 2;
220 a0[i] += q0 * t, a1[i] += q1 * t;
221 }
222 }
223 for( i = 0; i < mr; ++i )
224 {
225 realType a0i = a0[i], a1i = a1[i];
226 for( k = 0; k < ms; ++k )
227 *b++ = *a++ = a0i + a1i * ps->b.h[k];
228 }
229 ar = ar_, br = br_;
230 uvp = ps->uvp, ovp = ps->ovp, uvn = ps->uvn, ovn = ps->ovn;
231 for( j = 0; j < ns; ++j, uvp += ms, uvn += ms, ovp += ms, ovn += ms )
232 {
233 realType zj = ps->b.z[j];
234 a = a_, b = b_;
235 for( i = 0; i < mr; ++i )
236 {
237 realType t = a0[i] + a1[i] * zj;
238 realType uw = *ar++ - t;
239 realType ow = *br++ - t;
240 if( uw >= 0 )
241 for( k = 0; k < ms; ++k )
242 *a++ += uw * uvp[k] + ow * uvn[k], *b++ += ow * ovp[k] + uw * ovn[k];
243 else if( ow <= 0 )
244 for( k = 0; k < ms; ++k )
245 *a++ += uw * ovp[k] + ow * ovn[k], *b++ += ow * uvp[k] + uw * uvn[k];
246 else
247 for( k = 0; k < ms; ++k )
248 *a++ += uw * ovp[k] + ow * uvn[k], *b++ += ow * ovp[k] + uw * uvn[k];
249 }
250 }
251 b0 = a_[0], b1 = b_[0];
252 for( i = 1; i < mr * ms; ++i )
253 {
254 if( a_[i] < b0 ) b0 = a_[i];
255 if( b_[i] > b1 ) b1 = b_[i];
256 }
257 bnd[0] = b0, bnd[1] = b1;
258 }
259
260
263
264 static void mat_inv_2( const realType A[4], realType inv[4] )
265 {
266 const realType idet = 1 / ( A[0] * A[3] - A[1] * A[2] );
267 inv[0] = idet * A[3];
268 inv[1] = -( idet * A[1] );
269 inv[2] = -( idet * A[2] );
270 inv[3] = idet * A[0];
271 }
272
273 static void mat_inv_3( const realType A[9], realType inv[9] )
274 {
275 const realType a = A[4] * A[8] - A[5] * A[7], b = A[5] * A[6] - A[3] * A[8], c = A[3] * A[7] - A[4] * A[6],
276 idet = 1 / ( A[0] * a + A[1] * b + A[2] * c );
277 inv[0] = idet * a;
278 inv[1] = idet * ( A[2] * A[7] - A[1] * A[8] );
279 inv[2] = idet * ( A[1] * A[5] - A[2] * A[4] );
280 inv[3] = idet * b;
281 inv[4] = idet * ( A[0] * A[8] - A[2] * A[6] );
282 inv[5] = idet * ( A[2] * A[3] - A[0] * A[5] );
283 inv[6] = idet * c;
284 inv[7] = idet * ( A[1] * A[6] - A[0] * A[7] );
285 inv[8] = idet * ( A[0] * A[4] - A[1] * A[3] );
286 }
287
288 static void mat_app_2r( realType y[2], const realType A[4], const realType x[2] )
289 {
290 y[0] = A[0] * x[0] + A[1] * x[1];
291 y[1] = A[2] * x[0] + A[3] * x[1];
292 }
293
294 static void mat_app_2c( realType y[2], const realType A[4], const realType x[2] )
295 {
296 y[0] = A[0] * x[0] + A[2] * x[1];
297 y[1] = A[1] * x[0] + A[3] * x[1];
298 }
299
300 static void mat_app_3r( realType y[3], const realType A[9], const realType x[3] )
301 {
302 y[0] = A[0] * x[0] + A[1] * x[1] + A[2] * x[2];
303 y[1] = A[3] * x[0] + A[4] * x[1] + A[5] * x[2];
304 y[2] = A[6] * x[0] + A[7] * x[1] + A[8] * x[2];
305 }
306
307 static void mat_app_3c( realType y[3], const realType A[9], const realType x[3] )
308 {
309 y[0] = A[0] * x[0] + A[3] * x[1] + A[6] * x[2];
310 y[1] = A[1] * x[0] + A[4] * x[1] + A[7] * x[2];
311 y[2] = A[2] * x[0] + A[5] * x[1] + A[8] * x[2];
312 }
313
314 static void tinyla_solve_2( realType x[2], const realType A[4], const realType b[2] )
315 {
316 realType inv[4];
317 mat_inv_2( A, inv );
318 mat_app_2r( x, inv, b );
319 }
320
321 static void tinyla_solve_3( realType x[3], const realType A[9], const realType b[3] )
322 {
323 realType inv[9];
324 mat_inv_3( A, inv );
325 mat_app_3r( x, inv, b );
326 }
327
328
332 static void tinyla_solve_sym_2( realType* x0, realType* x1, const realType A[3], realType b0, realType b1 )
333 {
334 const realType idet = 1 / ( A[0] * A[1] - A[2] * A[2] );
335 *x0 = idet * ( A[1] * b0 - A[2] * b1 );
336 *x1 = idet * ( A[0] * b1 - A[2] * b0 );
337 }
338
339
395
396 typedef struct
397 {
398 lob_bnd_base dr, ds;
399 realType *Jr0, *Dr0, *Js0, *Ds0, *work;
400 } obbox_data_2;
401
402 typedef struct
403 {
404 lob_bnd_base dr;
405 lob_bnd_ext ds, dt;
406 realType *Jr0, *Dr0, *Js0, *Ds0, *Jt0, *Dt0, *work;
407 } obbox_data_3;
408
409 static void obbox_data_alloc_2( obbox_data_2* p, const unsigned n[2], const unsigned m[2] )
410 {
411 const unsigned max_npm = umax_2( n[0] + m[0], n[1] + m[1] );
412 lob_bnd_base_alloc( &p->dr, n[0], m[0] );
413 lob_bnd_base_alloc( &p->ds, n[1], m[1] );
414 p->Jr0 = tmalloc( realType, 2 * n[0] + 2 * n[1] + 2 * max_npm );
415 p->Dr0 = p->Jr0 + n[0];
416 p->Js0 = p->Dr0 + n[0];
417 p->Ds0 = p->Js0 + n[1];
418 p->work = p->Ds0 + n[1];
419 }
420
421 static void obbox_data_free_2( obbox_data_2* p )
422 {
423 lob_bnd_base_free( &p->dr );
424 lob_bnd_base_free( &p->ds );
425 free( p->Jr0 );
426 }
427
428 static void obbox_data_alloc_3( obbox_data_3* p, const unsigned n[3], const unsigned m[3] )
429 {
430 const unsigned wk1 = 3 * n[0] * n[1] + 2 * m[0] + 2 * m[0] * n[1] + 2 * m[0] * m[1];
431 const unsigned wk2 = 3 * n[0] * n[2] + 2 * m[0] + 2 * m[0] * n[2] + 2 * m[0] * m[2];
432 const unsigned wk3 = 3 * n[1] * n[2] + 2 * m[1] + 2 * m[1] * n[2] + 2 * m[1] * m[2];
433 const unsigned wk_max = umax_3( wk1, wk2, wk3 );
434 lob_bnd_base_alloc( &p->dr, n[0], m[0] );
435 lob_bnd_ext_alloc( &p->ds, n[1], m[1] );
436 lob_bnd_ext_alloc( &p->dt, n[2], m[2] );
437 p->Jr0 = tmalloc( realType, 2 * n[0] + 2 * n[1] + 2 * n[2] + wk_max );
438 p->Dr0 = p->Jr0 + n[0];
439 p->Js0 = p->Dr0 + n[0];
440 p->Ds0 = p->Js0 + n[1];
441 p->Jt0 = p->Ds0 + n[1];
442 p->Dt0 = p->Jt0 + n[2];
443 p->work = p->Dt0 + n[2];
444 }
445
446 static void obbox_data_free_3( obbox_data_3* p )
447 {
448 lob_bnd_base_free( &p->dr );
449 lob_bnd_ext_free( &p->ds );
450 lob_bnd_ext_free( &p->dt );
451 free( p->Jr0 );
452 }
453
454 static obbox_data_2* obbox_setup_2( const realType* const z[2],
455 const realType* const w[2],
456 const unsigned n[2],
457 const unsigned m[2] )
458 {
459 const realType zero = 0;
460 realType* work;
461 obbox_data_2* p = tmalloc( obbox_data_2, 1 );
462 obbox_data_alloc_2( p, n, m );
463 lob_bnd_base_setup( &p->dr, z[0], w[0] );
464 lob_bnd_base_setup( &p->ds, z[1], w[1] );
465 work = tmalloc( realType, 6 * umax_2( n[0], n[1] ) );
466 lagrange_weights_deriv( z[0], n[0], &zero, 1, p->Jr0, p->Dr0, work );
467 lagrange_weights_deriv( z[1], n[1], &zero, 1, p->Js0, p->Ds0, work );
468 free( work );
469 return p;
470 }
471
472 static obbox_data_3* obbox_setup_3( const realType* const z[3],
473 const realType* const w[3],
474 const unsigned n[3],
475 const unsigned m[3] )
476 {
477 const realType zero = 0;
478 realType* work;
479 obbox_data_3* p = tmalloc( obbox_data_3, 1 );
480 obbox_data_alloc_3( p, n, m );
481 lob_bnd_base_setup( &p->dr, z[0], w[0] );
482 lob_bnd_ext_setup( &p->ds, z[1], w[1] );
483 lob_bnd_ext_setup( &p->dt, z[2], w[2] );
484 work = tmalloc( realType, 6 * umax_3( n[0], n[1], n[2] ) );
485 lagrange_weights_deriv( z[0], n[0], &zero, 1, p->Jr0, p->Dr0, work );
486 lagrange_weights_deriv( z[1], n[1], &zero, 1, p->Js0, p->Ds0, work );
487 lagrange_weights_deriv( z[2], n[2], &zero, 1, p->Jt0, p->Dt0, work );
488 free( work );
489 return p;
490 }
491
492 static void obbox_free_2( obbox_data_2* p )
493 {
494 obbox_data_free_2( p );
495 free( p );
496 }
497
498 static void obbox_free_3( obbox_data_3* p )
499 {
500 obbox_data_free_3( p );
501 free( p );
502 }
503
504 int obbox_axis_test_2( const obbox_2* p, const realType x[2] )
505 {
506 return ( x[0] < p->axis_bnd[0] || x[0] > p->axis_bnd[1] || x[1] < p->axis_bnd[2] || x[1] > p->axis_bnd[3] );
507 }
508
509 int obbox_axis_test_3( const obbox_3* p, const realType x[3] )
510 {
511 return ( x[0] < p->axis_bnd[0] || x[0] > p->axis_bnd[1] || x[1] < p->axis_bnd[2] || x[1] > p->axis_bnd[3] ||
512 x[2] < p->axis_bnd[4] || x[2] > p->axis_bnd[5] );
513 }
514
515 int obbox_test_2( const obbox_2* p, const realType x[2], realType r[2] )
516 {
517 realType xt[2];
518 xt[0] = x[0] - p->x[0];
519 xt[1] = x[1] - p->x[1];
520 r[0] = p->A[0] * xt[0] + p->A[1] * xt[1];
521 if( mbabs( r[0] ) > 1 ) return 1;
522 r[1] = p->A[2] * xt[0] + p->A[3] * xt[1];
523 return mbabs( r[1] ) > 1;
524 }
525
526 int obbox_test_3( const obbox_3* p, const realType x[3], realType r[3] )
527 {
528 realType xt[3];
529 xt[0] = x[0] - p->x[0];
530 xt[1] = x[1] - p->x[1];
531 xt[2] = x[2] - p->x[2];
532 r[0] = p->A[0] * xt[0] + p->A[1] * xt[1] + p->A[2] * xt[2];
533 if( mbabs( r[0] ) > 1 ) return 1;
534 r[1] = p->A[3] * xt[0] + p->A[4] * xt[1] + p->A[5] * xt[2];
535 if( mbabs( r[1] ) > 1 ) return 1;
536 r[2] = p->A[6] * xt[0] + p->A[7] * xt[1] + p->A[8] * xt[2];
537 return mbabs( r[2] ) > 1;
538 }
539
540 void obbox_calc_tfm_2( const realType* x,
541 const realType* y,
542 unsigned n,
543 unsigned s,
544 const realType c0[2],
545 const realType A[4],
546 realType* u )
547 {
548 unsigned i;
549 realType* v = u + n;
550 for( i = 0; i < n; ++i, x += s, y += s )
551 {
552 const realType xt = *x - c0[0], yt = *y - c0[1];
553 u[i] = A[0] * xt + A[1] * yt;
554 v[i] = A[2] * xt + A[3] * yt;
555 }
556 }
557
558 void obbox_calc_tfm_3( const realType* x,
559 const realType* y,
560 const realType* z,
561 unsigned nr,
562 unsigned sr,
563 unsigned ns,
564 unsigned ss,
565 const realType c0[3],
566 const realType A[9],
567 realType* u )
568 {
569 unsigned i, j;
570 realType *v = u + nr * ns, *w = v + nr * ns;
571 for( j = 0; j < ns; ++j, x += ss, y += ss, z += ss )
572 {
573 for( i = 0; i < nr; ++i, x += sr, y += sr, z += sr )
574 {
575 const realType xt = *x - c0[0], yt = *y - c0[1], zt = *z - c0[2];
576 *u++ = A[0] * xt + A[1] * yt + A[2] * zt;
577 *v++ = A[3] * xt + A[4] * yt + A[5] * zt;
578 *w++ = A[6] * xt + A[7] * yt + A[8] * zt;
579 }
580 }
581 }
582
583 void obbox_merge_2( realType* b, const realType* ob )
584 {
585 if( ob[0] < b[0] ) b[0] = ob[0];
586 if( ob[1] > b[1] ) b[1] = ob[1];
587 if( ob[2] < b[2] ) b[2] = ob[2];
588 if( ob[3] > b[3] ) b[3] = ob[3];
589 }
590
591 void obbox_merge_3( realType* b, const realType* ob )
592 {
593 if( ob[0] < b[0] ) b[0] = ob[0];
594 if( ob[1] > b[1] ) b[1] = ob[1];
595 if( ob[2] < b[2] ) b[2] = ob[2];
596 if( ob[3] > b[3] ) b[3] = ob[3];
597 if( ob[4] < b[4] ) b[4] = ob[4];
598 if( ob[5] > b[5] ) b[5] = ob[5];
599 }
600
601
602 void obbox_side_2( const realType* x,
603 const realType* y,
604 unsigned n,
605 unsigned s,
606 const realType c0[2],
607 const realType A[4],
608 realType* work,
609 const lob_bnd_base* lbd,
610 realType bnd[4] )
611 {
612 obbox_calc_tfm_2( x, y, n, s, c0, A, work );
613 lob_bnd_1( lbd, work, bnd, work + 2 * n );
614 lob_bnd_1( lbd, work + n, bnd + 2, work + 2 * n );
615 }
616
617
618 void obbox_side_3( const realType* x,
619 const realType* y,
620 const realType* z,
621 unsigned nr,
622 unsigned sr,
623 unsigned ns,
624 unsigned ss,
625 const realType c0[3],
626 const realType A[9],
627 realType* work,
628 const lob_bnd_base* dr,
629 const lob_bnd_ext* ds,
630 realType bnd[6] )
631 {
632 obbox_calc_tfm_3( x, y, z, nr, sr, ns, ss, c0, A, work );
633 lob_bnd_2( dr, ds, work, bnd, work + 3 * nr * ns );
634 lob_bnd_2( dr, ds, work + nr * ns, bnd + 2, work + 3 * nr * ns );
635 lob_bnd_2( dr, ds, work + 2 * nr * ns, bnd + 4, work + 3 * nr * ns );
636 }
637
638
641 void obbox_bnd_2( const obbox_data_2* p,
642 const realType* x,
643 const realType* y,
644 const realType c0[2],
645 const realType A[4],
646 realType bnd[4] )
647 {
648 unsigned i, nr = p->dr.n, ns = p->ds.n;
649 realType obnd[4];
650
651 i = nr * ( ns - 1 );
652 obbox_side_2( x, y, nr, 1, c0, A, p->work, &p->dr, bnd );
653 obbox_side_2( x + i, y + i, nr, 1, c0, A, p->work, &p->dr, obnd );
654 obbox_merge_2( bnd, obnd );
655
656 i = nr - 1;
657 obbox_side_2( x, y, ns, nr, c0, A, p->work, &p->ds, obnd );
658 obbox_merge_2( bnd, obnd );
659 obbox_side_2( x + i, y + i, nr, nr, c0, A, p->work, &p->ds, obnd );
660 obbox_merge_2( bnd, obnd );
661 }
662
663
667 void obbox_bnd_3( const obbox_data_3* p,
668 const realType* x,
669 const realType* y,
670 const realType* z,
671 const realType c0[3],
672 const realType A[9],
673 realType bnd[6] )
674 {
675 unsigned i, nr = p->dr.n, ns = p->ds.b.n, nt = p->dt.b.n;
676 realType obnd[6];
677
678 i = nr * ns * ( nt - 1 );
679 obbox_side_3( x, y, z, nr, 1, ns, 0, c0, A, p->work, &p->dr, &p->ds, bnd );
680 obbox_side_3( x + i, y + i, z + i, nr, 1, ns, 0, c0, A, p->work, &p->dr, &p->ds, obnd );
681 obbox_merge_3( bnd, obnd );
682
683 i = nr * ( ns - 1 );
684 obbox_side_3( x, y, z, nr, 1, nt, i, c0, A, p->work, &p->dr, &p->dt, obnd );
685 obbox_merge_3( bnd, obnd );
686 obbox_side_3( x + i, y + i, z + i, nr, 1, nt, i, c0, A, p->work, &p->dr, &p->dt, obnd );
687 obbox_merge_3( bnd, obnd );
688
689 i = nr - 1;
690 obbox_side_3( x, y, z, ns, nr, nt, 0, c0, A, p->work, &p->ds.b, &p->dt, obnd );
691 obbox_merge_3( bnd, obnd );
692 obbox_side_3( x + i, y + i, z + i, ns, nr, nt, 0, c0, A, p->work, &p->ds.b, &p->dt, obnd );
693 obbox_merge_3( bnd, obnd );
694 }
695
696 void obbox_calc_2( const obbox_data_2* p, realType tol, const realType* x, const realType* y, obbox_2* b )
697 {
698 const realType zero[2] = { 0, 0 }, id[4] = { 1, 0, 0, 1 };
699 realType c0[2], jac[4], inv[4], bnd[4], u0[2], d[2];
700
701 obbox_bnd_2( p, x, y, zero, id, b->axis_bnd );
702 d[0] = b->axis_bnd[1] - b->axis_bnd[0];
703 d[1] = b->axis_bnd[3] - b->axis_bnd[2];
704 b->axis_bnd[0] -= tol * d[0], b->axis_bnd[1] += tol * d[0];
705 b->axis_bnd[2] -= tol * d[1], b->axis_bnd[3] += tol * d[1];
706
707 c0[0] = tensor_ig2( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.n, x, jac, p->work );
708 c0[1] = tensor_ig2( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.n, y, jac + 2, p->work );
709 mat_inv_2( jac, inv );
710
711 obbox_bnd_2( p, x, y, c0, inv, bnd );
712
713 u0[0] = ( bnd[0] + bnd[1] ) / 2;
714 u0[1] = ( bnd[2] + bnd[3] ) / 2;
715 d[0] = 2 / ( ( 1 + tol ) * ( bnd[1] - bnd[0] ) );
716 d[1] = 2 / ( ( 1 + tol ) * ( bnd[3] - bnd[2] ) );
717 b->x[0] = c0[0] + jac[0] * u0[0] + jac[1] * u0[1];
718 b->x[1] = c0[1] + jac[2] * u0[0] + jac[3] * u0[1];
719 b->A[0] = d[0] * inv[0], b->A[1] = d[0] * inv[1];
720 b->A[2] = d[1] * inv[2], b->A[3] = d[1] * inv[3];
721 }
722
723 void obbox_calc_3( const obbox_data_3* p,
724 realType tol,
725 const realType* x,
726 const realType* y,
727 const realType* z,
728 obbox_3* b )
729 {
730 const realType zero[3] = { 0, 0 }, id[9] = { 1, 0, 0, 0, 1, 0, 0, 0, 1 };
731 realType c0[3], jac[9], inv[9], bnd[6], u0[3], d[3];
732
733 obbox_bnd_3( p, x, y, z, zero, id, b->axis_bnd );
734 d[0] = b->axis_bnd[1] - b->axis_bnd[0];
735 d[1] = b->axis_bnd[3] - b->axis_bnd[2];
736 d[2] = b->axis_bnd[5] - b->axis_bnd[4];
737 b->axis_bnd[0] -= tol * d[0], b->axis_bnd[1] += tol * d[0];
738 b->axis_bnd[2] -= tol * d[1], b->axis_bnd[3] += tol * d[1];
739 b->axis_bnd[4] -= tol * d[2], b->axis_bnd[5] += tol * d[2];
740
741 c0[0] =
742 tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, x, jac, p->work );
743 c0[1] = tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, y, jac + 3,
744 p->work );
745 c0[2] = tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, z, jac + 6,
746 p->work );
747 mat_inv_3( jac, inv );
748
749 obbox_bnd_3( p, x, y, z, c0, inv, bnd );
750
751 u0[0] = ( bnd[0] + bnd[1] ) / 2;
752 u0[1] = ( bnd[2] + bnd[3] ) / 2;
753 u0[2] = ( bnd[4] + bnd[5] ) / 2;
754 d[0] = 2 / ( ( 1 + tol ) * ( bnd[1] - bnd[0] ) );
755 d[1] = 2 / ( ( 1 + tol ) * ( bnd[3] - bnd[2] ) );
756 d[2] = 2 / ( ( 1 + tol ) * ( bnd[5] - bnd[4] ) );
757 b->x[0] = c0[0] + jac[0] * u0[0] + jac[1] * u0[1] + jac[2] * u0[2];
758 b->x[1] = c0[1] + jac[3] * u0[0] + jac[4] * u0[1] + jac[5] * u0[2];
759 b->x[2] = c0[2] + jac[6] * u0[0] + jac[7] * u0[1] + jac[8] * u0[2];
760 b->A[0] = d[0] * inv[0], b->A[1] = d[0] * inv[1], b->A[2] = d[0] * inv[2];
761 b->A[3] = d[1] * inv[3], b->A[4] = d[1] * inv[4], b->A[5] = d[1] * inv[5];
762 b->A[6] = d[2] * inv[6], b->A[7] = d[2] * inv[7], b->A[8] = d[2] * inv[8];
763 }
764
765
800
801 static int ifloor( realType x )
802 {
803
807 return mbfloor( x );
808 }
809
810 static int iceil( realType x )
811 {
812
816 return mbceil( x );
817 }
818
819 static unsigned hash_index_helper( realType low, realType fac, unsigned n, realType x )
820 {
821 const int i = ifloor( ( x - low ) * fac );
822 if( i < 0 ) return 0;
823 return umin_2( i, n - 1 );
824 }
825
826 static uint hash_index_2( const hash_data_2* p, const realType x[2] )
827 {
828 const unsigned n = p->hash_n;
829 return (uint)hash_index_helper( p->bnd[2], p->fac[1], n, x[1] ) * n +
830 hash_index_helper( p->bnd[0], p->fac[0], n, x[0] );
831 }
832
833 static uint hash_index_3( const hash_data_3* p, const realType x[3] )
834 {
835 const unsigned n = p->hash_n;
836 return ( (uint)hash_index_helper( p->bnd[4], p->fac[2], n, x[2] ) * n +
837 hash_index_helper( p->bnd[2], p->fac[1], n, x[1] ) ) *
838 n +
839 hash_index_helper( p->bnd[0], p->fac[0], n, x[0] );
840 }
841
842 static void hash_setfac_2( hash_data_2* p, unsigned n )
843 {
844 p->hash_n = n;
845 p->fac[0] = n / ( p->bnd[1] - p->bnd[0] );
846 p->fac[1] = n / ( p->bnd[3] - p->bnd[2] );
847 }
848
849 static void hash_setfac_3( hash_data_3* p, unsigned n )
850 {
851 p->hash_n = n;
852 p->fac[0] = n / ( p->bnd[1] - p->bnd[0] );
853 p->fac[1] = n / ( p->bnd[3] - p->bnd[2] );
854 p->fac[2] = n / ( p->bnd[5] - p->bnd[4] );
855 }
856
857 static void hash_range_2( const hash_data_2* p, uint i, unsigned d, unsigned* ia, unsigned* ib )
858 {
859 const realType a = p->obb[i].axis_bnd[d * 2];
860 const realType b = p->obb[i].axis_bnd[d * 2 + 1];
861 const int i0 = ifloor( ( a - p->bnd[d * 2] ) * p->fac[d] );
862 const unsigned i1 = iceil( ( b - p->bnd[d * 2] ) * p->fac[d] );
863 *ia = imax_2( 0, i0 );
864 *ib = imin_2( i1, p->hash_n );
865 if( *ib == *ia ) ++( *ib );
866 }
867
868 static void hash_range_3( const hash_data_3* p, uint i, unsigned d, unsigned* ia, unsigned* ib )
869 {
870 const realType a = p->obb[i].axis_bnd[d * 2];
871 const realType b = p->obb[i].axis_bnd[d * 2 + 1];
872 const int i0 = ifloor( ( a - p->bnd[d * 2] ) * p->fac[d] );
873 const unsigned i1 = iceil( ( b - p->bnd[d * 2] ) * p->fac[d] );
874 *ia = imax_2( 0, i0 );
875 *ib = imin_2( i1, p->hash_n );
876 if( *ib == *ia ) ++( *ib );
877 }
878
879 static uint hash_count_2( hash_data_2* p, uint nel, unsigned n )
880 {
881 uint i, count = 0;
882 hash_setfac_2( p, n );
883 for( i = 0; i < nel; ++i )
884 {
885 unsigned ia, ib;
886 uint ci;
887 hash_range_2( p, i, 0, &ia, &ib );
888 ci = ib - ia;
889 hash_range_2( p, i, 1, &ia, &ib );
890 ci *= ib - ia;
891 count += ci;
892 }
893 return count;
894 }
895
896 static uint hash_count_3( hash_data_3* p, uint nel, unsigned n )
897 {
898 uint i, count = 0;
899 hash_setfac_3( p, n );
900 for( i = 0; i < nel; ++i )
901 {
902 unsigned ia, ib;
903 uint ci;
904 hash_range_3( p, i, 0, &ia, &ib );
905 ci = ib - ia;
906 hash_range_3( p, i, 1, &ia, &ib );
907 ci *= ib - ia;
908 hash_range_3( p, i, 2, &ia, &ib );
909 ci *= ib - ia;
910 count += ci;
911 }
912 return count;
913 }
914
915 static uint hash_opt_size_2( hash_data_2* p, uint nel, uint max_size )
916 {
917 unsigned nl = 1, nu = ceil( sqrt( max_size - nel ) );
918 uint size_low = 2 + nel;
919 while( nu - nl > 1 )
920 {
921 unsigned nm = nl + ( nu - nl ) / 2;
922 uint size = (uint)nm * nm + 1 + hash_count_2( p, nel, nm );
923 if( size <= max_size )
924 nl = nm, size_low = size;
925 else
926 nu = nm;
927 }
928 hash_setfac_2( p, nl );
929 return size_low;
930 }
931
932 static uint hash_opt_size_3( hash_data_3* p, uint nel, uint max_size )
933 {
934 unsigned nl = 1, nu = ceil( pow( max_size - nel, 1.0 / 3 ) );
935 uint size_low = 2 + nel;
936 while( nu - nl > 1 )
937 {
938 unsigned nm = nl + ( nu - nl ) / 2;
939 uint size = (uint)nm * nm * nm + 1 + hash_count_3( p, nel, nm );
940 if( size <= max_size )
941 nl = nm, size_low = size;
942 else
943 nu = nm;
944 }
945 hash_setfac_3( p, nl );
946 return size_low;
947 }
948
949 static void hash_getbb_2( hash_data_2* p, const realType* const elx[2], const unsigned n[2], uint nel, realType tol )
950 {
951 obbox_data_2* data;
952 realType *z[2], *w[2];
953 uint i;
954 unsigned d;
955 const unsigned nn = n[0] * n[1];
956 unsigned int m[2];
957 const realType* x[2];
958
959 for( i = 0; i < 2; ++i )
960 {
961 x[i] = elx[i];
962 m[i] = 2 * n[i];
963 }
964
965 z[0] = tmalloc( realType, 2 * ( n[0] + n[1] ) );
966 w[0] = z[0] + n[0];
967 z[1] = w[0] + n[0], w[1] = z[1] + n[1];
968 for( d = 0; d < 2; ++d )
969 lobatto_nodes( z[d], n[d] ), lobatto_weights( z[d], w[d], n[d] );
970 data = obbox_setup_2( (const realType* const*)z, (const realType* const*)w, n, m );
971 obbox_calc_2( data, tol, x[0], x[1], &p->obb[0] );
972 memcpy( &p->bnd[0], (const realType*)&p->obb[0].axis_bnd[0], 4 * sizeof( realType ) );
973 for( i = 0; i < nel; ++i, x[0] += nn, x[1] += nn )
974 {
975 obbox_calc_2( data, tol, x[0], x[1], &p->obb[i] );
976 obbox_merge_2( &p->bnd[0], (const realType*)&p->obb[i].axis_bnd[0] );
977 }
978 obbox_free_2( data );
979 free( z[0] );
980 }
981
982 static void hash_getbb_3( hash_data_3* p, const realType* const elx[3], const unsigned n[3], uint nel, realType tol )
983 {
984 obbox_data_3* data;
985 const realType* x[3];
986 realType *z[3], *w[3];
987 uint i;
988 unsigned d;
989 const unsigned nn = n[0] * n[1] * n[2];
990 unsigned int m[3];
991
992 for( i = 0; i < 3; ++i )
993 {
994 x[i] = elx[i];
995 m[i] = 2 * n[i];
996 }
997
998 z[0] = tmalloc( realType, 2 * ( n[0] + n[1] + n[2] ) );
999 w[0] = z[0] + n[0];
1000 for( d = 1; d < 3; ++d )
1001 z[d] = w[d - 1] + n[d - 1], w[d] = z[d] + n[d];
1002 for( d = 0; d < 3; ++d )
1003 lobatto_nodes( z[d], n[d] ), lobatto_weights( z[d], w[d], n[d] );
1004 data = obbox_setup_3( (const realType* const*)z, (const realType* const*)w, n, m );
1005 obbox_calc_3( data, tol, x[0], x[1], x[2], &p->obb[0] );
1006 memcpy( &p->bnd[0], (const realType*)&p->obb[0].axis_bnd[0], 6 * sizeof( realType ) );
1007 for( i = 0; i < nel; ++i, x[0] += nn, x[1] += nn, x[2] += nn )
1008 {
1009 obbox_calc_3( data, tol, x[0], x[1], x[2], &p->obb[i] );
1010 obbox_merge_3( &p->bnd[0], (const realType*)&p->obb[i].axis_bnd[0] );
1011 }
1012 obbox_free_3( data );
1013 free( z[0] );
1014 }
1015
1016 static void hash_build_2( hash_data_2* p,
1017 const realType* const x[2],
1018 const unsigned n[2],
1019 uint nel,
1020 uint max_hash_size,
1021 realType tol )
1022 {
1023 uint i, el, size, hn2, sum;
1024 unsigned hn;
1025 unsigned* count;
1026 p->obb = tmalloc( obbox_2, nel );
1027 hash_getbb_2( p, x, n, nel, tol );
1028 size = hash_opt_size_2( p, nel, max_hash_size );
1029 p->offset = tmalloc( uint, size );
1030 hn = p->hash_n;
1031 hn2 = (uint)hn * hn;
1032 count = tcalloc( unsigned, hn2 );
1033 for( el = 0; el < nel; ++el )
1034 {
1035 unsigned ia, ib, j, ja, jb;
1036 hash_range_2( p, el, 0, &ia, &ib );
1037 hash_range_2( p, el, 1, &ja, &jb );
1038 for( j = ja; j < jb; ++j )
1039 for( i = ia; i < ib; ++i )
1040 ++count[(uint)j * hn + i];
1041 }
1042 sum = hn2 + 1, p->max = count[0];
1043 p->offset[0] = sum;
1044 for( i = 0; i < hn2; ++i )
1045 {
1046 if( count[i] > p->max ) p->max = count[i];
1047 sum += count[i];
1048 p->offset[i + 1] = sum;
1049 }
1050 for( el = 0; el < nel; ++el )
1051 {
1052 unsigned ia, ib, j, ja, jb;
1053 hash_range_2( p, el, 0, &ia, &ib );
1054 hash_range_2( p, el, 1, &ja, &jb );
1055 for( j = ja; j < jb; ++j )
1056 for( i = ia; i < ib; ++i )
1057 {
1058 uint arrindex = (uint)j * hn + i;
1059 p->offset[p->offset[arrindex + 1] - count[arrindex]] = el;
1060 --count[arrindex];
1061 }
1062 }
1063 free( count );
1064 }
1065
1066 static void hash_build_3( hash_data_3* p,
1067 const realType* const x[3],
1068 const unsigned n[3],
1069 uint nel,
1070 uint max_hash_size,
1071 realType tol )
1072 {
1073 uint i, el, size, hn3, sum;
1074 unsigned hn;
1075 unsigned* count;
1076 p->obb = tmalloc( obbox_3, nel );
1077 hash_getbb_3( p, x, n, nel, tol );
1078 size = hash_opt_size_3( p, nel, max_hash_size );
1079 p->offset = tmalloc( uint, size );
1080 hn = p->hash_n;
1081 hn3 = (uint)hn * hn * hn;
1082 count = tcalloc( unsigned, hn3 );
1083 for( el = 0; el < nel; ++el )
1084 {
1085 unsigned ia, ib, j, ja, jb, k, ka, kb;
1086 hash_range_3( p, el, 0, &ia, &ib );
1087 hash_range_3( p, el, 1, &ja, &jb );
1088 hash_range_3( p, el, 2, &ka, &kb );
1089 for( k = ka; k < kb; ++k )
1090 for( j = ja; j < jb; ++j )
1091 for( i = ia; i < ib; ++i )
1092 ++count[( (uint)k * hn + j ) * hn + i];
1093 }
1094 sum = hn3 + 1, p->max = count[0];
1095 p->offset[0] = sum;
1096 for( i = 0; i < hn3; ++i )
1097 {
1098 if( count[i] > p->max ) p->max = count[i];
1099 sum += count[i];
1100 p->offset[i + 1] = sum;
1101 }
1102 for( el = 0; el < nel; ++el )
1103 {
1104 unsigned ia, ib, j, ja, jb, k, ka, kb;
1105 hash_range_3( p, el, 0, &ia, &ib );
1106 hash_range_3( p, el, 1, &ja, &jb );
1107 hash_range_3( p, el, 2, &ka, &kb );
1108 for( k = ka; k < kb; ++k )
1109 for( j = ja; j < jb; ++j )
1110 for( i = ia; i < ib; ++i )
1111 {
1112 uint arrindex = ( (uint)k * hn + j ) * hn + i;
1113 p->offset[p->offset[arrindex + 1] - count[arrindex]] = el;
1114 --count[arrindex];
1115 }
1116 }
1117 free( count );
1118 }
1119
1120 static void hash_free_2( hash_data_2* p )
1121 {
1122 free( p->obb );
1123 free( p->offset );
1124 }
1125
1126 static void hash_free_3( hash_data_3* p )
1127 {
1128 free( p->obb );
1129 free( p->offset );
1130 }
1131
1132
1194
1195
1196 static const char opt_constr_num_2[9] = { 2, 1, 2, 1, 0, 1, 2, 1, 2 };
1197 static const char opt_constr_num_3[27] = { 3, 2, 3, 2, 1, 2, 3, 2, 3, 2, 1, 2, 1, 0,
1198 1, 2, 1, 2, 3, 2, 3, 2, 1, 2, 3, 2, 3 };
1199
1200
1201
1202 static const char opt_constr_dir_3[27] = { -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, 1, -1, 0, -1,
1203 0, -1, 1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1 };
1204
1205
1206 static const char opt_constr_not[27] = { -1, 0, -1, 1, -1, 1, -1, 0, -1, 2, -1, 2, -1, -1,
1207 -1, 2, -1, 2, -1, 0, -1, 1, -1, 1, -1, 0, -1 };
1208
1209 static const char opt_constr_wide[27] = { 0x00, 0x01, 0x02, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a,
1210 0x10, 0x11, 0x12, 0x14, 0x15, 0x16, 0x18, 0x19, 0x1a,
1211 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a };
1212
1213 static const unsigned opt_other1_3[3] = { 1, 0, 0 }, opt_other2_3[3] = { 2, 2, 1 };
1214
1215 static unsigned opt_constr( unsigned constraints, unsigned d )
1216 {
1217 return ( opt_constr_wide[constraints] >> ( d * 2 ) ) & 3;
1218 }
1219
1220 static void opt_constr_unpack_2( unsigned constraints, unsigned* c )
1221 {
1222 const char cw = opt_constr_wide[constraints];
1223 c[0] = cw & 3;
1224 c[1] = cw >> 2;
1225 }
1226
1227 static void opt_constr_unpack_3( unsigned constraints, unsigned* c )
1228 {
1229 const char cw = opt_constr_wide[constraints];
1230 c[0] = cw & 3;
1231 c[1] = ( cw >> 2 ) & 3;
1232 c[2] = cw >> 4;
1233 }
1234
1235 static unsigned opt_constr_pack_2( const unsigned* c )
1236 {
1237 return c[1] * 3 + c[0];
1238 }
1239
1240 static unsigned opt_constr_pack_3( const unsigned* c )
1241 {
1242 return ( c[2] * 3 + c[1] ) * 3 + c[0];
1243 }
1244
1245
1250
1251 void opt_alloc_3( opt_data_3* p, lagrange_data* ld )
1252 {
1253 const unsigned nr = ld[0].n, ns = ld[1].n, nt = ld[2].n, nf = umax_3( nr * ns, nr * nt, ns * nt ),
1254 ne = umax_3( nr, ns, nt ), nw = 2 * ns * nt + 3 * ns;
1255 p->size[0] = 1;
1256 p->size[1] = nr;
1257 p->size[2] = nr * ns;
1258 p->size[3] = p->size[2] * nt;
1259 p->ld = ld;
1260 p->work = tmalloc( realType, 6 * nf + 9 * ne + nw );
1261 p->fd.x[0] = p->work + nw;
1262 p->fd.x[1] = p->fd.x[0] + nf;
1263 p->fd.x[2] = p->fd.x[1] + nf;
1264 p->fd.fdn[0] = p->fd.x[2] + nf;
1265 p->fd.fdn[1] = p->fd.fdn[0] + nf;
1266 p->fd.fdn[2] = p->fd.fdn[1] + nf;
1267 p->ed.x[0] = p->fd.fdn[2] + nf;
1268 p->ed.x[1] = p->ed.x[0] + ne;
1269 p->ed.x[2] = p->ed.x[1] + ne;
1270 p->ed.fd1[0] = p->ed.x[2] + ne;
1271 p->ed.fd1[1] = p->ed.fd1[0] + ne;
1272 p->ed.fd1[2] = p->ed.fd1[1] + ne;
1273 p->ed.fd2[0] = p->ed.fd1[2] + ne;
1274 p->ed.fd2[1] = p->ed.fd2[0] + ne;
1275 p->ed.fd2[2] = p->ed.fd2[1] + ne;
1276 }
1277
1278 void opt_free_3( opt_data_3* p )
1279 {
1280 free( p->work );
1281 }
1282
1283 static void opt_vol_set_3( opt_data_3* p, const realType r[3] )
1284 {
1285 lagrange_1( &p->ld[0], r[0] );
1286 lagrange_1( &p->ld[1], r[1] );
1287 lagrange_1( &p->ld[2], r[2] );
1288 }
1289
1290
1291 static void opt_vol_intp_3( opt_data_3* p )
1292 {
1293 unsigned d;
1294 const lagrange_data* ld = p->ld;
1295
1296 for( d = 0; d < 3; ++d )
1297 p->x[d] = tensor_ig3( ld[0].J, ld[0].D, ld[0].n, ld[1].J, ld[1].D, ld[1].n, ld[2].J, ld[2].D, ld[2].n,
1298 p->elx[d], &p->jac[d * 3], p->work );
1299 }
1300
1301 void opt_vol_set_intp_3( opt_data_3* p, const realType r[3] )
1302 {
1303 opt_vol_set_3( p, r );
1304 opt_vol_intp_3( p );
1305 }
1306
1307 static void opt_face_proj_3( opt_data_3* p )
1308 {
1309 unsigned d, off = 0;
1310 const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2, so = p->size[d2] - p->size[d1 + 1], s1 = p->size[d1],
1311 sn = p->size[dn], n1 = p->ld[d1].n, n2 = p->ld[d2].n, nn = p->ld[dn].n;
1312 const realType* D = p->ld[dn].D_z0;
1313 if( opt_constr( p->fd.constraints, dn ) == 2 ) off = p->size[dn + 1] - p->size[dn], D = p->ld[dn].D_zn;
1314 for( d = 0; d < 3; ++d )
1315 {
1316 unsigned i, j, k, arrindex = 0;
1317 const realType* in = p->elx[d] + off;
1318 for( j = n2; j; --j, in += so )
1319 for( i = n1; i; --i, ++arrindex, in += s1 )
1320 {
1321 const realType* ind = in - off;
1322 realType* fdn = &p->fd.fdn[d][arrindex];
1323 p->fd.x[d][arrindex] = *in;
1324 *fdn = 0;
1325 for( k = 0; k < nn; ++k, ind += sn )
1326 *fdn += *ind * D[k];
1327 }
1328 }
1329 }
1330
1331 static void opt_face_set_3( opt_data_3* p, const realType r[3], unsigned constr )
1332 {
1333 if( p->fd.constraints != constr )
1334 {
1335 p->fd.constraints = constr;
1336 p->fd.dn = opt_constr_dir_3[constr];
1337 p->fd.d1 = opt_other1_3[p->fd.dn];
1338 p->fd.d2 = opt_other2_3[p->fd.dn];
1339 opt_face_proj_3( p );
1340 }
1341 lagrange_1( &p->ld[p->fd.d1], r[p->fd.d1] );
1342 lagrange_1( &p->ld[p->fd.d2], r[p->fd.d2] );
1343 }
1344
1345
1346 static void opt_face_intp_3( opt_data_3* p )
1347 {
1348 unsigned d;
1349 const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2, n1 = p->ld[d1].n, n2 = p->ld[d2].n;
1350 const realType *J1 = p->ld[d1].J, *J2 = p->ld[d2].J, *D1 = p->ld[d1].D, *D2 = p->ld[d2].D;
1351
1352 for( d = 0; d < 3; ++d )
1353 {
1354 realType g[2];
1355 p->x[d] = tensor_ig2( J1, D1, n1, J2, D2, n2, p->fd.x[d], &g[0], p->work );
1356 p->jac[d * 3 + d1] = g[0];
1357 p->jac[d * 3 + d2] = g[1];
1358 p->jac[d * 3 + dn] = tensor_i2( J1, n1, J2, n2, p->fd.fdn[d], p->work );
1359 }
1360 }
1361
1362 static void opt_face_set_intp_3( opt_data_3* p, const realType r[3], unsigned constr )
1363 {
1364 opt_face_set_3( p, r, constr );
1365 opt_face_intp_3( p );
1366 }
1367
1368 static void opt_face_hess_3( opt_data_3* p, realType hess[9] )
1369 {
1370 unsigned d;
1371 const unsigned d1 = p->fd.d1, d2 = p->fd.d2, n1 = p->ld[d1].n, n2 = p->ld[d2].n;
1372 const realType *J1 = p->ld[d1].J, *J2 = p->ld[d2].J, *D1 = p->ld[d1].D, *D2 = p->ld[d2].D, *S1 = p->ld[d1].D2,
1373 *S2 = p->ld[d2].D2;
1374
1375 lagrange_2u( &p->ld[d1] );
1376 lagrange_2u( &p->ld[d2] );
1377
1378 for( d = 0; d < 3; ++d )
1379 {
1380 (void)tensor_ig2( J1, S1, n1, J2, S2, n2, p->fd.x[d], hess + d * 3, p->work );
1381 hess[d * 3 + 0] = tensor_i2( S1, n1, J2, n2, p->fd.x[d], p->work );
1382 hess[d * 3 + 1] = tensor_i2( J1, n1, S2, n2, p->fd.x[d], p->work );
1383 hess[d * 3 + 2] = tensor_i2( D1, n1, D2, n2, p->fd.x[d], p->work );
1384 }
1385 }
1386
1387 static void opt_edge_proj_3( opt_data_3* p )
1388 {
1389 unsigned d, off, off1 = 0, off2 = 0;
1390 const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2, se = p->size[de], s1 = p->size[d1], s2 = p->size[d2],
1391 ne = p->ld[de].n, n1 = p->ld[d1].n, n2 = p->ld[d2].n;
1392 const realType *fD1, *fD2;
1393 if( opt_constr( p->ed.constraints, d1 ) == 0 )
1394 fD1 = p->ld[d1].D_z0;
1395 else
1396 fD1 = p->ld[d1].D_zn, off1 = p->size[d1 + 1] - p->size[d1];
1397 if( opt_constr( p->ed.constraints, d2 ) == 0 )
1398 fD2 = p->ld[d2].D_z0;
1399 else
1400 fD2 = p->ld[d2].D_zn, off2 = p->size[d2 + 1] - p->size[d2];
1401 off = off1 + off2;
1402 for( d = 0; d < 3; ++d )
1403 {
1404 unsigned i, j;
1405 const realType* in = p->elx[d] + off;
1406 for( i = 0; i < ne; ++i, in += se )
1407 {
1408 const realType *in1 = in - off1, *in2 = in - off2;
1409 realType *fd1 = &p->ed.fd1[d][i], *fd2 = &p->ed.fd2[d][i];
1410 p->ed.x[d][i] = *in;
1411 *fd1 = *fd2 = 0;
1412 for( j = 0; j < n1; ++j, in1 += s1 )
1413 *fd1 += *in1 * fD1[j];
1414 for( j = 0; j < n2; ++j, in2 += s2 )
1415 *fd2 += *in2 * fD2[j];
1416 }
1417 }
1418 }
1419
1420 static void opt_edge_set_3( opt_data_3* p, const realType r[3], unsigned constr )
1421 {
1422 if( p->ed.constraints != constr )
1423 {
1424 p->ed.constraints = constr;
1425 p->ed.de = opt_constr_not[constr];
1426 p->ed.d1 = opt_other1_3[p->ed.de];
1427 p->ed.d2 = opt_other2_3[p->ed.de];
1428 opt_edge_proj_3( p );
1429 }
1430 lagrange_1( &p->ld[p->ed.de], r[p->ed.de] );
1431 }
1432
1433 static void opt_edge_intp_3( opt_data_3* p )
1434 {
1435 unsigned d;
1436 const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2, n = p->ld[de].n;
1437 const realType *J = p->ld[de].J, *D = p->ld[de].D;
1438
1439 for( d = 0; d < 3; ++d )
1440 {
1441 p->x[d] = tensor_ig1( J, D, n, p->ed.x[d], &p->jac[d * 3 + de] );
1442 p->jac[d * 3 + d1] = tensor_i1( J, n, p->ed.fd1[d] );
1443 p->jac[d * 3 + d2] = tensor_i1( J, n, p->ed.fd2[d] );
1444 }
1445 }
1446
1447 static void opt_edge_set_intp_3( opt_data_3* p, const realType r[3], unsigned constr )
1448 {
1449 opt_edge_set_3( p, r, constr );
1450 opt_edge_intp_3( p );
1451 }
1452
1453 static void opt_edge_hess_3( opt_data_3* p, realType hess[3] )
1454 {
1455 unsigned d;
1456 const unsigned de = p->ed.de, n = p->ld[de].n;
1457 const realType* D2 = p->ld[de].D2;
1458 lagrange_2u( &p->ld[de] );
1459 for( d = 0; d < 3; ++d )
1460 hess[d] = tensor_i1( D2, n, p->ed.x[d] );
1461 }
1462
1463 static void opt_point_proj_3( opt_data_3* p )
1464 {
1465 unsigned off[3], offt, d, c[3];
1466 const realType* fD[3];
1467 opt_constr_unpack_3( p->pd.constraints, c );
1468 for( d = 0; d < 3; ++d )
1469 if( c[d] == 0 )
1470 fD[d] = p->ld[d].D_z0, off[d] = 0;
1471 else
1472 fD[d] = p->ld[d].D_zn, off[d] = p->size[d + 1] - p->size[d];
1473 offt = off[0] + off[1] + off[2];
1474 for( d = 0; d < 9; ++d )
1475 p->pd.jac[d] = 0;
1476 for( d = 0; d < 3; ++d )
1477 {
1478 unsigned i, j;
1479 p->pd.x[d] = p->elx[d][offt];
1480 for( i = 0; i < 3; ++i )
1481 {
1482 const realType* in = p->elx[d] + offt - off[i];
1483 for( j = 0; j < p->ld[i].n; ++j, in += p->size[i] )
1484 p->pd.jac[d * 3 + i] += *in * fD[i][j];
1485 }
1486 }
1487 }
1488
1489 static void opt_point_set_3( opt_data_3* p, unsigned constr )
1490 {
1491 if( p->pd.constraints != constr )
1492 {
1493 p->pd.constraints = constr;
1494 opt_point_proj_3( p );
1495 }
1496 }
1497
1498 static void opt_point_intp_3( opt_data_3* p )
1499 {
1500 memcpy( p->x, p->pd.x, 3 * sizeof( realType ) );
1501 memcpy( p->jac, p->pd.jac, 9 * sizeof( realType ) );
1502 }
1503
1504 static void opt_point_set_intp_3( opt_data_3* p, unsigned constr )
1505 {
1506 opt_point_set_3( p, constr );
1507 opt_point_intp_3( p );
1508 }
1509
1510 #define DIAGNOSTICS 0
1511
1512 double opt_findpt_3( opt_data_3* p,
1513 const realType* const elx[3],
1514 const realType xstar[3],
1515 realType r[3],
1516 unsigned* constr )
1517 {
1518 realType dr[3], resid[3], steep[3];
1519
1520 unsigned c = *constr, ac, d, cc[3], step = 0;
1521
1522 p->elx[0] = elx[0], p->elx[1] = elx[1], p->elx[2] = elx[2];
1523
1524 p->fd.constraints = opt_no_constraints_3;
1525 p->ed.constraints = opt_no_constraints_3;
1526 p->pd.constraints = opt_no_constraints_3;
1527
1528 #if DIAGNOSTICS
1529 printf( "opt_findpt: xstar = %g, %g, %g\n", xstar[0], xstar[1], xstar[2] );
1530 #endif
1531
1532 do
1533 {
1534 ++step;
1535 if( step == 50 )
1536 return 1.e+30;
1537 #if DIAGNOSTICS
1538 printf( " iteration %u\n", step );
1539 printf( " %d constraint(s) active\n", (int)opt_constr_num_3[c] );
1540 #endif
1541
1543 switch( opt_constr_num_3[c] )
1544 {
1545 case 0:
1546 opt_vol_set_intp_3( p, r );
1547 break;
1548 case 1:
1549 opt_face_set_intp_3( p, r, c );
1550 break;
1551 case 2:
1552 opt_edge_set_intp_3( p, r, c );
1553 break;
1554 case 3:
1555 opt_point_set_intp_3( p, c );
1556 break;
1557 }
1558 #if DIAGNOSTICS
1559 printf( " r = %g, %g, %g\n", r[0], r[1], r[2] );
1560 printf( " x = %g, %g, %g\n", p->x[0], p->x[1], p->x[2] );
1561 #endif
1562
1563 for( d = 0; d < 3; ++d )
1564 resid[d] = xstar[d] - p->x[d];
1565 #if DIAGNOSTICS
1566 printf( " resid = %g, %g, %g\n", resid[0], resid[1], resid[2] );
1567 printf( " 2-norm = %g\n", r2norm_3( resid[0], resid[1], resid[2] ) );
1568 #endif
1569
1570 ac = c;
1571 if( opt_constr_num_3[c] )
1572 {
1573 opt_constr_unpack_3( c, cc );
1574 mat_app_3c( steep, p->jac, resid );
1575 #if DIAGNOSTICS
1576 printf( " steepest descent = %g, %g, %g\n", steep[0], steep[1], steep[2] );
1577 #endif
1578 for( d = 0; d < 3; ++d )
1579 if( ( cc[d] == 0 && steep[d] > 0 ) || ( cc[d] == 2 && steep[d] < 0 ) ) cc[d] = 1;
1580 ac = opt_constr_pack_3( cc );
1581 }
1582
1583 if( ac != c )
1584 {
1585 c = ac;
1586 #if DIAGNOSTICS
1587 printf( " relaxed to %d constraints\n", (int)opt_constr_num_3[c] );
1588 #endif
1589 switch( opt_constr_num_3[c] )
1590 {
1591 case 1:
1592 opt_face_set_3( p, r, c );
1593 break;
1594 case 2:
1595 opt_edge_set_3( p, r, c );
1596 break;
1597 case 3:
1598 opt_point_set_3( p, c );
1599 break;
1600 }
1601 }
1602
1603 switch( opt_constr_num_3[c] )
1604 {
1605 case 0:
1606 tinyla_solve_3( dr, p->jac, resid );
1607 break;
1608 case 1: {
1609 const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2;
1610 realType A[4], H[9];
1611 const realType* J = p->jac;
1612 opt_face_hess_3( p, H );
1613 A[0] = J[d1] * J[d1] + J[3 + d1] * J[3 + d1] + J[6 + d1] * J[6 + d1];
1614 A[1] = J[d2] * J[d2] + J[3 + d2] * J[3 + d2] + J[6 + d2] * J[6 + d2];
1615 A[2] = J[d1] * J[d2] + J[3 + d1] * J[3 + d2] + J[6 + d1] * J[6 + d2];
1616 A[0] -= resid[0] * H[0] + resid[1] * H[3] + resid[2] * H[6];
1617 A[1] -= resid[0] * H[1] + resid[1] * H[4] + resid[2] * H[7];
1618 A[2] -= resid[0] * H[2] + resid[1] * H[5] + resid[2] * H[8];
1619 tinyla_solve_sym_2( &dr[d1], &dr[d2], A, steep[d1], steep[d2] );
1620 dr[dn] = 0;
1621 }
1622 break;
1623 case 2: {
1624 const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2;
1625 realType fac, H[3];
1626 const realType* J = p->jac + de;
1627 opt_edge_hess_3( p, H );
1628 fac = J[0] * J[0] + J[3] * J[3] + J[6] * J[6] - ( resid[0] * H[0] + resid[1] * H[1] + resid[2] * H[2] );
1629 dr[de] = steep[de] / fac;
1630 dr[d1] = 0, dr[d2] = 0;
1631 }
1632 break;
1633 case 3:
1634 dr[0] = dr[1] = dr[2] = 0;
1635 break;
1636 }
1637 #if DIAGNOSTICS
1638 printf( " dr = %g, %g, %g\n", dr[0], dr[1], dr[2] );
1639 #endif
1640
1641 opt_constr_unpack_3( c, cc );
1642 for( d = 0; d < 3; ++d )
1643 {
1644 if( cc[d] != 1 ) continue;
1645 r[d] += dr[d];
1646 if( r[d] <= -1 )
1647 dr[d] -= r[d] + 1, r[d] = -1, cc[d] = 0;
1648 else if( r[d] >= 1 )
1649 dr[d] -= r[d] - 1, r[d] = 1, cc[d] = 2;
1650 }
1651 c = opt_constr_pack_3( cc );
1652 } while( r1norm_3( dr[0], dr[1], dr[2] ) > 3 * MOAB_POLY_EPS );
1653 *constr = c;
1654 #if 0
1655 printf("opt_findpt_3 converged in %u iterations\n", step);
1656 #endif
1657 return r2norm_3( resid[0], resid[1], resid[2] );
1658 }
1659
1660 #undef DIAGNOSTICS
1661
1662 void opt_alloc_2( opt_data_2* p, lagrange_data* ld )
1663 {
1664 const unsigned nr = ld[0].n, ns = ld[1].n, ne = umax_2( nr, ns ), nw = 2 * ns;
1665 p->size[0] = 1;
1666 p->size[1] = nr;
1667 p->size[2] = nr * ns;
1668 p->ld = ld;
1669 p->work = tmalloc( realType, 4 * ne + nw );
1670 p->ed.x[0] = p->work + nw;
1671 p->ed.x[1] = p->ed.x[0] + ne;
1672 p->ed.fd1[0] = p->ed.x[1] + ne;
1673 p->ed.fd1[1] = p->ed.fd1[0] + ne;
1674 }
1675
1676 void opt_free_2( opt_data_2* p )
1677 {
1678 free( p->work );
1679 }
1680
1681 static void opt_area_set_2( opt_data_2* p, const realType r[2] )
1682 {
1683 lagrange_1( &p->ld[0], r[0] );
1684 lagrange_1( &p->ld[1], r[1] );
1685 }
1686
1687
1688 static void opt_area_intp_2( opt_data_2* p )
1689 {
1690 unsigned d;
1691 const lagrange_data* ld = p->ld;
1692
1693 for( d = 0; d < 2; ++d )
1694 p->x[d] =
1695 tensor_ig2( ld[0].J, ld[0].D, ld[0].n, ld[1].J, ld[1].D, ld[1].n, p->elx[d], &p->jac[d * 2], p->work );
1696 }
1697
1698 static void opt_area_set_intp_2( opt_data_2* p, const realType r[2] )
1699 {
1700 opt_area_set_2( p, r );
1701 opt_area_intp_2( p );
1702 }
1703
1704 static void opt_edge_proj_2( opt_data_2* p )
1705 {
1706 unsigned d, off = 0;
1707 const unsigned de = p->ed.de, d1 = p->ed.d1, se = p->size[de], s1 = p->size[d1], ne = p->ld[de].n, n1 = p->ld[d1].n;
1708 const realType* fD1;
1709 if( opt_constr( p->ed.constraints, d1 ) == 0 )
1710 fD1 = p->ld[d1].D_z0;
1711 else
1712 fD1 = p->ld[d1].D_zn, off = p->size[d1 + 1] - p->size[d1];
1713 for( d = 0; d < 2; ++d )
1714 {
1715 unsigned i, j;
1716 const realType* in = p->elx[d] + off;
1717 for( i = 0; i < ne; ++i, in += se )
1718 {
1719 const realType* in1 = in - off;
1720 realType* fd1 = &p->ed.fd1[d][i];
1721 p->ed.x[d][i] = *in;
1722 *fd1 = 0;
1723 for( j = 0; j < n1; ++j, in1 += s1 )
1724 *fd1 += *in1 * fD1[j];
1725 }
1726 }
1727 }
1728
1729 static void opt_edge_set_2( opt_data_2* p, const realType r[2], unsigned constr )
1730 {
1731 if( p->ed.constraints != constr )
1732 {
1733 p->ed.constraints = constr;
1734 p->ed.de = opt_constr_not[constr];
1735 p->ed.d1 = 1 - p->ed.de;
1736 opt_edge_proj_2( p );
1737 }
1738 lagrange_1( &p->ld[p->ed.de], r[p->ed.de] );
1739 }
1740
1741 static void opt_edge_intp_2( opt_data_2* p )
1742 {
1743 unsigned d;
1744 const unsigned de = p->ed.de, d1 = p->ed.d1, n = p->ld[de].n;
1745 const realType *J = p->ld[de].J, *D = p->ld[de].D;
1746 for( d = 0; d < 2; ++d )
1747 {
1748 p->x[d] = tensor_ig1( J, D, n, p->ed.x[d], &p->jac[d * 2 + de] );
1749 p->jac[d * 2 + d1] = tensor_i1( J, n, p->ed.fd1[d] );
1750 }
1751 }
1752
1753 static void opt_edge_set_intp_2( opt_data_2* p, const realType r[2], unsigned constr )
1754 {
1755 opt_edge_set_2( p, r, constr );
1756 opt_edge_intp_2( p );
1757 }
1758
1759 static void opt_edge_hess_2( opt_data_2* p, realType hess[2] )
1760 {
1761 unsigned d;
1762 const unsigned de = p->ed.de, n = p->ld[de].n;
1763 const realType* D2 = p->ld[de].D2;
1764 lagrange_2u( &p->ld[de] );
1765 for( d = 0; d < 2; ++d )
1766 hess[d] = tensor_i1( D2, n, p->ed.x[d] );
1767 }
1768
1769 static void opt_point_proj_2( opt_data_2* p )
1770 {
1771 unsigned off[2], offt, d, c[2];
1772 const realType* fD[2];
1773 opt_constr_unpack_2( p->pd.constraints, c );
1774 for( d = 0; d < 2; ++d )
1775 if( c[d] == 0 )
1776 fD[d] = p->ld[d].D_z0, off[d] = 0;
1777 else
1778 fD[d] = p->ld[d].D_zn, off[d] = p->size[d + 1] - p->size[d];
1779 offt = off[0] + off[1];
1780 for( d = 0; d < 4; ++d )
1781 p->pd.jac[d] = 0;
1782 for( d = 0; d < 2; ++d )
1783 {
1784 unsigned i, j;
1785 p->pd.x[d] = p->elx[d][offt];
1786 for( i = 0; i < 2; ++i )
1787 {
1788 const realType* in = p->elx[d] + offt - off[i];
1789 for( j = 0; j < p->ld[i].n; ++j, in += p->size[i] )
1790 p->pd.jac[d * 2 + i] += *in * fD[i][j];
1791 }
1792 }
1793 }
1794
1795 static void opt_point_set_2( opt_data_2* p, unsigned constr )
1796 {
1797 if( p->pd.constraints != constr )
1798 {
1799 p->pd.constraints = constr;
1800 opt_point_proj_2( p );
1801 }
1802 }
1803
1804 static void opt_point_intp_2( opt_data_2* p )
1805 {
1806 memcpy( p->x, p->pd.x, 2 * sizeof( realType ) );
1807 memcpy( p->jac, p->pd.jac, 4 * sizeof( realType ) );
1808 }
1809
1810 static void opt_point_set_intp_2( opt_data_2* p, unsigned constr )
1811 {
1812 opt_point_set_2( p, constr );
1813 opt_point_intp_2( p );
1814 }
1815
1816 #define DIAGNOSTICS 0
1817
1818 double opt_findpt_2( opt_data_2* p,
1819 const realType* const elx[2],
1820 const realType xstar[2],
1821 realType r[2],
1822 unsigned* constr )
1823 {
1824 realType dr[2], resid[2], steep[2];
1825
1826 unsigned c = *constr, ac, d, cc[2], step = 0;
1827
1828 p->elx[0] = elx[0], p->elx[1] = elx[1];
1829
1830 p->ed.constraints = opt_no_constraints_2;
1831 p->pd.constraints = opt_no_constraints_2;
1832
1833 #if DIAGNOSTICS
1834 printf( "opt_findpt: xstar = %g, %g\n", xstar[0], xstar[1] );
1835 #endif
1836
1837 do
1838 {
1839 ++step;
1840 if( step == 50 )
1841 return 1.e+30;
1842 #if DIAGNOSTICS
1843 printf( " iteration %u\n", step );
1844 printf( " %d constraint(s) active\n", (int)opt_constr_num_2[c] );
1845 #endif
1846
1848 switch( opt_constr_num_2[c] )
1849 {
1850 case 0:
1851 opt_area_set_intp_2( p, r );
1852 break;
1853 case 1:
1854 opt_edge_set_intp_2( p, r, c );
1855 break;
1856 case 2:
1857 opt_point_set_intp_2( p, c );
1858 break;
1859 }
1860 #if DIAGNOSTICS
1861 printf( " r = %g, %g\n", r[0], r[1] );
1862 printf( " x = %g, %g\n", p->x[0], p->x[1] );
1863 #endif
1864
1865 for( d = 0; d < 2; ++d )
1866 resid[d] = xstar[d] - p->x[d];
1867 #if DIAGNOSTICS
1868 printf( " resid = %g, %g\n", resid[0], resid[1] );
1869 printf( " 2-norm = %g\n", r2norm_2( resid[0], resid[1] ) );
1870 #endif
1871
1872 ac = c;
1873 if( opt_constr_num_2[c] )
1874 {
1875 opt_constr_unpack_2( c, cc );
1876 mat_app_2c( steep, p->jac, resid );
1877 #if DIAGNOSTICS
1878 printf( " steepest descent = %g, %g\n", steep[0], steep[1] );
1879 #endif
1880 for( d = 0; d < 2; ++d )
1881 if( ( cc[d] == 0 && steep[d] > 0 ) || ( cc[d] == 2 && steep[d] < 0 ) ) cc[d] = 1;
1882 ac = opt_constr_pack_2( cc );
1883 }
1884
1885 if( ac != c )
1886 {
1887 c = ac;
1888 #if DIAGNOSTICS
1889 printf( " relaxed to %d constraints\n", (int)opt_constr_num_2[c] );
1890 #endif
1891 switch( opt_constr_num_2[c] )
1892 {
1893 case 1:
1894 opt_edge_set_2( p, r, c );
1895 break;
1896 case 2:
1897 opt_point_set_2( p, c );
1898 break;
1899 }
1900 }
1901
1902 switch( opt_constr_num_2[c] )
1903 {
1904 case 0:
1905 tinyla_solve_2( dr, p->jac, resid );
1906 break;
1907 case 1: {
1908 const unsigned de = p->ed.de, d1 = p->ed.d1;
1909 realType fac, H[2];
1910 const realType* J = p->jac + de;
1911 opt_edge_hess_2( p, H );
1912 fac = J[0] * J[0] + J[2] * J[2] - ( resid[0] * H[0] + resid[1] * H[1] );
1913 dr[de] = steep[de] / fac;
1914 dr[d1] = 0;
1915 }
1916 break;
1917 case 2:
1918 dr[0] = dr[1] = 0;
1919 break;
1920 }
1921 #if DIAGNOSTICS
1922 printf( " dr = %g, %g\n", dr[0], dr[1] );
1923 #endif
1924
1925 opt_constr_unpack_2( c, cc );
1926 for( d = 0; d < 2; ++d )
1927 {
1928 if( cc[d] != 1 ) continue;
1929 r[d] += dr[d];
1930 if( r[d] <= -1 )
1931 dr[d] -= r[d] + 1, r[d] = -1, cc[d] = 0;
1932 else if( r[d] >= 1 )
1933 dr[d] -= r[d] - 1, r[d] = 1, cc[d] = 2;
1934 }
1935 c = opt_constr_pack_2( cc );
1936 } while( r1norm_2( dr[0], dr[1] ) > 2 * MOAB_POLY_EPS );
1937 *constr = c;
1938 return r2norm_2( resid[0], resid[1] );
1939 }
1940
1941 #undef DIAGNOSTICS
1942
1943
1972
1973
1975 static void findpt_list_sort( findpt_listel** A, unsigned n )
1976 {
1977 unsigned i;
1978 --A;
1979
1980 for( i = 2; i <= n; ++i )
1981 {
1982 findpt_listel* item = A[i];
1983 unsigned hole = i, parent = hole >> 1;
1984 if( A[parent]->dist >= item->dist ) continue;
1985 do
1986 {
1987 A[hole] = A[parent];
1988 hole = parent;
1989 parent >>= 1;
1990 } while( parent && A[parent]->dist < item->dist );
1991 A[hole] = item;
1992 }
1993
1994 for( i = n - 1; i; --i )
1995 {
1996 findpt_listel* item = A[i + 1];
1997 unsigned hole = 1;
1998 A[i + 1] = A[1];
1999 for( ;; )
2000 {
2001 unsigned ch = hole << 1, r = ch + 1;
2002 if( r <= i && A[ch]->dist < A[r]->dist ) ch = r;
2003 if( ch > i || item->dist >= A[ch]->dist ) break;
2004 A[hole] = A[ch];
2005 hole = ch;
2006 }
2007 A[hole] = item;
2008 }
2009 }
2010
2011 findpt_data_2* findpt_setup_2( const realType* const xw[2],
2012 const unsigned n[2],
2013 uint nel,
2014 uint max_hash_size,
2015 realType bbox_tol )
2016 {
2017 unsigned d;
2018 findpt_data_2* p = tmalloc( findpt_data_2, 1 );
2019
2020 p->hash = tmalloc( hash_data_2, 1 );
2021 p->od = tmalloc( opt_data_2, 1 );
2022
2023 for( d = 0; d < 2; ++d )
2024 p->xw[d] = xw[d];
2025 p->nptel = n[0] * n[1];
2026
2027 hash_build_2( p->hash, xw, n, nel, max_hash_size, bbox_tol );
2028
2029 for( d = 0; d < 2; ++d )
2030 {
2031 p->z[d] = tmalloc( realType, n[d] );
2032 lobatto_nodes( p->z[d], n[d] );
2033 lagrange_setup( &p->ld[d], p->z[d], n[d] );
2034 }
2035
2036 p->list = tmalloc( findpt_listel, p->hash->max );
2037 p->sorted = tmalloc( findpt_listel*, p->hash->max );
2038
2039 opt_alloc_2( p->od, p->ld );
2040 p->od_work = p->od->work;
2041
2042 return p;
2043 }
2044
2045 findpt_data_3* findpt_setup_3( const realType* const xw[3],
2046 const unsigned n[3],
2047 uint nel,
2048 uint max_hash_size,
2049 realType bbox_tol )
2050 {
2051 unsigned d;
2052 findpt_data_3* p = tmalloc( findpt_data_3, 1 );
2053
2054 p->hash = tmalloc( hash_data_3, 1 );
2055 p->od = tmalloc( opt_data_3, 1 );
2056
2057 for( d = 0; d < 3; ++d )
2058 p->xw[d] = xw[d];
2059 p->nptel = n[0] * n[1] * n[2];
2060
2061 hash_build_3( p->hash, xw, n, nel, max_hash_size, bbox_tol );
2062
2063 for( d = 0; d < 3; ++d )
2064 {
2065 p->z[d] = tmalloc( realType, n[d] );
2066 lobatto_nodes( p->z[d], n[d] );
2067 lagrange_setup( &p->ld[d], p->z[d], n[d] );
2068 }
2069
2070 p->list = tmalloc( findpt_listel, p->hash->max );
2071 p->sorted = tmalloc( findpt_listel*, p->hash->max );
2072
2073 opt_alloc_3( p->od, p->ld );
2074 p->od_work = p->od->work;
2075
2076 return p;
2077 }
2078
2079 void findpt_free_2( findpt_data_2* p )
2080 {
2081 unsigned d;
2082 opt_free_2( p->od );
2083 free( p->od );
2084 hash_free_2( p->hash );
2085 free( p->hash );
2086 free( p->list );
2087 free( p->sorted );
2088 for( d = 0; d < 2; ++d )
2089 free( p->z[d] );
2090 free( p );
2091 }
2092
2093 void findpt_free_3( findpt_data_3* p )
2094 {
2095 unsigned d;
2096 opt_free_3( p->od );
2097 free( p->od );
2098 hash_free_3( p->hash );
2099 free( p->hash );
2100 free( p->list );
2101 free( p->sorted );
2102 for( d = 0; d < 3; ++d )
2103 free( p->z[d] );
2104 free( p );
2105 }
2106
2107 const realType* findpt_allbnd_2( const findpt_data_2* p )
2108 {
2109 return p->hash->bnd;
2110 }
2111
2112 const realType* findpt_allbnd_3( const findpt_data_3* p )
2113 {
2114 return p->hash->bnd;
2115 }
2116
2117 static void findpt_hash_2( findpt_data_2* p, const realType x[2] )
2118 {
2119 findpt_listel *list = p->list, **sorted = p->sorted;
2120 const uint hi = hash_index_2( p->hash, x );
2121 const uint* offset = p->hash->offset;
2122 uint i;
2123 const uint b = offset[hi], e = offset[hi + 1];
2124 for( i = b; i != e; ++i )
2125 {
2126 const uint el = offset[i];
2127 realType* r = &list->r[0];
2128 const obbox_2* obb = &p->hash->obb[el];
2129 if( obbox_axis_test_2( obb, x ) ) continue;
2130 if( obbox_test_2( obb, x, r ) ) continue;
2131 list->el = el;
2132 list->dist = r1norm_2( r[0], r[1] );
2133 *sorted++ = list++;
2134 }
2135 p->end = sorted;
2136 if( p->end != p->sorted ) findpt_list_sort( p->sorted, p->end - p->sorted );
2137 }
2138
2139 static void findpt_hash_3( findpt_data_3* p, const realType x[3] )
2140 {
2141 findpt_listel *list = p->list, **sorted = p->sorted;
2142 const uint hi = hash_index_3( p->hash, x );
2143 const uint* offset = p->hash->offset;
2144 uint i;
2145 const uint b = offset[hi], e = offset[hi + 1];
2146 for( i = b; i != e; ++i )
2147 {
2148 const uint el = offset[i];
2149 realType* r = &list->r[0];
2150 const obbox_3* obb = &p->hash->obb[el];
2151 if( obbox_axis_test_3( obb, x ) ) continue;
2152 if( obbox_test_3( obb, x, r ) ) continue;
2153 list->el = el;
2154 list->dist = r1norm_3( r[0], r[1], r[2] );
2155 *sorted++ = list++;
2156 }
2157 p->end = sorted;
2158 if( p->end != p->sorted ) findpt_list_sort( p->sorted, p->end - p->sorted );
2159 }
2160
2161 static int findpt_guess_2( findpt_data_2* p, const realType x[2], uint el, realType r[2], realType* dist )
2162 {
2163 const uint arrindex = p->nptel * el;
2164 const realType* elx[2];
2165 realType g[2];
2166 unsigned c = opt_no_constraints_2;
2167 const obbox_2* obb = &p->hash->obb[el];
2168 elx[0] = p->xw[0] + arrindex;
2169 elx[1] = p->xw[1] + arrindex;
2170 if( obbox_axis_test_2( obb, x ) || obbox_test_2( obb, x, g ) ) return 0;
2171 *dist = opt_findpt_2( p->od, elx, x, r, &c );
2172 return c == opt_no_constraints_2;
2173 }
2174
2175 static int findpt_guess_3( findpt_data_3* p, const realType x[3], uint el, realType r[3], realType* dist )
2176 {
2177 const uint arrindex = p->nptel * el;
2178 const realType* elx[3];
2179 realType g[3];
2180 unsigned c = opt_no_constraints_3;
2181 const obbox_3* obb = &p->hash->obb[el];
2182 elx[0] = p->xw[0] + arrindex;
2183 elx[1] = p->xw[1] + arrindex;
2184 elx[2] = p->xw[2] + arrindex;
2185 if( obbox_axis_test_3( obb, x ) || obbox_test_3( obb, x, g ) ) return 0;
2186 *dist = opt_findpt_3( p->od, elx, x, r, &c );
2187 return c == opt_no_constraints_3;
2188 }
2189
2190 #define DIAGNOSTICS 0
2191
2192 static int findpt_pass_2( findpt_data_2* p, const realType x[2], uint* el, realType r[2], realType* dist_min )
2193 {
2194 findpt_listel** qq = p->sorted;
2195 const realType* bnd;
2196 realType dist;
2197 do
2198 {
2199 findpt_listel* q = *qq;
2200 const uint arrindex = p->nptel * q->el;
2201 const realType* elx[2];
2202 unsigned c = opt_no_constraints_2;
2203 elx[0] = p->xw[0] + arrindex;
2204 elx[1] = p->xw[1] + arrindex;
2205 dist = opt_findpt_2( p->od, elx, x, q->r, &c );
2206 if( qq == p->sorted || dist < *dist_min || c == opt_no_constraints_2 )
2207 {
2208 *dist_min = dist;
2209 *el = q->el;
2210 memcpy( r, q->r, 2 * sizeof( realType ) );
2211 if( c == opt_no_constraints_2 ) return 0;
2212 }
2213 } while( ++qq != p->end );
2214 bnd = p->hash->obb[*el].axis_bnd;
2215 return *dist_min > r2norm_2( bnd[1] - bnd[0], bnd[3] - bnd[2] ) ? -1 : 1;
2216 }
2217
2218 static int findpt_pass_3( findpt_data_3* p, const realType x[3], uint* el, realType r[3], realType* dist_min )
2219 {
2220 findpt_listel** qq = p->sorted;
2221 const realType* bnd;
2222 realType dist;
2223 do
2224 {
2225 findpt_listel* q = *qq;
2226 const uint arrindex = p->nptel * q->el;
2227 const realType* elx[3];
2228 unsigned c = opt_no_constraints_3;
2229 elx[0] = p->xw[0] + arrindex;
2230 elx[1] = p->xw[1] + arrindex;
2231 elx[2] = p->xw[2] + arrindex;
2232 dist = opt_findpt_3( p->od, elx, x, q->r, &c );
2233 if( qq == p->sorted || dist < *dist_min || c == opt_no_constraints_3 )
2234 {
2235 *dist_min = dist;
2236 *el = q->el;
2237 memcpy( r, q->r, 3 * sizeof( realType ) );
2238 if( c == opt_no_constraints_3 )
2239 {
2240 #if DIAGNOSTICS
2241 printf( "point found in element #%d\n", qq - p->sorted );
2242 #endif
2243 return 0;
2244 }
2245 }
2246 } while( ++qq != p->end );
2247 bnd = p->hash->obb[*el].axis_bnd;
2248 return *dist_min > r2norm_3( bnd[1] - bnd[0], bnd[3] - bnd[2], bnd[5] - bnd[4] ) ? -1 : 1;
2249 }
2250
2251 int findpt_2( findpt_data_2* p, const realType x[2], int guess, uint* el, realType r[2], realType* dist )
2252 {
2253 if( guess && findpt_guess_2( p, x, *el, r, dist ) ) return 0;
2254 findpt_hash_2( p, x );
2255 if( p->sorted == p->end ) return -1;
2256 return findpt_pass_2( p, x, el, r, dist );
2257 }
2258
2259 int findpt_3( findpt_data_3* p, const realType x[3], int guess, uint* el, realType r[3], realType* dist )
2260 {
2261 if( guess && findpt_guess_3( p, x, *el, r, dist ) ) return 0;
2262 findpt_hash_3( p, x );
2263 #if DIAGNOSTICS
2264 printf( "hashing leaves %d elements to consider\n", p->end - p->sorted );
2265 #endif
2266 if( p->sorted == p->end ) return -1;
2267 return findpt_pass_3( p, x, el, r, dist );
2268 }
2269
2270 void findpt_weights_2( findpt_data_2* p, const realType r[2] )
2271 {
2272 lagrange_0( &p->ld[0], r[0] );
2273 lagrange_0( &p->ld[1], r[1] );
2274 }
2275
2276 void findpt_weights_3( findpt_data_3* p, const realType r[3] )
2277 {
2278 lagrange_0( &p->ld[0], r[0] );
2279 lagrange_0( &p->ld[1], r[1] );
2280 lagrange_0( &p->ld[2], r[2] );
2281 }
2282
2283 double findpt_eval_2( findpt_data_2* p, const realType* u )
2284 {
2285 return tensor_i2( p->ld[0].J, p->ld[0].n, p->ld[1].J, p->ld[1].n, u, p->od_work );
2286 }
2287
2288 double findpt_eval_3( findpt_data_3* p, const realType* u )
2289 {
2290 return tensor_i3( p->ld[0].J, p->ld[0].n, p->ld[1].J, p->ld[1].n, p->ld[2].J, p->ld[2].n, u, p->od_work );
2291 }