87 p->
ov = p->
uv + n * m;
97 p->
b.
n = n, p->
b.
m = m;
100 p->
b.
h = p->
b.
Q1 + n;
101 p->
b.
uv = p->
b.
h + m;
102 p->
b.
ov = p->
b.
uv + n * m;
116 unsigned i, j, m = p->
m, n = p->
n, mm = 2 * m - 1;
117 realType *q =
tmalloc(
realType, ( 2 * n + 1 ) * mm + 6 * n ), *J = q + mm, *D = J + n * mm, *work = D + n * mm;
119 for( i = 0; i < n; ++i )
120 p->
Q0[i] = w[i] / 2, p->
Q1[i] = 3 * p->
Q0[i] * z[i];
121 p->
h[0] = -1, p->
h[m - 1] = 1;
122 for( j = 1; j < m - 1; ++j )
124 for( j = 0; j < m - 1; ++j )
125 q[2 * j] = p->
h[j], q[2 * j + 1] = ( p->
h[j] + p->
h[j + 1] ) / 2;
126 q[mm - 1] = p->
h[m - 1];
128 for( i = 0; i < n; ++i )
131 ov[0] = uv[0] = J[i];
132 ov[m - 1] = uv[m - 1] = J[( mm - 1 ) * n + i];
133 for( j = 1; j < m - 1; ++j )
136 realType c0 = J[( jj - 1 ) * n + i] + ( q[jj] - q[jj - 1] ) * D[( jj - 1 ) * n + i];
137 realType c1 = J[( jj + 1 ) * n + i] + ( q[jj] - q[jj + 1] ) * D[( jj + 1 ) * n + i];
139 rminmax_3( &uv[j], &ov[j], c0, c1, c2 );
147 unsigned i, mn = p->
b.
m * p->
b.
n;
149 for( i = 0; i < mn; ++i )
169 for( i = 0; i < p->
n; ++i )
170 a0 += p->
Q0[i] * u[i], a1 += p->
Q1[i] * u[i];
171 for( j = 0; j < p->m; ++j )
172 b[j] = a[j] = a0 + a1 * p->
h[j];
173 for( i = 0; i < p->
n; ++i )
175 realType w = u[i] - ( a0 + a1 * p->
z[i] );
177 for( j = 0; j < p->
m; ++j )
178 a[j] += w * ( *uv++ ), b[j] += w * ( *ov++ );
180 for( j = 0; j < p->
m; ++j )
181 a[j] += w * ( *ov++ ), b[j] += w * ( *uv++ );
191 bnd[0] = a[0], bnd[1] = b[0];
192 for( j = 1; j < p->
m; ++j )
194 if( a[j] < bnd[0] ) bnd[0] = a[j];
195 if( b[j] > bnd[1] ) bnd[1] = b[j];
206 unsigned nr = pr->
n, mr = pr->
m, ns = ps->
b.
n, ms = ps->
b.
m;
207 realType *a0 = work, *a1 = a0 + mr, *ar_ = a1 + mr, *ar = ar_, *br_ = ar + mr * ns, *br = br_, *a_ = br + mr * ns,
208 *a = a_, *b_ = a + mr * ms, *b = b_, *uvp, *ovp, *uvn, *ovn;
211 for( i = 0; i < mr; ++i )
213 for( j = 0; j < ns; ++j, u +=
nr )
217 for( i = 0; i < mr; ++i )
220 a0[i] += q0 * t, a1[i] += q1 * t;
223 for( i = 0; i < mr; ++i )
226 for( k = 0; k < ms; ++k )
227 *b++ = *a++ = a0i + a1i * ps->
b.
h[k];
230 uvp = ps->
uvp, ovp = ps->
ovp, uvn = ps->
uvn, ovn = ps->
ovn;
231 for( j = 0; j < ns; ++j, uvp += ms, uvn += ms, ovp += ms, ovn += ms )
235 for( i = 0; i < mr; ++i )
241 for( k = 0; k < ms; ++k )
242 *a++ += uw * uvp[k] + ow * uvn[k], *b++ += ow * ovp[k] + uw * ovn[k];
244 for( k = 0; k < ms; ++k )
245 *a++ += uw * ovp[k] + ow * ovn[k], *b++ += ow * uvp[k] + uw * uvn[k];
247 for( k = 0; k < ms; ++k )
248 *a++ += uw * ovp[k] + ow * uvn[k], *b++ += ow * ovp[k] + uw * uvn[k];
251 b0 = a_[0], b1 = b_[0];
252 for( i = 1; i < mr * ms; ++i )
254 if( a_[i] < b0 ) b0 = a_[i];
255 if( b_[i] > b1 ) b1 = b_[i];
257 bnd[0] = b0, bnd[1] = b1;
266 const realType idet = 1 / ( A[0] * A[3] - A[1] * A[2] );
267 inv[0] = idet * A[3];
268 inv[1] = -( idet * A[1] );
269 inv[2] = -( idet * A[2] );
270 inv[3] = idet * A[0];
275 const realType a = A[4] * A[8] - A[5] * A[7], b = A[5] * A[6] - A[3] * A[8], c = A[3] * A[7] - A[4] * A[6],
276 idet = 1 / ( A[0] * a + A[1] * b + A[2] * c );
278 inv[1] = idet * ( A[2] * A[7] - A[1] * A[8] );
279 inv[2] = idet * ( A[1] * A[5] - A[2] * A[4] );
281 inv[4] = idet * ( A[0] * A[8] - A[2] * A[6] );
282 inv[5] = idet * ( A[2] * A[3] - A[0] * A[5] );
284 inv[7] = idet * ( A[1] * A[6] - A[0] * A[7] );
285 inv[8] = idet * ( A[0] * A[4] - A[1] * A[3] );
290 y[0] = A[0] * x[0] + A[1] * x[1];
291 y[1] = A[2] * x[0] + A[3] * x[1];
296 y[0] = A[0] * x[0] + A[2] * x[1];
297 y[1] = A[1] * x[0] + A[3] * x[1];
302 y[0] = A[0] * x[0] + A[1] * x[1] + A[2] * x[2];
303 y[1] = A[3] * x[0] + A[4] * x[1] + A[5] * x[2];
304 y[2] = A[6] * x[0] + A[7] * x[1] + A[8] * x[2];
309 y[0] = A[0] * x[0] + A[3] * x[1] + A[6] * x[2];
310 y[1] = A[1] * x[0] + A[4] * x[1] + A[7] * x[2];
311 y[2] = A[2] * x[0] + A[5] * x[1] + A[8] * x[2];
334 const realType idet = 1 / ( A[0] * A[1] - A[2] * A[2] );
335 *x0 = idet * ( A[1] * b0 - A[2] * b1 );
336 *x1 = idet * ( A[0] * b1 - A[2] * b0 );
411 const unsigned max_npm =
umax_2( n[0] + m[0], n[1] + m[1] );
430 const unsigned wk1 = 3 * n[0] * n[1] + 2 * m[0] + 2 * m[0] * n[1] + 2 * m[0] * m[1];
431 const unsigned wk2 = 3 * n[0] * n[2] + 2 * m[0] + 2 * m[0] * n[2] + 2 * m[0] * m[2];
432 const unsigned wk3 = 3 * n[1] * n[2] + 2 * m[1] + 2 * m[1] * n[2] + 2 * m[1] * m[2];
433 const unsigned wk_max = umax_3( wk1, wk2, wk3 );
457 const unsigned m[2] )
475 const unsigned m[3] )
518 xt[0] = x[0] - p->
x[0];
519 xt[1] = x[1] - p->
x[1];
520 r[0] = p->
A[0] * xt[0] + p->
A[1] * xt[1];
521 if(
mbabs( r[0] ) > 1 )
return 1;
522 r[1] = p->
A[2] * xt[0] + p->
A[3] * xt[1];
523 return mbabs( r[1] ) > 1;
529 xt[0] = x[0] - p->
x[0];
530 xt[1] = x[1] - p->
x[1];
531 xt[2] = x[2] - p->
x[2];
532 r[0] = p->
A[0] * xt[0] + p->
A[1] * xt[1] + p->
A[2] * xt[2];
533 if(
mbabs( r[0] ) > 1 )
return 1;
534 r[1] = p->
A[3] * xt[0] + p->
A[4] * xt[1] + p->
A[5] * xt[2];
535 if(
mbabs( r[1] ) > 1 )
return 1;
536 r[2] = p->
A[6] * xt[0] + p->
A[7] * xt[1] + p->
A[8] * xt[2];
537 return mbabs( r[2] ) > 1;
550 for( i = 0; i < n; ++i, x += s, y += s )
552 const realType xt = *x - c0[0], yt = *y - c0[1];
553 u[i] = A[0] * xt + A[1] * yt;
554 v[i] = A[2] * xt + A[3] * yt;
571 for( j = 0; j < ns; ++j, x += ss, y += ss, z += ss )
573 for( i = 0; i <
nr; ++i, x += sr, y += sr, z += sr )
575 const realType xt = *x - c0[0], yt = *y - c0[1], zt = *z - c0[2];
576 *u++ = A[0] * xt + A[1] * yt + A[2] * zt;
577 *v++ = A[3] * xt + A[4] * yt + A[5] * zt;
578 *w++ = A[6] * xt + A[7] * yt + A[8] * zt;
585 if( ob[0] < b[0] ) b[0] = ob[0];
586 if( ob[1] > b[1] ) b[1] = ob[1];
587 if( ob[2] < b[2] ) b[2] = ob[2];
588 if( ob[3] > b[3] ) b[3] = ob[3];
593 if( ob[0] < b[0] ) b[0] = ob[0];
594 if( ob[1] > b[1] ) b[1] = ob[1];
595 if( ob[2] < b[2] ) b[2] = ob[2];
596 if( ob[3] > b[3] ) b[3] = ob[3];
597 if( ob[4] < b[4] ) b[4] = ob[4];
598 if( ob[5] > b[5] ) b[5] = ob[5];
613 lob_bnd_1( lbd, work, bnd, work + 2 * n );
614 lob_bnd_1( lbd, work + n, bnd + 2, work + 2 * n );
633 lob_bnd_2( dr, ds, work, bnd, work + 3 *
nr * ns );
634 lob_bnd_2( dr, ds, work +
nr * ns, bnd + 2, work + 3 *
nr * ns );
635 lob_bnd_2( dr, ds, work + 2 *
nr * ns, bnd + 4, work + 3 *
nr * ns );
648 unsigned i,
nr = p->
dr.
n, ns = p->
ds.
n;
678 i =
nr * ns * ( nt - 1 );
679 obbox_side_3( x, y, z,
nr, 1, ns, 0, c0, A, p->
work, &p->
dr, &p->
ds, bnd );
680 obbox_side_3( x + i, y + i, z + i,
nr, 1, ns, 0, c0, A, p->
work, &p->
dr, &p->
ds, obnd );
684 obbox_side_3( x, y, z,
nr, 1, nt, i, c0, A, p->
work, &p->
dr, &p->
dt, obnd );
686 obbox_side_3( x + i, y + i, z + i,
nr, 1, nt, i, c0, A, p->
work, &p->
dr, &p->
dt, obnd );
690 obbox_side_3( x, y, z, ns,
nr, nt, 0, c0, A, p->
work, &p->
ds.
b, &p->
dt, obnd );
692 obbox_side_3( x + i, y + i, z + i, ns,
nr, nt, 0, c0, A, p->
work, &p->
ds.
b, &p->
dt, obnd );
698 const realType zero[2] = { 0, 0 },
id[4] = { 1, 0, 0, 1 };
699 realType c0[2], jac[4], inv[4], bnd[4], u0[2], d[2];
713 u0[0] = ( bnd[0] + bnd[1] ) / 2;
714 u0[1] = ( bnd[2] + bnd[3] ) / 2;
715 d[0] = 2 / ( ( 1 + tol ) * ( bnd[1] - bnd[0] ) );
716 d[1] = 2 / ( ( 1 + tol ) * ( bnd[3] - bnd[2] ) );
717 b->
x[0] = c0[0] + jac[0] * u0[0] + jac[1] * u0[1];
718 b->
x[1] = c0[1] + jac[2] * u0[0] + jac[3] * u0[1];
719 b->
A[0] = d[0] * inv[0], b->
A[1] = d[0] * inv[1];
720 b->
A[2] = d[1] * inv[2], b->
A[3] = d[1] * inv[3];
730 const realType zero[3] = { 0, 0 },
id[9] = { 1, 0, 0, 0, 1, 0, 0, 0, 1 };
731 realType c0[3], jac[9], inv[9], bnd[6], u0[3], d[3];
742 tensor_ig3( p->
Jr0, p->
Dr0, p->
dr.
n, p->
Js0, p->
Ds0, p->
ds.
b.
n, p->
Jt0, p->
Dt0, p->
dt.
b.
n, x, jac, p->
work );
743 c0[1] =
tensor_ig3( p->
Jr0, p->
Dr0, p->
dr.
n, p->
Js0, p->
Ds0, p->
ds.
b.
n, p->
Jt0, p->
Dt0, p->
dt.
b.
n, y, jac + 3,
745 c0[2] =
tensor_ig3( p->
Jr0, p->
Dr0, p->
dr.
n, p->
Js0, p->
Ds0, p->
ds.
b.
n, p->
Jt0, p->
Dt0, p->
dt.
b.
n, z, jac + 6,
751 u0[0] = ( bnd[0] + bnd[1] ) / 2;
752 u0[1] = ( bnd[2] + bnd[3] ) / 2;
753 u0[2] = ( bnd[4] + bnd[5] ) / 2;
754 d[0] = 2 / ( ( 1 + tol ) * ( bnd[1] - bnd[0] ) );
755 d[1] = 2 / ( ( 1 + tol ) * ( bnd[3] - bnd[2] ) );
756 d[2] = 2 / ( ( 1 + tol ) * ( bnd[5] - bnd[4] ) );
757 b->
x[0] = c0[0] + jac[0] * u0[0] + jac[1] * u0[1] + jac[2] * u0[2];
758 b->
x[1] = c0[1] + jac[3] * u0[0] + jac[4] * u0[1] + jac[5] * u0[2];
759 b->
x[2] = c0[2] + jac[6] * u0[0] + jac[7] * u0[1] + jac[8] * u0[2];
760 b->
A[0] = d[0] * inv[0], b->
A[1] = d[0] * inv[1], b->
A[2] = d[0] * inv[2];
761 b->
A[3] = d[1] * inv[3], b->
A[4] = d[1] * inv[4], b->
A[5] = d[1] * inv[5];
762 b->
A[6] = d[2] * inv[6], b->
A[7] = d[2] * inv[7], b->
A[8] = d[2] * inv[8];
821 const int i =
ifloor( ( x - low ) * fac );
822 if( i < 0 )
return 0;
823 return umin_2( i, n - 1 );
828 const unsigned n = p->
hash_n;
835 const unsigned n = p->
hash_n;
845 p->
fac[0] = n / ( p->
bnd[1] - p->
bnd[0] );
846 p->
fac[1] = n / ( p->
bnd[3] - p->
bnd[2] );
852 p->
fac[0] = n / ( p->
bnd[1] - p->
bnd[0] );
853 p->
fac[1] = n / ( p->
bnd[3] - p->
bnd[2] );
854 p->
fac[2] = n / ( p->
bnd[5] - p->
bnd[4] );
861 const int i0 =
ifloor( ( a - p->
bnd[d * 2] ) * p->
fac[d] );
862 const unsigned i1 =
iceil( ( b - p->
bnd[d * 2] ) * p->
fac[d] );
863 *ia = imax_2( 0, i0 );
864 *ib = imin_2( i1, p->
hash_n );
865 if( *ib == *ia ) ++( *ib );
872 const int i0 =
ifloor( ( a - p->
bnd[d * 2] ) * p->
fac[d] );
873 const unsigned i1 =
iceil( ( b - p->
bnd[d * 2] ) * p->
fac[d] );
874 *ia = imax_2( 0, i0 );
875 *ib = imin_2( i1, p->
hash_n );
876 if( *ib == *ia ) ++( *ib );
883 for( i = 0; i < nel; ++i )
900 for( i = 0; i < nel; ++i )
917 unsigned nl = 1, nu = ceil( sqrt( max_size - nel ) );
918 uint size_low = 2 + nel;
921 unsigned nm =
nl + ( nu -
nl ) / 2;
923 if(
size <= max_size )
934 unsigned nl = 1, nu = ceil( pow( max_size - nel, 1.0 / 3 ) );
935 uint size_low = 2 + nel;
938 unsigned nm =
nl + ( nu -
nl ) / 2;
940 if(
size <= max_size )
955 const unsigned nn = n[0] * n[1];
959 for( i = 0; i < 2; ++i )
967 z[1] = w[0] + n[0], w[1] = z[1] + n[1];
968 for( d = 0; d < 2; ++d )
973 for( i = 0; i < nel; ++i, x[0] += nn, x[1] += nn )
989 const unsigned nn = n[0] * n[1] * n[2];
992 for( i = 0; i < 3; ++i )
1000 for( d = 1; d < 3; ++d )
1001 z[d] = w[d - 1] + n[d - 1], w[d] = z[d] + n[d];
1002 for( d = 0; d < 3; ++d )
1007 for( i = 0; i < nel; ++i, x[0] += nn, x[1] += nn, x[2] += nn )
1018 const unsigned n[2],
1031 hn2 = (
uint)hn * hn;
1032 count =
tcalloc(
unsigned, hn2 );
1033 for( el = 0; el < nel; ++el )
1035 unsigned ia, ib, j, ja, jb;
1038 for( j = ja; j < jb; ++j )
1039 for( i = ia; i < ib; ++i )
1040 ++count[(
uint)j * hn + i];
1042 sum = hn2 + 1, p->
max = count[0];
1044 for( i = 0; i < hn2; ++i )
1046 if( count[i] > p->
max ) p->
max = count[i];
1050 for( el = 0; el < nel; ++el )
1052 unsigned ia, ib, j, ja, jb;
1055 for( j = ja; j < jb; ++j )
1056 for( i = ia; i < ib; ++i )
1059 p->
offset[p->
offset[arrindex + 1] - count[arrindex]] = el;
1068 const unsigned n[3],
1081 hn3 = (
uint)hn * hn * hn;
1082 count =
tcalloc(
unsigned, hn3 );
1083 for( el = 0; el < nel; ++el )
1085 unsigned ia, ib, j, ja, jb, k, ka, kb;
1089 for( k = ka; k < kb; ++k )
1090 for( j = ja; j < jb; ++j )
1091 for( i = ia; i < ib; ++i )
1092 ++count[( (
uint)k * hn + j ) * hn + i];
1094 sum = hn3 + 1, p->
max = count[0];
1096 for( i = 0; i < hn3; ++i )
1098 if( count[i] > p->
max ) p->
max = count[i];
1102 for( el = 0; el < nel; ++el )
1104 unsigned ia, ib, j, ja, jb, k, ka, kb;
1108 for( k = ka; k < kb; ++k )
1109 for( j = ja; j < jb; ++j )
1110 for( i = ia; i < ib; ++i )
1112 uint arrindex = ( (
uint)k * hn + j ) * hn + i;
1113 p->
offset[p->
offset[arrindex + 1] - count[arrindex]] = el;
1197 static const char opt_constr_num_3[27] = { 3, 2, 3, 2, 1, 2, 3, 2, 3, 2, 1, 2, 1, 0,
1198 1, 2, 1, 2, 3, 2, 3, 2, 1, 2, 3, 2, 3 };
1202 static const char opt_constr_dir_3[27] = { -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, 1, -1, 0, -1,
1203 0, -1, 1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1 };
1206 static const char opt_constr_not[27] = { -1, 0, -1, 1, -1, 1, -1, 0, -1, 2, -1, 2, -1, -1,
1207 -1, 2, -1, 2, -1, 0, -1, 1, -1, 1, -1, 0, -1 };
1209 static const char opt_constr_wide[27] = { 0x00, 0x01, 0x02, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a,
1210 0x10, 0x11, 0x12, 0x14, 0x15, 0x16, 0x18, 0x19, 0x1a,
1211 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a };
1231 c[1] = ( cw >> 2 ) & 3;
1237 return c[1] * 3 + c[0];
1242 return ( c[2] * 3 + c[1] ) * 3 + c[0];
1253 const unsigned nr = ld[0].
n, ns = ld[1].
n, nt = ld[2].
n, nf = umax_3(
nr * ns,
nr * nt, ns * nt ),
1254 ne = umax_3(
nr, ns, nt ), nw = 2 * ns * nt + 3 * ns;
1262 p->
fd.
x[1] = p->
fd.
x[0] + nf;
1263 p->
fd.
x[2] = p->
fd.
x[1] + nf;
1268 p->
ed.
x[1] = p->
ed.
x[0] + ne;
1269 p->
ed.
x[2] = p->
ed.
x[1] + ne;
1296 for( d = 0; d < 3; ++d )
1297 p->
x[d] =
tensor_ig3( ld[0].J, ld[0].D, ld[0].n, ld[1].J, ld[1].D, ld[1].n, ld[2].J, ld[2].D, ld[2].n,
1309 unsigned d, off = 0;
1311 sn = p->
size[dn], n1 = p->
ld[d1].
n, n2 = p->
ld[d2].
n, nn = p->
ld[dn].
n;
1314 for( d = 0; d < 3; ++d )
1316 unsigned i, j, k, arrindex = 0;
1318 for( j = n2; j; --j, in += so )
1319 for( i = n1; i; --i, ++arrindex, in += s1 )
1323 p->
fd.
x[d][arrindex] = *in;
1325 for( k = 0; k < nn; ++k, ind += sn )
1326 *fdn += *ind * D[k];
1349 const unsigned dn = p->
fd.
dn, d1 = p->
fd.
d1, d2 = p->
fd.
d2, n1 = p->
ld[d1].
n, n2 = p->
ld[d2].
n;
1352 for( d = 0; d < 3; ++d )
1356 p->
jac[d * 3 + d1] = g[0];
1357 p->
jac[d * 3 + d2] = g[1];
1371 const unsigned d1 = p->
fd.
d1, d2 = p->
fd.
d2, n1 = p->
ld[d1].
n, n2 = p->
ld[d2].
n;
1378 for( d = 0; d < 3; ++d )
1389 unsigned d, off, off1 = 0, off2 = 0;
1391 ne = p->
ld[de].
n, n1 = p->
ld[d1].
n, n2 = p->
ld[d2].
n;
1402 for( d = 0; d < 3; ++d )
1406 for( i = 0; i < ne; ++i, in += se )
1408 const realType *in1 = in - off1, *in2 = in - off2;
1410 p->
ed.
x[d][i] = *in;
1412 for( j = 0; j < n1; ++j, in1 += s1 )
1413 *fd1 += *in1 * fD1[j];
1414 for( j = 0; j < n2; ++j, in2 += s2 )
1415 *fd2 += *in2 * fD2[j];
1436 const unsigned de = p->
ed.
de, d1 = p->
ed.
d1, d2 = p->
ed.
d2, n = p->
ld[de].
n;
1439 for( d = 0; d < 3; ++d )
1456 const unsigned de = p->
ed.
de, n = p->
ld[de].
n;
1459 for( d = 0; d < 3; ++d )
1465 unsigned off[3], offt, d, c[3];
1468 for( d = 0; d < 3; ++d )
1470 fD[d] = p->
ld[d].
D_z0, off[d] = 0;
1473 offt = off[0] + off[1] + off[2];
1474 for( d = 0; d < 9; ++d )
1476 for( d = 0; d < 3; ++d )
1479 p->
pd.
x[d] = p->
elx[d][offt];
1480 for( i = 0; i < 3; ++i )
1483 for( j = 0; j < p->
ld[i].
n; ++j, in += p->
size[i] )
1484 p->
pd.
jac[d * 3 + i] += *in * fD[i][j];
1510 #define DIAGNOSTICS 0
1518 realType dr[3], resid[3], steep[3];
1520 unsigned c = *constr, ac, d, cc[3], step = 0;
1522 p->
elx[0] = elx[0], p->
elx[1] = elx[1], p->
elx[2] = elx[2];
1529 printf(
"opt_findpt: xstar = %g, %g, %g\n", xstar[0], xstar[1], xstar[2] );
1538 printf(
" iteration %u\n", step );
1559 printf(
" r = %g, %g, %g\n", r[0], r[1], r[2] );
1560 printf(
" x = %g, %g, %g\n", p->
x[0], p->
x[1], p->
x[2] );
1563 for( d = 0; d < 3; ++d )
1564 resid[d] = xstar[d] - p->
x[d];
1566 printf(
" resid = %g, %g, %g\n", resid[0], resid[1], resid[2] );
1567 printf(
" 2-norm = %g\n",
r2norm_3( resid[0], resid[1], resid[2] ) );
1576 printf(
" steepest descent = %g, %g, %g\n", steep[0], steep[1], steep[2] );
1578 for( d = 0; d < 3; ++d )
1579 if( ( cc[d] == 0 && steep[d] > 0 ) || ( cc[d] == 2 && steep[d] < 0 ) ) cc[d] = 1;
1609 const unsigned dn = p->
fd.
dn, d1 = p->
fd.
d1, d2 = p->
fd.
d2;
1613 A[0] = J[d1] * J[d1] + J[3 + d1] * J[3 + d1] + J[6 + d1] * J[6 + d1];
1614 A[1] = J[d2] * J[d2] + J[3 + d2] * J[3 + d2] + J[6 + d2] * J[6 + d2];
1615 A[2] = J[d1] * J[d2] + J[3 + d1] * J[3 + d2] + J[6 + d1] * J[6 + d2];
1616 A[0] -= resid[0] * H[0] + resid[1] * H[3] + resid[2] * H[6];
1617 A[1] -= resid[0] * H[1] + resid[1] * H[4] + resid[2] * H[7];
1618 A[2] -= resid[0] * H[2] + resid[1] * H[5] + resid[2] * H[8];
1624 const unsigned de = p->
ed.
de, d1 = p->
ed.
d1, d2 = p->
ed.
d2;
1628 fac = J[0] * J[0] + J[3] * J[3] + J[6] * J[6] - ( resid[0] * H[0] + resid[1] * H[1] + resid[2] * H[2] );
1629 dr[de] = steep[de] / fac;
1630 dr[d1] = 0, dr[d2] = 0;
1634 dr[0] = dr[1] = dr[2] = 0;
1638 printf(
" dr = %g, %g, %g\n", dr[0], dr[1], dr[2] );
1642 for( d = 0; d < 3; ++d )
1644 if( cc[d] != 1 )
continue;
1647 dr[d] -= r[d] + 1, r[d] = -1, cc[d] = 0;
1648 else if( r[d] >= 1 )
1649 dr[d] -= r[d] - 1, r[d] = 1, cc[d] = 2;
1655 printf(
"opt_findpt_3 converged in %u iterations\n", step);
1657 return r2norm_3( resid[0], resid[1], resid[2] );
1664 const unsigned nr = ld[0].
n, ns = ld[1].
n, ne =
umax_2(
nr, ns ), nw = 2 * ns;
1671 p->
ed.
x[1] = p->
ed.
x[0] + ne;
1693 for( d = 0; d < 2; ++d )
1695 tensor_ig2( ld[0].J, ld[0].D, ld[0].n, ld[1].J, ld[1].D, ld[1].n, p->
elx[d], &p->
jac[d * 2], p->
work );
1706 unsigned d, off = 0;
1707 const unsigned de = p->
ed.
de, d1 = p->
ed.
d1, se = p->
size[de], s1 = p->
size[d1], ne = p->
ld[de].
n, n1 = p->
ld[d1].
n;
1713 for( d = 0; d < 2; ++d )
1717 for( i = 0; i < ne; ++i, in += se )
1721 p->
ed.
x[d][i] = *in;
1723 for( j = 0; j < n1; ++j, in1 += s1 )
1724 *fd1 += *in1 * fD1[j];
1744 const unsigned de = p->
ed.
de, d1 = p->
ed.
d1, n = p->
ld[de].
n;
1746 for( d = 0; d < 2; ++d )
1762 const unsigned de = p->
ed.
de, n = p->
ld[de].
n;
1765 for( d = 0; d < 2; ++d )
1771 unsigned off[2], offt, d, c[2];
1774 for( d = 0; d < 2; ++d )
1776 fD[d] = p->
ld[d].
D_z0, off[d] = 0;
1779 offt = off[0] + off[1];
1780 for( d = 0; d < 4; ++d )
1782 for( d = 0; d < 2; ++d )
1785 p->
pd.
x[d] = p->
elx[d][offt];
1786 for( i = 0; i < 2; ++i )
1789 for( j = 0; j < p->
ld[i].
n; ++j, in += p->
size[i] )
1790 p->
pd.
jac[d * 2 + i] += *in * fD[i][j];
1816 #define DIAGNOSTICS 0
1824 realType dr[2], resid[2], steep[2];
1826 unsigned c = *constr, ac, d, cc[2], step = 0;
1828 p->
elx[0] = elx[0], p->
elx[1] = elx[1];
1834 printf(
"opt_findpt: xstar = %g, %g\n", xstar[0], xstar[1] );
1843 printf(
" iteration %u\n", step );
1861 printf(
" r = %g, %g\n", r[0], r[1] );
1862 printf(
" x = %g, %g\n", p->
x[0], p->
x[1] );
1865 for( d = 0; d < 2; ++d )
1866 resid[d] = xstar[d] - p->
x[d];
1868 printf(
" resid = %g, %g\n", resid[0], resid[1] );
1869 printf(
" 2-norm = %g\n",
r2norm_2( resid[0], resid[1] ) );
1878 printf(
" steepest descent = %g, %g\n", steep[0], steep[1] );
1880 for( d = 0; d < 2; ++d )
1881 if( ( cc[d] == 0 && steep[d] > 0 ) || ( cc[d] == 2 && steep[d] < 0 ) ) cc[d] = 1;
1908 const unsigned de = p->
ed.
de, d1 = p->
ed.
d1;
1912 fac = J[0] * J[0] + J[2] * J[2] - ( resid[0] * H[0] + resid[1] * H[1] );
1913 dr[de] = steep[de] / fac;
1922 printf(
" dr = %g, %g\n", dr[0], dr[1] );
1926 for( d = 0; d < 2; ++d )
1928 if( cc[d] != 1 )
continue;
1931 dr[d] -= r[d] + 1, r[d] = -1, cc[d] = 0;
1932 else if( r[d] >= 1 )
1933 dr[d] -= r[d] - 1, r[d] = 1, cc[d] = 2;
1938 return r2norm_2( resid[0], resid[1] );
1980 for( i = 2; i <= n; ++i )
1983 unsigned hole = i, parent = hole >> 1;
1984 if( A[parent]->dist >= item->
dist )
continue;
1987 A[hole] = A[parent];
1990 }
while( parent && A[parent]->dist < item->dist );
1994 for( i = n - 1; i; --i )
2001 unsigned ch = hole << 1, r = ch + 1;
2002 if( r <= i && A[ch]->dist < A[r]->dist ) ch = r;
2003 if( ch > i || item->
dist >= A[ch]->
dist )
break;
2012 const unsigned n[2],
2023 for( d = 0; d < 2; ++d )
2025 p->
nptel = n[0] * n[1];
2029 for( d = 0; d < 2; ++d )
2046 const unsigned n[3],
2057 for( d = 0; d < 3; ++d )
2059 p->
nptel = n[0] * n[1] * n[2];
2063 for( d = 0; d < 3; ++d )
2088 for( d = 0; d < 2; ++d )
2102 for( d = 0; d < 3; ++d )
2123 const uint b = offset[hi], e = offset[hi + 1];
2124 for( i = b; i != e; ++i )
2126 const uint el = offset[i];
2145 const uint b = offset[hi], e = offset[hi + 1];
2146 for( i = b; i != e; ++i )
2148 const uint el = offset[i];
2168 elx[0] = p->
xw[0] + arrindex;
2169 elx[1] = p->
xw[1] + arrindex;
2182 elx[0] = p->
xw[0] + arrindex;
2183 elx[1] = p->
xw[1] + arrindex;
2184 elx[2] = p->
xw[2] + arrindex;
2190 #define DIAGNOSTICS 0
2203 elx[0] = p->
xw[0] + arrindex;
2204 elx[1] = p->
xw[1] + arrindex;
2210 memcpy( r, q->
r, 2 *
sizeof(
realType ) );
2213 }
while( ++qq != p->
end );
2215 return *dist_min >
r2norm_2( bnd[1] - bnd[0], bnd[3] - bnd[2] ) ? -1 : 1;
2229 elx[0] = p->
xw[0] + arrindex;
2230 elx[1] = p->
xw[1] + arrindex;
2231 elx[2] = p->
xw[2] + arrindex;
2237 memcpy( r, q->
r, 3 *
sizeof(
realType ) );
2241 printf(
"point found in element #%d\n", qq - p->
sorted );
2246 }
while( ++qq != p->
end );
2248 return *dist_min >
r2norm_3( bnd[1] - bnd[0], bnd[3] - bnd[2], bnd[5] - bnd[4] ) ? -1 : 1;
2264 printf(
"hashing leaves %d elements to consider\n", p->
end - p->
sorted );