1 #include "moab/FindPtFuncs.h"
2
3 /*--------------------------------------------------------------------------
4 Matrix-Matrix Multiplication
5
6 mxm_ab (A,na,B,nb,C,nc) :
7 gives C = A B where A is na x nb, B is nb x nc, C is na x nc
8 a := r | c to indicate A is in row- or column- major format
9 b := r | c to indicate B is in row- or column- major format
10 C is always column-major
11 --------------------------------------------------------------------------*/
12
13 static void mxm_cc( const realType* A, unsigned na, const realType* B, unsigned nb, realType* C, unsigned nc )
14 {
15 unsigned i, j, k;
16 realType* Ccol = C;
17 const realType* Bcol = B;
18 for( j = 0; j < nc; ++j, Ccol += na, Bcol += nb )
19 {
20 const realType* Acol = A;
21 for( i = 0; i < na; ++i )
22 Ccol[i] = 0;
23 for( k = 0; k < nb; ++k, Acol += na )
24 for( i = 0; i < na; ++i )
25 Ccol[i] += Acol[i] * Bcol[k];
26 }
27 }
28
29 static void mxm_rc( const realType* A, unsigned na, const realType* B, unsigned nb, realType* C, unsigned nc )
30 {
31 unsigned i, j, k;
32 realType* Ccol = C;
33 const realType* Bcol = B;
34 for( j = 0; j < nc; ++j, Ccol += na, Bcol += nb )
35 {
36 const realType* Arow = A;
37 for( i = 0; i < na; ++i, Arow += nb )
38 {
39 Ccol[i] = 0;
40 for( k = 0; k < nb; ++k )
41 Ccol[i] += Arow[k] * Bcol[k];
42 }
43 }
44 }
45
46 static void mxm_cr( const realType* A, unsigned na, const realType* B, unsigned nb, realType* C, unsigned nc )
47 {
48 unsigned i, j, k;
49 const realType *Acol = A, *Brow = B;
50 for( i = 0; i < na * nc; ++i )
51 C[i] = 0;
52 for( k = 0; k < nb; ++k, Acol += na, Brow += nc )
53 {
54 realType* Ccol = C;
55 for( j = 0; j < nc; ++j, Ccol += na )
56 for( i = 0; i < na; ++i )
57 Ccol[i] += Acol[i] * Brow[j];
58 }
59 }
60
61 /*
62 static void mxm_rr(const realType *A, unsigned na,
63 const realType *B, unsigned nb,
64 realType *C, unsigned nc)
65 {
66 unsigned i,j,k;
67 realType *Ccol = C;
68 const realType *Bcol = B;
69 for(j=0;j<nc;++j,Ccol+=na,++Bcol) {
70 const realType *Arow = A;
71 for(i=0;i<na;++i,Arow+=nb) {
72 const realType *Bkj = Bcol;
73 Ccol[i]=0.0;
74 for(k=0;k<nb;++k,Bkj+=nc)
75 Ccol[i] += Arow[k] * *Bkj;
76 }
77 }
78 }
79 */
80
81 /*--------------------------------------------------------------------------
82 Matrix-Vector Multiplication
83
84 mxv_f (y,ny,A,x,nx) :
85 gives y = A x where A is ny x nx
86 f := r | c to indicate A is in row- or column- major format
87 --------------------------------------------------------------------------*/
88
89 static void mxv_c( realType* y, unsigned ny, const realType* A, const realType* x, unsigned nx )
90 {
91 realType *yp = y, *y_end = y + ny;
92 const realType* x_end = x + nx;
93 realType xk = *x;
94 do
95 {
96 *yp++ = *A++ * xk;
97 } while( yp != y_end );
98 for( ++x; x != x_end; ++x )
99 {
100 xk = *x;
101 yp = y;
102 do
103 {
104 *yp++ += *A++ * xk;
105 } while( yp != y_end );
106 }
107 }
108
109 static void mxv_r( realType* y, unsigned ny, const realType* A, const realType* x, unsigned nx )
110 {
111 realType* y_end = y + ny;
112 const realType* x_end = x + nx;
113 do
114 {
115 const realType* xp = x;
116 realType sum = *A++ * *xp++;
117 while( xp != x_end )
118 {
119 sum += *A++ * *xp++;
120 }
121 *y++ = sum;
122 } while( y != y_end );
123 }
124
125 /*--------------------------------------------------------------------------
126 Vector-Vector Multiplication
127
128 inner (u,v,n) : inner product
129 --------------------------------------------------------------------------*/
130
131 /* precondition: n>=1 */
132 static realType inner( const realType* u, const realType* v, unsigned n )
133 {
134 const realType* u_end = u + n;
135 realType sum = *u++ * *v++;
136 while( u != u_end )
137 {
138 sum += *u++ * *v++;
139 }
140 return sum;
141 }
142
143 /*--------------------------------------------------------------------------
144 1-,2-,3-d Tensor Application
145
146 the 3d case:
147 tensor_f3(R,mr,nr, S,ms,ns, T,mt,nt, u,v, work1,work2)
148 gives v = [ R (x) S (x) T ] u
149 where R is mr x nr, S is ms x ns, T is mt x nt,
150 each in row- or column-major format according to f := r | c
151 u is nr x ns x nt in column-major format (inner index is r)
152 v is mr x ms x mt in column-major format (inner index is r)
153 --------------------------------------------------------------------------*/
154
155 void tensor_c1( const realType* R, unsigned mr, unsigned nr, const realType* u, realType* v )
156 {
157 mxv_c( v, mr, R, u, nr );
158 }
159
160 void tensor_r1( const realType* R, unsigned mr, unsigned nr, const realType* u, realType* v )
161 {
162 mxv_r( v, mr, R, u, nr );
163 }
164
165 /* W holds mr*ns reals */
166 void tensor_c2( const realType* R,
167 unsigned mr,
168 unsigned nr,
169 const realType* S,
170 unsigned ms,
171 unsigned ns,
172 const realType* u,
173 realType* v,
174 realType* W )
175 {
176 mxm_cc( R, mr, u, nr, W, ns );
177 mxm_cr( W, mr, S, ns, v, ms );
178 }
179
180 /* W holds mr*ns reals */
181 void tensor_r2( const realType* R,
182 unsigned mr,
183 unsigned nr,
184 const realType* S,
185 unsigned ms,
186 unsigned ns,
187 const realType* u,
188 realType* v,
189 realType* W )
190 {
191 mxm_rc( R, mr, u, nr, W, ns );
192 mxm_cc( W, mr, S, ns, v, ms );
193 }
194
195 /* W holds mr*ns*nt reals,
196 Z holds mr*ms*nt reals */
197 void tensor_c3( const realType* R,
198 unsigned mr,
199 unsigned nr,
200 const realType* S,
201 unsigned ms,
202 unsigned ns,
203 const realType* T,
204 unsigned mt,
205 unsigned nt,
206 const realType* u,
207 realType* v,
208 realType* W,
209 realType* Z )
210 {
211 unsigned n, mrns = mr * ns, mrms = mr * ms;
212 realType* Zp = Z;
213 mxm_cc( R, mr, u, nr, W, ns * nt );
214 for( n = 0; n < nt; ++n, W += mrns, Zp += mrms )
215 mxm_cr( W, mr, S, ns, Zp, ms );
216 mxm_cr( Z, mrms, T, nt, v, mt );
217 }
218
219 /* W holds mr*ns*nt reals,
220 Z holds mr*ms*nt reals */
221 void tensor_r3( const realType* R,
222 unsigned mr,
223 unsigned nr,
224 const realType* S,
225 unsigned ms,
226 unsigned ns,
227 const realType* T,
228 unsigned mt,
229 unsigned nt,
230 const realType* u,
231 realType* v,
232 realType* W,
233 realType* Z )
234 {
235 unsigned n, mrns = mr * ns, mrms = mr * ms;
236 realType* Zp = Z;
237 mxm_rc( R, mr, u, nr, W, ns * nt );
238 for( n = 0; n < nt; ++n, W += mrns, Zp += mrms )
239 mxm_cc( W, mr, S, ns, Zp, ms );
240 mxm_cc( Z, mrms, T, nt, v, mt );
241 }
242
243 /*--------------------------------------------------------------------------
244 1-,2-,3-d Tensor Application of Row Vectors (for Interpolation)
245
246 the 3d case:
247 v = tensor_i3(Jr,nr, Js,ns, Jt,nt, u, work)
248 same effect as tensor_r3(Jr,1,nr, Js,1,ns, Jt,1,nt, u,&v, work1,work2):
249 gives v = [ Jr (x) Js (x) Jt ] u
250 where Jr, Js, Jt are row vectors (interpolation weights)
251 u is nr x ns x nt in column-major format (inner index is r)
252 v is a scalar
253 --------------------------------------------------------------------------*/
254
255 realType tensor_i1( const realType* Jr, unsigned nr, const realType* u )
256 {
257 return inner( Jr, u, nr );
258 }
259
260 /* work holds ns reals */
261 realType tensor_i2( const realType* Jr,
262 unsigned nr,
263 const realType* Js,
264 unsigned ns,
265 const realType* u,
266 realType* work )
267 {
268 mxv_r( work, ns, u, Jr, nr );
269 return inner( Js, work, ns );
270 }
271
272 /* work holds ns*nt + nt reals */
273 realType tensor_i3( const realType* Jr,
274 unsigned nr,
275 const realType* Js,
276 unsigned ns,
277 const realType* Jt,
278 unsigned nt,
279 const realType* u,
280 realType* work )
281 {
282 realType* work2 = work + nt;
283 mxv_r( work2, ns * nt, u, Jr, nr );
284 mxv_r( work, nt, work2, Js, ns );
285 return inner( Jt, work, nt );
286 }
287
288 /*--------------------------------------------------------------------------
289 1-,2-,3-d Tensor Application of Row Vectors
290 for simultaneous Interpolation and Gradient computation
291
292 the 3d case:
293 v = tensor_ig3(Jr,Dr,nr, Js,Ds,ns, Jt,Dt,nt, u,g, work)
294 gives v = [ Jr (x) Js (x) Jt ] u
295 g_0 = [ Dr (x) Js (x) Jt ] u
296 g_1 = [ Jr (x) Ds (x) Jt ] u
297 g_2 = [ Jr (x) Js (x) Dt ] u
298 where Jr,Dr,Js,Ds,Jt,Dt are row vectors
299 (interpolation & derivative weights)
300 u is nr x ns x nt in column-major format (inner index is r)
301 v is a scalar, g is an array of 3 reals
302 --------------------------------------------------------------------------*/
303
304 realType tensor_ig1( const realType* Jr, const realType* Dr, unsigned nr, const realType* u, realType* g )
305 {
306 *g = inner( Dr, u, nr );
307 return inner( Jr, u, nr );
308 }
309
310 /* work holds 2*ns reals */
311 realType tensor_ig2( const realType* Jr,
312 const realType* Dr,
313 unsigned nr,
314 const realType* Js,
315 const realType* Ds,
316 unsigned ns,
317 const realType* u,
318 realType* g,
319 realType* work )
320 {
321 realType *a = work, *ar = a + ns;
322 mxv_r( a, ns, u, Jr, nr );
323 mxv_r( ar, ns, u, Dr, nr );
324 g[0] = inner( Js, ar, ns );
325 g[1] = inner( Ds, a, ns );
326 return inner( Js, a, ns );
327 }
328
329 /* work holds 2*ns*nt + 3*ns reals */
330 realType tensor_ig3( const realType* Jr,
331 const realType* Dr,
332 unsigned nr,
333 const realType* Js,
334 const realType* Ds,
335 unsigned ns,
336 const realType* Jt,
337 const realType* Dt,
338 unsigned nt,
339 const realType* u,
340 realType* g,
341 realType* work )
342 {
343 unsigned nsnt = ns * nt;
344 realType *a = work, *ar = a + nsnt, *b = ar + nsnt, *br = b + ns, *bs = br + ns;
345 mxv_r( a, nsnt, u, Jr, nr );
346 mxv_r( ar, nsnt, u, Dr, nr );
347 mxv_r( b, nt, a, Js, ns );
348 mxv_r( br, nt, ar, Js, ns );
349 mxv_r( bs, nt, a, Ds, ns );
350 g[0] = inner( Jt, br, nt );
351 g[1] = inner( Jt, bs, nt );
352 g[2] = inner( Dt, b, nt );
353 return inner( Jt, b, nt );
354 }