Actual source code: dgefa2.c
petsc-3.4.5 2014-06-29
2: /*
3: Inverts 2 by 2 matrix using partial pivoting.
5: Used by the sparse factorization routines in
6: src/mat/impls/baij/seq
9: This is a combination of the Linpack routines
10: dgefa() and dgedi() specialized for a size of 2.
12: */
13: #include <petscsys.h>
17: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift)
18: {
19: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
20: PetscInt k4,j3;
21: MatScalar *aa,*ax,*ay,work[4],stmp;
22: MatReal tmp,max;
24: /* gaussian elimination with partial pivoting */
27: shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
28: /* Parameter adjustments */
29: a -= 3;
31: /*for (k = 1; k <= 1; ++k) {*/
32: k = 1;
33: kp1 = k + 1;
34: k3 = 2*k;
35: k4 = k3 + k;
36: /* find l = pivot index */
38: i__2 = 3 - k;
39: aa = &a[k4];
40: max = PetscAbsScalar(aa[0]);
41: l = 1;
42: for (ll=1; ll<i__2; ll++) {
43: tmp = PetscAbsScalar(aa[ll]);
44: if (tmp > max) { max = tmp; l = ll+1;}
45: }
46: l += k - 1;
47: ipvt[k-1] = l;
49: if (a[l + k3] == 0.0) {
50: if (shift == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
51: else {
52: a[l + k3] = shift;
53: }
54: }
56: /* interchange if necessary */
58: if (l != k) {
59: stmp = a[l + k3];
60: a[l + k3] = a[k4];
61: a[k4] = stmp;
62: }
64: /* compute multipliers */
66: stmp = -1. / a[k4];
67: i__2 = 2 - k;
68: aa = &a[1 + k4];
69: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
71: /* row elimination with column indexing */
73: ax = &a[k4+1];
74: for (j = kp1; j <= 2; ++j) {
75: j3 = 2*j;
76: stmp = a[l + j3];
77: if (l != k) {
78: a[l + j3] = a[k + j3];
79: a[k + j3] = stmp;
80: }
82: i__3 = 2 - k;
83: ay = &a[1+k+j3];
84: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
85: }
87: /*}*/
88: ipvt[1] = 2;
89: if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);
91: /*
92: Now form the inverse
93: */
95: /* compute inverse(u) */
97: for (k = 1; k <= 2; ++k) {
98: k3 = 2*k;
99: k4 = k3 + k;
100: a[k4] = 1.0 / a[k4];
101: stmp = -a[k4];
102: i__2 = k - 1;
103: aa = &a[k3 + 1];
104: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
105: kp1 = k + 1;
106: if (2 < kp1) continue;
107: ax = aa;
108: for (j = kp1; j <= 2; ++j) {
109: j3 = 2*j;
110: stmp = a[k + j3];
111: a[k + j3] = 0.0;
112: ay = &a[j3 + 1];
113: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
114: }
115: }
117: /* form inverse(u)*inverse(l) */
119: /*for (kb = 1; kb <= 1; ++kb) {*/
121: k = 1;
122: k3 = 2*k;
123: kp1 = k + 1;
124: aa = a + k3;
125: for (i = kp1; i <= 2; ++i) {
126: work[i-1] = aa[i];
127: aa[i] = 0.0;
128: }
129: for (j = kp1; j <= 2; ++j) {
130: stmp = work[j-1];
131: ax = &a[2*j + 1];
132: ay = &a[k3 + 1];
133: ay[0] += stmp*ax[0];
134: ay[1] += stmp*ax[1];
135: }
136: l = ipvt[k-1];
137: if (l != k) {
138: ax = &a[k3 + 1];
139: ay = &a[2*l + 1];
140: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
141: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
142: }
143: return(0);
144: }
148: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift)
149: {
150: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
151: PetscInt k4,j3;
152: MatScalar *aa,*ax,*ay,work[81],stmp;
153: MatReal tmp,max;
155: /* gaussian elimination with partial pivoting */
158: /* Parameter adjustments */
159: a -= 10;
161: for (k = 1; k <= 8; ++k) {
162: kp1 = k + 1;
163: k3 = 9*k;
164: k4 = k3 + k;
165: /* find l = pivot index */
167: i__2 = 10 - k;
168: aa = &a[k4];
169: max = PetscAbsScalar(aa[0]);
170: l = 1;
171: for (ll=1; ll<i__2; ll++) {
172: tmp = PetscAbsScalar(aa[ll]);
173: if (tmp > max) { max = tmp; l = ll+1;}
174: }
175: l += k - 1;
176: ipvt[k-1] = l;
178: if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
180: /* interchange if necessary */
182: if (l != k) {
183: stmp = a[l + k3];
184: a[l + k3] = a[k4];
185: a[k4] = stmp;
186: }
188: /* compute multipliers */
190: stmp = -1. / a[k4];
191: i__2 = 9 - k;
192: aa = &a[1 + k4];
193: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
195: /* row elimination with column indexing */
197: ax = &a[k4+1];
198: for (j = kp1; j <= 9; ++j) {
199: j3 = 9*j;
200: stmp = a[l + j3];
201: if (l != k) {
202: a[l + j3] = a[k + j3];
203: a[k + j3] = stmp;
204: }
206: i__3 = 9 - k;
207: ay = &a[1+k+j3];
208: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
209: }
210: }
211: ipvt[8] = 9;
212: if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
214: /*
215: Now form the inverse
216: */
218: /* compute inverse(u) */
220: for (k = 1; k <= 9; ++k) {
221: k3 = 9*k;
222: k4 = k3 + k;
223: a[k4] = 1.0 / a[k4];
224: stmp = -a[k4];
225: i__2 = k - 1;
226: aa = &a[k3 + 1];
227: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
228: kp1 = k + 1;
229: if (9 < kp1) continue;
230: ax = aa;
231: for (j = kp1; j <= 9; ++j) {
232: j3 = 9*j;
233: stmp = a[k + j3];
234: a[k + j3] = 0.0;
235: ay = &a[j3 + 1];
236: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
237: }
238: }
240: /* form inverse(u)*inverse(l) */
242: for (kb = 1; kb <= 8; ++kb) {
243: k = 9 - kb;
244: k3 = 9*k;
245: kp1 = k + 1;
246: aa = a + k3;
247: for (i = kp1; i <= 9; ++i) {
248: work[i-1] = aa[i];
249: aa[i] = 0.0;
250: }
251: for (j = kp1; j <= 9; ++j) {
252: stmp = work[j-1];
253: ax = &a[9*j + 1];
254: ay = &a[k3 + 1];
255: ay[0] += stmp*ax[0];
256: ay[1] += stmp*ax[1];
257: ay[2] += stmp*ax[2];
258: ay[3] += stmp*ax[3];
259: ay[4] += stmp*ax[4];
260: ay[5] += stmp*ax[5];
261: ay[6] += stmp*ax[6];
262: ay[7] += stmp*ax[7];
263: ay[8] += stmp*ax[8];
264: }
265: l = ipvt[k-1];
266: if (l != k) {
267: ax = &a[k3 + 1];
268: ay = &a[9*l + 1];
269: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
270: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
271: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
272: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
273: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
274: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
275: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
276: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
277: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
278: }
279: }
280: return(0);
281: }
283: /*
284: Inverts 15 by 15 matrix using partial pivoting.
286: Used by the sparse factorization routines in
287: src/mat/impls/baij/seq
289: This is a combination of the Linpack routines
290: dgefa() and dgedi() specialized for a size of 15.
292: */
296: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift)
297: {
298: PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
299: PetscInt k4,j3;
300: MatScalar *aa,*ax,*ay,stmp;
301: MatReal tmp,max;
303: /* gaussian elimination with partial pivoting */
306: /* Parameter adjustments */
307: a -= 16;
309: for (k = 1; k <= 14; ++k) {
310: kp1 = k + 1;
311: k3 = 15*k;
312: k4 = k3 + k;
313: /* find l = pivot index */
315: i__2 = 16 - k;
316: aa = &a[k4];
317: max = PetscAbsScalar(aa[0]);
318: l = 1;
319: for (ll=1; ll<i__2; ll++) {
320: tmp = PetscAbsScalar(aa[ll]);
321: if (tmp > max) { max = tmp; l = ll+1;}
322: }
323: l += k - 1;
324: ipvt[k-1] = l;
326: if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
328: /* interchange if necessary */
330: if (l != k) {
331: stmp = a[l + k3];
332: a[l + k3] = a[k4];
333: a[k4] = stmp;
334: }
336: /* compute multipliers */
338: stmp = -1. / a[k4];
339: i__2 = 15 - k;
340: aa = &a[1 + k4];
341: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
343: /* row elimination with column indexing */
345: ax = &a[k4+1];
346: for (j = kp1; j <= 15; ++j) {
347: j3 = 15*j;
348: stmp = a[l + j3];
349: if (l != k) {
350: a[l + j3] = a[k + j3];
351: a[k + j3] = stmp;
352: }
354: i__3 = 15 - k;
355: ay = &a[1+k+j3];
356: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
357: }
358: }
359: ipvt[14] = 15;
360: if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
362: /*
363: Now form the inverse
364: */
366: /* compute inverse(u) */
368: for (k = 1; k <= 15; ++k) {
369: k3 = 15*k;
370: k4 = k3 + k;
371: a[k4] = 1.0 / a[k4];
372: stmp = -a[k4];
373: i__2 = k - 1;
374: aa = &a[k3 + 1];
375: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
376: kp1 = k + 1;
377: if (15 < kp1) continue;
378: ax = aa;
379: for (j = kp1; j <= 15; ++j) {
380: j3 = 15*j;
381: stmp = a[k + j3];
382: a[k + j3] = 0.0;
383: ay = &a[j3 + 1];
384: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
385: }
386: }
388: /* form inverse(u)*inverse(l) */
390: for (kb = 1; kb <= 14; ++kb) {
391: k = 15 - kb;
392: k3 = 15*k;
393: kp1 = k + 1;
394: aa = a + k3;
395: for (i = kp1; i <= 15; ++i) {
396: work[i-1] = aa[i];
397: aa[i] = 0.0;
398: }
399: for (j = kp1; j <= 15; ++j) {
400: stmp = work[j-1];
401: ax = &a[15*j + 1];
402: ay = &a[k3 + 1];
403: ay[0] += stmp*ax[0];
404: ay[1] += stmp*ax[1];
405: ay[2] += stmp*ax[2];
406: ay[3] += stmp*ax[3];
407: ay[4] += stmp*ax[4];
408: ay[5] += stmp*ax[5];
409: ay[6] += stmp*ax[6];
410: ay[7] += stmp*ax[7];
411: ay[8] += stmp*ax[8];
412: ay[9] += stmp*ax[9];
413: ay[10] += stmp*ax[10];
414: ay[11] += stmp*ax[11];
415: ay[12] += stmp*ax[12];
416: ay[13] += stmp*ax[13];
417: ay[14] += stmp*ax[14];
418: }
419: l = ipvt[k-1];
420: if (l != k) {
421: ax = &a[k3 + 1];
422: ay = &a[15*l + 1];
423: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
424: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
425: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
426: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
427: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
428: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
429: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
430: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
431: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
432: stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp;
433: stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
434: stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
435: stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
436: stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
437: stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
438: }
439: }
440: return(0);
441: }