Actual source code: dgefa2.c
petsc-3.3-p7 2013-05-11
2: /*
3: Inverts 2 by 2 matrix using partial pivoting.
5: Used by the sparse factorization routines in
6: src/mat/impls/baij/seq
9: This is a combination of the Linpack routines
10: dgefa() and dgedi() specialized for a size of 2.
12: */
13: #include <petscsys.h>
17: PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift)
18: {
19: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
20: PetscInt k4,j3;
21: MatScalar *aa,*ax,*ay,work[4],stmp;
22: MatReal tmp,max;
24: /* gaussian elimination with partial pivoting */
27: shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
28: /* Parameter adjustments */
29: a -= 3;
31: /*for (k = 1; k <= 1; ++k) {*/
32: k = 1;
33: kp1 = k + 1;
34: k3 = 2*k;
35: k4 = k3 + k;
36: /* find l = pivot index */
38: i__2 = 3 - k;
39: aa = &a[k4];
40: max = PetscAbsScalar(aa[0]);
41: l = 1;
42: for (ll=1; ll<i__2; ll++) {
43: tmp = PetscAbsScalar(aa[ll]);
44: if (tmp > max) { max = tmp; l = ll+1;}
45: }
46: l += k - 1;
47: ipvt[k-1] = l;
49: if (a[l + k3] == 0.0) {
50: if (shift == 0.0) {
51: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
52: } else {
53: a[l + k3] = shift;
54: }
55: }
57: /* interchange if necessary */
59: if (l != k) {
60: stmp = a[l + k3];
61: a[l + k3] = a[k4];
62: a[k4] = stmp;
63: }
65: /* compute multipliers */
67: stmp = -1. / a[k4];
68: i__2 = 2 - k;
69: aa = &a[1 + k4];
70: for (ll=0; ll<i__2; ll++) {
71: aa[ll] *= stmp;
72: }
74: /* row elimination with column indexing */
76: ax = &a[k4+1];
77: for (j = kp1; j <= 2; ++j) {
78: j3 = 2*j;
79: stmp = a[l + j3];
80: if (l != k) {
81: a[l + j3] = a[k + j3];
82: a[k + j3] = stmp;
83: }
85: i__3 = 2 - k;
86: ay = &a[1+k+j3];
87: for (ll=0; ll<i__3; ll++) {
88: ay[ll] += stmp*ax[ll];
89: }
90: }
91: /*}*/
92: ipvt[1] = 2;
93: if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);
95: /*
96: Now form the inverse
97: */
99: /* compute inverse(u) */
101: for (k = 1; k <= 2; ++k) {
102: k3 = 2*k;
103: k4 = k3 + k;
104: a[k4] = 1.0 / a[k4];
105: stmp = -a[k4];
106: i__2 = k - 1;
107: aa = &a[k3 + 1];
108: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
109: kp1 = k + 1;
110: if (2 < kp1) continue;
111: ax = aa;
112: for (j = kp1; j <= 2; ++j) {
113: j3 = 2*j;
114: stmp = a[k + j3];
115: a[k + j3] = 0.0;
116: ay = &a[j3 + 1];
117: for (ll=0; ll<k; ll++) {
118: ay[ll] += stmp*ax[ll];
119: }
120: }
121: }
123: /* form inverse(u)*inverse(l) */
125: /*for (kb = 1; kb <= 1; ++kb) {*/
126:
127: k = 1;
128: k3 = 2*k;
129: kp1 = k + 1;
130: aa = a + k3;
131: for (i = kp1; i <= 2; ++i) {
132: work[i-1] = aa[i];
133: aa[i] = 0.0;
134: }
135: for (j = kp1; j <= 2; ++j) {
136: stmp = work[j-1];
137: ax = &a[2*j + 1];
138: ay = &a[k3 + 1];
139: ay[0] += stmp*ax[0];
140: ay[1] += stmp*ax[1];
141: }
142: l = ipvt[k-1];
143: if (l != k) {
144: ax = &a[k3 + 1];
145: ay = &a[2*l + 1];
146: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
147: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
148: }
149:
150: return(0);
151: }
155: PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift)
156: {
157: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
158: PetscInt k4,j3;
159: MatScalar *aa,*ax,*ay,work[81],stmp;
160: MatReal tmp,max;
162: /* gaussian elimination with partial pivoting */
165: /* Parameter adjustments */
166: a -= 10;
168: for (k = 1; k <= 8; ++k) {
169: kp1 = k + 1;
170: k3 = 9*k;
171: k4 = k3 + k;
172: /* find l = pivot index */
174: i__2 = 10 - k;
175: aa = &a[k4];
176: max = PetscAbsScalar(aa[0]);
177: l = 1;
178: for (ll=1; ll<i__2; ll++) {
179: tmp = PetscAbsScalar(aa[ll]);
180: if (tmp > max) { max = tmp; l = ll+1;}
181: }
182: l += k - 1;
183: ipvt[k-1] = l;
185: if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
187: /* interchange if necessary */
189: if (l != k) {
190: stmp = a[l + k3];
191: a[l + k3] = a[k4];
192: a[k4] = stmp;
193: }
195: /* compute multipliers */
197: stmp = -1. / a[k4];
198: i__2 = 9 - k;
199: aa = &a[1 + k4];
200: for (ll=0; ll<i__2; ll++) {
201: aa[ll] *= stmp;
202: }
204: /* row elimination with column indexing */
206: ax = &a[k4+1];
207: for (j = kp1; j <= 9; ++j) {
208: j3 = 9*j;
209: stmp = a[l + j3];
210: if (l != k) {
211: a[l + j3] = a[k + j3];
212: a[k + j3] = stmp;
213: }
215: i__3 = 9 - k;
216: ay = &a[1+k+j3];
217: for (ll=0; ll<i__3; ll++) {
218: ay[ll] += stmp*ax[ll];
219: }
220: }
221: }
222: ipvt[8] = 9;
223: if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
225: /*
226: Now form the inverse
227: */
229: /* compute inverse(u) */
231: for (k = 1; k <= 9; ++k) {
232: k3 = 9*k;
233: k4 = k3 + k;
234: a[k4] = 1.0 / a[k4];
235: stmp = -a[k4];
236: i__2 = k - 1;
237: aa = &a[k3 + 1];
238: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
239: kp1 = k + 1;
240: if (9 < kp1) continue;
241: ax = aa;
242: for (j = kp1; j <= 9; ++j) {
243: j3 = 9*j;
244: stmp = a[k + j3];
245: a[k + j3] = 0.0;
246: ay = &a[j3 + 1];
247: for (ll=0; ll<k; ll++) {
248: ay[ll] += stmp*ax[ll];
249: }
250: }
251: }
253: /* form inverse(u)*inverse(l) */
255: for (kb = 1; kb <= 8; ++kb) {
256: k = 9 - kb;
257: k3 = 9*k;
258: kp1 = k + 1;
259: aa = a + k3;
260: for (i = kp1; i <= 9; ++i) {
261: work[i-1] = aa[i];
262: aa[i] = 0.0;
263: }
264: for (j = kp1; j <= 9; ++j) {
265: stmp = work[j-1];
266: ax = &a[9*j + 1];
267: ay = &a[k3 + 1];
268: ay[0] += stmp*ax[0];
269: ay[1] += stmp*ax[1];
270: ay[2] += stmp*ax[2];
271: ay[3] += stmp*ax[3];
272: ay[4] += stmp*ax[4];
273: ay[5] += stmp*ax[5];
274: ay[6] += stmp*ax[6];
275: ay[7] += stmp*ax[7];
276: ay[8] += stmp*ax[8];
277: }
278: l = ipvt[k-1];
279: if (l != k) {
280: ax = &a[k3 + 1];
281: ay = &a[9*l + 1];
282: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
283: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
284: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
285: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
286: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
287: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
288: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
289: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
290: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
291: }
292: }
293: return(0);
294: }
296: /*
297: Inverts 15 by 15 matrix using partial pivoting.
299: Used by the sparse factorization routines in
300: src/mat/impls/baij/seq
302: This is a combination of the Linpack routines
303: dgefa() and dgedi() specialized for a size of 15.
305: */
309: PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift)
310: {
311: PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
312: PetscInt k4,j3;
313: MatScalar *aa,*ax,*ay,stmp;
314: MatReal tmp,max;
316: /* gaussian elimination with partial pivoting */
319: /* Parameter adjustments */
320: a -= 16;
322: for (k = 1; k <= 14; ++k) {
323: kp1 = k + 1;
324: k3 = 15*k;
325: k4 = k3 + k;
326: /* find l = pivot index */
328: i__2 = 16 - k;
329: aa = &a[k4];
330: max = PetscAbsScalar(aa[0]);
331: l = 1;
332: for (ll=1; ll<i__2; ll++) {
333: tmp = PetscAbsScalar(aa[ll]);
334: if (tmp > max) { max = tmp; l = ll+1;}
335: }
336: l += k - 1;
337: ipvt[k-1] = l;
339: if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
341: /* interchange if necessary */
343: if (l != k) {
344: stmp = a[l + k3];
345: a[l + k3] = a[k4];
346: a[k4] = stmp;
347: }
349: /* compute multipliers */
351: stmp = -1. / a[k4];
352: i__2 = 15 - k;
353: aa = &a[1 + k4];
354: for (ll=0; ll<i__2; ll++) {
355: aa[ll] *= stmp;
356: }
358: /* row elimination with column indexing */
360: ax = &a[k4+1];
361: for (j = kp1; j <= 15; ++j) {
362: j3 = 15*j;
363: stmp = a[l + j3];
364: if (l != k) {
365: a[l + j3] = a[k + j3];
366: a[k + j3] = stmp;
367: }
369: i__3 = 15 - k;
370: ay = &a[1+k+j3];
371: for (ll=0; ll<i__3; ll++) {
372: ay[ll] += stmp*ax[ll];
373: }
374: }
375: }
376: ipvt[14] = 15;
377: if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
379: /*
380: Now form the inverse
381: */
383: /* compute inverse(u) */
385: for (k = 1; k <= 15; ++k) {
386: k3 = 15*k;
387: k4 = k3 + k;
388: a[k4] = 1.0 / a[k4];
389: stmp = -a[k4];
390: i__2 = k - 1;
391: aa = &a[k3 + 1];
392: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
393: kp1 = k + 1;
394: if (15 < kp1) continue;
395: ax = aa;
396: for (j = kp1; j <= 15; ++j) {
397: j3 = 15*j;
398: stmp = a[k + j3];
399: a[k + j3] = 0.0;
400: ay = &a[j3 + 1];
401: for (ll=0; ll<k; ll++) {
402: ay[ll] += stmp*ax[ll];
403: }
404: }
405: }
407: /* form inverse(u)*inverse(l) */
409: for (kb = 1; kb <= 14; ++kb) {
410: k = 15 - kb;
411: k3 = 15*k;
412: kp1 = k + 1;
413: aa = a + k3;
414: for (i = kp1; i <= 15; ++i) {
415: work[i-1] = aa[i];
416: aa[i] = 0.0;
417: }
418: for (j = kp1; j <= 15; ++j) {
419: stmp = work[j-1];
420: ax = &a[15*j + 1];
421: ay = &a[k3 + 1];
422: ay[0] += stmp*ax[0];
423: ay[1] += stmp*ax[1];
424: ay[2] += stmp*ax[2];
425: ay[3] += stmp*ax[3];
426: ay[4] += stmp*ax[4];
427: ay[5] += stmp*ax[5];
428: ay[6] += stmp*ax[6];
429: ay[7] += stmp*ax[7];
430: ay[8] += stmp*ax[8];
431: ay[9] += stmp*ax[9];
432: ay[10] += stmp*ax[10];
433: ay[11] += stmp*ax[11];
434: ay[12] += stmp*ax[12];
435: ay[13] += stmp*ax[13];
436: ay[14] += stmp*ax[14];
437: }
438: l = ipvt[k-1];
439: if (l != k) {
440: ax = &a[k3 + 1];
441: ay = &a[15*l + 1];
442: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
443: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
444: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
445: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
446: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
447: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
448: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
449: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
450: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
451: stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp;
452: stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
453: stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
454: stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
455: stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
456: stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
457: }
458: }
459: return(0);
460: }