Actual source code: dgefa2.c
petsc-3.7.3 2016-08-01
2: /*
3: Inverts 2 by 2 matrix using gaussian elimination with partial pivoting.
5: Used by the sparse factorization routines in
6: src/mat/impls/baij/seq
9: This is a combination of the Linpack routines
10: dgefa() and dgedi() specialized for a size of 2.
12: */
13: #include <petscsys.h>
17: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
18: {
19: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
20: PetscInt k4,j3;
21: MatScalar *aa,*ax,*ay,work[4],stmp;
22: MatReal tmp,max;
25: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
26: shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
28: /* Parameter adjustments */
29: a -= 3;
31: k = 1;
32: kp1 = k + 1;
33: k3 = 2*k;
34: k4 = k3 + k;
36: /* find l = pivot index */
37: i__2 = 3 - k;
38: aa = &a[k4];
39: max = PetscAbsScalar(aa[0]);
40: l = 1;
41: for (ll=1; ll<i__2; ll++) {
42: tmp = PetscAbsScalar(aa[ll]);
43: if (tmp > max) { max = tmp; l = ll+1;}
44: }
45: l += k - 1;
46: ipvt[k-1] = l;
48: if (a[l + k3] == 0.0) {
49: if (shift == 0.0) {
50: if (allowzeropivot) {
52: PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);
53: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
54: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
55: } else {
56: a[l + k3] = shift;
57: }
58: }
60: /* interchange if necessary */
61: if (l != k) {
62: stmp = a[l + k3];
63: a[l + k3] = a[k4];
64: a[k4] = stmp;
65: }
67: /* compute multipliers */
68: stmp = -1. / a[k4];
69: i__2 = 2 - k;
70: aa = &a[1 + k4];
71: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
73: /* row elimination with column indexing */
74: ax = &a[k4+1];
75: for (j = kp1; j <= 2; ++j) {
76: j3 = 2*j;
77: stmp = a[l + j3];
78: if (l != k) {
79: a[l + j3] = a[k + j3];
80: a[k + j3] = stmp;
81: }
83: i__3 = 2 - k;
84: ay = &a[1+k+j3];
85: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
86: }
88: ipvt[1] = 2;
89: if (a[6] == 0.0) {
90: if (allowzeropivot) {
92: PetscInfo1(NULL,"Zero pivot, row %D\n",1);
93: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
94: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);
95: }
97: /* Now form the inverse */
98: /* compute inverse(u) */
99: for (k = 1; k <= 2; ++k) {
100: k3 = 2*k;
101: k4 = k3 + k;
102: a[k4] = 1.0 / a[k4];
103: stmp = -a[k4];
104: i__2 = k - 1;
105: aa = &a[k3 + 1];
106: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
107: kp1 = k + 1;
108: if (2 < kp1) continue;
109: ax = aa;
110: for (j = kp1; j <= 2; ++j) {
111: j3 = 2*j;
112: stmp = a[k + j3];
113: a[k + j3] = 0.0;
114: ay = &a[j3 + 1];
115: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
116: }
117: }
119: /* form inverse(u)*inverse(l) */
120: k = 1;
121: k3 = 2*k;
122: kp1 = k + 1;
123: aa = a + k3;
124: for (i = kp1; i <= 2; ++i) {
125: work[i-1] = aa[i];
126: aa[i] = 0.0;
127: }
128: for (j = kp1; j <= 2; ++j) {
129: stmp = work[j-1];
130: ax = &a[2*j + 1];
131: ay = &a[k3 + 1];
132: ay[0] += stmp*ax[0];
133: ay[1] += stmp*ax[1];
134: }
135: l = ipvt[k-1];
136: if (l != k) {
137: ax = &a[k3 + 1];
138: ay = &a[2*l + 1];
139: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
140: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
141: }
142: return(0);
143: }
145: /* gaussian elimination with partial pivoting */
148: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
149: {
150: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
151: PetscInt k4,j3;
152: MatScalar *aa,*ax,*ay,work[81],stmp;
153: MatReal tmp,max;
156: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
158: /* Parameter adjustments */
159: a -= 10;
161: for (k = 1; k <= 8; ++k) {
162: kp1 = k + 1;
163: k3 = 9*k;
164: k4 = k3 + k;
166: /* find l = pivot index */
167: i__2 = 10 - k;
168: aa = &a[k4];
169: max = PetscAbsScalar(aa[0]);
170: l = 1;
171: for (ll=1; ll<i__2; ll++) {
172: tmp = PetscAbsScalar(aa[ll]);
173: if (tmp > max) { max = tmp; l = ll+1;}
174: }
175: l += k - 1;
176: ipvt[k-1] = l;
178: if (a[l + k3] == 0.0) {
179: if (shift == 0.0) {
180: if (allowzeropivot) {
182: PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);
183: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
184: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
185: } else {
186: a[l + k3] = shift;
187: }
188: }
190: /* interchange if necessary */
191: if (l != k) {
192: stmp = a[l + k3];
193: a[l + k3] = a[k4];
194: a[k4] = stmp;
195: }
197: /* compute multipliers */
198: stmp = -1. / a[k4];
199: i__2 = 9 - k;
200: aa = &a[1 + k4];
201: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
203: /* row elimination with column indexing */
204: ax = &a[k4+1];
205: for (j = kp1; j <= 9; ++j) {
206: j3 = 9*j;
207: stmp = a[l + j3];
208: if (l != k) {
209: a[l + j3] = a[k + j3];
210: a[k + j3] = stmp;
211: }
213: i__3 = 9 - k;
214: ay = &a[1+k+j3];
215: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
216: }
217: }
218: ipvt[8] = 9;
219: if (a[90] == 0.0) {
220: if (allowzeropivot) {
222: PetscInfo1(NULL,"Zero pivot, row %D\n",6);
223: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
224: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
225: }
227: /* Now form the inverse */
228: /* compute inverse(u) */
229: for (k = 1; k <= 9; ++k) {
230: k3 = 9*k;
231: k4 = k3 + k;
232: a[k4] = 1.0 / a[k4];
233: stmp = -a[k4];
234: i__2 = k - 1;
235: aa = &a[k3 + 1];
236: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
237: kp1 = k + 1;
238: if (9 < kp1) continue;
239: ax = aa;
240: for (j = kp1; j <= 9; ++j) {
241: j3 = 9*j;
242: stmp = a[k + j3];
243: a[k + j3] = 0.0;
244: ay = &a[j3 + 1];
245: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
246: }
247: }
249: /* form inverse(u)*inverse(l) */
250: for (kb = 1; kb <= 8; ++kb) {
251: k = 9 - kb;
252: k3 = 9*k;
253: kp1 = k + 1;
254: aa = a + k3;
255: for (i = kp1; i <= 9; ++i) {
256: work[i-1] = aa[i];
257: aa[i] = 0.0;
258: }
259: for (j = kp1; j <= 9; ++j) {
260: stmp = work[j-1];
261: ax = &a[9*j + 1];
262: ay = &a[k3 + 1];
263: ay[0] += stmp*ax[0];
264: ay[1] += stmp*ax[1];
265: ay[2] += stmp*ax[2];
266: ay[3] += stmp*ax[3];
267: ay[4] += stmp*ax[4];
268: ay[5] += stmp*ax[5];
269: ay[6] += stmp*ax[6];
270: ay[7] += stmp*ax[7];
271: ay[8] += stmp*ax[8];
272: }
273: l = ipvt[k-1];
274: if (l != k) {
275: ax = &a[k3 + 1];
276: ay = &a[9*l + 1];
277: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
278: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
279: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
280: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
281: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
282: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
283: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
284: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
285: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
286: }
287: }
288: return(0);
289: }
291: /*
292: Inverts 15 by 15 matrix using gaussian elimination with partial pivoting.
294: Used by the sparse factorization routines in
295: src/mat/impls/baij/seq
297: This is a combination of the Linpack routines
298: dgefa() and dgedi() specialized for a size of 15.
300: */
304: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
305: {
306: PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
307: PetscInt k4,j3;
308: MatScalar *aa,*ax,*ay,stmp;
309: MatReal tmp,max;
312: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
314: /* Parameter adjustments */
315: a -= 16;
317: for (k = 1; k <= 14; ++k) {
318: kp1 = k + 1;
319: k3 = 15*k;
320: k4 = k3 + k;
322: /* find l = pivot index */
323: i__2 = 16 - k;
324: aa = &a[k4];
325: max = PetscAbsScalar(aa[0]);
326: l = 1;
327: for (ll=1; ll<i__2; ll++) {
328: tmp = PetscAbsScalar(aa[ll]);
329: if (tmp > max) { max = tmp; l = ll+1;}
330: }
331: l += k - 1;
332: ipvt[k-1] = l;
334: if (a[l + k3] == 0.0) {
335: if (shift == 0.0) {
336: if (allowzeropivot) {
338: PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);
339: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
340: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
341: } else {
342: a[l + k3] = shift;
343: }
344: }
346: /* interchange if necessary */
347: if (l != k) {
348: stmp = a[l + k3];
349: a[l + k3] = a[k4];
350: a[k4] = stmp;
351: }
353: /* compute multipliers */
354: stmp = -1. / a[k4];
355: i__2 = 15 - k;
356: aa = &a[1 + k4];
357: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
359: /* row elimination with column indexing */
360: ax = &a[k4+1];
361: for (j = kp1; j <= 15; ++j) {
362: j3 = 15*j;
363: stmp = a[l + j3];
364: if (l != k) {
365: a[l + j3] = a[k + j3];
366: a[k + j3] = stmp;
367: }
369: i__3 = 15 - k;
370: ay = &a[1+k+j3];
371: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
372: }
373: }
374: ipvt[14] = 15;
375: if (a[240] == 0.0) {
376: if (allowzeropivot) {
378: PetscInfo1(NULL,"Zero pivot, row %D\n",6);
379: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
380: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
381: }
383: /* Now form the inverse */
384: /* compute inverse(u) */
385: for (k = 1; k <= 15; ++k) {
386: k3 = 15*k;
387: k4 = k3 + k;
388: a[k4] = 1.0 / a[k4];
389: stmp = -a[k4];
390: i__2 = k - 1;
391: aa = &a[k3 + 1];
392: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
393: kp1 = k + 1;
394: if (15 < kp1) continue;
395: ax = aa;
396: for (j = kp1; j <= 15; ++j) {
397: j3 = 15*j;
398: stmp = a[k + j3];
399: a[k + j3] = 0.0;
400: ay = &a[j3 + 1];
401: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
402: }
403: }
405: /* form inverse(u)*inverse(l) */
406: for (kb = 1; kb <= 14; ++kb) {
407: k = 15 - kb;
408: k3 = 15*k;
409: kp1 = k + 1;
410: aa = a + k3;
411: for (i = kp1; i <= 15; ++i) {
412: work[i-1] = aa[i];
413: aa[i] = 0.0;
414: }
415: for (j = kp1; j <= 15; ++j) {
416: stmp = work[j-1];
417: ax = &a[15*j + 1];
418: ay = &a[k3 + 1];
419: ay[0] += stmp*ax[0];
420: ay[1] += stmp*ax[1];
421: ay[2] += stmp*ax[2];
422: ay[3] += stmp*ax[3];
423: ay[4] += stmp*ax[4];
424: ay[5] += stmp*ax[5];
425: ay[6] += stmp*ax[6];
426: ay[7] += stmp*ax[7];
427: ay[8] += stmp*ax[8];
428: ay[9] += stmp*ax[9];
429: ay[10] += stmp*ax[10];
430: ay[11] += stmp*ax[11];
431: ay[12] += stmp*ax[12];
432: ay[13] += stmp*ax[13];
433: ay[14] += stmp*ax[14];
434: }
435: l = ipvt[k-1];
436: if (l != k) {
437: ax = &a[k3 + 1];
438: ay = &a[15*l + 1];
439: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
440: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
441: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
442: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
443: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
444: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
445: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
446: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
447: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
448: stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp;
449: stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
450: stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
451: stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
452: stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
453: stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
454: }
455: }
456: return(0);
457: }