Actual source code: dgefa2.c
petsc-3.8.4 2018-03-24
2: /*
3: Inverts 2 by 2 matrix using gaussian elimination with partial pivoting.
5: Used by the sparse factorization routines in
6: src/mat/impls/baij/seq
9: This is a combination of the Linpack routines
10: dgefa() and dgedi() specialized for a size of 2.
12: */
13: #include <petscsys.h>
15: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
16: {
17: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
18: PetscInt k4,j3;
19: MatScalar *aa,*ax,*ay,work[4],stmp;
20: MatReal tmp,max;
23: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
24: shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
26: /* Parameter adjustments */
27: a -= 3;
29: k = 1;
30: kp1 = k + 1;
31: k3 = 2*k;
32: k4 = k3 + k;
34: /* find l = pivot index */
35: i__2 = 3 - k;
36: aa = &a[k4];
37: max = PetscAbsScalar(aa[0]);
38: l = 1;
39: for (ll=1; ll<i__2; ll++) {
40: tmp = PetscAbsScalar(aa[ll]);
41: if (tmp > max) { max = tmp; l = ll+1;}
42: }
43: l += k - 1;
44: ipvt[k-1] = l;
46: if (a[l + k3] == 0.0) {
47: if (shift == 0.0) {
48: if (allowzeropivot) {
50: PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);
51: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
52: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
53: } else {
54: a[l + k3] = shift;
55: }
56: }
58: /* interchange if necessary */
59: if (l != k) {
60: stmp = a[l + k3];
61: a[l + k3] = a[k4];
62: a[k4] = stmp;
63: }
65: /* compute multipliers */
66: stmp = -1. / a[k4];
67: i__2 = 2 - k;
68: aa = &a[1 + k4];
69: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
71: /* row elimination with column indexing */
72: ax = &a[k4+1];
73: for (j = kp1; j <= 2; ++j) {
74: j3 = 2*j;
75: stmp = a[l + j3];
76: if (l != k) {
77: a[l + j3] = a[k + j3];
78: a[k + j3] = stmp;
79: }
81: i__3 = 2 - k;
82: ay = &a[1+k+j3];
83: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
84: }
86: ipvt[1] = 2;
87: if (a[6] == 0.0) {
88: if (allowzeropivot) {
90: PetscInfo1(NULL,"Zero pivot, row %D\n",1);
91: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
92: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);
93: }
95: /* Now form the inverse */
96: /* compute inverse(u) */
97: for (k = 1; k <= 2; ++k) {
98: k3 = 2*k;
99: k4 = k3 + k;
100: a[k4] = 1.0 / a[k4];
101: stmp = -a[k4];
102: i__2 = k - 1;
103: aa = &a[k3 + 1];
104: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
105: kp1 = k + 1;
106: if (2 < kp1) continue;
107: ax = aa;
108: for (j = kp1; j <= 2; ++j) {
109: j3 = 2*j;
110: stmp = a[k + j3];
111: a[k + j3] = 0.0;
112: ay = &a[j3 + 1];
113: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
114: }
115: }
117: /* form inverse(u)*inverse(l) */
118: k = 1;
119: k3 = 2*k;
120: kp1 = k + 1;
121: aa = a + k3;
122: for (i = kp1; i <= 2; ++i) {
123: work[i-1] = aa[i];
124: aa[i] = 0.0;
125: }
126: for (j = kp1; j <= 2; ++j) {
127: stmp = work[j-1];
128: ax = &a[2*j + 1];
129: ay = &a[k3 + 1];
130: ay[0] += stmp*ax[0];
131: ay[1] += stmp*ax[1];
132: }
133: l = ipvt[k-1];
134: if (l != k) {
135: ax = &a[k3 + 1];
136: ay = &a[2*l + 1];
137: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
138: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
139: }
140: return(0);
141: }
143: /* gaussian elimination with partial pivoting */
144: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
145: {
146: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
147: PetscInt k4,j3;
148: MatScalar *aa,*ax,*ay,work[81],stmp;
149: MatReal tmp,max;
152: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
154: /* Parameter adjustments */
155: a -= 10;
157: for (k = 1; k <= 8; ++k) {
158: kp1 = k + 1;
159: k3 = 9*k;
160: k4 = k3 + k;
162: /* find l = pivot index */
163: i__2 = 10 - k;
164: aa = &a[k4];
165: max = PetscAbsScalar(aa[0]);
166: l = 1;
167: for (ll=1; ll<i__2; ll++) {
168: tmp = PetscAbsScalar(aa[ll]);
169: if (tmp > max) { max = tmp; l = ll+1;}
170: }
171: l += k - 1;
172: ipvt[k-1] = l;
174: if (a[l + k3] == 0.0) {
175: if (shift == 0.0) {
176: if (allowzeropivot) {
178: PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);
179: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
180: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
181: } else {
182: a[l + k3] = shift;
183: }
184: }
186: /* interchange if necessary */
187: if (l != k) {
188: stmp = a[l + k3];
189: a[l + k3] = a[k4];
190: a[k4] = stmp;
191: }
193: /* compute multipliers */
194: stmp = -1. / a[k4];
195: i__2 = 9 - k;
196: aa = &a[1 + k4];
197: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
199: /* row elimination with column indexing */
200: ax = &a[k4+1];
201: for (j = kp1; j <= 9; ++j) {
202: j3 = 9*j;
203: stmp = a[l + j3];
204: if (l != k) {
205: a[l + j3] = a[k + j3];
206: a[k + j3] = stmp;
207: }
209: i__3 = 9 - k;
210: ay = &a[1+k+j3];
211: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
212: }
213: }
214: ipvt[8] = 9;
215: if (a[90] == 0.0) {
216: if (allowzeropivot) {
218: PetscInfo1(NULL,"Zero pivot, row %D\n",6);
219: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
220: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
221: }
223: /* Now form the inverse */
224: /* compute inverse(u) */
225: for (k = 1; k <= 9; ++k) {
226: k3 = 9*k;
227: k4 = k3 + k;
228: a[k4] = 1.0 / a[k4];
229: stmp = -a[k4];
230: i__2 = k - 1;
231: aa = &a[k3 + 1];
232: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
233: kp1 = k + 1;
234: if (9 < kp1) continue;
235: ax = aa;
236: for (j = kp1; j <= 9; ++j) {
237: j3 = 9*j;
238: stmp = a[k + j3];
239: a[k + j3] = 0.0;
240: ay = &a[j3 + 1];
241: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
242: }
243: }
245: /* form inverse(u)*inverse(l) */
246: for (kb = 1; kb <= 8; ++kb) {
247: k = 9 - kb;
248: k3 = 9*k;
249: kp1 = k + 1;
250: aa = a + k3;
251: for (i = kp1; i <= 9; ++i) {
252: work[i-1] = aa[i];
253: aa[i] = 0.0;
254: }
255: for (j = kp1; j <= 9; ++j) {
256: stmp = work[j-1];
257: ax = &a[9*j + 1];
258: ay = &a[k3 + 1];
259: ay[0] += stmp*ax[0];
260: ay[1] += stmp*ax[1];
261: ay[2] += stmp*ax[2];
262: ay[3] += stmp*ax[3];
263: ay[4] += stmp*ax[4];
264: ay[5] += stmp*ax[5];
265: ay[6] += stmp*ax[6];
266: ay[7] += stmp*ax[7];
267: ay[8] += stmp*ax[8];
268: }
269: l = ipvt[k-1];
270: if (l != k) {
271: ax = &a[k3 + 1];
272: ay = &a[9*l + 1];
273: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
274: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
275: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
276: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
277: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
278: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
279: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
280: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
281: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
282: }
283: }
284: return(0);
285: }
287: /*
288: Inverts 15 by 15 matrix using gaussian elimination with partial pivoting.
290: Used by the sparse factorization routines in
291: src/mat/impls/baij/seq
293: This is a combination of the Linpack routines
294: dgefa() and dgedi() specialized for a size of 15.
296: */
298: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift,PetscBool allowzeropivot,PetscBool *zeropivotdetected)
299: {
300: PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
301: PetscInt k4,j3;
302: MatScalar *aa,*ax,*ay,stmp;
303: MatReal tmp,max;
306: if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE;
308: /* Parameter adjustments */
309: a -= 16;
311: for (k = 1; k <= 14; ++k) {
312: kp1 = k + 1;
313: k3 = 15*k;
314: k4 = k3 + k;
316: /* find l = pivot index */
317: i__2 = 16 - k;
318: aa = &a[k4];
319: max = PetscAbsScalar(aa[0]);
320: l = 1;
321: for (ll=1; ll<i__2; ll++) {
322: tmp = PetscAbsScalar(aa[ll]);
323: if (tmp > max) { max = tmp; l = ll+1;}
324: }
325: l += k - 1;
326: ipvt[k-1] = l;
328: if (a[l + k3] == 0.0) {
329: if (shift == 0.0) {
330: if (allowzeropivot) {
332: PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);
333: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
334: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
335: } else {
336: a[l + k3] = shift;
337: }
338: }
340: /* interchange if necessary */
341: if (l != k) {
342: stmp = a[l + k3];
343: a[l + k3] = a[k4];
344: a[k4] = stmp;
345: }
347: /* compute multipliers */
348: stmp = -1. / a[k4];
349: i__2 = 15 - k;
350: aa = &a[1 + k4];
351: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
353: /* row elimination with column indexing */
354: ax = &a[k4+1];
355: for (j = kp1; j <= 15; ++j) {
356: j3 = 15*j;
357: stmp = a[l + j3];
358: if (l != k) {
359: a[l + j3] = a[k + j3];
360: a[k + j3] = stmp;
361: }
363: i__3 = 15 - k;
364: ay = &a[1+k+j3];
365: for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
366: }
367: }
368: ipvt[14] = 15;
369: if (a[240] == 0.0) {
370: if (allowzeropivot) {
372: PetscInfo1(NULL,"Zero pivot, row %D\n",6);
373: if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE;
374: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
375: }
377: /* Now form the inverse */
378: /* compute inverse(u) */
379: for (k = 1; k <= 15; ++k) {
380: k3 = 15*k;
381: k4 = k3 + k;
382: a[k4] = 1.0 / a[k4];
383: stmp = -a[k4];
384: i__2 = k - 1;
385: aa = &a[k3 + 1];
386: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
387: kp1 = k + 1;
388: if (15 < kp1) continue;
389: ax = aa;
390: for (j = kp1; j <= 15; ++j) {
391: j3 = 15*j;
392: stmp = a[k + j3];
393: a[k + j3] = 0.0;
394: ay = &a[j3 + 1];
395: for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
396: }
397: }
399: /* form inverse(u)*inverse(l) */
400: for (kb = 1; kb <= 14; ++kb) {
401: k = 15 - kb;
402: k3 = 15*k;
403: kp1 = k + 1;
404: aa = a + k3;
405: for (i = kp1; i <= 15; ++i) {
406: work[i-1] = aa[i];
407: aa[i] = 0.0;
408: }
409: for (j = kp1; j <= 15; ++j) {
410: stmp = work[j-1];
411: ax = &a[15*j + 1];
412: ay = &a[k3 + 1];
413: ay[0] += stmp*ax[0];
414: ay[1] += stmp*ax[1];
415: ay[2] += stmp*ax[2];
416: ay[3] += stmp*ax[3];
417: ay[4] += stmp*ax[4];
418: ay[5] += stmp*ax[5];
419: ay[6] += stmp*ax[6];
420: ay[7] += stmp*ax[7];
421: ay[8] += stmp*ax[8];
422: ay[9] += stmp*ax[9];
423: ay[10] += stmp*ax[10];
424: ay[11] += stmp*ax[11];
425: ay[12] += stmp*ax[12];
426: ay[13] += stmp*ax[13];
427: ay[14] += stmp*ax[14];
428: }
429: l = ipvt[k-1];
430: if (l != k) {
431: ax = &a[k3 + 1];
432: ay = &a[15*l + 1];
433: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
434: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
435: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
436: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
437: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
438: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
439: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
440: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
441: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
442: stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp;
443: stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
444: stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
445: stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
446: stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
447: stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
448: }
449: }
450: return(0);
451: }