Actual source code: dgefa2.c

petsc-3.4.5 2014-06-29
  2: /*
  3:      Inverts 2 by 2 matrix using partial pivoting.

  5:        Used by the sparse factorization routines in
  6:      src/mat/impls/baij/seq


  9:        This is a combination of the Linpack routines
 10:     dgefa() and dgedi() specialized for a size of 2.

 12: */
 13: #include <petscsys.h>

 17: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift)
 18: {
 19:   PetscInt  i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
 20:   PetscInt  k4,j3;
 21:   MatScalar *aa,*ax,*ay,work[4],stmp;
 22:   MatReal   tmp,max;

 24: /*     gaussian elimination with partial pivoting */

 27:   shift = .25*shift*(1.e-12 + PetscAbsScalar(a[0]) + PetscAbsScalar(a[3]));
 28:   /* Parameter adjustments */
 29:   a -= 3;

 31:   /*for (k = 1; k <= 1; ++k) {*/
 32:   k   = 1;
 33:   kp1 = k + 1;
 34:   k3  = 2*k;
 35:   k4  = k3 + k;
 36: /*        find l = pivot index */

 38:   i__2 = 3 - k;
 39:   aa   = &a[k4];
 40:   max  = PetscAbsScalar(aa[0]);
 41:   l    = 1;
 42:   for (ll=1; ll<i__2; ll++) {
 43:     tmp = PetscAbsScalar(aa[ll]);
 44:     if (tmp > max) { max = tmp; l = ll+1;}
 45:   }
 46:   l        += k - 1;
 47:   ipvt[k-1] = l;

 49:   if (a[l + k3] == 0.0) {
 50:     if (shift == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
 51:     else {
 52:       a[l + k3] = shift;
 53:     }
 54:   }

 56: /*           interchange if necessary */

 58:   if (l != k) {
 59:     stmp      = a[l + k3];
 60:     a[l + k3] = a[k4];
 61:     a[k4]     = stmp;
 62:   }

 64: /*           compute multipliers */

 66:   stmp = -1. / a[k4];
 67:   i__2 = 2 - k;
 68:   aa = &a[1 + k4];
 69:   for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;

 71: /*           row elimination with column indexing */

 73:   ax = &a[k4+1];
 74:   for (j = kp1; j <= 2; ++j) {
 75:     j3   = 2*j;
 76:     stmp = a[l + j3];
 77:     if (l != k) {
 78:       a[l + j3] = a[k + j3];
 79:       a[k + j3] = stmp;
 80:     }

 82:     i__3 = 2 - k;
 83:     ay   = &a[1+k+j3];
 84:     for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
 85:   }

 87:   /*}*/
 88:   ipvt[1] = 2;
 89:   if (a[6] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);

 91:   /*
 92:         Now form the inverse
 93:   */

 95:   /*     compute inverse(u) */

 97:   for (k = 1; k <= 2; ++k) {
 98:     k3    = 2*k;
 99:     k4    = k3 + k;
100:     a[k4] = 1.0 / a[k4];
101:     stmp  = -a[k4];
102:     i__2  = k - 1;
103:     aa    = &a[k3 + 1];
104:     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
105:     kp1 = k + 1;
106:     if (2 < kp1) continue;
107:     ax = aa;
108:     for (j = kp1; j <= 2; ++j) {
109:       j3        = 2*j;
110:       stmp      = a[k + j3];
111:       a[k + j3] = 0.0;
112:       ay        = &a[j3 + 1];
113:       for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
114:     }
115:   }

117: /*    form inverse(u)*inverse(l) */

119: /*for (kb = 1; kb <= 1; ++kb) {*/

121:   k   = 1;
122:   k3  = 2*k;
123:   kp1 = k + 1;
124:   aa  = a + k3;
125:   for (i = kp1; i <= 2; ++i) {
126:     work[i-1] = aa[i];
127:     aa[i]     = 0.0;
128:   }
129:   for (j = kp1; j <= 2; ++j) {
130:     stmp   = work[j-1];
131:     ax     = &a[2*j + 1];
132:     ay     = &a[k3 + 1];
133:     ay[0] += stmp*ax[0];
134:     ay[1] += stmp*ax[1];
135:   }
136:   l = ipvt[k-1];
137:   if (l != k) {
138:     ax   = &a[k3 + 1];
139:     ay   = &a[2*l + 1];
140:     stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
141:     stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
142:   }
143:   return(0);
144: }

148: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift)
149: {
150:   PetscInt  i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
151:   PetscInt  k4,j3;
152:   MatScalar *aa,*ax,*ay,work[81],stmp;
153:   MatReal   tmp,max;

155: /*     gaussian elimination with partial pivoting */

158:   /* Parameter adjustments */
159:   a -= 10;

161:   for (k = 1; k <= 8; ++k) {
162:     kp1 = k + 1;
163:     k3  = 9*k;
164:     k4  = k3 + k;
165: /*        find l = pivot index */

167:     i__2 = 10 - k;
168:     aa   = &a[k4];
169:     max  = PetscAbsScalar(aa[0]);
170:     l    = 1;
171:     for (ll=1; ll<i__2; ll++) {
172:       tmp = PetscAbsScalar(aa[ll]);
173:       if (tmp > max) { max = tmp; l = ll+1;}
174:     }
175:     l        += k - 1;
176:     ipvt[k-1] = l;

178:     if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);

180: /*           interchange if necessary */

182:     if (l != k) {
183:       stmp      = a[l + k3];
184:       a[l + k3] = a[k4];
185:       a[k4]     = stmp;
186:     }

188: /*           compute multipliers */

190:     stmp = -1. / a[k4];
191:     i__2 = 9 - k;
192:     aa = &a[1 + k4];
193:     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;

195: /*           row elimination with column indexing */

197:     ax = &a[k4+1];
198:     for (j = kp1; j <= 9; ++j) {
199:       j3   = 9*j;
200:       stmp = a[l + j3];
201:       if (l != k) {
202:         a[l + j3] = a[k + j3];
203:         a[k + j3] = stmp;
204:       }

206:       i__3 = 9 - k;
207:       ay = &a[1+k+j3];
208:       for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
209:     }
210:   }
211:   ipvt[8] = 9;
212:   if (a[90] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);

214:   /*
215:         Now form the inverse
216:   */

218:   /*     compute inverse(u) */

220:   for (k = 1; k <= 9; ++k) {
221:     k3    = 9*k;
222:     k4    = k3 + k;
223:     a[k4] = 1.0 / a[k4];
224:     stmp  = -a[k4];
225:     i__2  = k - 1;
226:     aa    = &a[k3 + 1];
227:     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
228:     kp1 = k + 1;
229:     if (9 < kp1) continue;
230:     ax = aa;
231:     for (j = kp1; j <= 9; ++j) {
232:       j3        = 9*j;
233:       stmp      = a[k + j3];
234:       a[k + j3] = 0.0;
235:       ay        = &a[j3 + 1];
236:       for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
237:     }
238:   }

240:   /*    form inverse(u)*inverse(l) */

242:   for (kb = 1; kb <= 8; ++kb) {
243:     k   = 9 - kb;
244:     k3  = 9*k;
245:     kp1 = k + 1;
246:     aa  = a + k3;
247:     for (i = kp1; i <= 9; ++i) {
248:       work[i-1] = aa[i];
249:       aa[i]     = 0.0;
250:     }
251:     for (j = kp1; j <= 9; ++j) {
252:       stmp   = work[j-1];
253:       ax     = &a[9*j + 1];
254:       ay     = &a[k3 + 1];
255:       ay[0] += stmp*ax[0];
256:       ay[1] += stmp*ax[1];
257:       ay[2] += stmp*ax[2];
258:       ay[3] += stmp*ax[3];
259:       ay[4] += stmp*ax[4];
260:       ay[5] += stmp*ax[5];
261:       ay[6] += stmp*ax[6];
262:       ay[7] += stmp*ax[7];
263:       ay[8] += stmp*ax[8];
264:     }
265:     l = ipvt[k-1];
266:     if (l != k) {
267:       ax   = &a[k3 + 1];
268:       ay   = &a[9*l + 1];
269:       stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
270:       stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
271:       stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
272:       stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
273:       stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
274:       stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
275:       stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
276:       stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
277:       stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
278:     }
279:   }
280:   return(0);
281: }

283: /*
284:       Inverts 15 by 15 matrix using partial pivoting.

286:        Used by the sparse factorization routines in
287:      src/mat/impls/baij/seq

289:        This is a combination of the Linpack routines
290:     dgefa() and dgedi() specialized for a size of 15.

292: */

296: PETSC_EXTERN PetscErrorCode PetscKernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift)
297: {
298:   PetscInt  i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
299:   PetscInt  k4,j3;
300:   MatScalar *aa,*ax,*ay,stmp;
301:   MatReal   tmp,max;

303: /*     gaussian elimination with partial pivoting */

306:   /* Parameter adjustments */
307:   a -= 16;

309:   for (k = 1; k <= 14; ++k) {
310:     kp1 = k + 1;
311:     k3  = 15*k;
312:     k4  = k3 + k;
313: /*        find l = pivot index */

315:     i__2 = 16 - k;
316:     aa   = &a[k4];
317:     max  = PetscAbsScalar(aa[0]);
318:     l    = 1;
319:     for (ll=1; ll<i__2; ll++) {
320:       tmp = PetscAbsScalar(aa[ll]);
321:       if (tmp > max) { max = tmp; l = ll+1;}
322:     }
323:     l        += k - 1;
324:     ipvt[k-1] = l;

326:     if (a[l + k3] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);

328: /*           interchange if necessary */

330:     if (l != k) {
331:       stmp      = a[l + k3];
332:       a[l + k3] = a[k4];
333:       a[k4]     = stmp;
334:     }

336: /*           compute multipliers */

338:     stmp = -1. / a[k4];
339:     i__2 = 15 - k;
340:     aa = &a[1 + k4];
341:     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;

343: /*           row elimination with column indexing */

345:     ax = &a[k4+1];
346:     for (j = kp1; j <= 15; ++j) {
347:       j3   = 15*j;
348:       stmp = a[l + j3];
349:       if (l != k) {
350:         a[l + j3] = a[k + j3];
351:         a[k + j3] = stmp;
352:       }

354:       i__3 = 15 - k;
355:       ay = &a[1+k+j3];
356:       for (ll=0; ll<i__3; ll++) ay[ll] += stmp*ax[ll];
357:     }
358:   }
359:   ipvt[14] = 15;
360:   if (a[240] == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);

362:   /*
363:         Now form the inverse
364:   */

366:   /*     compute inverse(u) */

368:   for (k = 1; k <= 15; ++k) {
369:     k3    = 15*k;
370:     k4    = k3 + k;
371:     a[k4] = 1.0 / a[k4];
372:     stmp  = -a[k4];
373:     i__2  = k - 1;
374:     aa    = &a[k3 + 1];
375:     for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
376:     kp1 = k + 1;
377:     if (15 < kp1) continue;
378:     ax = aa;
379:     for (j = kp1; j <= 15; ++j) {
380:       j3        = 15*j;
381:       stmp      = a[k + j3];
382:       a[k + j3] = 0.0;
383:       ay        = &a[j3 + 1];
384:       for (ll=0; ll<k; ll++) ay[ll] += stmp*ax[ll];
385:     }
386:   }

388:   /*    form inverse(u)*inverse(l) */

390:   for (kb = 1; kb <= 14; ++kb) {
391:     k   = 15 - kb;
392:     k3  = 15*k;
393:     kp1 = k + 1;
394:     aa  = a + k3;
395:     for (i = kp1; i <= 15; ++i) {
396:       work[i-1] = aa[i];
397:       aa[i]     = 0.0;
398:     }
399:     for (j = kp1; j <= 15; ++j) {
400:       stmp    = work[j-1];
401:       ax      = &a[15*j + 1];
402:       ay      = &a[k3 + 1];
403:       ay[0]  += stmp*ax[0];
404:       ay[1]  += stmp*ax[1];
405:       ay[2]  += stmp*ax[2];
406:       ay[3]  += stmp*ax[3];
407:       ay[4]  += stmp*ax[4];
408:       ay[5]  += stmp*ax[5];
409:       ay[6]  += stmp*ax[6];
410:       ay[7]  += stmp*ax[7];
411:       ay[8]  += stmp*ax[8];
412:       ay[9]  += stmp*ax[9];
413:       ay[10] += stmp*ax[10];
414:       ay[11] += stmp*ax[11];
415:       ay[12] += stmp*ax[12];
416:       ay[13] += stmp*ax[13];
417:       ay[14] += stmp*ax[14];
418:     }
419:     l = ipvt[k-1];
420:     if (l != k) {
421:       ax   = &a[k3 + 1];
422:       ay   = &a[15*l + 1];
423:       stmp = ax[0];  ax[0]  = ay[0];  ay[0]  = stmp;
424:       stmp = ax[1];  ax[1]  = ay[1];  ay[1]  = stmp;
425:       stmp = ax[2];  ax[2]  = ay[2];  ay[2]  = stmp;
426:       stmp = ax[3];  ax[3]  = ay[3];  ay[3]  = stmp;
427:       stmp = ax[4];  ax[4]  = ay[4];  ay[4]  = stmp;
428:       stmp = ax[5];  ax[5]  = ay[5];  ay[5]  = stmp;
429:       stmp = ax[6];  ax[6]  = ay[6];  ay[6]  = stmp;
430:       stmp = ax[7];  ax[7]  = ay[7];  ay[7]  = stmp;
431:       stmp = ax[8];  ax[8]  = ay[8];  ay[8]  = stmp;
432:       stmp = ax[9];  ax[9]  = ay[9];  ay[9]  = stmp;
433:       stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
434:       stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
435:       stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
436:       stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
437:       stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
438:     }
439:   }
440:   return(0);
441: }