Actual source code: sbaijfact.c


  2: #include <../src/mat/impls/baij/seq/baij.h>
  3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  4: #include <petsc/private/kernels/blockinvert.h>
  5: #include <petscis.h>

  7: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
  8: {
  9:   Mat_SeqSBAIJ *fact=(Mat_SeqSBAIJ*)F->data;
 10:   MatScalar    *dd=fact->a;
 11:   PetscInt     mbs=fact->mbs,bs=F->rmap->bs,i,nneg_tmp,npos_tmp,*fi=fact->diag;


 15:   nneg_tmp = 0; npos_tmp = 0;
 16:   for (i=0; i<mbs; i++) {
 17:     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
 18:     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
 19:     fi++;
 20:   }
 21:   if (nneg)  *nneg  = nneg_tmp;
 22:   if (npos)  *npos  = npos_tmp;
 23:   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
 24:   return 0;
 25: }

 27: /*
 28:   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 29:   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
 30: */
 31: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo *info)
 32: {
 33:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
 34:   const PetscInt *rip,*ai,*aj;
 35:   PetscInt       i,mbs = a->mbs,*jutmp,bs = A->rmap->bs,bs2=a->bs2;
 36:   PetscInt       m,reallocs = 0,prow;
 37:   PetscInt       *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
 38:   PetscReal      f = info->fill;
 39:   PetscBool      perm_identity;

 41:   /* check whether perm is the identity mapping */
 42:   ISIdentity(perm,&perm_identity);
 43:   ISGetIndices(perm,&rip);

 45:   if (perm_identity) { /* without permutation */
 46:     a->permute = PETSC_FALSE;

 48:     ai = a->i; aj = a->j;
 49:   } else {            /* non-trivial permutation */
 50:     a->permute = PETSC_TRUE;

 52:     MatReorderingSeqSBAIJ(A,perm);

 54:     ai = a->inew; aj = a->jnew;
 55:   }

 57:   /* initialization */
 58:   PetscMalloc1(mbs+1,&iu);
 59:   umax  = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
 60:   PetscMalloc1(umax,&ju);
 61:   iu[0] = mbs+1;
 62:   juidx = mbs + 1; /* index for ju */
 63:   /* jl linked list for pivot row -- linked list for col index */
 64:   PetscMalloc2(mbs,&jl,mbs,&q);
 65:   for (i=0; i<mbs; i++) {
 66:     jl[i] = mbs;
 67:     q[i]  = 0;
 68:   }

 70:   /* for each row k */
 71:   for (k=0; k<mbs; k++) {
 72:     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
 73:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
 74:     q[k] = mbs;
 75:     /* initialize nonzero structure of k-th row to row rip[k] of A */
 76:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
 77:     jmax = ai[rip[k]+1];
 78:     for (j=jmin; j<jmax; j++) {
 79:       vj = rip[aj[j]]; /* col. value */
 80:       if (vj > k) {
 81:         qm = k;
 82:         do {
 83:           m = qm; qm = q[m];
 84:         } while (qm < vj);
 86:         nzk++;
 87:         q[m]  = vj;
 88:         q[vj] = qm;
 89:       } /* if (vj > k) */
 90:     } /* for (j=jmin; j<jmax; j++) */

 92:     /* modify nonzero structure of k-th row by computing fill-in
 93:        for each row i to be merged in */
 94:     prow = k;
 95:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */

 97:     while (prow < k) {
 98:       /* merge row prow into k-th row */
 99:       jmin = iu[prow] + 1; jmax = iu[prow+1];
100:       qm   = k;
101:       for (j=jmin; j<jmax; j++) {
102:         vj = ju[j];
103:         do {
104:           m = qm; qm = q[m];
105:         } while (qm < vj);
106:         if (qm != vj) {
107:           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
108:         }
109:       }
110:       prow = jl[prow]; /* next pivot row */
111:     }

113:     /* add k to row list for first nonzero element in k-th row */
114:     if (nzk > 0) {
115:       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
116:       jl[k] = jl[i]; jl[i] = k;
117:     }
118:     iu[k+1] = iu[k] + nzk;

120:     /* allocate more space to ju if needed */
121:     if (iu[k+1] > umax) {
122:       /* estimate how much additional space we will need */
123:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
124:       /* just double the memory each time */
125:       maxadd = umax;
126:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
127:       umax += maxadd;

129:       /* allocate a longer ju */
130:       PetscMalloc1(umax,&jutmp);
131:       PetscArraycpy(jutmp,ju,iu[k]);
132:       PetscFree(ju);
133:       ju   = jutmp;
134:       reallocs++; /* count how many times we realloc */
135:     }

137:     /* save nonzero structure of k-th row in ju */
138:     i=k;
139:     while (nzk--) {
140:       i           = q[i];
141:       ju[juidx++] = i;
142:     }
143:   }

145: #if defined(PETSC_USE_INFO)
146:   if (ai[mbs] != 0) {
147:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
148:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
149:     PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
150:     PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af);
151:     PetscInfo(A,"for best performance.\n");
152:   } else {
153:     PetscInfo(A,"Empty matrix\n");
154:   }
155: #endif

157:   ISRestoreIndices(perm,&rip);
158:   PetscFree2(jl,q);

160:   /* put together the new matrix */
161:   MatSeqSBAIJSetPreallocation(F,bs,MAT_SKIP_ALLOCATION,NULL);

163:   /* PetscLogObjectParent((PetscObject)B,(PetscObject)iperm); */
164:   b                = (Mat_SeqSBAIJ*)(F)->data;
165:   b->singlemalloc  = PETSC_FALSE;
166:   b->free_a        = PETSC_TRUE;
167:   b->free_ij       = PETSC_TRUE;

169:   PetscMalloc1((iu[mbs]+1)*bs2,&b->a);
170:   b->j    = ju;
171:   b->i    = iu;
172:   b->diag = NULL;
173:   b->ilen = NULL;
174:   b->imax = NULL;
175:   b->row  = perm;

177:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

179:   PetscObjectReference((PetscObject)perm);

181:   b->icol = perm;
182:   PetscObjectReference((PetscObject)perm);
183:   PetscMalloc1(bs*mbs+bs,&b->solve_work);
184:   /* In b structure:  Free imax, ilen, old a, old j.
185:      Allocate idnew, solve_work, new a, new j */
186:   PetscLogObjectMemory((PetscObject)F,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
187:   b->maxnz = b->nz = iu[mbs];

189:   (F)->info.factor_mallocs   = reallocs;
190:   (F)->info.fill_ratio_given = f;
191:   if (ai[mbs] != 0) {
192:     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
193:   } else {
194:     (F)->info.fill_ratio_needed = 0.0;
195:   }
196:   MatSeqSBAIJSetNumericFactorization_inplace(F,perm_identity);
197:   return 0;
198: }
199: /*
200:     Symbolic U^T*D*U factorization for SBAIJ format.
201:     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
202: */
203: #include <petscbt.h>
204: #include <../src/mat/utils/freespace.h>
205: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
206: {
207:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
208:   Mat_SeqSBAIJ       *b;
209:   PetscBool          perm_identity,missing;
210:   PetscReal          fill = info->fill;
211:   const PetscInt     *rip,*ai=a->i,*aj=a->j;
212:   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow;
213:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
214:   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
215:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
216:   PetscBT            lnkbt;

219:   MatMissingDiagonal(A,&missing,&i);
221:   if (bs > 1) {
222:     MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);
223:     return 0;
224:   }

226:   /* check whether perm is the identity mapping */
227:   ISIdentity(perm,&perm_identity);
229:   a->permute = PETSC_FALSE;
230:   ISGetIndices(perm,&rip);

232:   /* initialization */
233:   PetscMalloc1(mbs+1,&ui);
234:   PetscMalloc1(mbs+1,&udiag);
235:   ui[0] = 0;

237:   /* jl: linked list for storing indices of the pivot rows
238:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
239:   PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols);
240:   for (i=0; i<mbs; i++) {
241:     jl[i] = mbs; il[i] = 0;
242:   }

244:   /* create and initialize a linked list for storing column indices of the active row k */
245:   nlnk = mbs + 1;
246:   PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);

248:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
249:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[mbs]+1),&free_space);
250:   current_space = free_space;

252:   for (k=0; k<mbs; k++) {  /* for each active row k */
253:     /* initialize lnk by the column indices of row rip[k] of A */
254:     nzk   = 0;
255:     ncols = ai[k+1] - ai[k];
257:     for (j=0; j<ncols; j++) {
258:       i       = *(aj + ai[k] + j);
259:       cols[j] = i;
260:     }
261:     PetscLLAdd(ncols,cols,mbs,&nlnk,lnk,lnkbt);
262:     nzk += nlnk;

264:     /* update lnk by computing fill-in for each pivot row to be merged in */
265:     prow = jl[k]; /* 1st pivot row */

267:     while (prow < k) {
268:       nextprow = jl[prow];
269:       /* merge prow into k-th row */
270:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
271:       jmax   = ui[prow+1];
272:       ncols  = jmax-jmin;
273:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
274:       PetscLLAddSorted(ncols,uj_ptr,mbs,&nlnk,lnk,lnkbt);
275:       nzk   += nlnk;

277:       /* update il and jl for prow */
278:       if (jmin < jmax) {
279:         il[prow] = jmin;
280:         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
281:       }
282:       prow = nextprow;
283:     }

285:     /* if free space is not available, make more free space */
286:     if (current_space->local_remaining<nzk) {
287:       i    = mbs - k + 1; /* num of unfactored rows */
288:       i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
289:       PetscFreeSpaceGet(i,&current_space);
290:       reallocs++;
291:     }

293:     /* copy data into free space, then initialize lnk */
294:     PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);

296:     /* add the k-th row into il and jl */
297:     if (nzk > 1) {
298:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
299:       jl[k] = jl[i]; jl[i] = k;
300:       il[k] = ui[k] + 1;
301:     }
302:     ui_ptr[k] = current_space->array;

304:     current_space->array           += nzk;
305:     current_space->local_used      += nzk;
306:     current_space->local_remaining -= nzk;

308:     ui[k+1] = ui[k] + nzk;
309:   }

311:   ISRestoreIndices(perm,&rip);
312:   PetscFree4(ui_ptr,il,jl,cols);

314:   /* destroy list of free space and other temporary array(s) */
315:   PetscMalloc1(ui[mbs]+1,&uj);
316:   PetscFreeSpaceContiguous_Cholesky(&free_space,uj,mbs,ui,udiag); /* store matrix factor */
317:   PetscLLDestroy(lnk,lnkbt);

319:   /* put together the new matrix in MATSEQSBAIJ format */
320:   MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL);

322:   b               = (Mat_SeqSBAIJ*)fact->data;
323:   b->singlemalloc = PETSC_FALSE;
324:   b->free_a       = PETSC_TRUE;
325:   b->free_ij      = PETSC_TRUE;

327:   PetscMalloc1(ui[mbs]+1,&b->a);

329:   b->j         = uj;
330:   b->i         = ui;
331:   b->diag      = udiag;
332:   b->free_diag = PETSC_TRUE;
333:   b->ilen      = NULL;
334:   b->imax      = NULL;
335:   b->row       = perm;
336:   b->icol      = perm;

338:   PetscObjectReference((PetscObject)perm);
339:   PetscObjectReference((PetscObject)perm);

341:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

343:   PetscMalloc1(mbs+1,&b->solve_work);
344:   PetscLogObjectMemory((PetscObject)fact,ui[mbs]*(sizeof(PetscInt)+sizeof(MatScalar)));

346:   b->maxnz = b->nz = ui[mbs];

348:   fact->info.factor_mallocs   = reallocs;
349:   fact->info.fill_ratio_given = fill;
350:   if (ai[mbs] != 0) {
351:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
352:   } else {
353:     fact->info.fill_ratio_needed = 0.0;
354:   }
355: #if defined(PETSC_USE_INFO)
356:   if (ai[mbs] != 0) {
357:     PetscReal af = fact->info.fill_ratio_needed;
358:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
359:     PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
360:     PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
361:   } else {
362:     PetscInfo(A,"Empty matrix\n");
363:   }
364: #endif
365:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
366:   return 0;
367: }

369: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
370: {
371:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
372:   Mat_SeqSBAIJ       *b;
373:   PetscBool          perm_identity,missing;
374:   PetscReal          fill = info->fill;
375:   const PetscInt     *rip,*ai,*aj;
376:   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d;
377:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
378:   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr;
379:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
380:   PetscBT            lnkbt;

382:   MatMissingDiagonal(A,&missing,&d);

385:   /*
386:    This code originally uses Modified Sparse Row (MSR) storage
387:    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
388:    Then it is rewritten so the factor B takes seqsbaij format. However the associated
389:    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
390:    thus the original code in MSR format is still used for these cases.
391:    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
392:    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
393:   */
394:   if (bs > 1) {
395:     MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);
396:     return 0;
397:   }

399:   /* check whether perm is the identity mapping */
400:   ISIdentity(perm,&perm_identity);
402:   a->permute = PETSC_FALSE;
403:   ai = a->i; aj = a->j;
404:   ISGetIndices(perm,&rip);

406:   /* initialization */
407:   PetscMalloc1(mbs+1,&ui);
408:   ui[0] = 0;

410:   /* jl: linked list for storing indices of the pivot rows
411:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
412:   PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols);
413:   for (i=0; i<mbs; i++) {
414:     jl[i] = mbs; il[i] = 0;
415:   }

417:   /* create and initialize a linked list for storing column indices of the active row k */
418:   nlnk = mbs + 1;
419:   PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);

421:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
422:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[mbs]+1),&free_space);
423:   current_space = free_space;

425:   for (k=0; k<mbs; k++) {  /* for each active row k */
426:     /* initialize lnk by the column indices of row rip[k] of A */
427:     nzk   = 0;
428:     ncols = ai[rip[k]+1] - ai[rip[k]];
429:     for (j=0; j<ncols; j++) {
430:       i       = *(aj + ai[rip[k]] + j);
431:       cols[j] = rip[i];
432:     }
433:     PetscLLAdd(ncols,cols,mbs,&nlnk,lnk,lnkbt);
434:     nzk += nlnk;

436:     /* update lnk by computing fill-in for each pivot row to be merged in */
437:     prow = jl[k]; /* 1st pivot row */

439:     while (prow < k) {
440:       nextprow = jl[prow];
441:       /* merge prow into k-th row */
442:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
443:       jmax   = ui[prow+1];
444:       ncols  = jmax-jmin;
445:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
446:       PetscLLAddSorted(ncols,uj_ptr,mbs,&nlnk,lnk,lnkbt);
447:       nzk   += nlnk;

449:       /* update il and jl for prow */
450:       if (jmin < jmax) {
451:         il[prow] = jmin;

453:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
454:       }
455:       prow = nextprow;
456:     }

458:     /* if free space is not available, make more free space */
459:     if (current_space->local_remaining<nzk) {
460:       i = mbs - k + 1;          /* num of unfactored rows */
461:       i = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
462:       PetscFreeSpaceGet(i,&current_space);
463:       reallocs++;
464:     }

466:     /* copy data into free space, then initialize lnk */
467:     PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);

469:     /* add the k-th row into il and jl */
470:     if (nzk-1 > 0) {
471:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
472:       jl[k] = jl[i]; jl[i] = k;
473:       il[k] = ui[k] + 1;
474:     }
475:     ui_ptr[k] = current_space->array;

477:     current_space->array           += nzk;
478:     current_space->local_used      += nzk;
479:     current_space->local_remaining -= nzk;

481:     ui[k+1] = ui[k] + nzk;
482:   }

484:   ISRestoreIndices(perm,&rip);
485:   PetscFree4(ui_ptr,il,jl,cols);

487:   /* destroy list of free space and other temporary array(s) */
488:   PetscMalloc1(ui[mbs]+1,&uj);
489:   PetscFreeSpaceContiguous(&free_space,uj);
490:   PetscLLDestroy(lnk,lnkbt);

492:   /* put together the new matrix in MATSEQSBAIJ format */
493:   MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL);

495:   b               = (Mat_SeqSBAIJ*)fact->data;
496:   b->singlemalloc = PETSC_FALSE;
497:   b->free_a       = PETSC_TRUE;
498:   b->free_ij      = PETSC_TRUE;

500:   PetscMalloc1(ui[mbs]+1,&b->a);

502:   b->j    = uj;
503:   b->i    = ui;
504:   b->diag = NULL;
505:   b->ilen = NULL;
506:   b->imax = NULL;
507:   b->row  = perm;

509:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

511:   PetscObjectReference((PetscObject)perm);
512:   b->icol  = perm;
513:   PetscObjectReference((PetscObject)perm);
514:   PetscMalloc1(mbs+1,&b->solve_work);
515:   PetscLogObjectMemory((PetscObject)fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
516:   b->maxnz = b->nz = ui[mbs];

518:   fact->info.factor_mallocs   = reallocs;
519:   fact->info.fill_ratio_given = fill;
520:   if (ai[mbs] != 0) {
521:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
522:   } else {
523:     fact->info.fill_ratio_needed = 0.0;
524:   }
525: #if defined(PETSC_USE_INFO)
526:   if (ai[mbs] != 0) {
527:     PetscReal af = fact->info.fill_ratio_needed;
528:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
529:     PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
530:     PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
531:   } else {
532:     PetscInfo(A,"Empty matrix\n");
533:   }
534: #endif
535:   MatSeqSBAIJSetNumericFactorization_inplace(fact,perm_identity);
536:   return 0;
537: }

539: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo *info)
540: {
541:   Mat_SeqSBAIJ   *a   = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
542:   IS             perm = b->row;
543:   const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j;
544:   PetscInt       i,j;
545:   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
546:   PetscInt       bs  =A->rmap->bs,bs2 = a->bs2;
547:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
548:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
549:   MatScalar      *work;
550:   PetscInt       *pivots;
551:   PetscBool      allowzeropivot,zeropivotdetected;

553:   /* initialization */
554:   PetscCalloc1(bs2*mbs,&rtmp);
555:   PetscMalloc2(mbs,&il,mbs,&jl);
556:   allowzeropivot = PetscNot(A->erroriffailure);

558:   il[0] = 0;
559:   for (i=0; i<mbs; i++) jl[i] = mbs;

561:   PetscMalloc3(bs2,&dk,bs2,&uik,bs,&work);
562:   PetscMalloc1(bs,&pivots);

564:   ISGetIndices(perm,&perm_ptr);

566:   /* check permutation */
567:   if (!a->permute) {
568:     ai = a->i; aj = a->j; aa = a->a;
569:   } else {
570:     ai   = a->inew; aj = a->jnew;
571:     PetscMalloc1(bs2*ai[mbs],&aa);
572:     PetscArraycpy(aa,a->a,bs2*ai[mbs]);
573:     PetscMalloc1(ai[mbs],&a2anew);
574:     PetscArraycpy(a2anew,a->a2anew,ai[mbs]);

576:     for (i=0; i<mbs; i++) {
577:       jmin = ai[i]; jmax = ai[i+1];
578:       for (j=jmin; j<jmax; j++) {
579:         while (a2anew[j] != j) {
580:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
581:           for (k1=0; k1<bs2; k1++) {
582:             dk[k1]       = aa[k*bs2+k1];
583:             aa[k*bs2+k1] = aa[j*bs2+k1];
584:             aa[j*bs2+k1] = dk[k1];
585:           }
586:         }
587:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
588:         if (i > aj[j]) {
589:           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
590:           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
591:           for (k=0; k<bs; k++) {               /* j-th block of aa <- dk^T */
592:             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
593:           }
594:         }
595:       }
596:     }
597:     PetscFree(a2anew);
598:   }

600:   /* for each row k */
601:   for (k = 0; k<mbs; k++) {

603:     /*initialize k-th row with elements nonzero in row perm(k) of A */
604:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];

606:     ap = aa + jmin*bs2;
607:     for (j = jmin; j < jmax; j++) {
608:       vj       = perm_ptr[aj[j]];   /* block col. index */
609:       rtmp_ptr = rtmp + vj*bs2;
610:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
611:     }

613:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
614:     PetscArraycpy(dk,rtmp+k*bs2,bs2);
615:     i    = jl[k]; /* first row to be added to k_th row  */

617:     while (i < k) {
618:       nexti = jl[i]; /* next row to be added to k_th row */

620:       /* compute multiplier */
621:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

623:       /* uik = -inv(Di)*U_bar(i,k) */
624:       diag = ba + i*bs2;
625:       u    = ba + ili*bs2;
626:       PetscArrayzero(uik,bs2);
627:       PetscKernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);

629:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
630:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
631:       PetscLogFlops(4.0*bs*bs2);

633:       /* update -U(i,k) */
634:       PetscArraycpy(ba+ili*bs2,uik,bs2);

636:       /* add multiple of row i to k-th row ... */
637:       jmin = ili + 1; jmax = bi[i+1];
638:       if (jmin < jmax) {
639:         for (j=jmin; j<jmax; j++) {
640:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
641:           rtmp_ptr = rtmp + bj[j]*bs2;
642:           u        = ba + j*bs2;
643:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
644:         }
645:         PetscLogFlops(2.0*bs*bs2*(jmax-jmin));

647:         /* ... add i to row list for next nonzero entry */
648:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
649:         j     = bj[jmin];
650:         jl[i] = jl[j]; jl[j] = i; /* update jl */
651:       }
652:       i = nexti;
653:     }

655:     /* save nonzero entries in k-th row of U ... */

657:     /* invert diagonal block */
658:     diag = ba+k*bs2;
659:     PetscArraycpy(diag,dk,bs2);

661:     PetscKernel_A_gets_inverse_A(bs,diag,pivots,work,allowzeropivot,&zeropivotdetected);
662:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

664:     jmin = bi[k]; jmax = bi[k+1];
665:     if (jmin < jmax) {
666:       for (j=jmin; j<jmax; j++) {
667:         vj       = bj[j];      /* block col. index of U */
668:         u        = ba + j*bs2;
669:         rtmp_ptr = rtmp + vj*bs2;
670:         for (k1=0; k1<bs2; k1++) {
671:           *u++        = *rtmp_ptr;
672:           *rtmp_ptr++ = 0.0;
673:         }
674:       }

676:       /* ... add k to row list for first nonzero entry in k-th row */
677:       il[k] = jmin;
678:       i     = bj[jmin];
679:       jl[k] = jl[i]; jl[i] = k;
680:     }
681:   }

683:   PetscFree(rtmp);
684:   PetscFree2(il,jl);
685:   PetscFree3(dk,uik,work);
686:   PetscFree(pivots);
687:   if (a->permute) {
688:     PetscFree(aa);
689:   }

691:   ISRestoreIndices(perm,&perm_ptr);

693:   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
694:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
695:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
696:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;

698:   C->assembled    = PETSC_TRUE;
699:   C->preallocated = PETSC_TRUE;

701:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
702:   return 0;
703: }

705: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
706: {
707:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
708:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
709:   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
710:   PetscInt       bs  =A->rmap->bs,bs2 = a->bs2;
711:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
712:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
713:   MatScalar      *work;
714:   PetscInt       *pivots;
715:   PetscBool      allowzeropivot,zeropivotdetected;

717:   PetscCalloc1(bs2*mbs,&rtmp);
718:   PetscMalloc2(mbs,&il,mbs,&jl);
719:   il[0] = 0;
720:   for (i=0; i<mbs; i++) jl[i] = mbs;

722:   PetscMalloc3(bs2,&dk,bs2,&uik,bs,&work);
723:   PetscMalloc1(bs,&pivots);
724:   allowzeropivot = PetscNot(A->erroriffailure);

726:   ai = a->i; aj = a->j; aa = a->a;

728:   /* for each row k */
729:   for (k = 0; k<mbs; k++) {

731:     /*initialize k-th row with elements nonzero in row k of A */
732:     jmin = ai[k]; jmax = ai[k+1];
733:     ap   = aa + jmin*bs2;
734:     for (j = jmin; j < jmax; j++) {
735:       vj       = aj[j];   /* block col. index */
736:       rtmp_ptr = rtmp + vj*bs2;
737:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
738:     }

740:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
741:     PetscArraycpy(dk,rtmp+k*bs2,bs2);
742:     i    = jl[k]; /* first row to be added to k_th row  */

744:     while (i < k) {
745:       nexti = jl[i]; /* next row to be added to k_th row */

747:       /* compute multiplier */
748:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

750:       /* uik = -inv(Di)*U_bar(i,k) */
751:       diag = ba + i*bs2;
752:       u    = ba + ili*bs2;
753:       PetscArrayzero(uik,bs2);
754:       PetscKernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);

756:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
757:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
758:       PetscLogFlops(2.0*bs*bs2);

760:       /* update -U(i,k) */
761:       PetscArraycpy(ba+ili*bs2,uik,bs2);

763:       /* add multiple of row i to k-th row ... */
764:       jmin = ili + 1; jmax = bi[i+1];
765:       if (jmin < jmax) {
766:         for (j=jmin; j<jmax; j++) {
767:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
768:           rtmp_ptr = rtmp + bj[j]*bs2;
769:           u        = ba + j*bs2;
770:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
771:         }
772:         PetscLogFlops(2.0*bs*bs2*(jmax-jmin));

774:         /* ... add i to row list for next nonzero entry */
775:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
776:         j     = bj[jmin];
777:         jl[i] = jl[j]; jl[j] = i; /* update jl */
778:       }
779:       i = nexti;
780:     }

782:     /* save nonzero entries in k-th row of U ... */

784:     /* invert diagonal block */
785:     diag = ba+k*bs2;
786:     PetscArraycpy(diag,dk,bs2);

788:     PetscKernel_A_gets_inverse_A(bs,diag,pivots,work,allowzeropivot,&zeropivotdetected);
789:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

791:     jmin = bi[k]; jmax = bi[k+1];
792:     if (jmin < jmax) {
793:       for (j=jmin; j<jmax; j++) {
794:         vj       = bj[j];      /* block col. index of U */
795:         u        = ba + j*bs2;
796:         rtmp_ptr = rtmp + vj*bs2;
797:         for (k1=0; k1<bs2; k1++) {
798:           *u++        = *rtmp_ptr;
799:           *rtmp_ptr++ = 0.0;
800:         }
801:       }

803:       /* ... add k to row list for first nonzero entry in k-th row */
804:       il[k] = jmin;
805:       i     = bj[jmin];
806:       jl[k] = jl[i]; jl[i] = k;
807:     }
808:   }

810:   PetscFree(rtmp);
811:   PetscFree2(il,jl);
812:   PetscFree3(dk,uik,work);
813:   PetscFree(pivots);

815:   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
816:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
817:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
818:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
819:   C->assembled           = PETSC_TRUE;
820:   C->preallocated        = PETSC_TRUE;

822:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
823:   return 0;
824: }

826: /*
827:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
828:     Version for blocks 2 by 2.
829: */
830: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo *info)
831: {
832:   Mat_SeqSBAIJ   *a   = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
833:   IS             perm = b->row;
834:   const PetscInt *ai,*aj,*perm_ptr;
835:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
836:   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
837:   MatScalar      *ba = b->a,*aa,*ap;
838:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr,dk[4],uik[4];
839:   PetscReal      shift = info->shiftamount;
840:   PetscBool      allowzeropivot,zeropivotdetected;

842:   allowzeropivot = PetscNot(A->erroriffailure);

844:   /* initialization */
845:   /* il and jl record the first nonzero element in each row of the accessing
846:      window U(0:k, k:mbs-1).
847:      jl:    list of rows to be added to uneliminated rows
848:             i>= k: jl(i) is the first row to be added to row i
849:             i<  k: jl(i) is the row following row i in some list of rows
850:             jl(i) = mbs indicates the end of a list
851:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
852:             row i of U */
853:   PetscCalloc1(4*mbs,&rtmp);
854:   PetscMalloc2(mbs,&il,mbs,&jl);
855:   il[0] = 0;
856:   for (i=0; i<mbs; i++) jl[i] = mbs;

858:   ISGetIndices(perm,&perm_ptr);

860:   /* check permutation */
861:   if (!a->permute) {
862:     ai = a->i; aj = a->j; aa = a->a;
863:   } else {
864:     ai   = a->inew; aj = a->jnew;
865:     PetscMalloc1(4*ai[mbs],&aa);
866:     PetscArraycpy(aa,a->a,4*ai[mbs]);
867:     PetscMalloc1(ai[mbs],&a2anew);
868:     PetscArraycpy(a2anew,a->a2anew,ai[mbs]);

870:     for (i=0; i<mbs; i++) {
871:       jmin = ai[i]; jmax = ai[i+1];
872:       for (j=jmin; j<jmax; j++) {
873:         while (a2anew[j] != j) {
874:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
875:           for (k1=0; k1<4; k1++) {
876:             dk[k1]     = aa[k*4+k1];
877:             aa[k*4+k1] = aa[j*4+k1];
878:             aa[j*4+k1] = dk[k1];
879:           }
880:         }
881:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
882:         if (i > aj[j]) {
883:           ap    = aa + j*4;  /* ptr to the beginning of the block */
884:           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
885:           ap[1] = ap[2];
886:           ap[2] = dk[1];
887:         }
888:       }
889:     }
890:     PetscFree(a2anew);
891:   }

893:   /* for each row k */
894:   for (k = 0; k<mbs; k++) {

896:     /*initialize k-th row with elements nonzero in row perm(k) of A */
897:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
898:     ap   = aa + jmin*4;
899:     for (j = jmin; j < jmax; j++) {
900:       vj       = perm_ptr[aj[j]];   /* block col. index */
901:       rtmp_ptr = rtmp + vj*4;
902:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
903:     }

905:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
906:     PetscArraycpy(dk,rtmp+k*4,4);
907:     i    = jl[k]; /* first row to be added to k_th row  */

909:     while (i < k) {
910:       nexti = jl[i]; /* next row to be added to k_th row */

912:       /* compute multiplier */
913:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

915:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
916:       diag   = ba + i*4;
917:       u      = ba + ili*4;
918:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
919:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
920:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
921:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);

923:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
924:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
925:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
926:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
927:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

929:       PetscLogFlops(16.0*2.0);

931:       /* update -U(i,k): ba[ili] = uik */
932:       PetscArraycpy(ba+ili*4,uik,4);

934:       /* add multiple of row i to k-th row ... */
935:       jmin = ili + 1; jmax = bi[i+1];
936:       if (jmin < jmax) {
937:         for (j=jmin; j<jmax; j++) {
938:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
939:           rtmp_ptr     = rtmp + bj[j]*4;
940:           u            = ba + j*4;
941:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
942:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
943:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
944:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
945:         }
946:         PetscLogFlops(16.0*(jmax-jmin));

948:         /* ... add i to row list for next nonzero entry */
949:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
950:         j     = bj[jmin];
951:         jl[i] = jl[j]; jl[j] = i; /* update jl */
952:       }
953:       i = nexti;
954:     }

956:     /* save nonzero entries in k-th row of U ... */

958:     /* invert diagonal block */
959:     diag = ba+k*4;
960:     PetscArraycpy(diag,dk,4);
961:     PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
962:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

964:     jmin = bi[k]; jmax = bi[k+1];
965:     if (jmin < jmax) {
966:       for (j=jmin; j<jmax; j++) {
967:         vj       = bj[j];      /* block col. index of U */
968:         u        = ba + j*4;
969:         rtmp_ptr = rtmp + vj*4;
970:         for (k1=0; k1<4; k1++) {
971:           *u++        = *rtmp_ptr;
972:           *rtmp_ptr++ = 0.0;
973:         }
974:       }

976:       /* ... add k to row list for first nonzero entry in k-th row */
977:       il[k] = jmin;
978:       i     = bj[jmin];
979:       jl[k] = jl[i]; jl[i] = k;
980:     }
981:   }

983:   PetscFree(rtmp);
984:   PetscFree2(il,jl);
985:   if (a->permute) {
986:     PetscFree(aa);
987:   }
988:   ISRestoreIndices(perm,&perm_ptr);

990:   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
991:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
992:   C->assembled           = PETSC_TRUE;
993:   C->preallocated        = PETSC_TRUE;

995:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
996:   return 0;
997: }

999: /*
1000:       Version for when blocks are 2 by 2 Using natural ordering
1001: */
1002: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
1003: {
1004:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
1005:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1006:   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1007:   MatScalar      *ba = b->a,*aa,*ap,dk[8],uik[8];
1008:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
1009:   PetscReal      shift = info->shiftamount;
1010:   PetscBool      allowzeropivot,zeropivotdetected;

1012:   allowzeropivot = PetscNot(A->erroriffailure);

1014:   /* initialization */
1015:   /* il and jl record the first nonzero element in each row of the accessing
1016:      window U(0:k, k:mbs-1).
1017:      jl:    list of rows to be added to uneliminated rows
1018:             i>= k: jl(i) is the first row to be added to row i
1019:             i<  k: jl(i) is the row following row i in some list of rows
1020:             jl(i) = mbs indicates the end of a list
1021:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1022:             row i of U */
1023:   PetscCalloc1(4*mbs,&rtmp);
1024:   PetscMalloc2(mbs,&il,mbs,&jl);
1025:   il[0] = 0;
1026:   for (i=0; i<mbs; i++) jl[i] = mbs;

1028:   ai = a->i; aj = a->j; aa = a->a;

1030:   /* for each row k */
1031:   for (k = 0; k<mbs; k++) {

1033:     /*initialize k-th row with elements nonzero in row k of A */
1034:     jmin = ai[k]; jmax = ai[k+1];
1035:     ap   = aa + jmin*4;
1036:     for (j = jmin; j < jmax; j++) {
1037:       vj       = aj[j];   /* block col. index */
1038:       rtmp_ptr = rtmp + vj*4;
1039:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1040:     }

1042:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1043:     PetscArraycpy(dk,rtmp+k*4,4);
1044:     i    = jl[k]; /* first row to be added to k_th row  */

1046:     while (i < k) {
1047:       nexti = jl[i]; /* next row to be added to k_th row */

1049:       /* compute multiplier */
1050:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

1052:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1053:       diag   = ba + i*4;
1054:       u      = ba + ili*4;
1055:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1056:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1057:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1058:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);

1060:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1061:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
1062:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
1063:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
1064:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

1066:       PetscLogFlops(16.0*2.0);

1068:       /* update -U(i,k): ba[ili] = uik */
1069:       PetscArraycpy(ba+ili*4,uik,4);

1071:       /* add multiple of row i to k-th row ... */
1072:       jmin = ili + 1; jmax = bi[i+1];
1073:       if (jmin < jmax) {
1074:         for (j=jmin; j<jmax; j++) {
1075:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1076:           rtmp_ptr     = rtmp + bj[j]*4;
1077:           u            = ba + j*4;
1078:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1079:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1080:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1081:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1082:         }
1083:         PetscLogFlops(16.0*(jmax-jmin));

1085:         /* ... add i to row list for next nonzero entry */
1086:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1087:         j     = bj[jmin];
1088:         jl[i] = jl[j]; jl[j] = i; /* update jl */
1089:       }
1090:       i = nexti;
1091:     }

1093:     /* save nonzero entries in k-th row of U ... */

1095:     /* invert diagonal block */
1096:     diag = ba+k*4;
1097:     PetscArraycpy(diag,dk,4);
1098:     PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
1099:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

1101:     jmin = bi[k]; jmax = bi[k+1];
1102:     if (jmin < jmax) {
1103:       for (j=jmin; j<jmax; j++) {
1104:         vj       = bj[j];      /* block col. index of U */
1105:         u        = ba + j*4;
1106:         rtmp_ptr = rtmp + vj*4;
1107:         for (k1=0; k1<4; k1++) {
1108:           *u++        = *rtmp_ptr;
1109:           *rtmp_ptr++ = 0.0;
1110:         }
1111:       }

1113:       /* ... add k to row list for first nonzero entry in k-th row */
1114:       il[k] = jmin;
1115:       i     = bj[jmin];
1116:       jl[k] = jl[i]; jl[i] = k;
1117:     }
1118:   }

1120:   PetscFree(rtmp);
1121:   PetscFree2(il,jl);

1123:   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1124:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1125:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1126:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1127:   C->assembled           = PETSC_TRUE;
1128:   C->preallocated        = PETSC_TRUE;

1130:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1131:   return 0;
1132: }

1134: /*
1135:     Numeric U^T*D*U factorization for SBAIJ format.
1136:     Version for blocks are 1 by 1.
1137: */
1138: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info)
1139: {
1140:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ*)C->data;
1141:   IS             ip=b->row;
1142:   const PetscInt *ai,*aj,*rip;
1143:   PetscInt       *a2anew,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol;
1144:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1145:   MatScalar      *rtmp,*ba=b->a,*bval,*aa,dk,uikdi;
1146:   PetscReal      rs;
1147:   FactorShiftCtx sctx;

1149:   /* MatPivotSetUp(): initialize shift context sctx */
1150:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

1152:   ISGetIndices(ip,&rip);
1153:   if (!a->permute) {
1154:     ai = a->i; aj = a->j; aa = a->a;
1155:   } else {
1156:     ai     = a->inew; aj = a->jnew;
1157:     nz     = ai[mbs];
1158:     PetscMalloc1(nz,&aa);
1159:     a2anew = a->a2anew;
1160:     bval   = a->a;
1161:     for (j=0; j<nz; j++) {
1162:       aa[a2anew[j]] = *(bval++);
1163:     }
1164:   }

1166:   /* initialization */
1167:   /* il and jl record the first nonzero element in each row of the accessing
1168:      window U(0:k, k:mbs-1).
1169:      jl:    list of rows to be added to uneliminated rows
1170:             i>= k: jl(i) is the first row to be added to row i
1171:             i<  k: jl(i) is the row following row i in some list of rows
1172:             jl(i) = mbs indicates the end of a list
1173:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1174:             row i of U */
1175:   PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);

1177:   do {
1178:     sctx.newshift = PETSC_FALSE;
1179:     il[0] = 0;
1180:     for (i=0; i<mbs; i++) {
1181:       rtmp[i] = 0.0; jl[i] = mbs;
1182:     }

1184:     for (k = 0; k<mbs; k++) {
1185:       /*initialize k-th row by the perm[k]-th row of A */
1186:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1187:       bval = ba + bi[k];
1188:       for (j = jmin; j < jmax; j++) {
1189:         col       = rip[aj[j]];
1190:         rtmp[col] = aa[j];
1191:         *bval++   = 0.0; /* for in-place factorization */
1192:       }

1194:       /* shift the diagonal of the matrix */
1195:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1197:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1198:       dk = rtmp[k];
1199:       i  = jl[k]; /* first row to be added to k_th row  */

1201:       while (i < k) {
1202:         nexti = jl[i]; /* next row to be added to k_th row */

1204:         /* compute multiplier, update diag(k) and U(i,k) */
1205:         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1206:         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
1207:         dk     += uikdi*ba[ili];
1208:         ba[ili] = uikdi; /* -U(i,k) */

1210:         /* add multiple of row i to k-th row */
1211:         jmin = ili + 1; jmax = bi[i+1];
1212:         if (jmin < jmax) {
1213:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1214:           PetscLogFlops(2.0*(jmax-jmin));

1216:           /* update il and jl for row i */
1217:           il[i] = jmin;
1218:           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1219:         }
1220:         i = nexti;
1221:       }

1223:       /* shift the diagonals when zero pivot is detected */
1224:       /* compute rs=sum of abs(off-diagonal) */
1225:       rs   = 0.0;
1226:       jmin = bi[k]+1;
1227:       nz   = bi[k+1] - jmin;
1228:       if (nz) {
1229:         bcol = bj + jmin;
1230:         while (nz--) {
1231:           rs += PetscAbsScalar(rtmp[*bcol]);
1232:           bcol++;
1233:         }
1234:       }

1236:       sctx.rs = rs;
1237:       sctx.pv = dk;
1238:       MatPivotCheck(C,A,info,&sctx,k);
1239:       if (sctx.newshift) break;    /* sctx.shift_amount is updated */
1240:       dk = sctx.pv;

1242:       /* copy data into U(k,:) */
1243:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1244:       jmin      = bi[k]+1; jmax = bi[k+1];
1245:       if (jmin < jmax) {
1246:         for (j=jmin; j<jmax; j++) {
1247:           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1248:         }
1249:         /* add the k-th row into il and jl */
1250:         il[k] = jmin;
1251:         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1252:       }
1253:     }
1254:   } while (sctx.newshift);
1255:   PetscFree3(rtmp,il,jl);
1256:   if (a->permute) PetscFree(aa);

1258:   ISRestoreIndices(ip,&rip);

1260:   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1261:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1262:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1263:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1264:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1265:   C->assembled           = PETSC_TRUE;
1266:   C->preallocated        = PETSC_TRUE;

1268:   PetscLogFlops(C->rmap->N);
1269:   if (sctx.nshift) {
1270:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1271:       PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
1272:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1273:       PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
1274:     }
1275:   }
1276:   return 0;
1277: }

1279: /*
1280:   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1281:   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1282: */
1283: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info)
1284: {
1285:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1286:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)B->data;
1287:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
1288:   PetscInt       *ai=a->i,*aj=a->j,*ajtmp;
1289:   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
1290:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1291:   FactorShiftCtx sctx;
1292:   PetscReal      rs;
1293:   MatScalar      d,*v;

1295:   PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);

1297:   /* MatPivotSetUp(): initialize shift context sctx */
1298:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

1300:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1301:     sctx.shift_top = info->zeropivot;

1303:     PetscArrayzero(rtmp,mbs);

1305:     for (i=0; i<mbs; i++) {
1306:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1307:       d        = (aa)[a->diag[i]];
1308:       rtmp[i] += -PetscRealPart(d);  /* diagonal entry */
1309:       ajtmp    = aj + ai[i] + 1;     /* exclude diagonal */
1310:       v        = aa + ai[i] + 1;
1311:       nz       = ai[i+1] - ai[i] - 1;
1312:       for (j=0; j<nz; j++) {
1313:         rtmp[i]        += PetscAbsScalar(v[j]);
1314:         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1315:       }
1316:       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1317:     }
1318:     sctx.shift_top *= 1.1;
1319:     sctx.nshift_max = 5;
1320:     sctx.shift_lo   = 0.;
1321:     sctx.shift_hi   = 1.;
1322:   }

1324:   /* allocate working arrays
1325:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1326:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1327:   */
1328:   do {
1329:     sctx.newshift = PETSC_FALSE;

1331:     for (i=0; i<mbs; i++) c2r[i] = mbs;
1332:     if (mbs) il[0] = 0;

1334:     for (k = 0; k<mbs; k++) {
1335:       /* zero rtmp */
1336:       nz    = bi[k+1] - bi[k];
1337:       bjtmp = bj + bi[k];
1338:       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

1340:       /* load in initial unfactored row */
1341:       bval = ba + bi[k];
1342:       jmin = ai[k]; jmax = ai[k+1];
1343:       for (j = jmin; j < jmax; j++) {
1344:         col       = aj[j];
1345:         rtmp[col] = aa[j];
1346:         *bval++   = 0.0; /* for in-place factorization */
1347:       }
1348:       /* shift the diagonal of the matrix: ZeropivotApply() */
1349:       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */

1351:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1352:       dk = rtmp[k];
1353:       i  = c2r[k]; /* first row to be added to k_th row  */

1355:       while (i < k) {
1356:         nexti = c2r[i]; /* next row to be added to k_th row */

1358:         /* compute multiplier, update diag(k) and U(i,k) */
1359:         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1360:         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
1361:         dk     += uikdi*ba[ili]; /* update diag[k] */
1362:         ba[ili] = uikdi; /* -U(i,k) */

1364:         /* add multiple of row i to k-th row */
1365:         jmin = ili + 1; jmax = bi[i+1];
1366:         if (jmin < jmax) {
1367:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1368:           /* update il and c2r for row i */
1369:           il[i] = jmin;
1370:           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
1371:         }
1372:         i = nexti;
1373:       }

1375:       /* copy data into U(k,:) */
1376:       rs   = 0.0;
1377:       jmin = bi[k]; jmax = bi[k+1]-1;
1378:       if (jmin < jmax) {
1379:         for (j=jmin; j<jmax; j++) {
1380:           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
1381:         }
1382:         /* add the k-th row into il and c2r */
1383:         il[k] = jmin;
1384:         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
1385:       }

1387:       sctx.rs = rs;
1388:       sctx.pv = dk;
1389:       MatPivotCheck(B,A,info,&sctx,k);
1390:       if (sctx.newshift) break;
1391:       dk = sctx.pv;

1393:       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
1394:     }
1395:   } while (sctx.newshift);

1397:   PetscFree3(rtmp,il,c2r);

1399:   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1400:   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1401:   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1402:   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1403:   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1404:   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;

1406:   B->assembled    = PETSC_TRUE;
1407:   B->preallocated = PETSC_TRUE;

1409:   PetscLogFlops(B->rmap->n);

1411:   /* MatPivotView() */
1412:   if (sctx.nshift) {
1413:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1414:       PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
1415:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1416:       PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
1417:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1418:       PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
1419:     }
1420:   }
1421:   return 0;
1422: }

1424: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info)
1425: {
1426:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ*)C->data;
1427:   PetscInt       i,j,mbs = a->mbs;
1428:   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1429:   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1430:   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1431:   PetscReal      rs;
1432:   FactorShiftCtx sctx;

1434:   /* MatPivotSetUp(): initialize shift context sctx */
1435:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

1437:   /* initialization */
1438:   /* il and jl record the first nonzero element in each row of the accessing
1439:      window U(0:k, k:mbs-1).
1440:      jl:    list of rows to be added to uneliminated rows
1441:             i>= k: jl(i) is the first row to be added to row i
1442:             i<  k: jl(i) is the row following row i in some list of rows
1443:             jl(i) = mbs indicates the end of a list
1444:      il(i): points to the first nonzero element in U(i,k:mbs-1)
1445:   */
1446:   PetscMalloc1(mbs,&rtmp);
1447:   PetscMalloc2(mbs,&il,mbs,&jl);

1449:   do {
1450:     sctx.newshift = PETSC_FALSE;
1451:     il[0] = 0;
1452:     for (i=0; i<mbs; i++) {
1453:       rtmp[i] = 0.0; jl[i] = mbs;
1454:     }

1456:     for (k = 0; k<mbs; k++) {
1457:       /*initialize k-th row with elements nonzero in row perm(k) of A */
1458:       nz   = ai[k+1] - ai[k];
1459:       acol = aj + ai[k];
1460:       aval = aa + ai[k];
1461:       bval = ba + bi[k];
1462:       while (nz--) {
1463:         rtmp[*acol++] = *aval++;
1464:         *bval++       = 0.0; /* for in-place factorization */
1465:       }

1467:       /* shift the diagonal of the matrix */
1468:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1470:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1471:       dk = rtmp[k];
1472:       i  = jl[k]; /* first row to be added to k_th row  */

1474:       while (i < k) {
1475:         nexti = jl[i]; /* next row to be added to k_th row */
1476:         /* compute multiplier, update D(k) and U(i,k) */
1477:         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1478:         uikdi   = -ba[ili]*ba[bi[i]];
1479:         dk     += uikdi*ba[ili];
1480:         ba[ili] = uikdi; /* -U(i,k) */

1482:         /* add multiple of row i to k-th row ... */
1483:         jmin = ili + 1;
1484:         nz   = bi[i+1] - jmin;
1485:         if (nz > 0) {
1486:           bcol = bj + jmin;
1487:           bval = ba + jmin;
1488:           PetscLogFlops(2.0*nz);
1489:           while (nz--) rtmp[*bcol++] += uikdi*(*bval++);

1491:           /* update il and jl for i-th row */
1492:           il[i] = jmin;
1493:           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1494:         }
1495:         i = nexti;
1496:       }

1498:       /* shift the diagonals when zero pivot is detected */
1499:       /* compute rs=sum of abs(off-diagonal) */
1500:       rs   = 0.0;
1501:       jmin = bi[k]+1;
1502:       nz   = bi[k+1] - jmin;
1503:       if (nz) {
1504:         bcol = bj + jmin;
1505:         while (nz--) {
1506:           rs += PetscAbsScalar(rtmp[*bcol]);
1507:           bcol++;
1508:         }
1509:       }

1511:       sctx.rs = rs;
1512:       sctx.pv = dk;
1513:       MatPivotCheck(C,A,info,&sctx,k);
1514:       if (sctx.newshift) break;    /* sctx.shift_amount is updated */
1515:       dk = sctx.pv;

1517:       /* copy data into U(k,:) */
1518:       ba[bi[k]] = 1.0/dk;
1519:       jmin      = bi[k]+1;
1520:       nz        = bi[k+1] - jmin;
1521:       if (nz) {
1522:         bcol = bj + jmin;
1523:         bval = ba + jmin;
1524:         while (nz--) {
1525:           *bval++       = rtmp[*bcol];
1526:           rtmp[*bcol++] = 0.0;
1527:         }
1528:         /* add k-th row into il and jl */
1529:         il[k] = jmin;
1530:         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1531:       }
1532:     } /* end of for (k = 0; k<mbs; k++) */
1533:   } while (sctx.newshift);
1534:   PetscFree(rtmp);
1535:   PetscFree2(il,jl);

1537:   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1538:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1539:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1540:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1541:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;

1543:   C->assembled    = PETSC_TRUE;
1544:   C->preallocated = PETSC_TRUE;

1546:   PetscLogFlops(C->rmap->N);
1547:   if (sctx.nshift) {
1548:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1549:       PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
1550:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1551:       PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
1552:     }
1553:   }
1554:   return 0;
1555: }

1557: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo *info)
1558: {
1559:   Mat            C;

1561:   MatGetFactor(A,"petsc",MAT_FACTOR_CHOLESKY,&C);
1562:   MatCholeskyFactorSymbolic(C,A,perm,info);
1563:   MatCholeskyFactorNumeric(C,A,info);

1565:   A->ops->solve          = C->ops->solve;
1566:   A->ops->solvetranspose = C->ops->solvetranspose;

1568:   MatHeaderMerge(A,&C);
1569:   return 0;
1570: }