Actual source code: aijfact.c


  2: #include <../src/mat/impls/aij/seq/aij.h>
  3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  4: #include <petscbt.h>
  5: #include <../src/mat/utils/freespace.h>

  7: /*
  8:       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix

 10:       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
 11: */
 12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
 13: {
 14:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
 15:   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
 16:   const PetscInt    *ai = a->i, *aj = a->j;
 17:   const PetscScalar *aa = a->a;
 18:   PetscBool         *done;
 19:   PetscReal         best,past = 0,future;

 21:   /* pick initial row */
 22:   best = -1;
 23:   for (i=0; i<n; i++) {
 24:     future = 0.0;
 25:     for (j=ai[i]; j<ai[i+1]; j++) {
 26:       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
 27:       else              past  = PetscAbsScalar(aa[j]);
 28:     }
 29:     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 30:     if (past/future > best) {
 31:       best    = past/future;
 32:       current = i;
 33:     }
 34:   }

 36:   PetscMalloc1(n,&done);
 37:   PetscArrayzero(done,n);
 38:   PetscMalloc1(n,&order);
 39:   order[0] = current;
 40:   for (i=0; i<n-1; i++) {
 41:     done[current] = PETSC_TRUE;
 42:     best          = -1;
 43:     /* loop over all neighbors of current pivot */
 44:     for (j=ai[current]; j<ai[current+1]; j++) {
 45:       jj = aj[j];
 46:       if (done[jj]) continue;
 47:       /* loop over columns of potential next row computing weights for below and above diagonal */
 48:       past = future = 0.0;
 49:       for (k=ai[jj]; k<ai[jj+1]; k++) {
 50:         kk = aj[k];
 51:         if (done[kk]) past += PetscAbsScalar(aa[k]);
 52:         else if (kk != jj) future += PetscAbsScalar(aa[k]);
 53:       }
 54:       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 55:       if (past/future > best) {
 56:         best       = past/future;
 57:         newcurrent = jj;
 58:       }
 59:     }
 60:     if (best == -1) { /* no neighbors to select from so select best of all that remain */
 61:       best = -1;
 62:       for (k=0; k<n; k++) {
 63:         if (done[k]) continue;
 64:         future = 0.0;
 65:         past   = 0.0;
 66:         for (j=ai[k]; j<ai[k+1]; j++) {
 67:           kk = aj[j];
 68:           if (done[kk])       past += PetscAbsScalar(aa[j]);
 69:           else if (kk != k) future += PetscAbsScalar(aa[j]);
 70:         }
 71:         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 72:         if (past/future > best) {
 73:           best       = past/future;
 74:           newcurrent = k;
 75:         }
 76:       }
 77:     }
 79:     current    = newcurrent;
 80:     order[i+1] = current;
 81:   }
 82:   ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
 83:   *icol = *irow;
 84:   PetscObjectReference((PetscObject)*irow);
 85:   PetscFree(done);
 86:   PetscFree(order);
 87:   return 0;
 88: }

 90: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A,MatSolverType *type)
 91: {
 92:   *type = MATSOLVERPETSC;
 93:   return 0;
 94: }

 96: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
 97: {
 98:   PetscInt       n = A->rmap->n;

100: #if defined(PETSC_USE_COMPLEX)
102: #endif
103:   MatCreate(PetscObjectComm((PetscObject)A),B);
104:   MatSetSizes(*B,n,n,n,n);
105:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
106:     MatSetType(*B,MATSEQAIJ);

108:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
109:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;

111:     MatSetBlockSizesFromMats(*B,A,A);
112:     PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_LU]);
113:     PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILU]);
114:     PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILUDT]);
115:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
116:     MatSetType(*B,MATSEQSBAIJ);
117:     MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);

119:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
120:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
121:     PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]);
122:     PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ICC]);
123:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
124:   (*B)->factortype = ftype;

126:   PetscFree((*B)->solvertype);
127:   PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);
128:   (*B)->canuseordering = PETSC_TRUE;
129:   PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverType_C",MatFactorGetSolverType_petsc);
130:   return 0;
131: }

133: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
134: {
135:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
136:   IS                 isicol;
137:   const PetscInt     *r,*ic;
138:   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
139:   PetscInt           *bi,*bj,*ajtmp;
140:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
141:   PetscReal          f;
142:   PetscInt           nlnk,*lnk,k,**bi_ptr;
143:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
144:   PetscBT            lnkbt;
145:   PetscBool          missing;

148:   MatMissingDiagonal(A,&missing,&i);

151:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
152:   ISGetIndices(isrow,&r);
153:   ISGetIndices(isicol,&ic);

155:   /* get new row pointers */
156:   PetscMalloc1(n+1,&bi);
157:   bi[0] = 0;

159:   /* bdiag is location of diagonal in factor */
160:   PetscMalloc1(n+1,&bdiag);
161:   bdiag[0] = 0;

163:   /* linked list for storing column indices of the active row */
164:   nlnk = n + 1;
165:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

167:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

169:   /* initial FreeSpace size is f*(ai[n]+1) */
170:   f             = info->fill;
171:   if (n==1)   f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
172:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
173:   current_space = free_space;

175:   for (i=0; i<n; i++) {
176:     /* copy previous fill into linked list */
177:     nzi = 0;
178:     nnz = ai[r[i]+1] - ai[r[i]];
179:     ajtmp = aj + ai[r[i]];
180:     PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt);
181:     nzi  += nlnk;

183:     /* add pivot rows into linked list */
184:     row = lnk[n];
185:     while (row < i) {
186:       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
187:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
188:       PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im);
189:       nzi  += nlnk;
190:       row   = lnk[row];
191:     }
192:     bi[i+1] = bi[i] + nzi;
193:     im[i]   = nzi;

195:     /* mark bdiag */
196:     nzbd = 0;
197:     nnz  = nzi;
198:     k    = lnk[n];
199:     while (nnz-- && k < i) {
200:       nzbd++;
201:       k = lnk[k];
202:     }
203:     bdiag[i] = bi[i] + nzbd;

205:     /* if free space is not available, make more free space */
206:     if (current_space->local_remaining<nzi) {
207:       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
208:       PetscFreeSpaceGet(nnz,&current_space);
209:       reallocs++;
210:     }

212:     /* copy data into free space, then initialize lnk */
213:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

215:     bi_ptr[i]                       = current_space->array;
216:     current_space->array           += nzi;
217:     current_space->local_used      += nzi;
218:     current_space->local_remaining -= nzi;
219:   }
220: #if defined(PETSC_USE_INFO)
221:   if (ai[n] != 0) {
222:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
223:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
224:     PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
225:     PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af);
226:     PetscInfo(A,"for best performance.\n");
227:   } else {
228:     PetscInfo(A,"Empty matrix\n");
229:   }
230: #endif

232:   ISRestoreIndices(isrow,&r);
233:   ISRestoreIndices(isicol,&ic);

235:   /* destroy list of free space and other temporary array(s) */
236:   PetscMalloc1(bi[n]+1,&bj);
237:   PetscFreeSpaceContiguous(&free_space,bj);
238:   PetscLLDestroy(lnk,lnkbt);
239:   PetscFree2(bi_ptr,im);

241:   /* put together the new matrix */
242:   MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
243:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
244:   b    = (Mat_SeqAIJ*)(B)->data;

246:   b->free_a       = PETSC_TRUE;
247:   b->free_ij      = PETSC_TRUE;
248:   b->singlemalloc = PETSC_FALSE;

250:   PetscMalloc1(bi[n]+1,&b->a);
251:   b->j    = bj;
252:   b->i    = bi;
253:   b->diag = bdiag;
254:   b->ilen = NULL;
255:   b->imax = NULL;
256:   b->row  = isrow;
257:   b->col  = iscol;
258:   PetscObjectReference((PetscObject)isrow);
259:   PetscObjectReference((PetscObject)iscol);
260:   b->icol = isicol;
261:   PetscMalloc1(n+1,&b->solve_work);

263:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
264:   PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
265:   b->maxnz = b->nz = bi[n];

267:   (B)->factortype            = MAT_FACTOR_LU;
268:   (B)->info.factor_mallocs   = reallocs;
269:   (B)->info.fill_ratio_given = f;

271:   if (ai[n]) {
272:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
273:   } else {
274:     (B)->info.fill_ratio_needed = 0.0;
275:   }
276:   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
277:   if (a->inode.size) {
278:     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
279:   }
280:   return 0;
281: }

283: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
284: {
285:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
286:   IS                 isicol;
287:   const PetscInt     *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
288:   PetscInt           i,n=A->rmap->n;
289:   PetscInt           *bi,*bj;
290:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
291:   PetscReal          f;
292:   PetscInt           nlnk,*lnk,k,**bi_ptr;
293:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
294:   PetscBT            lnkbt;
295:   PetscBool          missing;

298:   MatMissingDiagonal(A,&missing,&i);

301:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
302:   ISGetIndices(isrow,&r);
303:   ISGetIndices(isicol,&ic);

305:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
306:   PetscMalloc1(n+1,&bi);
307:   PetscMalloc1(n+1,&bdiag);
308:   bi[0] = bdiag[0] = 0;

310:   /* linked list for storing column indices of the active row */
311:   nlnk = n + 1;
312:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

314:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

316:   /* initial FreeSpace size is f*(ai[n]+1) */
317:   f             = info->fill;
318:   if (n==1)   f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
319:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
320:   current_space = free_space;

322:   for (i=0; i<n; i++) {
323:     /* copy previous fill into linked list */
324:     nzi = 0;
325:     nnz = ai[r[i]+1] - ai[r[i]];
326:     ajtmp = aj + ai[r[i]];
327:     PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt);
328:     nzi  += nlnk;

330:     /* add pivot rows into linked list */
331:     row = lnk[n];
332:     while (row < i) {
333:       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
334:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
335:       PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im);
336:       nzi  += nlnk;
337:       row   = lnk[row];
338:     }
339:     bi[i+1] = bi[i] + nzi;
340:     im[i]   = nzi;

342:     /* mark bdiag */
343:     nzbd = 0;
344:     nnz  = nzi;
345:     k    = lnk[n];
346:     while (nnz-- && k < i) {
347:       nzbd++;
348:       k = lnk[k];
349:     }
350:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

352:     /* if free space is not available, make more free space */
353:     if (current_space->local_remaining<nzi) {
354:       /* estimated additional space needed */
355:       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
356:       PetscFreeSpaceGet(nnz,&current_space);
357:       reallocs++;
358:     }

360:     /* copy data into free space, then initialize lnk */
361:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

363:     bi_ptr[i]                       = current_space->array;
364:     current_space->array           += nzi;
365:     current_space->local_used      += nzi;
366:     current_space->local_remaining -= nzi;
367:   }

369:   ISRestoreIndices(isrow,&r);
370:   ISRestoreIndices(isicol,&ic);

372:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
373:   PetscMalloc1(bi[n]+1,&bj);
374:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
375:   PetscLLDestroy(lnk,lnkbt);
376:   PetscFree2(bi_ptr,im);

378:   /* put together the new matrix */
379:   MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
380:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
381:   b    = (Mat_SeqAIJ*)(B)->data;

383:   b->free_a       = PETSC_TRUE;
384:   b->free_ij      = PETSC_TRUE;
385:   b->singlemalloc = PETSC_FALSE;

387:   PetscMalloc1(bdiag[0]+1,&b->a);

389:   b->j    = bj;
390:   b->i    = bi;
391:   b->diag = bdiag;
392:   b->ilen = NULL;
393:   b->imax = NULL;
394:   b->row  = isrow;
395:   b->col  = iscol;
396:   PetscObjectReference((PetscObject)isrow);
397:   PetscObjectReference((PetscObject)iscol);
398:   b->icol = isicol;
399:   PetscMalloc1(n+1,&b->solve_work);

401:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
402:   PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
403:   b->maxnz = b->nz = bdiag[0]+1;

405:   B->factortype            = MAT_FACTOR_LU;
406:   B->info.factor_mallocs   = reallocs;
407:   B->info.fill_ratio_given = f;

409:   if (ai[n]) {
410:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
411:   } else {
412:     B->info.fill_ratio_needed = 0.0;
413:   }
414: #if defined(PETSC_USE_INFO)
415:   if (ai[n] != 0) {
416:     PetscReal af = B->info.fill_ratio_needed;
417:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
418:     PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
419:     PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af);
420:     PetscInfo(A,"for best performance.\n");
421:   } else {
422:     PetscInfo(A,"Empty matrix\n");
423:   }
424: #endif
425:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426:   if (a->inode.size) {
427:     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
428:   }
429:   MatSeqAIJCheckInode_FactorLU(B);
430:   return 0;
431: }

433: /*
434:     Trouble in factorization, should we dump the original matrix?
435: */
436: PetscErrorCode MatFactorDumpMatrix(Mat A)
437: {
438:   PetscBool      flg = PETSC_FALSE;

440:   PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);
441:   if (flg) {
442:     PetscViewer viewer;
443:     char        filename[PETSC_MAX_PATH_LEN];

445:     PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
446:     PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
447:     MatView(A,viewer);
448:     PetscViewerDestroy(&viewer);
449:   }
450:   return 0;
451: }

453: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
454: {
455:   Mat             C     =B;
456:   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
457:   IS              isrow = b->row,isicol = b->icol;
458:   const PetscInt  *r,*ic,*ics;
459:   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
460:   PetscInt        i,j,k,nz,nzL,row,*pj;
461:   const PetscInt  *ajtmp,*bjtmp;
462:   MatScalar       *rtmp,*pc,multiplier,*pv;
463:   const MatScalar *aa=a->a,*v;
464:   PetscBool       row_identity,col_identity;
465:   FactorShiftCtx  sctx;
466:   const PetscInt  *ddiag;
467:   PetscReal       rs;
468:   MatScalar       d;

470:   /* MatPivotSetUp(): initialize shift context sctx */
471:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

473:   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
474:     ddiag          = a->diag;
475:     sctx.shift_top = info->zeropivot;
476:     for (i=0; i<n; i++) {
477:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
478:       d  = (aa)[ddiag[i]];
479:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
480:       v  = aa+ai[i];
481:       nz = ai[i+1] - ai[i];
482:       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
483:       if (rs>sctx.shift_top) sctx.shift_top = rs;
484:     }
485:     sctx.shift_top *= 1.1;
486:     sctx.nshift_max = 5;
487:     sctx.shift_lo   = 0.;
488:     sctx.shift_hi   = 1.;
489:   }

491:   ISGetIndices(isrow,&r);
492:   ISGetIndices(isicol,&ic);
493:   PetscMalloc1(n+1,&rtmp);
494:   ics  = ic;

496:   do {
497:     sctx.newshift = PETSC_FALSE;
498:     for (i=0; i<n; i++) {
499:       /* zero rtmp */
500:       /* L part */
501:       nz    = bi[i+1] - bi[i];
502:       bjtmp = bj + bi[i];
503:       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

505:       /* U part */
506:       nz    = bdiag[i]-bdiag[i+1];
507:       bjtmp = bj + bdiag[i+1]+1;
508:       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

510:       /* load in initial (unfactored row) */
511:       nz    = ai[r[i]+1] - ai[r[i]];
512:       ajtmp = aj + ai[r[i]];
513:       v     = aa + ai[r[i]];
514:       for (j=0; j<nz; j++) {
515:         rtmp[ics[ajtmp[j]]] = v[j];
516:       }
517:       /* ZeropivotApply() */
518:       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */

520:       /* elimination */
521:       bjtmp = bj + bi[i];
522:       row   = *bjtmp++;
523:       nzL   = bi[i+1] - bi[i];
524:       for (k=0; k < nzL; k++) {
525:         pc = rtmp + row;
526:         if (*pc != 0.0) {
527:           pv         = b->a + bdiag[row];
528:           multiplier = *pc * (*pv);
529:           *pc        = multiplier;

531:           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
532:           pv = b->a + bdiag[row+1]+1;
533:           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */

535:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
536:           PetscLogFlops(1+2.0*nz);
537:         }
538:         row = *bjtmp++;
539:       }

541:       /* finished row so stick it into b->a */
542:       rs = 0.0;
543:       /* L part */
544:       pv = b->a + bi[i];
545:       pj = b->j + bi[i];
546:       nz = bi[i+1] - bi[i];
547:       for (j=0; j<nz; j++) {
548:         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
549:       }

551:       /* U part */
552:       pv = b->a + bdiag[i+1]+1;
553:       pj = b->j + bdiag[i+1]+1;
554:       nz = bdiag[i] - bdiag[i+1]-1;
555:       for (j=0; j<nz; j++) {
556:         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
557:       }

559:       sctx.rs = rs;
560:       sctx.pv = rtmp[i];
561:       MatPivotCheck(B,A,info,&sctx,i);
562:       if (sctx.newshift) break; /* break for-loop */
563:       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

565:       /* Mark diagonal and invert diagonal for simpler triangular solves */
566:       pv  = b->a + bdiag[i];
567:       *pv = 1.0/rtmp[i];

569:     } /* endof for (i=0; i<n; i++) { */

571:     /* MatPivotRefine() */
572:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
573:       /*
574:        * if no shift in this attempt & shifting & started shifting & can refine,
575:        * then try lower shift
576:        */
577:       sctx.shift_hi       = sctx.shift_fraction;
578:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
579:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
580:       sctx.newshift       = PETSC_TRUE;
581:       sctx.nshift++;
582:     }
583:   } while (sctx.newshift);

585:   PetscFree(rtmp);
586:   ISRestoreIndices(isicol,&ic);
587:   ISRestoreIndices(isrow,&r);

589:   ISIdentity(isrow,&row_identity);
590:   ISIdentity(isicol,&col_identity);
591:   if (b->inode.size) {
592:     C->ops->solve = MatSolve_SeqAIJ_Inode;
593:   } else if (row_identity && col_identity) {
594:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
595:   } else {
596:     C->ops->solve = MatSolve_SeqAIJ;
597:   }
598:   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
599:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
600:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
601:   C->ops->matsolve          = MatMatSolve_SeqAIJ;
602:   C->assembled              = PETSC_TRUE;
603:   C->preallocated           = PETSC_TRUE;

605:   PetscLogFlops(C->cmap->n);

607:   /* MatShiftView(A,info,&sctx) */
608:   if (sctx.nshift) {
609:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
610:       PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
611:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
612:       PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
613:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
614:       PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
615:     }
616:   }
617:   return 0;
618: }

620: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
621: {
622:   Mat             C     =B;
623:   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
624:   IS              isrow = b->row,isicol = b->icol;
625:   const PetscInt  *r,*ic,*ics;
626:   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
627:   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
628:   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
629:   MatScalar       *pv,*rtmp,*pc,multiplier,d;
630:   const MatScalar *v,*aa=a->a;
631:   PetscReal       rs=0.0;
632:   FactorShiftCtx  sctx;
633:   const PetscInt  *ddiag;
634:   PetscBool       row_identity, col_identity;

636:   /* MatPivotSetUp(): initialize shift context sctx */
637:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

639:   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
640:     ddiag          = a->diag;
641:     sctx.shift_top = info->zeropivot;
642:     for (i=0; i<n; i++) {
643:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
644:       d  = (aa)[ddiag[i]];
645:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
646:       v  = aa+ai[i];
647:       nz = ai[i+1] - ai[i];
648:       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
649:       if (rs>sctx.shift_top) sctx.shift_top = rs;
650:     }
651:     sctx.shift_top *= 1.1;
652:     sctx.nshift_max = 5;
653:     sctx.shift_lo   = 0.;
654:     sctx.shift_hi   = 1.;
655:   }

657:   ISGetIndices(isrow,&r);
658:   ISGetIndices(isicol,&ic);
659:   PetscMalloc1(n+1,&rtmp);
660:   ics  = ic;

662:   do {
663:     sctx.newshift = PETSC_FALSE;
664:     for (i=0; i<n; i++) {
665:       nz    = bi[i+1] - bi[i];
666:       bjtmp = bj + bi[i];
667:       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

669:       /* load in initial (unfactored row) */
670:       nz    = ai[r[i]+1] - ai[r[i]];
671:       ajtmp = aj + ai[r[i]];
672:       v     = aa + ai[r[i]];
673:       for (j=0; j<nz; j++) {
674:         rtmp[ics[ajtmp[j]]] = v[j];
675:       }
676:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

678:       row = *bjtmp++;
679:       while  (row < i) {
680:         pc = rtmp + row;
681:         if (*pc != 0.0) {
682:           pv         = b->a + diag_offset[row];
683:           pj         = b->j + diag_offset[row] + 1;
684:           multiplier = *pc / *pv++;
685:           *pc        = multiplier;
686:           nz         = bi[row+1] - diag_offset[row] - 1;
687:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
688:           PetscLogFlops(1+2.0*nz);
689:         }
690:         row = *bjtmp++;
691:       }
692:       /* finished row so stick it into b->a */
693:       pv   = b->a + bi[i];
694:       pj   = b->j + bi[i];
695:       nz   = bi[i+1] - bi[i];
696:       diag = diag_offset[i] - bi[i];
697:       rs   = 0.0;
698:       for (j=0; j<nz; j++) {
699:         pv[j] = rtmp[pj[j]];
700:         rs   += PetscAbsScalar(pv[j]);
701:       }
702:       rs -= PetscAbsScalar(pv[diag]);

704:       sctx.rs = rs;
705:       sctx.pv = pv[diag];
706:       MatPivotCheck(B,A,info,&sctx,i);
707:       if (sctx.newshift) break;
708:       pv[diag] = sctx.pv;
709:     }

711:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
712:       /*
713:        * if no shift in this attempt & shifting & started shifting & can refine,
714:        * then try lower shift
715:        */
716:       sctx.shift_hi       = sctx.shift_fraction;
717:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
718:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
719:       sctx.newshift       = PETSC_TRUE;
720:       sctx.nshift++;
721:     }
722:   } while (sctx.newshift);

724:   /* invert diagonal entries for simpler triangular solves */
725:   for (i=0; i<n; i++) {
726:     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
727:   }
728:   PetscFree(rtmp);
729:   ISRestoreIndices(isicol,&ic);
730:   ISRestoreIndices(isrow,&r);

732:   ISIdentity(isrow,&row_identity);
733:   ISIdentity(isicol,&col_identity);
734:   if (row_identity && col_identity) {
735:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
736:   } else {
737:     C->ops->solve = MatSolve_SeqAIJ_inplace;
738:   }
739:   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
740:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
741:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
742:   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;

744:   C->assembled    = PETSC_TRUE;
745:   C->preallocated = PETSC_TRUE;

747:   PetscLogFlops(C->cmap->n);
748:   if (sctx.nshift) {
749:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
750:       PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
751:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
752:       PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
753:     }
754:   }
755:   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
756:   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;

758:   MatSeqAIJCheckInode(C);
759:   return 0;
760: }

762: /*
763:    This routine implements inplace ILU(0) with row or/and column permutations.
764:    Input:
765:      A - original matrix
766:    Output;
767:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
768:          a->j (col index) is permuted by the inverse of colperm, then sorted
769:          a->a reordered accordingly with a->j
770:          a->diag (ptr to diagonal elements) is updated.
771: */
772: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
773: {
774:   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
775:   IS              isrow = a->row,isicol = a->icol;
776:   const PetscInt  *r,*ic,*ics;
777:   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
778:   PetscInt        *ajtmp,nz,row;
779:   PetscInt        *diag = a->diag,nbdiag,*pj;
780:   PetscScalar     *rtmp,*pc,multiplier,d;
781:   MatScalar       *pv,*v;
782:   PetscReal       rs;
783:   FactorShiftCtx  sctx;
784:   const MatScalar *aa=a->a,*vtmp;


788:   /* MatPivotSetUp(): initialize shift context sctx */
789:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

791:   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
792:     const PetscInt *ddiag = a->diag;
793:     sctx.shift_top = info->zeropivot;
794:     for (i=0; i<n; i++) {
795:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
796:       d    = (aa)[ddiag[i]];
797:       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
798:       vtmp = aa+ai[i];
799:       nz   = ai[i+1] - ai[i];
800:       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
801:       if (rs>sctx.shift_top) sctx.shift_top = rs;
802:     }
803:     sctx.shift_top *= 1.1;
804:     sctx.nshift_max = 5;
805:     sctx.shift_lo   = 0.;
806:     sctx.shift_hi   = 1.;
807:   }

809:   ISGetIndices(isrow,&r);
810:   ISGetIndices(isicol,&ic);
811:   PetscMalloc1(n+1,&rtmp);
812:   PetscArrayzero(rtmp,n+1);
813:   ics  = ic;

815: #if defined(MV)
816:   sctx.shift_top      = 0.;
817:   sctx.nshift_max     = 0;
818:   sctx.shift_lo       = 0.;
819:   sctx.shift_hi       = 0.;
820:   sctx.shift_fraction = 0.;

822:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
823:     sctx.shift_top = 0.;
824:     for (i=0; i<n; i++) {
825:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
826:       d  = (a->a)[diag[i]];
827:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
828:       v  = a->a+ai[i];
829:       nz = ai[i+1] - ai[i];
830:       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
831:       if (rs>sctx.shift_top) sctx.shift_top = rs;
832:     }
833:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
834:     sctx.shift_top *= 1.1;
835:     sctx.nshift_max = 5;
836:     sctx.shift_lo   = 0.;
837:     sctx.shift_hi   = 1.;
838:   }

840:   sctx.shift_amount = 0.;
841:   sctx.nshift       = 0;
842: #endif

844:   do {
845:     sctx.newshift = PETSC_FALSE;
846:     for (i=0; i<n; i++) {
847:       /* load in initial unfactored row */
848:       nz    = ai[r[i]+1] - ai[r[i]];
849:       ajtmp = aj + ai[r[i]];
850:       v     = a->a + ai[r[i]];
851:       /* sort permuted ajtmp and values v accordingly */
852:       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
853:       PetscSortIntWithScalarArray(nz,ajtmp,v);

855:       diag[r[i]] = ai[r[i]];
856:       for (j=0; j<nz; j++) {
857:         rtmp[ajtmp[j]] = v[j];
858:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
859:       }
860:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

862:       row = *ajtmp++;
863:       while  (row < i) {
864:         pc = rtmp + row;
865:         if (*pc != 0.0) {
866:           pv = a->a + diag[r[row]];
867:           pj = aj + diag[r[row]] + 1;

869:           multiplier = *pc / *pv++;
870:           *pc        = multiplier;
871:           nz         = ai[r[row]+1] - diag[r[row]] - 1;
872:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
873:           PetscLogFlops(1+2.0*nz);
874:         }
875:         row = *ajtmp++;
876:       }
877:       /* finished row so overwrite it onto a->a */
878:       pv     = a->a + ai[r[i]];
879:       pj     = aj + ai[r[i]];
880:       nz     = ai[r[i]+1] - ai[r[i]];
881:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */

883:       rs = 0.0;
884:       for (j=0; j<nz; j++) {
885:         pv[j] = rtmp[pj[j]];
886:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
887:       }

889:       sctx.rs = rs;
890:       sctx.pv = pv[nbdiag];
891:       MatPivotCheck(B,A,info,&sctx,i);
892:       if (sctx.newshift) break;
893:       pv[nbdiag] = sctx.pv;
894:     }

896:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
897:       /*
898:        * if no shift in this attempt & shifting & started shifting & can refine,
899:        * then try lower shift
900:        */
901:       sctx.shift_hi       = sctx.shift_fraction;
902:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
903:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
904:       sctx.newshift       = PETSC_TRUE;
905:       sctx.nshift++;
906:     }
907:   } while (sctx.newshift);

909:   /* invert diagonal entries for simpler triangular solves */
910:   for (i=0; i<n; i++) {
911:     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
912:   }

914:   PetscFree(rtmp);
915:   ISRestoreIndices(isicol,&ic);
916:   ISRestoreIndices(isrow,&r);

918:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
919:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
920:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
921:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;

923:   A->assembled    = PETSC_TRUE;
924:   A->preallocated = PETSC_TRUE;

926:   PetscLogFlops(A->cmap->n);
927:   if (sctx.nshift) {
928:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
929:       PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
930:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
931:       PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
932:     }
933:   }
934:   return 0;
935: }

937: /* ----------------------------------------------------------- */
938: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
939: {
940:   Mat            C;

942:   MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
943:   MatLUFactorSymbolic(C,A,row,col,info);
944:   MatLUFactorNumeric(C,A,info);

946:   A->ops->solve          = C->ops->solve;
947:   A->ops->solvetranspose = C->ops->solvetranspose;

949:   MatHeaderMerge(A,&C);
950:   PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
951:   return 0;
952: }
953: /* ----------------------------------------------------------- */

955: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
956: {
957:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
958:   IS                iscol = a->col,isrow = a->row;
959:   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
960:   PetscInt          nz;
961:   const PetscInt    *rout,*cout,*r,*c;
962:   PetscScalar       *x,*tmp,*tmps,sum;
963:   const PetscScalar *b;
964:   const MatScalar   *aa = a->a,*v;

966:   if (!n) return 0;

968:   VecGetArrayRead(bb,&b);
969:   VecGetArrayWrite(xx,&x);
970:   tmp  = a->solve_work;

972:   ISGetIndices(isrow,&rout); r = rout;
973:   ISGetIndices(iscol,&cout)); c = cout + (n-1;

975:   /* forward solve the lower triangular */
976:   tmp[0] = b[*r++];
977:   tmps   = tmp;
978:   for (i=1; i<n; i++) {
979:     v   = aa + ai[i];
980:     vi  = aj + ai[i];
981:     nz  = a->diag[i] - ai[i];
982:     sum = b[*r++];
983:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
984:     tmp[i] = sum;
985:   }

987:   /* backward solve the upper triangular */
988:   for (i=n-1; i>=0; i--) {
989:     v   = aa + a->diag[i] + 1;
990:     vi  = aj + a->diag[i] + 1;
991:     nz  = ai[i+1] - a->diag[i] - 1;
992:     sum = tmp[i];
993:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
994:     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
995:   }

997:   ISRestoreIndices(isrow,&rout);
998:   ISRestoreIndices(iscol,&cout);
999:   VecRestoreArrayRead(bb,&b);
1000:   VecRestoreArrayWrite(xx,&x);
1001:   PetscLogFlops(2.0*a->nz - A->cmap->n);
1002:   return 0;
1003: }

1005: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1006: {
1007:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1008:   IS                iscol = a->col,isrow = a->row;
1009:   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1010:   PetscInt          nz,neq,ldb,ldx;
1011:   const PetscInt    *rout,*cout,*r,*c;
1012:   PetscScalar       *x,*tmp = a->solve_work,*tmps,sum;
1013:   const PetscScalar *b,*aa = a->a,*v;
1014:   PetscBool         isdense;

1016:   if (!n) return 0;
1017:   PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);
1019:   if (X != B) {
1020:     PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);
1022:   }
1023:   MatDenseGetArrayRead(B,&b);
1024:   MatDenseGetLDA(B,&ldb);
1025:   MatDenseGetArray(X,&x);
1026:   MatDenseGetLDA(X,&ldx);
1027:   ISGetIndices(isrow,&rout); r = rout;
1028:   ISGetIndices(iscol,&cout); c = cout;
1029:   for (neq=0; neq<B->cmap->n; neq++) {
1030:     /* forward solve the lower triangular */
1031:     tmp[0] = b[r[0]];
1032:     tmps   = tmp;
1033:     for (i=1; i<n; i++) {
1034:       v   = aa + ai[i];
1035:       vi  = aj + ai[i];
1036:       nz  = a->diag[i] - ai[i];
1037:       sum = b[r[i]];
1038:       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1039:       tmp[i] = sum;
1040:     }
1041:     /* backward solve the upper triangular */
1042:     for (i=n-1; i>=0; i--) {
1043:       v   = aa + a->diag[i] + 1;
1044:       vi  = aj + a->diag[i] + 1;
1045:       nz  = ai[i+1] - a->diag[i] - 1;
1046:       sum = tmp[i];
1047:       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1048:       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1049:     }
1050:     b += ldb;
1051:     x += ldx;
1052:   }
1053:   ISRestoreIndices(isrow,&rout);
1054:   ISRestoreIndices(iscol,&cout);
1055:   MatDenseRestoreArrayRead(B,&b);
1056:   MatDenseRestoreArray(X,&x);
1057:   PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1058:   return 0;
1059: }

1061: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1062: {
1063:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1064:   IS                iscol = a->col,isrow = a->row;
1065:   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1066:   PetscInt          nz,neq,ldb,ldx;
1067:   const PetscInt    *rout,*cout,*r,*c;
1068:   PetscScalar       *x,*tmp = a->solve_work,sum;
1069:   const PetscScalar *b,*aa = a->a,*v;
1070:   PetscBool         isdense;

1072:   if (!n) return 0;
1073:   PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);
1075:   if (X != B) {
1076:     PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);
1078:   }
1079:   MatDenseGetArrayRead(B,&b);
1080:   MatDenseGetLDA(B,&ldb);
1081:   MatDenseGetArray(X,&x);
1082:   MatDenseGetLDA(X,&ldx);
1083:   ISGetIndices(isrow,&rout); r = rout;
1084:   ISGetIndices(iscol,&cout); c = cout;
1085:   for (neq=0; neq<B->cmap->n; neq++) {
1086:     /* forward solve the lower triangular */
1087:     tmp[0] = b[r[0]];
1088:     v      = aa;
1089:     vi     = aj;
1090:     for (i=1; i<n; i++) {
1091:       nz  = ai[i+1] - ai[i];
1092:       sum = b[r[i]];
1093:       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1094:       tmp[i] = sum;
1095:       v     += nz; vi += nz;
1096:     }
1097:     /* backward solve the upper triangular */
1098:     for (i=n-1; i>=0; i--) {
1099:       v   = aa + adiag[i+1]+1;
1100:       vi  = aj + adiag[i+1]+1;
1101:       nz  = adiag[i]-adiag[i+1]-1;
1102:       sum = tmp[i];
1103:       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1104:       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1105:     }
1106:     b += ldb;
1107:     x += ldx;
1108:   }
1109:   ISRestoreIndices(isrow,&rout);
1110:   ISRestoreIndices(iscol,&cout);
1111:   MatDenseRestoreArrayRead(B,&b);
1112:   MatDenseRestoreArray(X,&x);
1113:   PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1114:   return 0;
1115: }

1117: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1118: {
1119:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1120:   IS                iscol = a->col,isrow = a->row;
1121:   const PetscInt    *r,*c,*rout,*cout;
1122:   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1123:   PetscInt          nz,row;
1124:   PetscScalar       *x,*tmp,*tmps,sum;
1125:   const PetscScalar *b;
1126:   const MatScalar   *aa = a->a,*v;

1128:   if (!n) return 0;

1130:   VecGetArrayRead(bb,&b);
1131:   VecGetArrayWrite(xx,&x);
1132:   tmp  = a->solve_work;

1134:   ISGetIndices(isrow,&rout); r = rout;
1135:   ISGetIndices(iscol,&cout)); c = cout + (n-1;

1137:   /* forward solve the lower triangular */
1138:   tmp[0] = b[*r++];
1139:   tmps   = tmp;
1140:   for (row=1; row<n; row++) {
1141:     i   = rout[row]; /* permuted row */
1142:     v   = aa + ai[i];
1143:     vi  = aj + ai[i];
1144:     nz  = a->diag[i] - ai[i];
1145:     sum = b[*r++];
1146:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1147:     tmp[row] = sum;
1148:   }

1150:   /* backward solve the upper triangular */
1151:   for (row=n-1; row>=0; row--) {
1152:     i   = rout[row]; /* permuted row */
1153:     v   = aa + a->diag[i] + 1;
1154:     vi  = aj + a->diag[i] + 1;
1155:     nz  = ai[i+1] - a->diag[i] - 1;
1156:     sum = tmp[row];
1157:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1158:     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1159:   }

1161:   ISRestoreIndices(isrow,&rout);
1162:   ISRestoreIndices(iscol,&cout);
1163:   VecRestoreArrayRead(bb,&b);
1164:   VecRestoreArrayWrite(xx,&x);
1165:   PetscLogFlops(2.0*a->nz - A->cmap->n);
1166:   return 0;
1167: }

1169: /* ----------------------------------------------------------- */
1170: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1171: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1172: {
1173:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1174:   PetscInt          n   = A->rmap->n;
1175:   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1176:   PetscScalar       *x;
1177:   const PetscScalar *b;
1178:   const MatScalar   *aa = a->a;
1179: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1180:   PetscInt        adiag_i,i,nz,ai_i;
1181:   const PetscInt  *vi;
1182:   const MatScalar *v;
1183:   PetscScalar     sum;
1184: #endif

1186:   if (!n) return 0;

1188:   VecGetArrayRead(bb,&b);
1189:   VecGetArrayWrite(xx,&x);

1191: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1192:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1193: #else
1194:   /* forward solve the lower triangular */
1195:   x[0] = b[0];
1196:   for (i=1; i<n; i++) {
1197:     ai_i = ai[i];
1198:     v    = aa + ai_i;
1199:     vi   = aj + ai_i;
1200:     nz   = adiag[i] - ai_i;
1201:     sum  = b[i];
1202:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1203:     x[i] = sum;
1204:   }

1206:   /* backward solve the upper triangular */
1207:   for (i=n-1; i>=0; i--) {
1208:     adiag_i = adiag[i];
1209:     v       = aa + adiag_i + 1;
1210:     vi      = aj + adiag_i + 1;
1211:     nz      = ai[i+1] - adiag_i - 1;
1212:     sum     = x[i];
1213:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1214:     x[i] = sum*aa[adiag_i];
1215:   }
1216: #endif
1217:   PetscLogFlops(2.0*a->nz - A->cmap->n);
1218:   VecRestoreArrayRead(bb,&b);
1219:   VecRestoreArrayWrite(xx,&x);
1220:   return 0;
1221: }

1223: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1224: {
1225:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1226:   IS                iscol = a->col,isrow = a->row;
1227:   PetscInt          i, n = A->rmap->n,j;
1228:   PetscInt          nz;
1229:   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1230:   PetscScalar       *x,*tmp,sum;
1231:   const PetscScalar *b;
1232:   const MatScalar   *aa = a->a,*v;

1234:   if (yy != xx) VecCopy(yy,xx);

1236:   VecGetArrayRead(bb,&b);
1237:   VecGetArray(xx,&x);
1238:   tmp  = a->solve_work;

1240:   ISGetIndices(isrow,&rout); r = rout;
1241:   ISGetIndices(iscol,&cout)); c = cout + (n-1;

1243:   /* forward solve the lower triangular */
1244:   tmp[0] = b[*r++];
1245:   for (i=1; i<n; i++) {
1246:     v   = aa + ai[i];
1247:     vi  = aj + ai[i];
1248:     nz  = a->diag[i] - ai[i];
1249:     sum = b[*r++];
1250:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1251:     tmp[i] = sum;
1252:   }

1254:   /* backward solve the upper triangular */
1255:   for (i=n-1; i>=0; i--) {
1256:     v   = aa + a->diag[i] + 1;
1257:     vi  = aj + a->diag[i] + 1;
1258:     nz  = ai[i+1] - a->diag[i] - 1;
1259:     sum = tmp[i];
1260:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1261:     tmp[i]   = sum*aa[a->diag[i]];
1262:     x[*c--] += tmp[i];
1263:   }

1265:   ISRestoreIndices(isrow,&rout);
1266:   ISRestoreIndices(iscol,&cout);
1267:   VecRestoreArrayRead(bb,&b);
1268:   VecRestoreArray(xx,&x);
1269:   PetscLogFlops(2.0*a->nz);
1270:   return 0;
1271: }

1273: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1274: {
1275:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1276:   IS                iscol = a->col,isrow = a->row;
1277:   PetscInt          i, n = A->rmap->n,j;
1278:   PetscInt          nz;
1279:   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1280:   PetscScalar       *x,*tmp,sum;
1281:   const PetscScalar *b;
1282:   const MatScalar   *aa = a->a,*v;

1284:   if (yy != xx) VecCopy(yy,xx);

1286:   VecGetArrayRead(bb,&b);
1287:   VecGetArray(xx,&x);
1288:   tmp  = a->solve_work;

1290:   ISGetIndices(isrow,&rout); r = rout;
1291:   ISGetIndices(iscol,&cout); c = cout;

1293:   /* forward solve the lower triangular */
1294:   tmp[0] = b[r[0]];
1295:   v      = aa;
1296:   vi     = aj;
1297:   for (i=1; i<n; i++) {
1298:     nz  = ai[i+1] - ai[i];
1299:     sum = b[r[i]];
1300:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1301:     tmp[i] = sum;
1302:     v     += nz;
1303:     vi    += nz;
1304:   }

1306:   /* backward solve the upper triangular */
1307:   v  = aa + adiag[n-1];
1308:   vi = aj + adiag[n-1];
1309:   for (i=n-1; i>=0; i--) {
1310:     nz  = adiag[i] - adiag[i+1] - 1;
1311:     sum = tmp[i];
1312:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1313:     tmp[i]   = sum*v[nz];
1314:     x[c[i]] += tmp[i];
1315:     v       += nz+1; vi += nz+1;
1316:   }

1318:   ISRestoreIndices(isrow,&rout);
1319:   ISRestoreIndices(iscol,&cout);
1320:   VecRestoreArrayRead(bb,&b);
1321:   VecRestoreArray(xx,&x);
1322:   PetscLogFlops(2.0*a->nz);
1323:   return 0;
1324: }

1326: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1327: {
1328:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1329:   IS                iscol = a->col,isrow = a->row;
1330:   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1331:   PetscInt          i,n = A->rmap->n,j;
1332:   PetscInt          nz;
1333:   PetscScalar       *x,*tmp,s1;
1334:   const MatScalar   *aa = a->a,*v;
1335:   const PetscScalar *b;

1337:   VecGetArrayRead(bb,&b);
1338:   VecGetArrayWrite(xx,&x);
1339:   tmp  = a->solve_work;

1341:   ISGetIndices(isrow,&rout); r = rout;
1342:   ISGetIndices(iscol,&cout); c = cout;

1344:   /* copy the b into temp work space according to permutation */
1345:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1347:   /* forward solve the U^T */
1348:   for (i=0; i<n; i++) {
1349:     v   = aa + diag[i];
1350:     vi  = aj + diag[i] + 1;
1351:     nz  = ai[i+1] - diag[i] - 1;
1352:     s1  = tmp[i];
1353:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1354:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1355:     tmp[i] = s1;
1356:   }

1358:   /* backward solve the L^T */
1359:   for (i=n-1; i>=0; i--) {
1360:     v  = aa + diag[i] - 1;
1361:     vi = aj + diag[i] - 1;
1362:     nz = diag[i] - ai[i];
1363:     s1 = tmp[i];
1364:     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1365:   }

1367:   /* copy tmp into x according to permutation */
1368:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

1370:   ISRestoreIndices(isrow,&rout);
1371:   ISRestoreIndices(iscol,&cout);
1372:   VecRestoreArrayRead(bb,&b);
1373:   VecRestoreArrayWrite(xx,&x);

1375:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1376:   return 0;
1377: }

1379: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1380: {
1381:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1382:   IS                iscol = a->col,isrow = a->row;
1383:   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1384:   PetscInt          i,n = A->rmap->n,j;
1385:   PetscInt          nz;
1386:   PetscScalar       *x,*tmp,s1;
1387:   const MatScalar   *aa = a->a,*v;
1388:   const PetscScalar *b;

1390:   VecGetArrayRead(bb,&b);
1391:   VecGetArrayWrite(xx,&x);
1392:   tmp  = a->solve_work;

1394:   ISGetIndices(isrow,&rout); r = rout;
1395:   ISGetIndices(iscol,&cout); c = cout;

1397:   /* copy the b into temp work space according to permutation */
1398:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1400:   /* forward solve the U^T */
1401:   for (i=0; i<n; i++) {
1402:     v   = aa + adiag[i+1] + 1;
1403:     vi  = aj + adiag[i+1] + 1;
1404:     nz  = adiag[i] - adiag[i+1] - 1;
1405:     s1  = tmp[i];
1406:     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1407:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1408:     tmp[i] = s1;
1409:   }

1411:   /* backward solve the L^T */
1412:   for (i=n-1; i>=0; i--) {
1413:     v  = aa + ai[i];
1414:     vi = aj + ai[i];
1415:     nz = ai[i+1] - ai[i];
1416:     s1 = tmp[i];
1417:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1418:   }

1420:   /* copy tmp into x according to permutation */
1421:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

1423:   ISRestoreIndices(isrow,&rout);
1424:   ISRestoreIndices(iscol,&cout);
1425:   VecRestoreArrayRead(bb,&b);
1426:   VecRestoreArrayWrite(xx,&x);

1428:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1429:   return 0;
1430: }

1432: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1433: {
1434:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1435:   IS                iscol = a->col,isrow = a->row;
1436:   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1437:   PetscInt          i,n = A->rmap->n,j;
1438:   PetscInt          nz;
1439:   PetscScalar       *x,*tmp,s1;
1440:   const MatScalar   *aa = a->a,*v;
1441:   const PetscScalar *b;

1443:   if (zz != xx) VecCopy(zz,xx);
1444:   VecGetArrayRead(bb,&b);
1445:   VecGetArray(xx,&x);
1446:   tmp  = a->solve_work;

1448:   ISGetIndices(isrow,&rout); r = rout;
1449:   ISGetIndices(iscol,&cout); c = cout;

1451:   /* copy the b into temp work space according to permutation */
1452:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1454:   /* forward solve the U^T */
1455:   for (i=0; i<n; i++) {
1456:     v   = aa + diag[i];
1457:     vi  = aj + diag[i] + 1;
1458:     nz  = ai[i+1] - diag[i] - 1;
1459:     s1  = tmp[i];
1460:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1461:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1462:     tmp[i] = s1;
1463:   }

1465:   /* backward solve the L^T */
1466:   for (i=n-1; i>=0; i--) {
1467:     v  = aa + diag[i] - 1;
1468:     vi = aj + diag[i] - 1;
1469:     nz = diag[i] - ai[i];
1470:     s1 = tmp[i];
1471:     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1472:   }

1474:   /* copy tmp into x according to permutation */
1475:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

1477:   ISRestoreIndices(isrow,&rout);
1478:   ISRestoreIndices(iscol,&cout);
1479:   VecRestoreArrayRead(bb,&b);
1480:   VecRestoreArray(xx,&x);

1482:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1483:   return 0;
1484: }

1486: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1487: {
1488:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1489:   IS                iscol = a->col,isrow = a->row;
1490:   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1491:   PetscInt          i,n = A->rmap->n,j;
1492:   PetscInt          nz;
1493:   PetscScalar       *x,*tmp,s1;
1494:   const MatScalar   *aa = a->a,*v;
1495:   const PetscScalar *b;

1497:   if (zz != xx) VecCopy(zz,xx);
1498:   VecGetArrayRead(bb,&b);
1499:   VecGetArray(xx,&x);
1500:   tmp  = a->solve_work;

1502:   ISGetIndices(isrow,&rout); r = rout;
1503:   ISGetIndices(iscol,&cout); c = cout;

1505:   /* copy the b into temp work space according to permutation */
1506:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1508:   /* forward solve the U^T */
1509:   for (i=0; i<n; i++) {
1510:     v   = aa + adiag[i+1] + 1;
1511:     vi  = aj + adiag[i+1] + 1;
1512:     nz  = adiag[i] - adiag[i+1] - 1;
1513:     s1  = tmp[i];
1514:     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1515:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1516:     tmp[i] = s1;
1517:   }

1519:   /* backward solve the L^T */
1520:   for (i=n-1; i>=0; i--) {
1521:     v  = aa + ai[i];
1522:     vi = aj + ai[i];
1523:     nz = ai[i+1] - ai[i];
1524:     s1 = tmp[i];
1525:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1526:   }

1528:   /* copy tmp into x according to permutation */
1529:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

1531:   ISRestoreIndices(isrow,&rout);
1532:   ISRestoreIndices(iscol,&cout);
1533:   VecRestoreArrayRead(bb,&b);
1534:   VecRestoreArray(xx,&x);

1536:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1537:   return 0;
1538: }

1540: /* ----------------------------------------------------------------*/

1542: /*
1543:    ilu() under revised new data structure.
1544:    Factored arrays bj and ba are stored as
1545:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1547:    bi=fact->i is an array of size n+1, in which
1548:    bi+
1549:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1550:      bi[n]:  points to L(n-1,n-1)+1

1552:   bdiag=fact->diag is an array of size n+1,in which
1553:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1554:      bdiag[n]: points to entry of U(n-1,0)-1

1556:    U(i,:) contains bdiag[i] as its last entry, i.e.,
1557:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1558: */
1559: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1560: {
1561:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1562:   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1563:   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1564:   IS             isicol;

1566:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1567:   MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1568:   b    = (Mat_SeqAIJ*)(fact)->data;

1570:   /* allocate matrix arrays for new data structure */
1571:   PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1572:   PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));

1574:   b->singlemalloc = PETSC_TRUE;
1575:   if (!b->diag) {
1576:     PetscMalloc1(n+1,&b->diag);
1577:     PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1578:   }
1579:   bdiag = b->diag;

1581:   if (n > 0) {
1582:     PetscArrayzero(b->a,ai[n]);
1583:   }

1585:   /* set bi and bj with new data structure */
1586:   bi = b->i;
1587:   bj = b->j;

1589:   /* L part */
1590:   bi[0] = 0;
1591:   for (i=0; i<n; i++) {
1592:     nz      = adiag[i] - ai[i];
1593:     bi[i+1] = bi[i] + nz;
1594:     aj      = a->j + ai[i];
1595:     for (j=0; j<nz; j++) {
1596:       /*   *bj = aj[j]; bj++; */
1597:       bj[k++] = aj[j];
1598:     }
1599:   }

1601:   /* U part */
1602:   bdiag[n] = bi[n]-1;
1603:   for (i=n-1; i>=0; i--) {
1604:     nz = ai[i+1] - adiag[i] - 1;
1605:     aj = a->j + adiag[i] + 1;
1606:     for (j=0; j<nz; j++) {
1607:       /*      *bj = aj[j]; bj++; */
1608:       bj[k++] = aj[j];
1609:     }
1610:     /* diag[i] */
1611:     /*    *bj = i; bj++; */
1612:     bj[k++]  = i;
1613:     bdiag[i] = bdiag[i+1] + nz + 1;
1614:   }

1616:   fact->factortype             = MAT_FACTOR_ILU;
1617:   fact->info.factor_mallocs    = 0;
1618:   fact->info.fill_ratio_given  = info->fill;
1619:   fact->info.fill_ratio_needed = 1.0;
1620:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1621:   MatSeqAIJCheckInode_FactorLU(fact);

1623:   b       = (Mat_SeqAIJ*)(fact)->data;
1624:   b->row  = isrow;
1625:   b->col  = iscol;
1626:   b->icol = isicol;
1627:   PetscMalloc1(fact->rmap->n+1,&b->solve_work);
1628:   PetscObjectReference((PetscObject)isrow);
1629:   PetscObjectReference((PetscObject)iscol);
1630:   return 0;
1631: }

1633: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1634: {
1635:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1636:   IS                 isicol;
1637:   const PetscInt     *r,*ic;
1638:   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1639:   PetscInt           *bi,*cols,nnz,*cols_lvl;
1640:   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1641:   PetscInt           i,levels,diagonal_fill;
1642:   PetscBool          col_identity,row_identity,missing;
1643:   PetscReal          f;
1644:   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1645:   PetscBT            lnkbt;
1646:   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1647:   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1648:   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;

1651:   MatMissingDiagonal(A,&missing,&i);

1654:   levels = (PetscInt)info->levels;
1655:   ISIdentity(isrow,&row_identity);
1656:   ISIdentity(iscol,&col_identity);
1657:   if (!levels && row_identity && col_identity) {
1658:     /* special case: ilu(0) with natural ordering */
1659:     MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1660:     if (a->inode.size) {
1661:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1662:     }
1663:     return 0;
1664:   }

1666:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1667:   ISGetIndices(isrow,&r);
1668:   ISGetIndices(isicol,&ic);

1670:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1671:   PetscMalloc1(n+1,&bi);
1672:   PetscMalloc1(n+1,&bdiag);
1673:   bi[0] = bdiag[0] = 0;
1674:   PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);

1676:   /* create a linked list for storing column indices of the active row */
1677:   nlnk = n + 1;
1678:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

1680:   /* initial FreeSpace size is f*(ai[n]+1) */
1681:   f                 = info->fill;
1682:   diagonal_fill     = (PetscInt)info->diagonal_fill;
1683:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1684:   current_space     = free_space;
1685:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1686:   current_space_lvl = free_space_lvl;
1687:   for (i=0; i<n; i++) {
1688:     nzi = 0;
1689:     /* copy current row into linked list */
1690:     nnz = ai[r[i]+1] - ai[r[i]];
1692:     cols   = aj + ai[r[i]];
1693:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1694:     PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt);
1695:     nzi   += nlnk;

1697:     /* make sure diagonal entry is included */
1698:     if (diagonal_fill && lnk[i] == -1) {
1699:       fm = n;
1700:       while (lnk[fm] < i) fm = lnk[fm];
1701:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1702:       lnk[fm]    = i;
1703:       lnk_lvl[i] = 0;
1704:       nzi++; dcount++;
1705:     }

1707:     /* add pivot rows into the active row */
1708:     nzbd = 0;
1709:     prow = lnk[n];
1710:     while (prow < i) {
1711:       nnz      = bdiag[prow];
1712:       cols     = bj_ptr[prow] + nnz + 1;
1713:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1714:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1715:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow);
1716:       nzi     += nlnk;
1717:       prow     = lnk[prow];
1718:       nzbd++;
1719:     }
1720:     bdiag[i] = nzbd;
1721:     bi[i+1]  = bi[i] + nzi;
1722:     /* if free space is not available, make more free space */
1723:     if (current_space->local_remaining<nzi) {
1724:       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1725:       PetscFreeSpaceGet(nnz,&current_space);
1726:       PetscFreeSpaceGet(nnz,&current_space_lvl);
1727:       reallocs++;
1728:     }

1730:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1731:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1732:     bj_ptr[i]    = current_space->array;
1733:     bjlvl_ptr[i] = current_space_lvl->array;

1735:     /* make sure the active row i has diagonal entry */

1738:     current_space->array               += nzi;
1739:     current_space->local_used          += nzi;
1740:     current_space->local_remaining     -= nzi;
1741:     current_space_lvl->array           += nzi;
1742:     current_space_lvl->local_used      += nzi;
1743:     current_space_lvl->local_remaining -= nzi;
1744:   }

1746:   ISRestoreIndices(isrow,&r);
1747:   ISRestoreIndices(isicol,&ic);
1748:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1749:   PetscMalloc1(bi[n]+1,&bj);
1750:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);

1752:   PetscIncompleteLLDestroy(lnk,lnkbt);
1753:   PetscFreeSpaceDestroy(free_space_lvl);
1754:   PetscFree2(bj_ptr,bjlvl_ptr);

1756: #if defined(PETSC_USE_INFO)
1757:   {
1758:     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1759:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1760:     PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1761:     PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1762:     PetscInfo(A,"for best performance.\n");
1763:     if (diagonal_fill) {
1764:       PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount);
1765:     }
1766:   }
1767: #endif
1768:   /* put together the new matrix */
1769:   MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1770:   PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1771:   b    = (Mat_SeqAIJ*)(fact)->data;

1773:   b->free_a       = PETSC_TRUE;
1774:   b->free_ij      = PETSC_TRUE;
1775:   b->singlemalloc = PETSC_FALSE;

1777:   PetscMalloc1(bdiag[0]+1,&b->a);

1779:   b->j    = bj;
1780:   b->i    = bi;
1781:   b->diag = bdiag;
1782:   b->ilen = NULL;
1783:   b->imax = NULL;
1784:   b->row  = isrow;
1785:   b->col  = iscol;
1786:   PetscObjectReference((PetscObject)isrow);
1787:   PetscObjectReference((PetscObject)iscol);
1788:   b->icol = isicol;

1790:   PetscMalloc1(n+1,&b->solve_work);
1791:   /* In b structure:  Free imax, ilen, old a, old j.
1792:      Allocate bdiag, solve_work, new a, new j */
1793:   PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1794:   b->maxnz = b->nz = bdiag[0]+1;

1796:   (fact)->info.factor_mallocs    = reallocs;
1797:   (fact)->info.fill_ratio_given  = f;
1798:   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1799:   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1800:   if (a->inode.size) {
1801:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1802:   }
1803:   MatSeqAIJCheckInode_FactorLU(fact);
1804:   return 0;
1805: }

1807: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1808: {
1809:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1810:   IS                 isicol;
1811:   const PetscInt     *r,*ic;
1812:   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1813:   PetscInt           *bi,*cols,nnz,*cols_lvl;
1814:   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1815:   PetscInt           i,levels,diagonal_fill;
1816:   PetscBool          col_identity,row_identity;
1817:   PetscReal          f;
1818:   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1819:   PetscBT            lnkbt;
1820:   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1821:   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1822:   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1823:   PetscBool          missing;

1826:   MatMissingDiagonal(A,&missing,&i);

1829:   f             = info->fill;
1830:   levels        = (PetscInt)info->levels;
1831:   diagonal_fill = (PetscInt)info->diagonal_fill;

1833:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

1835:   ISIdentity(isrow,&row_identity);
1836:   ISIdentity(iscol,&col_identity);
1837:   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1838:     MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);

1840:     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1841:     if (a->inode.size) {
1842:       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1843:     }
1844:     fact->factortype               = MAT_FACTOR_ILU;
1845:     (fact)->info.factor_mallocs    = 0;
1846:     (fact)->info.fill_ratio_given  = info->fill;
1847:     (fact)->info.fill_ratio_needed = 1.0;

1849:     b    = (Mat_SeqAIJ*)(fact)->data;
1850:     b->row  = isrow;
1851:     b->col  = iscol;
1852:     b->icol = isicol;
1853:     PetscMalloc1((fact)->rmap->n+1,&b->solve_work);
1854:     PetscObjectReference((PetscObject)isrow);
1855:     PetscObjectReference((PetscObject)iscol);
1856:     return 0;
1857:   }

1859:   ISGetIndices(isrow,&r);
1860:   ISGetIndices(isicol,&ic);

1862:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1863:   PetscMalloc1(n+1,&bi);
1864:   PetscMalloc1(n+1,&bdiag);
1865:   bi[0] = bdiag[0] = 0;

1867:   PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);

1869:   /* create a linked list for storing column indices of the active row */
1870:   nlnk = n + 1;
1871:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

1873:   /* initial FreeSpace size is f*(ai[n]+1) */
1874:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1875:   current_space     = free_space;
1876:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1877:   current_space_lvl = free_space_lvl;

1879:   for (i=0; i<n; i++) {
1880:     nzi = 0;
1881:     /* copy current row into linked list */
1882:     nnz = ai[r[i]+1] - ai[r[i]];
1884:     cols   = aj + ai[r[i]];
1885:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1886:     PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt);
1887:     nzi   += nlnk;

1889:     /* make sure diagonal entry is included */
1890:     if (diagonal_fill && lnk[i] == -1) {
1891:       fm = n;
1892:       while (lnk[fm] < i) fm = lnk[fm];
1893:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1894:       lnk[fm]    = i;
1895:       lnk_lvl[i] = 0;
1896:       nzi++; dcount++;
1897:     }

1899:     /* add pivot rows into the active row */
1900:     nzbd = 0;
1901:     prow = lnk[n];
1902:     while (prow < i) {
1903:       nnz      = bdiag[prow];
1904:       cols     = bj_ptr[prow] + nnz + 1;
1905:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1906:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1907:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow);
1908:       nzi     += nlnk;
1909:       prow     = lnk[prow];
1910:       nzbd++;
1911:     }
1912:     bdiag[i] = nzbd;
1913:     bi[i+1]  = bi[i] + nzi;

1915:     /* if free space is not available, make more free space */
1916:     if (current_space->local_remaining<nzi) {
1917:       nnz  = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1918:       PetscFreeSpaceGet(nnz,&current_space);
1919:       PetscFreeSpaceGet(nnz,&current_space_lvl);
1920:       reallocs++;
1921:     }

1923:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1924:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1925:     bj_ptr[i]    = current_space->array;
1926:     bjlvl_ptr[i] = current_space_lvl->array;

1928:     /* make sure the active row i has diagonal entry */

1931:     current_space->array               += nzi;
1932:     current_space->local_used          += nzi;
1933:     current_space->local_remaining     -= nzi;
1934:     current_space_lvl->array           += nzi;
1935:     current_space_lvl->local_used      += nzi;
1936:     current_space_lvl->local_remaining -= nzi;
1937:   }

1939:   ISRestoreIndices(isrow,&r);
1940:   ISRestoreIndices(isicol,&ic);

1942:   /* destroy list of free space and other temporary arrays */
1943:   PetscMalloc1(bi[n]+1,&bj);
1944:   PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
1945:   PetscIncompleteLLDestroy(lnk,lnkbt);
1946:   PetscFreeSpaceDestroy(free_space_lvl);
1947:   PetscFree2(bj_ptr,bjlvl_ptr);

1949: #if defined(PETSC_USE_INFO)
1950:   {
1951:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1952:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1953:     PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1954:     PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1955:     PetscInfo(A,"for best performance.\n");
1956:     if (diagonal_fill) {
1957:       PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount);
1958:     }
1959:   }
1960: #endif

1962:   /* put together the new matrix */
1963:   MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1964:   PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1965:   b    = (Mat_SeqAIJ*)(fact)->data;

1967:   b->free_a       = PETSC_TRUE;
1968:   b->free_ij      = PETSC_TRUE;
1969:   b->singlemalloc = PETSC_FALSE;

1971:   PetscMalloc1(bi[n],&b->a);
1972:   b->j = bj;
1973:   b->i = bi;
1974:   for (i=0; i<n; i++) bdiag[i] += bi[i];
1975:   b->diag = bdiag;
1976:   b->ilen = NULL;
1977:   b->imax = NULL;
1978:   b->row  = isrow;
1979:   b->col  = iscol;
1980:   PetscObjectReference((PetscObject)isrow);
1981:   PetscObjectReference((PetscObject)iscol);
1982:   b->icol = isicol;
1983:   PetscMalloc1(n+1,&b->solve_work);
1984:   /* In b structure:  Free imax, ilen, old a, old j.
1985:      Allocate bdiag, solve_work, new a, new j */
1986:   PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1987:   b->maxnz = b->nz = bi[n];

1989:   (fact)->info.factor_mallocs    = reallocs;
1990:   (fact)->info.fill_ratio_given  = f;
1991:   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1992:   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
1993:   if (a->inode.size) {
1994:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1995:   }
1996:   return 0;
1997: }

1999: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2000: {
2001:   Mat            C = B;
2002:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2003:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2004:   IS             ip=b->row,iip = b->icol;
2005:   const PetscInt *rip,*riip;
2006:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2007:   PetscInt       *ai=a->i,*aj=a->j;
2008:   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2009:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2010:   PetscBool      perm_identity;
2011:   FactorShiftCtx sctx;
2012:   PetscReal      rs;
2013:   MatScalar      d,*v;

2015:   /* MatPivotSetUp(): initialize shift context sctx */
2016:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

2018:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2019:     sctx.shift_top = info->zeropivot;
2020:     for (i=0; i<mbs; i++) {
2021:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2022:       d  = (aa)[a->diag[i]];
2023:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2024:       v  = aa+ai[i];
2025:       nz = ai[i+1] - ai[i];
2026:       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2027:       if (rs>sctx.shift_top) sctx.shift_top = rs;
2028:     }
2029:     sctx.shift_top *= 1.1;
2030:     sctx.nshift_max = 5;
2031:     sctx.shift_lo   = 0.;
2032:     sctx.shift_hi   = 1.;
2033:   }

2035:   ISGetIndices(ip,&rip);
2036:   ISGetIndices(iip,&riip);

2038:   /* allocate working arrays
2039:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2040:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2041:   */
2042:   PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);

2044:   do {
2045:     sctx.newshift = PETSC_FALSE;

2047:     for (i=0; i<mbs; i++) c2r[i] = mbs;
2048:     if (mbs) il[0] = 0;

2050:     for (k = 0; k<mbs; k++) {
2051:       /* zero rtmp */
2052:       nz    = bi[k+1] - bi[k];
2053:       bjtmp = bj + bi[k];
2054:       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

2056:       /* load in initial unfactored row */
2057:       bval = ba + bi[k];
2058:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2059:       for (j = jmin; j < jmax; j++) {
2060:         col = riip[aj[j]];
2061:         if (col >= k) { /* only take upper triangular entry */
2062:           rtmp[col] = aa[j];
2063:           *bval++   = 0.0; /* for in-place factorization */
2064:         }
2065:       }
2066:       /* shift the diagonal of the matrix: ZeropivotApply() */
2067:       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */

2069:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2070:       dk = rtmp[k];
2071:       i  = c2r[k]; /* first row to be added to k_th row  */

2073:       while (i < k) {
2074:         nexti = c2r[i]; /* next row to be added to k_th row */

2076:         /* compute multiplier, update diag(k) and U(i,k) */
2077:         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2078:         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2079:         dk     += uikdi*ba[ili]; /* update diag[k] */
2080:         ba[ili] = uikdi; /* -U(i,k) */

2082:         /* add multiple of row i to k-th row */
2083:         jmin = ili + 1; jmax = bi[i+1];
2084:         if (jmin < jmax) {
2085:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2086:           /* update il and c2r for row i */
2087:           il[i] = jmin;
2088:           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2089:         }
2090:         i = nexti;
2091:       }

2093:       /* copy data into U(k,:) */
2094:       rs   = 0.0;
2095:       jmin = bi[k]; jmax = bi[k+1]-1;
2096:       if (jmin < jmax) {
2097:         for (j=jmin; j<jmax; j++) {
2098:           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2099:         }
2100:         /* add the k-th row into il and c2r */
2101:         il[k] = jmin;
2102:         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2103:       }

2105:       /* MatPivotCheck() */
2106:       sctx.rs = rs;
2107:       sctx.pv = dk;
2108:       MatPivotCheck(B,A,info,&sctx,i);
2109:       if (sctx.newshift) break;
2110:       dk = sctx.pv;

2112:       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2113:     }
2114:   } while (sctx.newshift);

2116:   PetscFree3(rtmp,il,c2r);
2117:   ISRestoreIndices(ip,&rip);
2118:   ISRestoreIndices(iip,&riip);

2120:   ISIdentity(ip,&perm_identity);
2121:   if (perm_identity) {
2122:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2123:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2124:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2125:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2126:   } else {
2127:     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2128:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2129:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2130:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2131:   }

2133:   C->assembled    = PETSC_TRUE;
2134:   C->preallocated = PETSC_TRUE;

2136:   PetscLogFlops(C->rmap->n);

2138:   /* MatPivotView() */
2139:   if (sctx.nshift) {
2140:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2141:       PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2142:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2143:       PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2144:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2145:       PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2146:     }
2147:   }
2148:   return 0;
2149: }

2151: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2152: {
2153:   Mat            C = B;
2154:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2155:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2156:   IS             ip=b->row,iip = b->icol;
2157:   const PetscInt *rip,*riip;
2158:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2159:   PetscInt       *ai=a->i,*aj=a->j;
2160:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2161:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2162:   PetscBool      perm_identity;
2163:   FactorShiftCtx sctx;
2164:   PetscReal      rs;
2165:   MatScalar      d,*v;

2167:   /* MatPivotSetUp(): initialize shift context sctx */
2168:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

2170:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2171:     sctx.shift_top = info->zeropivot;
2172:     for (i=0; i<mbs; i++) {
2173:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2174:       d  = (aa)[a->diag[i]];
2175:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2176:       v  = aa+ai[i];
2177:       nz = ai[i+1] - ai[i];
2178:       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2179:       if (rs>sctx.shift_top) sctx.shift_top = rs;
2180:     }
2181:     sctx.shift_top *= 1.1;
2182:     sctx.nshift_max = 5;
2183:     sctx.shift_lo   = 0.;
2184:     sctx.shift_hi   = 1.;
2185:   }

2187:   ISGetIndices(ip,&rip);
2188:   ISGetIndices(iip,&riip);

2190:   /* initialization */
2191:   PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);

2193:   do {
2194:     sctx.newshift = PETSC_FALSE;

2196:     for (i=0; i<mbs; i++) jl[i] = mbs;
2197:     il[0] = 0;

2199:     for (k = 0; k<mbs; k++) {
2200:       /* zero rtmp */
2201:       nz    = bi[k+1] - bi[k];
2202:       bjtmp = bj + bi[k];
2203:       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

2205:       bval = ba + bi[k];
2206:       /* initialize k-th row by the perm[k]-th row of A */
2207:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2208:       for (j = jmin; j < jmax; j++) {
2209:         col = riip[aj[j]];
2210:         if (col >= k) { /* only take upper triangular entry */
2211:           rtmp[col] = aa[j];
2212:           *bval++   = 0.0; /* for in-place factorization */
2213:         }
2214:       }
2215:       /* shift the diagonal of the matrix */
2216:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

2218:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2219:       dk = rtmp[k];
2220:       i  = jl[k]; /* first row to be added to k_th row  */

2222:       while (i < k) {
2223:         nexti = jl[i]; /* next row to be added to k_th row */

2225:         /* compute multiplier, update diag(k) and U(i,k) */
2226:         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2227:         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2228:         dk     += uikdi*ba[ili];
2229:         ba[ili] = uikdi; /* -U(i,k) */

2231:         /* add multiple of row i to k-th row */
2232:         jmin = ili + 1; jmax = bi[i+1];
2233:         if (jmin < jmax) {
2234:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2235:           /* update il and jl for row i */
2236:           il[i] = jmin;
2237:           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2238:         }
2239:         i = nexti;
2240:       }

2242:       /* shift the diagonals when zero pivot is detected */
2243:       /* compute rs=sum of abs(off-diagonal) */
2244:       rs   = 0.0;
2245:       jmin = bi[k]+1;
2246:       nz   = bi[k+1] - jmin;
2247:       bcol = bj + jmin;
2248:       for (j=0; j<nz; j++) {
2249:         rs += PetscAbsScalar(rtmp[bcol[j]]);
2250:       }

2252:       sctx.rs = rs;
2253:       sctx.pv = dk;
2254:       MatPivotCheck(B,A,info,&sctx,k);
2255:       if (sctx.newshift) break;
2256:       dk = sctx.pv;

2258:       /* copy data into U(k,:) */
2259:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2260:       jmin      = bi[k]+1; jmax = bi[k+1];
2261:       if (jmin < jmax) {
2262:         for (j=jmin; j<jmax; j++) {
2263:           col = bj[j]; ba[j] = rtmp[col];
2264:         }
2265:         /* add the k-th row into il and jl */
2266:         il[k] = jmin;
2267:         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2268:       }
2269:     }
2270:   } while (sctx.newshift);

2272:   PetscFree3(rtmp,il,jl);
2273:   ISRestoreIndices(ip,&rip);
2274:   ISRestoreIndices(iip,&riip);

2276:   ISIdentity(ip,&perm_identity);
2277:   if (perm_identity) {
2278:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2279:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2280:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2281:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2282:   } else {
2283:     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2284:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2285:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2286:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2287:   }

2289:   C->assembled    = PETSC_TRUE;
2290:   C->preallocated = PETSC_TRUE;

2292:   PetscLogFlops(C->rmap->n);
2293:   if (sctx.nshift) {
2294:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2295:       PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2296:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2297:       PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2298:     }
2299:   }
2300:   return 0;
2301: }

2303: /*
2304:    icc() under revised new data structure.
2305:    Factored arrays bj and ba are stored as
2306:      U(0,:),...,U(i,:),U(n-1,:)

2308:    ui=fact->i is an array of size n+1, in which
2309:    ui+
2310:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2311:      ui[n]:  points to U(n-1,n-1)+1

2313:   udiag=fact->diag is an array of size n,in which
2314:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2316:    U(i,:) contains udiag[i] as its last entry, i.e.,
2317:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2318: */

2320: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2321: {
2322:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2323:   Mat_SeqSBAIJ       *b;
2324:   PetscBool          perm_identity,missing;
2325:   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2326:   const PetscInt     *rip,*riip;
2327:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2328:   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2329:   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2330:   PetscReal          fill          =info->fill,levels=info->levels;
2331:   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2332:   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2333:   PetscBT            lnkbt;
2334:   IS                 iperm;

2337:   MatMissingDiagonal(A,&missing,&d);
2339:   ISIdentity(perm,&perm_identity);
2340:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);

2342:   PetscMalloc1(am+1,&ui);
2343:   PetscMalloc1(am+1,&udiag);
2344:   ui[0] = 0;

2346:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2347:   if (!levels && perm_identity) {
2348:     for (i=0; i<am; i++) {
2349:       ncols    = ai[i+1] - a->diag[i];
2350:       ui[i+1]  = ui[i] + ncols;
2351:       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2352:     }
2353:     PetscMalloc1(ui[am]+1,&uj);
2354:     cols = uj;
2355:     for (i=0; i<am; i++) {
2356:       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2357:       ncols = ai[i+1] - a->diag[i] -1;
2358:       for (j=0; j<ncols; j++) *cols++ = aj[j];
2359:       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2360:     }
2361:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2362:     ISGetIndices(iperm,&riip);
2363:     ISGetIndices(perm,&rip);

2365:     /* initialization */
2366:     PetscMalloc1(am+1,&ajtmp);

2368:     /* jl: linked list for storing indices of the pivot rows
2369:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2370:     PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2371:     for (i=0; i<am; i++) {
2372:       jl[i] = am; il[i] = 0;
2373:     }

2375:     /* create and initialize a linked list for storing column indices of the active row k */
2376:     nlnk = am + 1;
2377:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

2379:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2380:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2381:     current_space     = free_space;
2382:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);
2383:     current_space_lvl = free_space_lvl;

2385:     for (k=0; k<am; k++) {  /* for each active row k */
2386:       /* initialize lnk by the column indices of row rip[k] of A */
2387:       nzk   = 0;
2388:       ncols = ai[rip[k]+1] - ai[rip[k]];
2390:       ncols_upper = 0;
2391:       for (j=0; j<ncols; j++) {
2392:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2393:         if (riip[i] >= k) { /* only take upper triangular entry */
2394:           ajtmp[ncols_upper] = i;
2395:           ncols_upper++;
2396:         }
2397:       }
2398:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt);
2399:       nzk += nlnk;

2401:       /* update lnk by computing fill-in for each pivot row to be merged in */
2402:       prow = jl[k]; /* 1st pivot row */

2404:       while (prow < k) {
2405:         nextprow = jl[prow];

2407:         /* merge prow into k-th row */
2408:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2409:         jmax  = ui[prow+1];
2410:         ncols = jmax-jmin;
2411:         i     = jmin - ui[prow];
2412:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2413:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2414:         j     = *(uj - 1);
2415:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j);
2416:         nzk  += nlnk;

2418:         /* update il and jl for prow */
2419:         if (jmin < jmax) {
2420:           il[prow] = jmin;
2421:           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2422:         }
2423:         prow = nextprow;
2424:       }

2426:       /* if free space is not available, make more free space */
2427:       if (current_space->local_remaining<nzk) {
2428:         i    = am - k + 1; /* num of unfactored rows */
2429:         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2430:         PetscFreeSpaceGet(i,&current_space);
2431:         PetscFreeSpaceGet(i,&current_space_lvl);
2432:         reallocs++;
2433:       }

2435:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2437:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

2439:       /* add the k-th row into il and jl */
2440:       if (nzk > 1) {
2441:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2442:         jl[k] = jl[i]; jl[i] = k;
2443:         il[k] = ui[k] + 1;
2444:       }
2445:       uj_ptr[k]     = current_space->array;
2446:       uj_lvl_ptr[k] = current_space_lvl->array;

2448:       current_space->array           += nzk;
2449:       current_space->local_used      += nzk;
2450:       current_space->local_remaining -= nzk;

2452:       current_space_lvl->array           += nzk;
2453:       current_space_lvl->local_used      += nzk;
2454:       current_space_lvl->local_remaining -= nzk;

2456:       ui[k+1] = ui[k] + nzk;
2457:     }

2459:     ISRestoreIndices(perm,&rip);
2460:     ISRestoreIndices(iperm,&riip);
2461:     PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2462:     PetscFree(ajtmp);

2464:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2465:     PetscMalloc1(ui[am]+1,&uj);
2466:     PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor  */
2467:     PetscIncompleteLLDestroy(lnk,lnkbt);
2468:     PetscFreeSpaceDestroy(free_space_lvl);

2470:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2472:   /* put together the new matrix in MATSEQSBAIJ format */
2473:   b               = (Mat_SeqSBAIJ*)(fact)->data;
2474:   b->singlemalloc = PETSC_FALSE;

2476:   PetscMalloc1(ui[am]+1,&b->a);

2478:   b->j             = uj;
2479:   b->i             = ui;
2480:   b->diag          = udiag;
2481:   b->free_diag     = PETSC_TRUE;
2482:   b->ilen          = NULL;
2483:   b->imax          = NULL;
2484:   b->row           = perm;
2485:   b->col           = perm;
2486:   PetscObjectReference((PetscObject)perm);
2487:   PetscObjectReference((PetscObject)perm);
2488:   b->icol          = iperm;
2489:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2491:   PetscMalloc1(am+1,&b->solve_work);
2492:   PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));

2494:   b->maxnz   = b->nz = ui[am];
2495:   b->free_a  = PETSC_TRUE;
2496:   b->free_ij = PETSC_TRUE;

2498:   fact->info.factor_mallocs   = reallocs;
2499:   fact->info.fill_ratio_given = fill;
2500:   if (ai[am] != 0) {
2501:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2502:     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2503:   } else {
2504:     fact->info.fill_ratio_needed = 0.0;
2505:   }
2506: #if defined(PETSC_USE_INFO)
2507:   if (ai[am] != 0) {
2508:     PetscReal af = fact->info.fill_ratio_needed;
2509:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2510:     PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2511:     PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2512:   } else {
2513:     PetscInfo(A,"Empty matrix\n");
2514:   }
2515: #endif
2516:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2517:   return 0;
2518: }

2520: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2521: {
2522:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2523:   Mat_SeqSBAIJ       *b;
2524:   PetscBool          perm_identity,missing;
2525:   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2526:   const PetscInt     *rip,*riip;
2527:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2528:   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2529:   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2530:   PetscReal          fill          =info->fill,levels=info->levels;
2531:   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2532:   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2533:   PetscBT            lnkbt;
2534:   IS                 iperm;

2537:   MatMissingDiagonal(A,&missing,&d);
2539:   ISIdentity(perm,&perm_identity);
2540:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);

2542:   PetscMalloc1(am+1,&ui);
2543:   PetscMalloc1(am+1,&udiag);
2544:   ui[0] = 0;

2546:   /* ICC(0) without matrix ordering: simply copies fill pattern */
2547:   if (!levels && perm_identity) {

2549:     for (i=0; i<am; i++) {
2550:       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2551:       udiag[i] = ui[i];
2552:     }
2553:     PetscMalloc1(ui[am]+1,&uj);
2554:     cols = uj;
2555:     for (i=0; i<am; i++) {
2556:       aj    = a->j + a->diag[i];
2557:       ncols = ui[i+1] - ui[i];
2558:       for (j=0; j<ncols; j++) *cols++ = *aj++;
2559:     }
2560:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2561:     ISGetIndices(iperm,&riip);
2562:     ISGetIndices(perm,&rip);

2564:     /* initialization */
2565:     PetscMalloc1(am+1,&ajtmp);

2567:     /* jl: linked list for storing indices of the pivot rows
2568:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2569:     PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2570:     for (i=0; i<am; i++) {
2571:       jl[i] = am; il[i] = 0;
2572:     }

2574:     /* create and initialize a linked list for storing column indices of the active row k */
2575:     nlnk = am + 1;
2576:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

2578:     /* initial FreeSpace size is fill*(ai[am]+1) */
2579:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2580:     current_space     = free_space;
2581:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);
2582:     current_space_lvl = free_space_lvl;

2584:     for (k=0; k<am; k++) {  /* for each active row k */
2585:       /* initialize lnk by the column indices of row rip[k] of A */
2586:       nzk   = 0;
2587:       ncols = ai[rip[k]+1] - ai[rip[k]];
2589:       ncols_upper = 0;
2590:       for (j=0; j<ncols; j++) {
2591:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2592:         if (riip[i] >= k) { /* only take upper triangular entry */
2593:           ajtmp[ncols_upper] = i;
2594:           ncols_upper++;
2595:         }
2596:       }
2597:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt);
2598:       nzk += nlnk;

2600:       /* update lnk by computing fill-in for each pivot row to be merged in */
2601:       prow = jl[k]; /* 1st pivot row */

2603:       while (prow < k) {
2604:         nextprow = jl[prow];

2606:         /* merge prow into k-th row */
2607:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2608:         jmax  = ui[prow+1];
2609:         ncols = jmax-jmin;
2610:         i     = jmin - ui[prow];
2611:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2612:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2613:         j     = *(uj - 1);
2614:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j);
2615:         nzk  += nlnk;

2617:         /* update il and jl for prow */
2618:         if (jmin < jmax) {
2619:           il[prow] = jmin;
2620:           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2621:         }
2622:         prow = nextprow;
2623:       }

2625:       /* if free space is not available, make more free space */
2626:       if (current_space->local_remaining<nzk) {
2627:         i    = am - k + 1; /* num of unfactored rows */
2628:         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2629:         PetscFreeSpaceGet(i,&current_space);
2630:         PetscFreeSpaceGet(i,&current_space_lvl);
2631:         reallocs++;
2632:       }

2634:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2636:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

2638:       /* add the k-th row into il and jl */
2639:       if (nzk > 1) {
2640:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2641:         jl[k] = jl[i]; jl[i] = k;
2642:         il[k] = ui[k] + 1;
2643:       }
2644:       uj_ptr[k]     = current_space->array;
2645:       uj_lvl_ptr[k] = current_space_lvl->array;

2647:       current_space->array           += nzk;
2648:       current_space->local_used      += nzk;
2649:       current_space->local_remaining -= nzk;

2651:       current_space_lvl->array           += nzk;
2652:       current_space_lvl->local_used      += nzk;
2653:       current_space_lvl->local_remaining -= nzk;

2655:       ui[k+1] = ui[k] + nzk;
2656:     }

2658: #if defined(PETSC_USE_INFO)
2659:     if (ai[am] != 0) {
2660:       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2661:       PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2662:       PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2663:       PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2664:     } else {
2665:       PetscInfo(A,"Empty matrix\n");
2666:     }
2667: #endif

2669:     ISRestoreIndices(perm,&rip);
2670:     ISRestoreIndices(iperm,&riip);
2671:     PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2672:     PetscFree(ajtmp);

2674:     /* destroy list of free space and other temporary array(s) */
2675:     PetscMalloc1(ui[am]+1,&uj);
2676:     PetscFreeSpaceContiguous(&free_space,uj);
2677:     PetscIncompleteLLDestroy(lnk,lnkbt);
2678:     PetscFreeSpaceDestroy(free_space_lvl);

2680:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2682:   /* put together the new matrix in MATSEQSBAIJ format */

2684:   b               = (Mat_SeqSBAIJ*)fact->data;
2685:   b->singlemalloc = PETSC_FALSE;

2687:   PetscMalloc1(ui[am]+1,&b->a);

2689:   b->j         = uj;
2690:   b->i         = ui;
2691:   b->diag      = udiag;
2692:   b->free_diag = PETSC_TRUE;
2693:   b->ilen      = NULL;
2694:   b->imax      = NULL;
2695:   b->row       = perm;
2696:   b->col       = perm;

2698:   PetscObjectReference((PetscObject)perm);
2699:   PetscObjectReference((PetscObject)perm);

2701:   b->icol          = iperm;
2702:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2703:   PetscMalloc1(am+1,&b->solve_work);
2704:   PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2705:   b->maxnz         = b->nz = ui[am];
2706:   b->free_a        = PETSC_TRUE;
2707:   b->free_ij       = PETSC_TRUE;

2709:   fact->info.factor_mallocs   = reallocs;
2710:   fact->info.fill_ratio_given = fill;
2711:   if (ai[am] != 0) {
2712:     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2713:   } else {
2714:     fact->info.fill_ratio_needed = 0.0;
2715:   }
2716:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2717:   return 0;
2718: }

2720: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2721: {
2722:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2723:   Mat_SeqSBAIJ       *b;
2724:   PetscBool          perm_identity,missing;
2725:   PetscReal          fill = info->fill;
2726:   const PetscInt     *rip,*riip;
2727:   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2728:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2729:   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2730:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2731:   PetscBT            lnkbt;
2732:   IS                 iperm;

2735:   MatMissingDiagonal(A,&missing,&i);

2738:   /* check whether perm is the identity mapping */
2739:   ISIdentity(perm,&perm_identity);
2740:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2741:   ISGetIndices(iperm,&riip);
2742:   ISGetIndices(perm,&rip);

2744:   /* initialization */
2745:   PetscMalloc1(am+1,&ui);
2746:   PetscMalloc1(am+1,&udiag);
2747:   ui[0] = 0;

2749:   /* jl: linked list for storing indices of the pivot rows
2750:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2751:   PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2752:   for (i=0; i<am; i++) {
2753:     jl[i] = am; il[i] = 0;
2754:   }

2756:   /* create and initialize a linked list for storing column indices of the active row k */
2757:   nlnk = am + 1;
2758:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

2760:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2761:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2762:   current_space = free_space;

2764:   for (k=0; k<am; k++) {  /* for each active row k */
2765:     /* initialize lnk by the column indices of row rip[k] of A */
2766:     nzk   = 0;
2767:     ncols = ai[rip[k]+1] - ai[rip[k]];
2769:     ncols_upper = 0;
2770:     for (j=0; j<ncols; j++) {
2771:       i = riip[*(aj + ai[rip[k]] + j)];
2772:       if (i >= k) { /* only take upper triangular entry */
2773:         cols[ncols_upper] = i;
2774:         ncols_upper++;
2775:       }
2776:     }
2777:     PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt);
2778:     nzk += nlnk;

2780:     /* update lnk by computing fill-in for each pivot row to be merged in */
2781:     prow = jl[k]; /* 1st pivot row */

2783:     while (prow < k) {
2784:       nextprow = jl[prow];
2785:       /* merge prow into k-th row */
2786:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2787:       jmax   = ui[prow+1];
2788:       ncols  = jmax-jmin;
2789:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2790:       PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt);
2791:       nzk   += nlnk;

2793:       /* update il and jl for prow */
2794:       if (jmin < jmax) {
2795:         il[prow] = jmin;
2796:         j        = *uj_ptr;
2797:         jl[prow] = jl[j];
2798:         jl[j]    = prow;
2799:       }
2800:       prow = nextprow;
2801:     }

2803:     /* if free space is not available, make more free space */
2804:     if (current_space->local_remaining<nzk) {
2805:       i    = am - k + 1; /* num of unfactored rows */
2806:       i    = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2807:       PetscFreeSpaceGet(i,&current_space);
2808:       reallocs++;
2809:     }

2811:     /* copy data into free space, then initialize lnk */
2812:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

2814:     /* add the k-th row into il and jl */
2815:     if (nzk > 1) {
2816:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2817:       jl[k] = jl[i]; jl[i] = k;
2818:       il[k] = ui[k] + 1;
2819:     }
2820:     ui_ptr[k] = current_space->array;

2822:     current_space->array           += nzk;
2823:     current_space->local_used      += nzk;
2824:     current_space->local_remaining -= nzk;

2826:     ui[k+1] = ui[k] + nzk;
2827:   }

2829:   ISRestoreIndices(perm,&rip);
2830:   ISRestoreIndices(iperm,&riip);
2831:   PetscFree4(ui_ptr,jl,il,cols);

2833:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2834:   PetscMalloc1(ui[am]+1,&uj);
2835:   PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2836:   PetscLLDestroy(lnk,lnkbt);

2838:   /* put together the new matrix in MATSEQSBAIJ format */

2840:   b               = (Mat_SeqSBAIJ*)fact->data;
2841:   b->singlemalloc = PETSC_FALSE;
2842:   b->free_a       = PETSC_TRUE;
2843:   b->free_ij      = PETSC_TRUE;

2845:   PetscMalloc1(ui[am]+1,&b->a);

2847:   b->j         = uj;
2848:   b->i         = ui;
2849:   b->diag      = udiag;
2850:   b->free_diag = PETSC_TRUE;
2851:   b->ilen      = NULL;
2852:   b->imax      = NULL;
2853:   b->row       = perm;
2854:   b->col       = perm;

2856:   PetscObjectReference((PetscObject)perm);
2857:   PetscObjectReference((PetscObject)perm);

2859:   b->icol          = iperm;
2860:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2862:   PetscMalloc1(am+1,&b->solve_work);
2863:   PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));

2865:   b->maxnz = b->nz = ui[am];

2867:   fact->info.factor_mallocs   = reallocs;
2868:   fact->info.fill_ratio_given = fill;
2869:   if (ai[am] != 0) {
2870:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2871:     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2872:   } else {
2873:     fact->info.fill_ratio_needed = 0.0;
2874:   }
2875: #if defined(PETSC_USE_INFO)
2876:   if (ai[am] != 0) {
2877:     PetscReal af = fact->info.fill_ratio_needed;
2878:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2879:     PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2880:     PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2881:   } else {
2882:     PetscInfo(A,"Empty matrix\n");
2883:   }
2884: #endif
2885:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2886:   return 0;
2887: }

2889: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2890: {
2891:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2892:   Mat_SeqSBAIJ       *b;
2893:   PetscBool          perm_identity,missing;
2894:   PetscReal          fill = info->fill;
2895:   const PetscInt     *rip,*riip;
2896:   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2897:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2898:   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2899:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2900:   PetscBT            lnkbt;
2901:   IS                 iperm;

2904:   MatMissingDiagonal(A,&missing,&i);

2907:   /* check whether perm is the identity mapping */
2908:   ISIdentity(perm,&perm_identity);
2909:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2910:   ISGetIndices(iperm,&riip);
2911:   ISGetIndices(perm,&rip);

2913:   /* initialization */
2914:   PetscMalloc1(am+1,&ui);
2915:   ui[0] = 0;

2917:   /* jl: linked list for storing indices of the pivot rows
2918:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2919:   PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2920:   for (i=0; i<am; i++) {
2921:     jl[i] = am; il[i] = 0;
2922:   }

2924:   /* create and initialize a linked list for storing column indices of the active row k */
2925:   nlnk = am + 1;
2926:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

2928:   /* initial FreeSpace size is fill*(ai[am]+1) */
2929:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2930:   current_space = free_space;

2932:   for (k=0; k<am; k++) {  /* for each active row k */
2933:     /* initialize lnk by the column indices of row rip[k] of A */
2934:     nzk   = 0;
2935:     ncols = ai[rip[k]+1] - ai[rip[k]];
2937:     ncols_upper = 0;
2938:     for (j=0; j<ncols; j++) {
2939:       i = riip[*(aj + ai[rip[k]] + j)];
2940:       if (i >= k) { /* only take upper triangular entry */
2941:         cols[ncols_upper] = i;
2942:         ncols_upper++;
2943:       }
2944:     }
2945:     PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt);
2946:     nzk += nlnk;

2948:     /* update lnk by computing fill-in for each pivot row to be merged in */
2949:     prow = jl[k]; /* 1st pivot row */

2951:     while (prow < k) {
2952:       nextprow = jl[prow];
2953:       /* merge prow into k-th row */
2954:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2955:       jmax   = ui[prow+1];
2956:       ncols  = jmax-jmin;
2957:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2958:       PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt);
2959:       nzk   += nlnk;

2961:       /* update il and jl for prow */
2962:       if (jmin < jmax) {
2963:         il[prow] = jmin;
2964:         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2965:       }
2966:       prow = nextprow;
2967:     }

2969:     /* if free space is not available, make more free space */
2970:     if (current_space->local_remaining<nzk) {
2971:       i    = am - k + 1; /* num of unfactored rows */
2972:       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2973:       PetscFreeSpaceGet(i,&current_space);
2974:       reallocs++;
2975:     }

2977:     /* copy data into free space, then initialize lnk */
2978:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

2980:     /* add the k-th row into il and jl */
2981:     if (nzk-1 > 0) {
2982:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2983:       jl[k] = jl[i]; jl[i] = k;
2984:       il[k] = ui[k] + 1;
2985:     }
2986:     ui_ptr[k] = current_space->array;

2988:     current_space->array           += nzk;
2989:     current_space->local_used      += nzk;
2990:     current_space->local_remaining -= nzk;

2992:     ui[k+1] = ui[k] + nzk;
2993:   }

2995: #if defined(PETSC_USE_INFO)
2996:   if (ai[am] != 0) {
2997:     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
2998:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2999:     PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3000:     PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3001:   } else {
3002:     PetscInfo(A,"Empty matrix\n");
3003:   }
3004: #endif

3006:   ISRestoreIndices(perm,&rip);
3007:   ISRestoreIndices(iperm,&riip);
3008:   PetscFree4(ui_ptr,jl,il,cols);

3010:   /* destroy list of free space and other temporary array(s) */
3011:   PetscMalloc1(ui[am]+1,&uj);
3012:   PetscFreeSpaceContiguous(&free_space,uj);
3013:   PetscLLDestroy(lnk,lnkbt);

3015:   /* put together the new matrix in MATSEQSBAIJ format */

3017:   b               = (Mat_SeqSBAIJ*)fact->data;
3018:   b->singlemalloc = PETSC_FALSE;
3019:   b->free_a       = PETSC_TRUE;
3020:   b->free_ij      = PETSC_TRUE;

3022:   PetscMalloc1(ui[am]+1,&b->a);

3024:   b->j    = uj;
3025:   b->i    = ui;
3026:   b->diag = NULL;
3027:   b->ilen = NULL;
3028:   b->imax = NULL;
3029:   b->row  = perm;
3030:   b->col  = perm;

3032:   PetscObjectReference((PetscObject)perm);
3033:   PetscObjectReference((PetscObject)perm);

3035:   b->icol          = iperm;
3036:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

3038:   PetscMalloc1(am+1,&b->solve_work);
3039:   PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3040:   b->maxnz = b->nz = ui[am];

3042:   fact->info.factor_mallocs   = reallocs;
3043:   fact->info.fill_ratio_given = fill;
3044:   if (ai[am] != 0) {
3045:     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3046:   } else {
3047:     fact->info.fill_ratio_needed = 0.0;
3048:   }
3049:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3050:   return 0;
3051: }

3053: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3054: {
3055:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3056:   PetscInt          n   = A->rmap->n;
3057:   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3058:   PetscScalar       *x,sum;
3059:   const PetscScalar *b;
3060:   const MatScalar   *aa = a->a,*v;
3061:   PetscInt          i,nz;

3063:   if (!n) return 0;

3065:   VecGetArrayRead(bb,&b);
3066:   VecGetArrayWrite(xx,&x);

3068:   /* forward solve the lower triangular */
3069:   x[0] = b[0];
3070:   v    = aa;
3071:   vi   = aj;
3072:   for (i=1; i<n; i++) {
3073:     nz  = ai[i+1] - ai[i];
3074:     sum = b[i];
3075:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3076:     v   += nz;
3077:     vi  += nz;
3078:     x[i] = sum;
3079:   }

3081:   /* backward solve the upper triangular */
3082:   for (i=n-1; i>=0; i--) {
3083:     v   = aa + adiag[i+1] + 1;
3084:     vi  = aj + adiag[i+1] + 1;
3085:     nz  = adiag[i] - adiag[i+1]-1;
3086:     sum = x[i];
3087:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3088:     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3089:   }

3091:   PetscLogFlops(2.0*a->nz - A->cmap->n);
3092:   VecRestoreArrayRead(bb,&b);
3093:   VecRestoreArrayWrite(xx,&x);
3094:   return 0;
3095: }

3097: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3098: {
3099:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3100:   IS                iscol = a->col,isrow = a->row;
3101:   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3102:   const PetscInt    *rout,*cout,*r,*c;
3103:   PetscScalar       *x,*tmp,sum;
3104:   const PetscScalar *b;
3105:   const MatScalar   *aa = a->a,*v;

3107:   if (!n) return 0;

3109:   VecGetArrayRead(bb,&b);
3110:   VecGetArrayWrite(xx,&x);
3111:   tmp  = a->solve_work;

3113:   ISGetIndices(isrow,&rout); r = rout;
3114:   ISGetIndices(iscol,&cout); c = cout;

3116:   /* forward solve the lower triangular */
3117:   tmp[0] = b[r[0]];
3118:   v      = aa;
3119:   vi     = aj;
3120:   for (i=1; i<n; i++) {
3121:     nz  = ai[i+1] - ai[i];
3122:     sum = b[r[i]];
3123:     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3124:     tmp[i] = sum;
3125:     v     += nz; vi += nz;
3126:   }

3128:   /* backward solve the upper triangular */
3129:   for (i=n-1; i>=0; i--) {
3130:     v   = aa + adiag[i+1]+1;
3131:     vi  = aj + adiag[i+1]+1;
3132:     nz  = adiag[i]-adiag[i+1]-1;
3133:     sum = tmp[i];
3134:     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3135:     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3136:   }

3138:   ISRestoreIndices(isrow,&rout);
3139:   ISRestoreIndices(iscol,&cout);
3140:   VecRestoreArrayRead(bb,&b);
3141:   VecRestoreArrayWrite(xx,&x);
3142:   PetscLogFlops(2.0*a->nz - A->cmap->n);
3143:   return 0;
3144: }

3146: /*
3147:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3148: */
3149: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3150: {
3151:   Mat            B = *fact;
3152:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3153:   IS             isicol;
3154:   const PetscInt *r,*ic;
3155:   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3156:   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3157:   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3158:   PetscInt       nlnk,*lnk;
3159:   PetscBT        lnkbt;
3160:   PetscBool      row_identity,icol_identity;
3161:   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3162:   const PetscInt *ics;
3163:   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3164:   PetscReal      dt     =info->dt,shift=info->shiftamount;
3165:   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3166:   PetscBool      missing;

3168:   if (dt      == PETSC_DEFAULT) dt = 0.005;
3169:   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);

3171:   /* ------- symbolic factorization, can be reused ---------*/
3172:   MatMissingDiagonal(A,&missing,&i);
3174:   adiag=a->diag;

3176:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

3178:   /* bdiag is location of diagonal in factor */
3179:   PetscMalloc1(n+1,&bdiag);     /* becomes b->diag */
3180:   PetscMalloc1(n+1,&bdiag_rev); /* temporary */

3182:   /* allocate row pointers bi */
3183:   PetscMalloc1(2*n+2,&bi);

3185:   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3186:   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3187:   nnz_max = ai[n]+2*n*dtcount+2;

3189:   PetscMalloc1(nnz_max+1,&bj);
3190:   PetscMalloc1(nnz_max+1,&ba);

3192:   /* put together the new matrix */
3193:   MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3194:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3195:   b    = (Mat_SeqAIJ*)B->data;

3197:   b->free_a       = PETSC_TRUE;
3198:   b->free_ij      = PETSC_TRUE;
3199:   b->singlemalloc = PETSC_FALSE;

3201:   b->a    = ba;
3202:   b->j    = bj;
3203:   b->i    = bi;
3204:   b->diag = bdiag;
3205:   b->ilen = NULL;
3206:   b->imax = NULL;
3207:   b->row  = isrow;
3208:   b->col  = iscol;
3209:   PetscObjectReference((PetscObject)isrow);
3210:   PetscObjectReference((PetscObject)iscol);
3211:   b->icol = isicol;

3213:   PetscMalloc1(n+1,&b->solve_work);
3214:   PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3215:   b->maxnz = nnz_max;

3217:   B->factortype            = MAT_FACTOR_ILUDT;
3218:   B->info.factor_mallocs   = 0;
3219:   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3220:   /* ------- end of symbolic factorization ---------*/

3222:   ISGetIndices(isrow,&r);
3223:   ISGetIndices(isicol,&ic);
3224:   ics  = ic;

3226:   /* linked list for storing column indices of the active row */
3227:   nlnk = n + 1;
3228:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

3230:   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3231:   PetscMalloc2(n,&im,n,&jtmp);
3232:   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3233:   PetscMalloc2(n,&rtmp,n,&vtmp);
3234:   PetscArrayzero(rtmp,n);

3236:   bi[0]        = 0;
3237:   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3238:   bdiag_rev[n] = bdiag[0];
3239:   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3240:   for (i=0; i<n; i++) {
3241:     /* copy initial fill into linked list */
3242:     nzi = ai[r[i]+1] - ai[r[i]];
3244:     nzi_al = adiag[r[i]] - ai[r[i]];
3245:     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3246:     ajtmp  = aj + ai[r[i]];
3247:     PetscLLAddPerm(nzi,ajtmp,ic,n,&nlnk,lnk,lnkbt);

3249:     /* load in initial (unfactored row) */
3250:     aatmp = a->a + ai[r[i]];
3251:     for (j=0; j<nzi; j++) {
3252:       rtmp[ics[*ajtmp++]] = *aatmp++;
3253:     }

3255:     /* add pivot rows into linked list */
3256:     row = lnk[n];
3257:     while (row < i) {
3258:       nzi_bl = bi[row+1] - bi[row] + 1;
3259:       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3260:       PetscLLAddSortedLU(bjtmp,row,&nlnk,lnk,lnkbt,i,nzi_bl,im);
3261:       nzi   += nlnk;
3262:       row    = lnk[row];
3263:     }

3265:     /* copy data from lnk into jtmp, then initialize lnk */
3266:     PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);

3268:     /* numerical factorization */
3269:     bjtmp = jtmp;
3270:     row   = *bjtmp++; /* 1st pivot row */
3271:     while (row < i) {
3272:       pc         = rtmp + row;
3273:       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3274:       multiplier = (*pc) * (*pv);
3275:       *pc        = multiplier;
3276:       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3277:         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3278:         pv = ba + bdiag[row+1] + 1;
3279:         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3280:         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3281:         PetscLogFlops(1+2.0*nz);
3282:       }
3283:       row = *bjtmp++;
3284:     }

3286:     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3287:     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3288:     nzi_bl   = 0; j = 0;
3289:     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3290:       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3291:       nzi_bl++; j++;
3292:     }
3293:     nzi_bu = nzi - nzi_bl -1;
3294:     while (j < nzi) {
3295:       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3296:       j++;
3297:     }

3299:     bjtmp = bj + bi[i];
3300:     batmp = ba + bi[i];
3301:     /* apply level dropping rule to L part */
3302:     ncut = nzi_al + dtcount;
3303:     if (ncut < nzi_bl) {
3304:       PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3305:       PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3306:     } else {
3307:       ncut = nzi_bl;
3308:     }
3309:     for (j=0; j<ncut; j++) {
3310:       bjtmp[j] = jtmp[j];
3311:       batmp[j] = vtmp[j];
3312:     }
3313:     bi[i+1] = bi[i] + ncut;
3314:     nzi     = ncut + 1;

3316:     /* apply level dropping rule to U part */
3317:     ncut = nzi_au + dtcount;
3318:     if (ncut < nzi_bu) {
3319:       PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3320:       PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3321:     } else {
3322:       ncut = nzi_bu;
3323:     }
3324:     nzi += ncut;

3326:     /* mark bdiagonal */
3327:     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3328:     bdiag_rev[n-i-1] = bdiag[i+1];
3329:     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3330:     bjtmp            = bj + bdiag[i];
3331:     batmp            = ba + bdiag[i];
3332:     *bjtmp           = i;
3333:     *batmp           = diag_tmp; /* rtmp[i]; */
3334:     if (*batmp == 0.0) {
3335:       *batmp = dt+shift;
3336:     }
3337:     *batmp = 1.0/(*batmp); /* invert diagonal entries for simpler triangular solves */

3339:     bjtmp = bj + bdiag[i+1]+1;
3340:     batmp = ba + bdiag[i+1]+1;
3341:     for (k=0; k<ncut; k++) {
3342:       bjtmp[k] = jtmp[nzi_bl+1+k];
3343:       batmp[k] = vtmp[nzi_bl+1+k];
3344:     }

3346:     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3347:   } /* for (i=0; i<n; i++) */

3350:   ISRestoreIndices(isrow,&r);
3351:   ISRestoreIndices(isicol,&ic);

3353:   PetscLLDestroy(lnk,lnkbt);
3354:   PetscFree2(im,jtmp);
3355:   PetscFree2(rtmp,vtmp);
3356:   PetscFree(bdiag_rev);

3358:   PetscLogFlops(B->cmap->n);
3359:   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];

3361:   ISIdentity(isrow,&row_identity);
3362:   ISIdentity(isicol,&icol_identity);
3363:   if (row_identity && icol_identity) {
3364:     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3365:   } else {
3366:     B->ops->solve = MatSolve_SeqAIJ;
3367:   }

3369:   B->ops->solveadd          = NULL;
3370:   B->ops->solvetranspose    = NULL;
3371:   B->ops->solvetransposeadd = NULL;
3372:   B->ops->matsolve          = NULL;
3373:   B->assembled              = PETSC_TRUE;
3374:   B->preallocated           = PETSC_TRUE;
3375:   return 0;
3376: }

3378: /* a wraper of MatILUDTFactor_SeqAIJ() */
3379: /*
3380:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3381: */

3383: PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3384: {
3385:   MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3386:   return 0;
3387: }

3389: /*
3390:    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3391:    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3392: */
3393: /*
3394:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3395: */

3397: PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3398: {
3399:   Mat            C     =fact;
3400:   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3401:   IS             isrow = b->row,isicol = b->icol;
3402:   const PetscInt *r,*ic,*ics;
3403:   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3404:   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3405:   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3406:   PetscReal      dt=info->dt,shift=info->shiftamount;
3407:   PetscBool      row_identity, col_identity;

3409:   ISGetIndices(isrow,&r);
3410:   ISGetIndices(isicol,&ic);
3411:   PetscMalloc1(n+1,&rtmp);
3412:   ics  = ic;

3414:   for (i=0; i<n; i++) {
3415:     /* initialize rtmp array */
3416:     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3417:     bjtmp = bj + bi[i];
3418:     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3419:     rtmp[i] = 0.0;
3420:     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3421:     bjtmp   = bj + bdiag[i+1] + 1;
3422:     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;

3424:     /* load in initial unfactored row of A */
3425:     nz    = ai[r[i]+1] - ai[r[i]];
3426:     ajtmp = aj + ai[r[i]];
3427:     v     = aa + ai[r[i]];
3428:     for (j=0; j<nz; j++) {
3429:       rtmp[ics[*ajtmp++]] = v[j];
3430:     }

3432:     /* numerical factorization */
3433:     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3434:     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3435:     k     = 0;
3436:     while (k < nzl) {
3437:       row        = *bjtmp++;
3438:       pc         = rtmp + row;
3439:       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3440:       multiplier = (*pc) * (*pv);
3441:       *pc        = multiplier;
3442:       if (PetscAbsScalar(multiplier) > dt) {
3443:         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3444:         pv = b->a + bdiag[row+1] + 1;
3445:         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3446:         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3447:         PetscLogFlops(1+2.0*nz);
3448:       }
3449:       k++;
3450:     }

3452:     /* finished row so stick it into b->a */
3453:     /* L-part */
3454:     pv  = b->a + bi[i];
3455:     pj  = bj + bi[i];
3456:     nzl = bi[i+1] - bi[i];
3457:     for (j=0; j<nzl; j++) {
3458:       pv[j] = rtmp[pj[j]];
3459:     }

3461:     /* diagonal: invert diagonal entries for simpler triangular solves */
3462:     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3463:     b->a[bdiag[i]] = 1.0/rtmp[i];

3465:     /* U-part */
3466:     pv  = b->a + bdiag[i+1] + 1;
3467:     pj  = bj + bdiag[i+1] + 1;
3468:     nzu = bdiag[i] - bdiag[i+1] - 1;
3469:     for (j=0; j<nzu; j++) {
3470:       pv[j] = rtmp[pj[j]];
3471:     }
3472:   }

3474:   PetscFree(rtmp);
3475:   ISRestoreIndices(isicol,&ic);
3476:   ISRestoreIndices(isrow,&r);

3478:   ISIdentity(isrow,&row_identity);
3479:   ISIdentity(isicol,&col_identity);
3480:   if (row_identity && col_identity) {
3481:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3482:   } else {
3483:     C->ops->solve = MatSolve_SeqAIJ;
3484:   }
3485:   C->ops->solveadd          = NULL;
3486:   C->ops->solvetranspose    = NULL;
3487:   C->ops->solvetransposeadd = NULL;
3488:   C->ops->matsolve          = NULL;
3489:   C->assembled              = PETSC_TRUE;
3490:   C->preallocated           = PETSC_TRUE;

3492:   PetscLogFlops(C->cmap->n);
3493:   return 0;
3494: }