Actual source code: aijfact.c

petsc-3.7.3 2016-08-01
Report Typos and Errors
  2: #include <../src/mat/impls/aij/seq/aij.h>
  3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  4: #include <petscbt.h>
  5: #include <../src/mat/utils/freespace.h>

  9: /*
 10:       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix

 12:       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
 13: */
 14: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
 15: {
 16:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
 17:   PetscErrorCode    ierr;
 18:   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
 19:   const PetscInt    *ai = a->i, *aj = a->j;
 20:   const PetscScalar *aa = a->a;
 21:   PetscBool         *done;
 22:   PetscReal         best,past = 0,future;

 25:   /* pick initial row */
 26:   best = -1;
 27:   for (i=0; i<n; i++) {
 28:     future = 0.0;
 29:     for (j=ai[i]; j<ai[i+1]; j++) {
 30:       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
 31:       else              past  = PetscAbsScalar(aa[j]);
 32:     }
 33:     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 34:     if (past/future > best) {
 35:       best    = past/future;
 36:       current = i;
 37:     }
 38:   }

 40:   PetscMalloc1(n,&done);
 41:   PetscMemzero(done,n*sizeof(PetscBool));
 42:   PetscMalloc1(n,&order);
 43:   order[0] = current;
 44:   for (i=0; i<n-1; i++) {
 45:     done[current] = PETSC_TRUE;
 46:     best          = -1;
 47:     /* loop over all neighbors of current pivot */
 48:     for (j=ai[current]; j<ai[current+1]; j++) {
 49:       jj = aj[j];
 50:       if (done[jj]) continue;
 51:       /* loop over columns of potential next row computing weights for below and above diagonal */
 52:       past = future = 0.0;
 53:       for (k=ai[jj]; k<ai[jj+1]; k++) {
 54:         kk = aj[k];
 55:         if (done[kk]) past += PetscAbsScalar(aa[k]);
 56:         else if (kk != jj) future += PetscAbsScalar(aa[k]);
 57:       }
 58:       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 59:       if (past/future > best) {
 60:         best       = past/future;
 61:         newcurrent = jj;
 62:       }
 63:     }
 64:     if (best == -1) { /* no neighbors to select from so select best of all that remain */
 65:       best = -1;
 66:       for (k=0; k<n; k++) {
 67:         if (done[k]) continue;
 68:         future = 0.0;
 69:         past   = 0.0;
 70:         for (j=ai[k]; j<ai[k+1]; j++) {
 71:           kk = aj[j];
 72:           if (done[kk])       past += PetscAbsScalar(aa[j]);
 73:           else if (kk != k) future += PetscAbsScalar(aa[j]);
 74:         }
 75:         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 76:         if (past/future > best) {
 77:           best       = past/future;
 78:           newcurrent = k;
 79:         }
 80:       }
 81:     }
 82:     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
 83:     current    = newcurrent;
 84:     order[i+1] = current;
 85:   }
 86:   ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
 87:   *icol = *irow;
 88:   PetscObjectReference((PetscObject)*irow);
 89:   PetscFree(done);
 90:   PetscFree(order);
 91:   return(0);
 92: }

 96: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
 97: {
 98:   PetscInt       n = A->rmap->n;

102: #if defined(PETSC_USE_COMPLEX)
103:   if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
104: #endif
105:   MatCreate(PetscObjectComm((PetscObject)A),B);
106:   MatSetSizes(*B,n,n,n,n);
107:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
108:     MatSetType(*B,MATSEQAIJ);

110:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
111:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;

113:     MatSetBlockSizesFromMats(*B,A,A);
114:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
115:     MatSetType(*B,MATSEQSBAIJ);
116:     MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);

118:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
119:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
120:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
121:   (*B)->factortype = ftype;
122: 
123:   PetscFree((*B)->solvertype);
124:   PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);
125:   return(0);
126: }

130: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
131: {
132:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
133:   IS                 isicol;
134:   PetscErrorCode     ierr;
135:   const PetscInt     *r,*ic;
136:   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
137:   PetscInt           *bi,*bj,*ajtmp;
138:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
139:   PetscReal          f;
140:   PetscInt           nlnk,*lnk,k,**bi_ptr;
141:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
142:   PetscBT            lnkbt;
143:   PetscBool          missing;

146:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
147:   MatMissingDiagonal(A,&missing,&i);
148:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);

150:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
151:   ISGetIndices(isrow,&r);
152:   ISGetIndices(isicol,&ic);

154:   /* get new row pointers */
155:   PetscMalloc1(n+1,&bi);
156:   bi[0] = 0;

158:   /* bdiag is location of diagonal in factor */
159:   PetscMalloc1(n+1,&bdiag);
160:   bdiag[0] = 0;

162:   /* linked list for storing column indices of the active row */
163:   nlnk = n + 1;
164:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

166:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

168:   /* initial FreeSpace size is f*(ai[n]+1) */
169:   f             = info->fill;
170:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
171:   current_space = free_space;

173:   for (i=0; i<n; i++) {
174:     /* copy previous fill into linked list */
175:     nzi = 0;
176:     nnz = ai[r[i]+1] - ai[r[i]];
177:     ajtmp = aj + ai[r[i]];
178:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
179:     nzi  += nlnk;

181:     /* add pivot rows into linked list */
182:     row = lnk[n];
183:     while (row < i) {
184:       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
185:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
186:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
187:       nzi  += nlnk;
188:       row   = lnk[row];
189:     }
190:     bi[i+1] = bi[i] + nzi;
191:     im[i]   = nzi;

193:     /* mark bdiag */
194:     nzbd = 0;
195:     nnz  = nzi;
196:     k    = lnk[n];
197:     while (nnz-- && k < i) {
198:       nzbd++;
199:       k = lnk[k];
200:     }
201:     bdiag[i] = bi[i] + nzbd;

203:     /* if free space is not available, make more free space */
204:     if (current_space->local_remaining<nzi) {
205:       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
206:       PetscFreeSpaceGet(nnz,&current_space);
207:       reallocs++;
208:     }

210:     /* copy data into free space, then initialize lnk */
211:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

213:     bi_ptr[i]                       = current_space->array;
214:     current_space->array           += nzi;
215:     current_space->local_used      += nzi;
216:     current_space->local_remaining -= nzi;
217:   }
218: #if defined(PETSC_USE_INFO)
219:   if (ai[n] != 0) {
220:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
221:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
222:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
223:     PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
224:     PetscInfo(A,"for best performance.\n");
225:   } else {
226:     PetscInfo(A,"Empty matrix\n");
227:   }
228: #endif

230:   ISRestoreIndices(isrow,&r);
231:   ISRestoreIndices(isicol,&ic);

233:   /* destroy list of free space and other temporary array(s) */
234:   PetscMalloc1(bi[n]+1,&bj);
235:   PetscFreeSpaceContiguous(&free_space,bj);
236:   PetscLLDestroy(lnk,lnkbt);
237:   PetscFree2(bi_ptr,im);

239:   /* put together the new matrix */
240:   MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
241:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
242:   b    = (Mat_SeqAIJ*)(B)->data;

244:   b->free_a       = PETSC_TRUE;
245:   b->free_ij      = PETSC_TRUE;
246:   b->singlemalloc = PETSC_FALSE;

248:   PetscMalloc1(bi[n]+1,&b->a);
249:   b->j    = bj;
250:   b->i    = bi;
251:   b->diag = bdiag;
252:   b->ilen = 0;
253:   b->imax = 0;
254:   b->row  = isrow;
255:   b->col  = iscol;
256:   PetscObjectReference((PetscObject)isrow);
257:   PetscObjectReference((PetscObject)iscol);
258:   b->icol = isicol;
259:   PetscMalloc1(n+1,&b->solve_work);

261:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
262:   PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
263:   b->maxnz = b->nz = bi[n];

265:   (B)->factortype            = MAT_FACTOR_LU;
266:   (B)->info.factor_mallocs   = reallocs;
267:   (B)->info.fill_ratio_given = f;

269:   if (ai[n]) {
270:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
271:   } else {
272:     (B)->info.fill_ratio_needed = 0.0;
273:   }
274:   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
275:   if (a->inode.size) {
276:     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
277:   }
278:   return(0);
279: }

283: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
284: {
285:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
286:   IS                 isicol;
287:   PetscErrorCode     ierr;
288:   const PetscInt     *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
289:   PetscInt           i,n=A->rmap->n;
290:   PetscInt           *bi,*bj;
291:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
292:   PetscReal          f;
293:   PetscInt           nlnk,*lnk,k,**bi_ptr;
294:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
295:   PetscBT            lnkbt;
296:   PetscBool          missing;

299:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
300:   MatMissingDiagonal(A,&missing,&i);
301:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
302: 
303:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
304:   ISGetIndices(isrow,&r);
305:   ISGetIndices(isicol,&ic);

307:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
308:   PetscMalloc1(n+1,&bi);
309:   PetscMalloc1(n+1,&bdiag);
310:   bi[0] = bdiag[0] = 0;

312:   /* linked list for storing column indices of the active row */
313:   nlnk = n + 1;
314:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

316:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

318:   /* initial FreeSpace size is f*(ai[n]+1) */
319:   f             = info->fill;
320:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
321:   current_space = free_space;

323:   for (i=0; i<n; i++) {
324:     /* copy previous fill into linked list */
325:     nzi = 0;
326:     nnz = ai[r[i]+1] - ai[r[i]];
327:     ajtmp = aj + ai[r[i]];
328:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
329:     nzi  += nlnk;

331:     /* add pivot rows into linked list */
332:     row = lnk[n];
333:     while (row < i) {
334:       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
335:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
336:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
337:       nzi  += nlnk;
338:       row   = lnk[row];
339:     }
340:     bi[i+1] = bi[i] + nzi;
341:     im[i]   = nzi;

343:     /* mark bdiag */
344:     nzbd = 0;
345:     nnz  = nzi;
346:     k    = lnk[n];
347:     while (nnz-- && k < i) {
348:       nzbd++;
349:       k = lnk[k];
350:     }
351:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

353:     /* if free space is not available, make more free space */
354:     if (current_space->local_remaining<nzi) {
355:       /* estimated additional space needed */
356:       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
357:       PetscFreeSpaceGet(nnz,&current_space);
358:       reallocs++;
359:     }

361:     /* copy data into free space, then initialize lnk */
362:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

364:     bi_ptr[i]                       = current_space->array;
365:     current_space->array           += nzi;
366:     current_space->local_used      += nzi;
367:     current_space->local_remaining -= nzi;
368:   }

370:   ISRestoreIndices(isrow,&r);
371:   ISRestoreIndices(isicol,&ic);

373:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
374:   PetscMalloc1(bi[n]+1,&bj);
375:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
376:   PetscLLDestroy(lnk,lnkbt);
377:   PetscFree2(bi_ptr,im);

379:   /* put together the new matrix */
380:   MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
381:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
382:   b    = (Mat_SeqAIJ*)(B)->data;

384:   b->free_a       = PETSC_TRUE;
385:   b->free_ij      = PETSC_TRUE;
386:   b->singlemalloc = PETSC_FALSE;

388:   PetscMalloc1(bdiag[0]+1,&b->a);

390:   b->j    = bj;
391:   b->i    = bi;
392:   b->diag = bdiag;
393:   b->ilen = 0;
394:   b->imax = 0;
395:   b->row  = isrow;
396:   b->col  = iscol;
397:   PetscObjectReference((PetscObject)isrow);
398:   PetscObjectReference((PetscObject)iscol);
399:   b->icol = isicol;
400:   PetscMalloc1(n+1,&b->solve_work);

402:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
403:   PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
404:   b->maxnz = b->nz = bdiag[0]+1;

406:   B->factortype            = MAT_FACTOR_LU;
407:   B->info.factor_mallocs   = reallocs;
408:   B->info.fill_ratio_given = f;

410:   if (ai[n]) {
411:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
412:   } else {
413:     B->info.fill_ratio_needed = 0.0;
414:   }
415: #if defined(PETSC_USE_INFO)
416:   if (ai[n] != 0) {
417:     PetscReal af = B->info.fill_ratio_needed;
418:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
419:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
420:     PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
421:     PetscInfo(A,"for best performance.\n");
422:   } else {
423:     PetscInfo(A,"Empty matrix\n");
424:   }
425: #endif
426:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
427:   if (a->inode.size) {
428:     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
429:   }
430:   MatSeqAIJCheckInode_FactorLU(B);
431:   return(0);
432: }

434: /*
435:     Trouble in factorization, should we dump the original matrix?
436: */
439: PetscErrorCode MatFactorDumpMatrix(Mat A)
440: {
442:   PetscBool      flg = PETSC_FALSE;

445:   PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);
446:   if (flg) {
447:     PetscViewer viewer;
448:     char        filename[PETSC_MAX_PATH_LEN];

450:     PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
451:     PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
452:     MatView(A,viewer);
453:     PetscViewerDestroy(&viewer);
454:   }
455:   return(0);
456: }

460: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
461: {
462:   Mat             C     =B;
463:   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
464:   IS              isrow = b->row,isicol = b->icol;
465:   PetscErrorCode  ierr;
466:   const PetscInt  *r,*ic,*ics;
467:   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
468:   PetscInt        i,j,k,nz,nzL,row,*pj;
469:   const PetscInt  *ajtmp,*bjtmp;
470:   MatScalar       *rtmp,*pc,multiplier,*pv;
471:   const MatScalar *aa=a->a,*v;
472:   PetscBool       row_identity,col_identity;
473:   FactorShiftCtx  sctx;
474:   const PetscInt  *ddiag;
475:   PetscReal       rs;
476:   MatScalar       d;

479:   /* MatPivotSetUp(): initialize shift context sctx */
480:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

482:   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
483:     ddiag          = a->diag;
484:     sctx.shift_top = info->zeropivot;
485:     for (i=0; i<n; i++) {
486:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
487:       d  = (aa)[ddiag[i]];
488:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
489:       v  = aa+ai[i];
490:       nz = ai[i+1] - ai[i];
491:       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
492:       if (rs>sctx.shift_top) sctx.shift_top = rs;
493:     }
494:     sctx.shift_top *= 1.1;
495:     sctx.nshift_max = 5;
496:     sctx.shift_lo   = 0.;
497:     sctx.shift_hi   = 1.;
498:   }

500:   ISGetIndices(isrow,&r);
501:   ISGetIndices(isicol,&ic);
502:   PetscMalloc1(n+1,&rtmp);
503:   ics  = ic;

505:   do {
506:     sctx.newshift = PETSC_FALSE;
507:     for (i=0; i<n; i++) {
508:       /* zero rtmp */
509:       /* L part */
510:       nz    = bi[i+1] - bi[i];
511:       bjtmp = bj + bi[i];
512:       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

514:       /* U part */
515:       nz    = bdiag[i]-bdiag[i+1];
516:       bjtmp = bj + bdiag[i+1]+1;
517:       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

519:       /* load in initial (unfactored row) */
520:       nz    = ai[r[i]+1] - ai[r[i]];
521:       ajtmp = aj + ai[r[i]];
522:       v     = aa + ai[r[i]];
523:       for (j=0; j<nz; j++) {
524:         rtmp[ics[ajtmp[j]]] = v[j];
525:       }
526:       /* ZeropivotApply() */
527:       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */

529:       /* elimination */
530:       bjtmp = bj + bi[i];
531:       row   = *bjtmp++;
532:       nzL   = bi[i+1] - bi[i];
533:       for (k=0; k < nzL; k++) {
534:         pc = rtmp + row;
535:         if (*pc != 0.0) {
536:           pv         = b->a + bdiag[row];
537:           multiplier = *pc * (*pv);
538:           *pc        = multiplier;

540:           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
541:           pv = b->a + bdiag[row+1]+1;
542:           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */

544:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
545:           PetscLogFlops(1+2*nz);
546:         }
547:         row = *bjtmp++;
548:       }

550:       /* finished row so stick it into b->a */
551:       rs = 0.0;
552:       /* L part */
553:       pv = b->a + bi[i];
554:       pj = b->j + bi[i];
555:       nz = bi[i+1] - bi[i];
556:       for (j=0; j<nz; j++) {
557:         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
558:       }

560:       /* U part */
561:       pv = b->a + bdiag[i+1]+1;
562:       pj = b->j + bdiag[i+1]+1;
563:       nz = bdiag[i] - bdiag[i+1]-1;
564:       for (j=0; j<nz; j++) {
565:         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
566:       }

568:       sctx.rs = rs;
569:       sctx.pv = rtmp[i];
570:       MatPivotCheck(B,A,info,&sctx,i);
571:       if (sctx.newshift) break; /* break for-loop */
572:       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

574:       /* Mark diagonal and invert diagonal for simplier triangular solves */
575:       pv  = b->a + bdiag[i];
576:       *pv = 1.0/rtmp[i];

578:     } /* endof for (i=0; i<n; i++) { */

580:     /* MatPivotRefine() */
581:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
582:       /*
583:        * if no shift in this attempt & shifting & started shifting & can refine,
584:        * then try lower shift
585:        */
586:       sctx.shift_hi       = sctx.shift_fraction;
587:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
588:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
589:       sctx.newshift       = PETSC_TRUE;
590:       sctx.nshift++;
591:     }
592:   } while (sctx.newshift);

594:   PetscFree(rtmp);
595:   ISRestoreIndices(isicol,&ic);
596:   ISRestoreIndices(isrow,&r);

598:   ISIdentity(isrow,&row_identity);
599:   ISIdentity(isicol,&col_identity);
600:   if (b->inode.size) {
601:     C->ops->solve = MatSolve_SeqAIJ_Inode;
602:   } else if (row_identity && col_identity) {
603:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
604:   } else {
605:     C->ops->solve = MatSolve_SeqAIJ;
606:   }
607:   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
608:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
609:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
610:   C->ops->matsolve          = MatMatSolve_SeqAIJ;
611:   C->assembled              = PETSC_TRUE;
612:   C->preallocated           = PETSC_TRUE;

614:   PetscLogFlops(C->cmap->n);

616:   /* MatShiftView(A,info,&sctx) */
617:   if (sctx.nshift) {
618:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
619:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
620:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
621:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
622:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
623:       PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
624:     }
625:   }
626:   return(0);
627: }

631: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
632: {
633:   Mat             C     =B;
634:   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
635:   IS              isrow = b->row,isicol = b->icol;
636:   PetscErrorCode  ierr;
637:   const PetscInt  *r,*ic,*ics;
638:   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
639:   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
640:   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
641:   MatScalar       *pv,*rtmp,*pc,multiplier,d;
642:   const MatScalar *v,*aa=a->a;
643:   PetscReal       rs=0.0;
644:   FactorShiftCtx  sctx;
645:   const PetscInt  *ddiag;
646:   PetscBool       row_identity, col_identity;

649:   /* MatPivotSetUp(): initialize shift context sctx */
650:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

652:   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
653:     ddiag          = a->diag;
654:     sctx.shift_top = info->zeropivot;
655:     for (i=0; i<n; i++) {
656:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
657:       d  = (aa)[ddiag[i]];
658:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
659:       v  = aa+ai[i];
660:       nz = ai[i+1] - ai[i];
661:       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
662:       if (rs>sctx.shift_top) sctx.shift_top = rs;
663:     }
664:     sctx.shift_top *= 1.1;
665:     sctx.nshift_max = 5;
666:     sctx.shift_lo   = 0.;
667:     sctx.shift_hi   = 1.;
668:   }

670:   ISGetIndices(isrow,&r);
671:   ISGetIndices(isicol,&ic);
672:   PetscMalloc1(n+1,&rtmp);
673:   ics  = ic;

675:   do {
676:     sctx.newshift = PETSC_FALSE;
677:     for (i=0; i<n; i++) {
678:       nz    = bi[i+1] - bi[i];
679:       bjtmp = bj + bi[i];
680:       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

682:       /* load in initial (unfactored row) */
683:       nz    = ai[r[i]+1] - ai[r[i]];
684:       ajtmp = aj + ai[r[i]];
685:       v     = aa + ai[r[i]];
686:       for (j=0; j<nz; j++) {
687:         rtmp[ics[ajtmp[j]]] = v[j];
688:       }
689:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

691:       row = *bjtmp++;
692:       while  (row < i) {
693:         pc = rtmp + row;
694:         if (*pc != 0.0) {
695:           pv         = b->a + diag_offset[row];
696:           pj         = b->j + diag_offset[row] + 1;
697:           multiplier = *pc / *pv++;
698:           *pc        = multiplier;
699:           nz         = bi[row+1] - diag_offset[row] - 1;
700:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
701:           PetscLogFlops(1+2*nz);
702:         }
703:         row = *bjtmp++;
704:       }
705:       /* finished row so stick it into b->a */
706:       pv   = b->a + bi[i];
707:       pj   = b->j + bi[i];
708:       nz   = bi[i+1] - bi[i];
709:       diag = diag_offset[i] - bi[i];
710:       rs   = 0.0;
711:       for (j=0; j<nz; j++) {
712:         pv[j] = rtmp[pj[j]];
713:         rs   += PetscAbsScalar(pv[j]);
714:       }
715:       rs -= PetscAbsScalar(pv[diag]);

717:       sctx.rs = rs;
718:       sctx.pv = pv[diag];
719:       MatPivotCheck(B,A,info,&sctx,i);
720:       if (sctx.newshift) break;
721:       pv[diag] = sctx.pv;
722:     }

724:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
725:       /*
726:        * if no shift in this attempt & shifting & started shifting & can refine,
727:        * then try lower shift
728:        */
729:       sctx.shift_hi       = sctx.shift_fraction;
730:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
731:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
732:       sctx.newshift       = PETSC_TRUE;
733:       sctx.nshift++;
734:     }
735:   } while (sctx.newshift);

737:   /* invert diagonal entries for simplier triangular solves */
738:   for (i=0; i<n; i++) {
739:     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
740:   }
741:   PetscFree(rtmp);
742:   ISRestoreIndices(isicol,&ic);
743:   ISRestoreIndices(isrow,&r);

745:   ISIdentity(isrow,&row_identity);
746:   ISIdentity(isicol,&col_identity);
747:   if (row_identity && col_identity) {
748:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
749:   } else {
750:     C->ops->solve = MatSolve_SeqAIJ_inplace;
751:   }
752:   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
753:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
754:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
755:   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;

757:   C->assembled    = PETSC_TRUE;
758:   C->preallocated = PETSC_TRUE;

760:   PetscLogFlops(C->cmap->n);
761:   if (sctx.nshift) {
762:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
763:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
764:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
765:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
766:     }
767:   }
768:   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
769:   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;

771:   MatSeqAIJCheckInode(C);
772:   return(0);
773: }

775: /*
776:    This routine implements inplace ILU(0) with row or/and column permutations.
777:    Input:
778:      A - original matrix
779:    Output;
780:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
781:          a->j (col index) is permuted by the inverse of colperm, then sorted
782:          a->a reordered accordingly with a->j
783:          a->diag (ptr to diagonal elements) is updated.
784: */
787: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
788: {
789:   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
790:   IS              isrow = a->row,isicol = a->icol;
791:   PetscErrorCode  ierr;
792:   const PetscInt  *r,*ic,*ics;
793:   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
794:   PetscInt        *ajtmp,nz,row;
795:   PetscInt        *diag = a->diag,nbdiag,*pj;
796:   PetscScalar     *rtmp,*pc,multiplier,d;
797:   MatScalar       *pv,*v;
798:   PetscReal       rs;
799:   FactorShiftCtx  sctx;
800:   const MatScalar *aa=a->a,*vtmp;

803:   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");

805:   /* MatPivotSetUp(): initialize shift context sctx */
806:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

808:   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
809:     const PetscInt *ddiag = a->diag;
810:     sctx.shift_top = info->zeropivot;
811:     for (i=0; i<n; i++) {
812:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
813:       d    = (aa)[ddiag[i]];
814:       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
815:       vtmp = aa+ai[i];
816:       nz   = ai[i+1] - ai[i];
817:       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
818:       if (rs>sctx.shift_top) sctx.shift_top = rs;
819:     }
820:     sctx.shift_top *= 1.1;
821:     sctx.nshift_max = 5;
822:     sctx.shift_lo   = 0.;
823:     sctx.shift_hi   = 1.;
824:   }

826:   ISGetIndices(isrow,&r);
827:   ISGetIndices(isicol,&ic);
828:   PetscMalloc1(n+1,&rtmp);
829:   PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
830:   ics  = ic;

832: #if defined(MV)
833:   sctx.shift_top      = 0.;
834:   sctx.nshift_max     = 0;
835:   sctx.shift_lo       = 0.;
836:   sctx.shift_hi       = 0.;
837:   sctx.shift_fraction = 0.;

839:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
840:     sctx.shift_top = 0.;
841:     for (i=0; i<n; i++) {
842:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
843:       d  = (a->a)[diag[i]];
844:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
845:       v  = a->a+ai[i];
846:       nz = ai[i+1] - ai[i];
847:       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
848:       if (rs>sctx.shift_top) sctx.shift_top = rs;
849:     }
850:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
851:     sctx.shift_top *= 1.1;
852:     sctx.nshift_max = 5;
853:     sctx.shift_lo   = 0.;
854:     sctx.shift_hi   = 1.;
855:   }

857:   sctx.shift_amount = 0.;
858:   sctx.nshift       = 0;
859: #endif

861:   do {
862:     sctx.newshift = PETSC_FALSE;
863:     for (i=0; i<n; i++) {
864:       /* load in initial unfactored row */
865:       nz    = ai[r[i]+1] - ai[r[i]];
866:       ajtmp = aj + ai[r[i]];
867:       v     = a->a + ai[r[i]];
868:       /* sort permuted ajtmp and values v accordingly */
869:       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
870:       PetscSortIntWithScalarArray(nz,ajtmp,v);

872:       diag[r[i]] = ai[r[i]];
873:       for (j=0; j<nz; j++) {
874:         rtmp[ajtmp[j]] = v[j];
875:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
876:       }
877:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

879:       row = *ajtmp++;
880:       while  (row < i) {
881:         pc = rtmp + row;
882:         if (*pc != 0.0) {
883:           pv = a->a + diag[r[row]];
884:           pj = aj + diag[r[row]] + 1;

886:           multiplier = *pc / *pv++;
887:           *pc        = multiplier;
888:           nz         = ai[r[row]+1] - diag[r[row]] - 1;
889:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
890:           PetscLogFlops(1+2*nz);
891:         }
892:         row = *ajtmp++;
893:       }
894:       /* finished row so overwrite it onto a->a */
895:       pv     = a->a + ai[r[i]];
896:       pj     = aj + ai[r[i]];
897:       nz     = ai[r[i]+1] - ai[r[i]];
898:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */

900:       rs = 0.0;
901:       for (j=0; j<nz; j++) {
902:         pv[j] = rtmp[pj[j]];
903:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
904:       }

906:       sctx.rs = rs;
907:       sctx.pv = pv[nbdiag];
908:       MatPivotCheck(B,A,info,&sctx,i);
909:       if (sctx.newshift) break;
910:       pv[nbdiag] = sctx.pv;
911:     }

913:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
914:       /*
915:        * if no shift in this attempt & shifting & started shifting & can refine,
916:        * then try lower shift
917:        */
918:       sctx.shift_hi       = sctx.shift_fraction;
919:       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
920:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
921:       sctx.newshift       = PETSC_TRUE;
922:       sctx.nshift++;
923:     }
924:   } while (sctx.newshift);

926:   /* invert diagonal entries for simplier triangular solves */
927:   for (i=0; i<n; i++) {
928:     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
929:   }

931:   PetscFree(rtmp);
932:   ISRestoreIndices(isicol,&ic);
933:   ISRestoreIndices(isrow,&r);

935:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
936:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
937:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
938:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;

940:   A->assembled    = PETSC_TRUE;
941:   A->preallocated = PETSC_TRUE;

943:   PetscLogFlops(A->cmap->n);
944:   if (sctx.nshift) {
945:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
946:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
947:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
948:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
949:     }
950:   }
951:   return(0);
952: }

954: /* ----------------------------------------------------------- */
957: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
958: {
960:   Mat            C;

963:   MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
964:   MatLUFactorSymbolic(C,A,row,col,info);
965:   MatLUFactorNumeric(C,A,info);

967:   A->ops->solve          = C->ops->solve;
968:   A->ops->solvetranspose = C->ops->solvetranspose;

970:   MatHeaderMerge(A,&C);
971:   PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
972:   return(0);
973: }
974: /* ----------------------------------------------------------- */


979: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
980: {
981:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
982:   IS                iscol = a->col,isrow = a->row;
983:   PetscErrorCode    ierr;
984:   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
985:   PetscInt          nz;
986:   const PetscInt    *rout,*cout,*r,*c;
987:   PetscScalar       *x,*tmp,*tmps,sum;
988:   const PetscScalar *b;
989:   const MatScalar   *aa = a->a,*v;

992:   if (!n) return(0);

994:   VecGetArrayRead(bb,&b);
995:   VecGetArray(xx,&x);
996:   tmp  = a->solve_work;

998:   ISGetIndices(isrow,&rout); r = rout;
999:   ISGetIndices(iscol,&cout); c = cout + (n-1);

1001:   /* forward solve the lower triangular */
1002:   tmp[0] = b[*r++];
1003:   tmps   = tmp;
1004:   for (i=1; i<n; i++) {
1005:     v   = aa + ai[i];
1006:     vi  = aj + ai[i];
1007:     nz  = a->diag[i] - ai[i];
1008:     sum = b[*r++];
1009:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1010:     tmp[i] = sum;
1011:   }

1013:   /* backward solve the upper triangular */
1014:   for (i=n-1; i>=0; i--) {
1015:     v   = aa + a->diag[i] + 1;
1016:     vi  = aj + a->diag[i] + 1;
1017:     nz  = ai[i+1] - a->diag[i] - 1;
1018:     sum = tmp[i];
1019:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1020:     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1021:   }

1023:   ISRestoreIndices(isrow,&rout);
1024:   ISRestoreIndices(iscol,&cout);
1025:   VecRestoreArrayRead(bb,&b);
1026:   VecRestoreArray(xx,&x);
1027:   PetscLogFlops(2.0*a->nz - A->cmap->n);
1028:   return(0);
1029: }

1033: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1034: {
1035:   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1036:   IS              iscol = a->col,isrow = a->row;
1037:   PetscErrorCode  ierr;
1038:   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1039:   PetscInt        nz,neq;
1040:   const PetscInt  *rout,*cout,*r,*c;
1041:   PetscScalar     *x,*b,*tmp,*tmps,sum;
1042:   const MatScalar *aa = a->a,*v;
1043:   PetscBool       bisdense,xisdense;

1046:   if (!n) return(0);

1048:   PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1049:   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1050:   PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1051:   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");

1053:   MatDenseGetArray(B,&b);
1054:   MatDenseGetArray(X,&x);

1056:   tmp  = a->solve_work;
1057:   ISGetIndices(isrow,&rout); r = rout;
1058:   ISGetIndices(iscol,&cout); c = cout;

1060:   for (neq=0; neq<B->cmap->n; neq++) {
1061:     /* forward solve the lower triangular */
1062:     tmp[0] = b[r[0]];
1063:     tmps   = tmp;
1064:     for (i=1; i<n; i++) {
1065:       v   = aa + ai[i];
1066:       vi  = aj + ai[i];
1067:       nz  = a->diag[i] - ai[i];
1068:       sum = b[r[i]];
1069:       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1070:       tmp[i] = sum;
1071:     }
1072:     /* backward solve the upper triangular */
1073:     for (i=n-1; i>=0; i--) {
1074:       v   = aa + a->diag[i] + 1;
1075:       vi  = aj + a->diag[i] + 1;
1076:       nz  = ai[i+1] - a->diag[i] - 1;
1077:       sum = tmp[i];
1078:       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1079:       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1080:     }

1082:     b += n;
1083:     x += n;
1084:   }
1085:   ISRestoreIndices(isrow,&rout);
1086:   ISRestoreIndices(iscol,&cout);
1087:   MatDenseRestoreArray(B,&b);
1088:   MatDenseRestoreArray(X,&x);
1089:   PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1090:   return(0);
1091: }

1095: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1096: {
1097:   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1098:   IS              iscol = a->col,isrow = a->row;
1099:   PetscErrorCode  ierr;
1100:   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1101:   PetscInt        nz,neq;
1102:   const PetscInt  *rout,*cout,*r,*c;
1103:   PetscScalar     *x,*b,*tmp,sum;
1104:   const MatScalar *aa = a->a,*v;
1105:   PetscBool       bisdense,xisdense;

1108:   if (!n) return(0);

1110:   PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1111:   if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1112:   PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1113:   if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");

1115:   MatDenseGetArray(B,&b);
1116:   MatDenseGetArray(X,&x);

1118:   tmp  = a->solve_work;
1119:   ISGetIndices(isrow,&rout); r = rout;
1120:   ISGetIndices(iscol,&cout); c = cout;

1122:   for (neq=0; neq<B->cmap->n; neq++) {
1123:     /* forward solve the lower triangular */
1124:     tmp[0] = b[r[0]];
1125:     v      = aa;
1126:     vi     = aj;
1127:     for (i=1; i<n; i++) {
1128:       nz  = ai[i+1] - ai[i];
1129:       sum = b[r[i]];
1130:       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1131:       tmp[i] = sum;
1132:       v     += nz; vi += nz;
1133:     }

1135:     /* backward solve the upper triangular */
1136:     for (i=n-1; i>=0; i--) {
1137:       v   = aa + adiag[i+1]+1;
1138:       vi  = aj + adiag[i+1]+1;
1139:       nz  = adiag[i]-adiag[i+1]-1;
1140:       sum = tmp[i];
1141:       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1142:       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1143:     }

1145:     b += n;
1146:     x += n;
1147:   }
1148:   ISRestoreIndices(isrow,&rout);
1149:   ISRestoreIndices(iscol,&cout);
1150:   MatDenseRestoreArray(B,&b);
1151:   MatDenseRestoreArray(X,&x);
1152:   PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1153:   return(0);
1154: }

1158: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1159: {
1160:   Mat_SeqAIJ      *a    = (Mat_SeqAIJ*)A->data;
1161:   IS              iscol = a->col,isrow = a->row;
1162:   PetscErrorCode  ierr;
1163:   const PetscInt  *r,*c,*rout,*cout;
1164:   PetscInt        i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1165:   PetscInt        nz,row;
1166:   PetscScalar     *x,*b,*tmp,*tmps,sum;
1167:   const MatScalar *aa = a->a,*v;

1170:   if (!n) return(0);

1172:   VecGetArray(bb,&b);
1173:   VecGetArray(xx,&x);
1174:   tmp  = a->solve_work;

1176:   ISGetIndices(isrow,&rout); r = rout;
1177:   ISGetIndices(iscol,&cout); c = cout + (n-1);

1179:   /* forward solve the lower triangular */
1180:   tmp[0] = b[*r++];
1181:   tmps   = tmp;
1182:   for (row=1; row<n; row++) {
1183:     i   = rout[row]; /* permuted row */
1184:     v   = aa + ai[i];
1185:     vi  = aj + ai[i];
1186:     nz  = a->diag[i] - ai[i];
1187:     sum = b[*r++];
1188:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1189:     tmp[row] = sum;
1190:   }

1192:   /* backward solve the upper triangular */
1193:   for (row=n-1; row>=0; row--) {
1194:     i   = rout[row]; /* permuted row */
1195:     v   = aa + a->diag[i] + 1;
1196:     vi  = aj + a->diag[i] + 1;
1197:     nz  = ai[i+1] - a->diag[i] - 1;
1198:     sum = tmp[row];
1199:     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1200:     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1201:   }

1203:   ISRestoreIndices(isrow,&rout);
1204:   ISRestoreIndices(iscol,&cout);
1205:   VecRestoreArray(bb,&b);
1206:   VecRestoreArray(xx,&x);
1207:   PetscLogFlops(2.0*a->nz - A->cmap->n);
1208:   return(0);
1209: }

1211: /* ----------------------------------------------------------- */
1212: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1215: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1216: {
1217:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1218:   PetscErrorCode    ierr;
1219:   PetscInt          n   = A->rmap->n;
1220:   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1221:   PetscScalar       *x;
1222:   const PetscScalar *b;
1223:   const MatScalar   *aa = a->a;
1224: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1225:   PetscInt        adiag_i,i,nz,ai_i;
1226:   const PetscInt  *vi;
1227:   const MatScalar *v;
1228:   PetscScalar     sum;
1229: #endif

1232:   if (!n) return(0);

1234:   VecGetArrayRead(bb,&b);
1235:   VecGetArray(xx,&x);

1237: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1238:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1239: #else
1240:   /* forward solve the lower triangular */
1241:   x[0] = b[0];
1242:   for (i=1; i<n; i++) {
1243:     ai_i = ai[i];
1244:     v    = aa + ai_i;
1245:     vi   = aj + ai_i;
1246:     nz   = adiag[i] - ai_i;
1247:     sum  = b[i];
1248:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1249:     x[i] = sum;
1250:   }

1252:   /* backward solve the upper triangular */
1253:   for (i=n-1; i>=0; i--) {
1254:     adiag_i = adiag[i];
1255:     v       = aa + adiag_i + 1;
1256:     vi      = aj + adiag_i + 1;
1257:     nz      = ai[i+1] - adiag_i - 1;
1258:     sum     = x[i];
1259:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1260:     x[i] = sum*aa[adiag_i];
1261:   }
1262: #endif
1263:   PetscLogFlops(2.0*a->nz - A->cmap->n);
1264:   VecRestoreArrayRead(bb,&b);
1265:   VecRestoreArray(xx,&x);
1266:   return(0);
1267: }

1271: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1272: {
1273:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1274:   IS                iscol = a->col,isrow = a->row;
1275:   PetscErrorCode    ierr;
1276:   PetscInt          i, n = A->rmap->n,j;
1277:   PetscInt          nz;
1278:   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1279:   PetscScalar       *x,*tmp,sum;
1280:   const PetscScalar *b;
1281:   const MatScalar   *aa = a->a,*v;

1284:   if (yy != xx) {VecCopy(yy,xx);}

1286:   VecGetArrayRead(bb,&b);
1287:   VecGetArray(xx,&x);
1288:   tmp  = a->solve_work;

1290:   ISGetIndices(isrow,&rout); r = rout;
1291:   ISGetIndices(iscol,&cout); c = cout + (n-1);

1293:   /* forward solve the lower triangular */
1294:   tmp[0] = b[*r++];
1295:   for (i=1; i<n; i++) {
1296:     v   = aa + ai[i];
1297:     vi  = aj + ai[i];
1298:     nz  = a->diag[i] - ai[i];
1299:     sum = b[*r++];
1300:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1301:     tmp[i] = sum;
1302:   }

1304:   /* backward solve the upper triangular */
1305:   for (i=n-1; i>=0; i--) {
1306:     v   = aa + a->diag[i] + 1;
1307:     vi  = aj + a->diag[i] + 1;
1308:     nz  = ai[i+1] - a->diag[i] - 1;
1309:     sum = tmp[i];
1310:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1311:     tmp[i]   = sum*aa[a->diag[i]];
1312:     x[*c--] += tmp[i];
1313:   }

1315:   ISRestoreIndices(isrow,&rout);
1316:   ISRestoreIndices(iscol,&cout);
1317:   VecRestoreArrayRead(bb,&b);
1318:   VecRestoreArray(xx,&x);
1319:   PetscLogFlops(2.0*a->nz);
1320:   return(0);
1321: }

1325: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1326: {
1327:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1328:   IS                iscol = a->col,isrow = a->row;
1329:   PetscErrorCode    ierr;
1330:   PetscInt          i, n = A->rmap->n,j;
1331:   PetscInt          nz;
1332:   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1333:   PetscScalar       *x,*tmp,sum;
1334:   const PetscScalar *b;
1335:   const MatScalar   *aa = a->a,*v;

1338:   if (yy != xx) {VecCopy(yy,xx);}

1340:   VecGetArrayRead(bb,&b);
1341:   VecGetArray(xx,&x);
1342:   tmp  = a->solve_work;

1344:   ISGetIndices(isrow,&rout); r = rout;
1345:   ISGetIndices(iscol,&cout); c = cout;

1347:   /* forward solve the lower triangular */
1348:   tmp[0] = b[r[0]];
1349:   v      = aa;
1350:   vi     = aj;
1351:   for (i=1; i<n; i++) {
1352:     nz  = ai[i+1] - ai[i];
1353:     sum = b[r[i]];
1354:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1355:     tmp[i] = sum;
1356:     v     += nz;
1357:     vi    += nz;
1358:   }

1360:   /* backward solve the upper triangular */
1361:   v  = aa + adiag[n-1];
1362:   vi = aj + adiag[n-1];
1363:   for (i=n-1; i>=0; i--) {
1364:     nz  = adiag[i] - adiag[i+1] - 1;
1365:     sum = tmp[i];
1366:     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1367:     tmp[i]   = sum*v[nz];
1368:     x[c[i]] += tmp[i];
1369:     v       += nz+1; vi += nz+1;
1370:   }

1372:   ISRestoreIndices(isrow,&rout);
1373:   ISRestoreIndices(iscol,&cout);
1374:   VecRestoreArrayRead(bb,&b);
1375:   VecRestoreArray(xx,&x);
1376:   PetscLogFlops(2.0*a->nz);
1377:   return(0);
1378: }

1382: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1383: {
1384:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1385:   IS                iscol = a->col,isrow = a->row;
1386:   PetscErrorCode    ierr;
1387:   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1388:   PetscInt          i,n = A->rmap->n,j;
1389:   PetscInt          nz;
1390:   PetscScalar       *x,*tmp,s1;
1391:   const MatScalar   *aa = a->a,*v;
1392:   const PetscScalar *b;

1395:   VecGetArrayRead(bb,&b);
1396:   VecGetArray(xx,&x);
1397:   tmp  = a->solve_work;

1399:   ISGetIndices(isrow,&rout); r = rout;
1400:   ISGetIndices(iscol,&cout); c = cout;

1402:   /* copy the b into temp work space according to permutation */
1403:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1405:   /* forward solve the U^T */
1406:   for (i=0; i<n; i++) {
1407:     v   = aa + diag[i];
1408:     vi  = aj + diag[i] + 1;
1409:     nz  = ai[i+1] - diag[i] - 1;
1410:     s1  = tmp[i];
1411:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1412:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1413:     tmp[i] = s1;
1414:   }

1416:   /* backward solve the L^T */
1417:   for (i=n-1; i>=0; i--) {
1418:     v  = aa + diag[i] - 1;
1419:     vi = aj + diag[i] - 1;
1420:     nz = diag[i] - ai[i];
1421:     s1 = tmp[i];
1422:     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1423:   }

1425:   /* copy tmp into x according to permutation */
1426:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

1428:   ISRestoreIndices(isrow,&rout);
1429:   ISRestoreIndices(iscol,&cout);
1430:   VecRestoreArrayRead(bb,&b);
1431:   VecRestoreArray(xx,&x);

1433:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1434:   return(0);
1435: }

1439: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1440: {
1441:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1442:   IS                iscol = a->col,isrow = a->row;
1443:   PetscErrorCode    ierr;
1444:   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1445:   PetscInt          i,n = A->rmap->n,j;
1446:   PetscInt          nz;
1447:   PetscScalar       *x,*tmp,s1;
1448:   const MatScalar   *aa = a->a,*v;
1449:   const PetscScalar *b;

1452:   VecGetArrayRead(bb,&b);
1453:   VecGetArray(xx,&x);
1454:   tmp  = a->solve_work;

1456:   ISGetIndices(isrow,&rout); r = rout;
1457:   ISGetIndices(iscol,&cout); c = cout;

1459:   /* copy the b into temp work space according to permutation */
1460:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1462:   /* forward solve the U^T */
1463:   for (i=0; i<n; i++) {
1464:     v   = aa + adiag[i+1] + 1;
1465:     vi  = aj + adiag[i+1] + 1;
1466:     nz  = adiag[i] - adiag[i+1] - 1;
1467:     s1  = tmp[i];
1468:     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1469:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1470:     tmp[i] = s1;
1471:   }

1473:   /* backward solve the L^T */
1474:   for (i=n-1; i>=0; i--) {
1475:     v  = aa + ai[i];
1476:     vi = aj + ai[i];
1477:     nz = ai[i+1] - ai[i];
1478:     s1 = tmp[i];
1479:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1480:   }

1482:   /* copy tmp into x according to permutation */
1483:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

1485:   ISRestoreIndices(isrow,&rout);
1486:   ISRestoreIndices(iscol,&cout);
1487:   VecRestoreArrayRead(bb,&b);
1488:   VecRestoreArray(xx,&x);

1490:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1491:   return(0);
1492: }

1496: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1497: {
1498:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1499:   IS                iscol = a->col,isrow = a->row;
1500:   PetscErrorCode    ierr;
1501:   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1502:   PetscInt          i,n = A->rmap->n,j;
1503:   PetscInt          nz;
1504:   PetscScalar       *x,*tmp,s1;
1505:   const MatScalar   *aa = a->a,*v;
1506:   const PetscScalar *b;

1509:   if (zz != xx) {VecCopy(zz,xx);}
1510:   VecGetArrayRead(bb,&b);
1511:   VecGetArray(xx,&x);
1512:   tmp  = a->solve_work;

1514:   ISGetIndices(isrow,&rout); r = rout;
1515:   ISGetIndices(iscol,&cout); c = cout;

1517:   /* copy the b into temp work space according to permutation */
1518:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1520:   /* forward solve the U^T */
1521:   for (i=0; i<n; i++) {
1522:     v   = aa + diag[i];
1523:     vi  = aj + diag[i] + 1;
1524:     nz  = ai[i+1] - diag[i] - 1;
1525:     s1  = tmp[i];
1526:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1527:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1528:     tmp[i] = s1;
1529:   }

1531:   /* backward solve the L^T */
1532:   for (i=n-1; i>=0; i--) {
1533:     v  = aa + diag[i] - 1;
1534:     vi = aj + diag[i] - 1;
1535:     nz = diag[i] - ai[i];
1536:     s1 = tmp[i];
1537:     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1538:   }

1540:   /* copy tmp into x according to permutation */
1541:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

1543:   ISRestoreIndices(isrow,&rout);
1544:   ISRestoreIndices(iscol,&cout);
1545:   VecRestoreArrayRead(bb,&b);
1546:   VecRestoreArray(xx,&x);

1548:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1549:   return(0);
1550: }

1554: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1555: {
1556:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1557:   IS                iscol = a->col,isrow = a->row;
1558:   PetscErrorCode    ierr;
1559:   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1560:   PetscInt          i,n = A->rmap->n,j;
1561:   PetscInt          nz;
1562:   PetscScalar       *x,*tmp,s1;
1563:   const MatScalar   *aa = a->a,*v;
1564:   const PetscScalar *b;

1567:   if (zz != xx) {VecCopy(zz,xx);}
1568:   VecGetArrayRead(bb,&b);
1569:   VecGetArray(xx,&x);
1570:   tmp  = a->solve_work;

1572:   ISGetIndices(isrow,&rout); r = rout;
1573:   ISGetIndices(iscol,&cout); c = cout;

1575:   /* copy the b into temp work space according to permutation */
1576:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1578:   /* forward solve the U^T */
1579:   for (i=0; i<n; i++) {
1580:     v   = aa + adiag[i+1] + 1;
1581:     vi  = aj + adiag[i+1] + 1;
1582:     nz  = adiag[i] - adiag[i+1] - 1;
1583:     s1  = tmp[i];
1584:     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1585:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1586:     tmp[i] = s1;
1587:   }


1590:   /* backward solve the L^T */
1591:   for (i=n-1; i>=0; i--) {
1592:     v  = aa + ai[i];
1593:     vi = aj + ai[i];
1594:     nz = ai[i+1] - ai[i];
1595:     s1 = tmp[i];
1596:     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1597:   }

1599:   /* copy tmp into x according to permutation */
1600:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

1602:   ISRestoreIndices(isrow,&rout);
1603:   ISRestoreIndices(iscol,&cout);
1604:   VecRestoreArrayRead(bb,&b);
1605:   VecRestoreArray(xx,&x);

1607:   PetscLogFlops(2.0*a->nz-A->cmap->n);
1608:   return(0);
1609: }

1611: /* ----------------------------------------------------------------*/

1613: extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);

1615: /*
1616:    ilu() under revised new data structure.
1617:    Factored arrays bj and ba are stored as
1618:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1620:    bi=fact->i is an array of size n+1, in which
1621:    bi+
1622:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1623:      bi[n]:  points to L(n-1,n-1)+1

1625:   bdiag=fact->diag is an array of size n+1,in which
1626:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1627:      bdiag[n]: points to entry of U(n-1,0)-1

1629:    U(i,:) contains bdiag[i] as its last entry, i.e.,
1630:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1631: */
1634: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1635: {
1636:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1638:   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1639:   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1640:   IS             isicol;

1643:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1644:   MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1645:   b    = (Mat_SeqAIJ*)(fact)->data;

1647:   /* allocate matrix arrays for new data structure */
1648:   PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1649:   PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));

1651:   b->singlemalloc = PETSC_TRUE;
1652:   if (!b->diag) {
1653:     PetscMalloc1(n+1,&b->diag);
1654:     PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1655:   }
1656:   bdiag = b->diag;

1658:   if (n > 0) {
1659:     PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1660:   }

1662:   /* set bi and bj with new data structure */
1663:   bi = b->i;
1664:   bj = b->j;

1666:   /* L part */
1667:   bi[0] = 0;
1668:   for (i=0; i<n; i++) {
1669:     nz      = adiag[i] - ai[i];
1670:     bi[i+1] = bi[i] + nz;
1671:     aj      = a->j + ai[i];
1672:     for (j=0; j<nz; j++) {
1673:       /*   *bj = aj[j]; bj++; */
1674:       bj[k++] = aj[j];
1675:     }
1676:   }

1678:   /* U part */
1679:   bdiag[n] = bi[n]-1;
1680:   for (i=n-1; i>=0; i--) {
1681:     nz = ai[i+1] - adiag[i] - 1;
1682:     aj = a->j + adiag[i] + 1;
1683:     for (j=0; j<nz; j++) {
1684:       /*      *bj = aj[j]; bj++; */
1685:       bj[k++] = aj[j];
1686:     }
1687:     /* diag[i] */
1688:     /*    *bj = i; bj++; */
1689:     bj[k++]  = i;
1690:     bdiag[i] = bdiag[i+1] + nz + 1;
1691:   }

1693:   fact->factortype             = MAT_FACTOR_ILU;
1694:   fact->info.factor_mallocs    = 0;
1695:   fact->info.fill_ratio_given  = info->fill;
1696:   fact->info.fill_ratio_needed = 1.0;
1697:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1698:   MatSeqAIJCheckInode_FactorLU(fact);

1700:   b       = (Mat_SeqAIJ*)(fact)->data;
1701:   b->row  = isrow;
1702:   b->col  = iscol;
1703:   b->icol = isicol;
1704:   PetscMalloc1(fact->rmap->n+1,&b->solve_work);
1705:   PetscObjectReference((PetscObject)isrow);
1706:   PetscObjectReference((PetscObject)iscol);
1707:   return(0);
1708: }

1712: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1713: {
1714:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1715:   IS                 isicol;
1716:   PetscErrorCode     ierr;
1717:   const PetscInt     *r,*ic;
1718:   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1719:   PetscInt           *bi,*cols,nnz,*cols_lvl;
1720:   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1721:   PetscInt           i,levels,diagonal_fill;
1722:   PetscBool          col_identity,row_identity,missing;
1723:   PetscReal          f;
1724:   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1725:   PetscBT            lnkbt;
1726:   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1727:   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1728:   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;

1731:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1732:   MatMissingDiagonal(A,&missing,&i);
1733:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1734: 
1735:   levels = (PetscInt)info->levels;
1736:   ISIdentity(isrow,&row_identity);
1737:   ISIdentity(iscol,&col_identity);
1738:   if (!levels && row_identity && col_identity) {
1739:     /* special case: ilu(0) with natural ordering */
1740:     MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1741:     if (a->inode.size) {
1742:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1743:     }
1744:     return(0);
1745:   }

1747:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1748:   ISGetIndices(isrow,&r);
1749:   ISGetIndices(isicol,&ic);

1751:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1752:   PetscMalloc1(n+1,&bi);
1753:   PetscMalloc1(n+1,&bdiag);
1754:   bi[0] = bdiag[0] = 0;
1755:   PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);

1757:   /* create a linked list for storing column indices of the active row */
1758:   nlnk = n + 1;
1759:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

1761:   /* initial FreeSpace size is f*(ai[n]+1) */
1762:   f                 = info->fill;
1763:   diagonal_fill     = (PetscInt)info->diagonal_fill;
1764:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1765:   current_space     = free_space;
1766:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1767:   current_space_lvl = free_space_lvl;
1768:   for (i=0; i<n; i++) {
1769:     nzi = 0;
1770:     /* copy current row into linked list */
1771:     nnz = ai[r[i]+1] - ai[r[i]];
1772:     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1773:     cols   = aj + ai[r[i]];
1774:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1775:     PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1776:     nzi   += nlnk;

1778:     /* make sure diagonal entry is included */
1779:     if (diagonal_fill && lnk[i] == -1) {
1780:       fm = n;
1781:       while (lnk[fm] < i) fm = lnk[fm];
1782:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1783:       lnk[fm]    = i;
1784:       lnk_lvl[i] = 0;
1785:       nzi++; dcount++;
1786:     }

1788:     /* add pivot rows into the active row */
1789:     nzbd = 0;
1790:     prow = lnk[n];
1791:     while (prow < i) {
1792:       nnz      = bdiag[prow];
1793:       cols     = bj_ptr[prow] + nnz + 1;
1794:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1795:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1796:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1797:       nzi     += nlnk;
1798:       prow     = lnk[prow];
1799:       nzbd++;
1800:     }
1801:     bdiag[i] = nzbd;
1802:     bi[i+1]  = bi[i] + nzi;
1803:     /* if free space is not available, make more free space */
1804:     if (current_space->local_remaining<nzi) {
1805:       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1806:       PetscFreeSpaceGet(nnz,&current_space);
1807:       PetscFreeSpaceGet(nnz,&current_space_lvl);
1808:       reallocs++;
1809:     }

1811:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1812:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1813:     bj_ptr[i]    = current_space->array;
1814:     bjlvl_ptr[i] = current_space_lvl->array;

1816:     /* make sure the active row i has diagonal entry */
1817:     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);

1819:     current_space->array               += nzi;
1820:     current_space->local_used          += nzi;
1821:     current_space->local_remaining     -= nzi;
1822:     current_space_lvl->array           += nzi;
1823:     current_space_lvl->local_used      += nzi;
1824:     current_space_lvl->local_remaining -= nzi;
1825:   }

1827:   ISRestoreIndices(isrow,&r);
1828:   ISRestoreIndices(isicol,&ic);
1829:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1830:   PetscMalloc1(bi[n]+1,&bj);
1831:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);

1833:   PetscIncompleteLLDestroy(lnk,lnkbt);
1834:   PetscFreeSpaceDestroy(free_space_lvl);
1835:   PetscFree2(bj_ptr,bjlvl_ptr);

1837: #if defined(PETSC_USE_INFO)
1838:   {
1839:     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1840:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1841:     PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1842:     PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1843:     PetscInfo(A,"for best performance.\n");
1844:     if (diagonal_fill) {
1845:       PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1846:     }
1847:   }
1848: #endif
1849:   /* put together the new matrix */
1850:   MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1851:   PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1852:   b    = (Mat_SeqAIJ*)(fact)->data;

1854:   b->free_a       = PETSC_TRUE;
1855:   b->free_ij      = PETSC_TRUE;
1856:   b->singlemalloc = PETSC_FALSE;

1858:   PetscMalloc1(bdiag[0]+1,&b->a);

1860:   b->j    = bj;
1861:   b->i    = bi;
1862:   b->diag = bdiag;
1863:   b->ilen = 0;
1864:   b->imax = 0;
1865:   b->row  = isrow;
1866:   b->col  = iscol;
1867:   PetscObjectReference((PetscObject)isrow);
1868:   PetscObjectReference((PetscObject)iscol);
1869:   b->icol = isicol;

1871:   PetscMalloc1(n+1,&b->solve_work);
1872:   /* In b structure:  Free imax, ilen, old a, old j.
1873:      Allocate bdiag, solve_work, new a, new j */
1874:   PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1875:   b->maxnz = b->nz = bdiag[0]+1;

1877:   (fact)->info.factor_mallocs    = reallocs;
1878:   (fact)->info.fill_ratio_given  = f;
1879:   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1880:   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1881:   if (a->inode.size) {
1882:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1883:   }
1884:   MatSeqAIJCheckInode_FactorLU(fact);
1885:   return(0);
1886: }

1890: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1891: {
1892:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1893:   IS                 isicol;
1894:   PetscErrorCode     ierr;
1895:   const PetscInt     *r,*ic;
1896:   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1897:   PetscInt           *bi,*cols,nnz,*cols_lvl;
1898:   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1899:   PetscInt           i,levels,diagonal_fill;
1900:   PetscBool          col_identity,row_identity;
1901:   PetscReal          f;
1902:   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1903:   PetscBT            lnkbt;
1904:   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1905:   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1906:   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1907:   PetscBool          missing;

1910:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1911:   MatMissingDiagonal(A,&missing,&i);
1912:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);

1914:   f             = info->fill;
1915:   levels        = (PetscInt)info->levels;
1916:   diagonal_fill = (PetscInt)info->diagonal_fill;

1918:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

1920:   ISIdentity(isrow,&row_identity);
1921:   ISIdentity(iscol,&col_identity);
1922:   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1923:     MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);

1925:     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1926:     if (a->inode.size) {
1927:       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1928:     }
1929:     fact->factortype               = MAT_FACTOR_ILU;
1930:     (fact)->info.factor_mallocs    = 0;
1931:     (fact)->info.fill_ratio_given  = info->fill;
1932:     (fact)->info.fill_ratio_needed = 1.0;

1934:     b    = (Mat_SeqAIJ*)(fact)->data;
1935:     b->row  = isrow;
1936:     b->col  = iscol;
1937:     b->icol = isicol;
1938:     PetscMalloc1((fact)->rmap->n+1,&b->solve_work);
1939:     PetscObjectReference((PetscObject)isrow);
1940:     PetscObjectReference((PetscObject)iscol);
1941:     return(0);
1942:   }

1944:   ISGetIndices(isrow,&r);
1945:   ISGetIndices(isicol,&ic);

1947:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1948:   PetscMalloc1(n+1,&bi);
1949:   PetscMalloc1(n+1,&bdiag);
1950:   bi[0] = bdiag[0] = 0;

1952:   PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);

1954:   /* create a linked list for storing column indices of the active row */
1955:   nlnk = n + 1;
1956:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

1958:   /* initial FreeSpace size is f*(ai[n]+1) */
1959:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1960:   current_space     = free_space;
1961:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1962:   current_space_lvl = free_space_lvl;

1964:   for (i=0; i<n; i++) {
1965:     nzi = 0;
1966:     /* copy current row into linked list */
1967:     nnz = ai[r[i]+1] - ai[r[i]];
1968:     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1969:     cols   = aj + ai[r[i]];
1970:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1971:     PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1972:     nzi   += nlnk;

1974:     /* make sure diagonal entry is included */
1975:     if (diagonal_fill && lnk[i] == -1) {
1976:       fm = n;
1977:       while (lnk[fm] < i) fm = lnk[fm];
1978:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1979:       lnk[fm]    = i;
1980:       lnk_lvl[i] = 0;
1981:       nzi++; dcount++;
1982:     }

1984:     /* add pivot rows into the active row */
1985:     nzbd = 0;
1986:     prow = lnk[n];
1987:     while (prow < i) {
1988:       nnz      = bdiag[prow];
1989:       cols     = bj_ptr[prow] + nnz + 1;
1990:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1991:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1992:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1993:       nzi     += nlnk;
1994:       prow     = lnk[prow];
1995:       nzbd++;
1996:     }
1997:     bdiag[i] = nzbd;
1998:     bi[i+1]  = bi[i] + nzi;

2000:     /* if free space is not available, make more free space */
2001:     if (current_space->local_remaining<nzi) {
2002:       nnz  = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
2003:       PetscFreeSpaceGet(nnz,&current_space);
2004:       PetscFreeSpaceGet(nnz,&current_space_lvl);
2005:       reallocs++;
2006:     }

2008:     /* copy data into free_space and free_space_lvl, then initialize lnk */
2009:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2010:     bj_ptr[i]    = current_space->array;
2011:     bjlvl_ptr[i] = current_space_lvl->array;

2013:     /* make sure the active row i has diagonal entry */
2014:     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);

2016:     current_space->array               += nzi;
2017:     current_space->local_used          += nzi;
2018:     current_space->local_remaining     -= nzi;
2019:     current_space_lvl->array           += nzi;
2020:     current_space_lvl->local_used      += nzi;
2021:     current_space_lvl->local_remaining -= nzi;
2022:   }

2024:   ISRestoreIndices(isrow,&r);
2025:   ISRestoreIndices(isicol,&ic);

2027:   /* destroy list of free space and other temporary arrays */
2028:   PetscMalloc1(bi[n]+1,&bj);
2029:   PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2030:   PetscIncompleteLLDestroy(lnk,lnkbt);
2031:   PetscFreeSpaceDestroy(free_space_lvl);
2032:   PetscFree2(bj_ptr,bjlvl_ptr);

2034: #if defined(PETSC_USE_INFO)
2035:   {
2036:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2037:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
2038:     PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
2039:     PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
2040:     PetscInfo(A,"for best performance.\n");
2041:     if (diagonal_fill) {
2042:       PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
2043:     }
2044:   }
2045: #endif

2047:   /* put together the new matrix */
2048:   MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
2049:   PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
2050:   b    = (Mat_SeqAIJ*)(fact)->data;

2052:   b->free_a       = PETSC_TRUE;
2053:   b->free_ij      = PETSC_TRUE;
2054:   b->singlemalloc = PETSC_FALSE;

2056:   PetscMalloc1(bi[n],&b->a);
2057:   b->j = bj;
2058:   b->i = bi;
2059:   for (i=0; i<n; i++) bdiag[i] += bi[i];
2060:   b->diag = bdiag;
2061:   b->ilen = 0;
2062:   b->imax = 0;
2063:   b->row  = isrow;
2064:   b->col  = iscol;
2065:   PetscObjectReference((PetscObject)isrow);
2066:   PetscObjectReference((PetscObject)iscol);
2067:   b->icol = isicol;
2068:   PetscMalloc1(n+1,&b->solve_work);
2069:   /* In b structure:  Free imax, ilen, old a, old j.
2070:      Allocate bdiag, solve_work, new a, new j */
2071:   PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2072:   b->maxnz = b->nz = bi[n];

2074:   (fact)->info.factor_mallocs    = reallocs;
2075:   (fact)->info.fill_ratio_given  = f;
2076:   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2077:   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
2078:   if (a->inode.size) {
2079:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2080:   }
2081:   return(0);
2082: }

2086: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2087: {
2088:   Mat            C = B;
2089:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2090:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2091:   IS             ip=b->row,iip = b->icol;
2093:   const PetscInt *rip,*riip;
2094:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2095:   PetscInt       *ai=a->i,*aj=a->j;
2096:   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2097:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2098:   PetscBool      perm_identity;
2099:   FactorShiftCtx sctx;
2100:   PetscReal      rs;
2101:   MatScalar      d,*v;

2104:   /* MatPivotSetUp(): initialize shift context sctx */
2105:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

2107:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2108:     sctx.shift_top = info->zeropivot;
2109:     for (i=0; i<mbs; i++) {
2110:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2111:       d  = (aa)[a->diag[i]];
2112:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2113:       v  = aa+ai[i];
2114:       nz = ai[i+1] - ai[i];
2115:       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2116:       if (rs>sctx.shift_top) sctx.shift_top = rs;
2117:     }
2118:     sctx.shift_top *= 1.1;
2119:     sctx.nshift_max = 5;
2120:     sctx.shift_lo   = 0.;
2121:     sctx.shift_hi   = 1.;
2122:   }

2124:   ISGetIndices(ip,&rip);
2125:   ISGetIndices(iip,&riip);

2127:   /* allocate working arrays
2128:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2129:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2130:   */
2131:   PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);

2133:   do {
2134:     sctx.newshift = PETSC_FALSE;

2136:     for (i=0; i<mbs; i++) c2r[i] = mbs;
2137:     if (mbs) il[0] = 0;

2139:     for (k = 0; k<mbs; k++) {
2140:       /* zero rtmp */
2141:       nz    = bi[k+1] - bi[k];
2142:       bjtmp = bj + bi[k];
2143:       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

2145:       /* load in initial unfactored row */
2146:       bval = ba + bi[k];
2147:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2148:       for (j = jmin; j < jmax; j++) {
2149:         col = riip[aj[j]];
2150:         if (col >= k) { /* only take upper triangular entry */
2151:           rtmp[col] = aa[j];
2152:           *bval++   = 0.0; /* for in-place factorization */
2153:         }
2154:       }
2155:       /* shift the diagonal of the matrix: ZeropivotApply() */
2156:       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */

2158:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2159:       dk = rtmp[k];
2160:       i  = c2r[k]; /* first row to be added to k_th row  */

2162:       while (i < k) {
2163:         nexti = c2r[i]; /* next row to be added to k_th row */

2165:         /* compute multiplier, update diag(k) and U(i,k) */
2166:         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2167:         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2168:         dk     += uikdi*ba[ili]; /* update diag[k] */
2169:         ba[ili] = uikdi; /* -U(i,k) */

2171:         /* add multiple of row i to k-th row */
2172:         jmin = ili + 1; jmax = bi[i+1];
2173:         if (jmin < jmax) {
2174:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2175:           /* update il and c2r for row i */
2176:           il[i] = jmin;
2177:           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2178:         }
2179:         i = nexti;
2180:       }

2182:       /* copy data into U(k,:) */
2183:       rs   = 0.0;
2184:       jmin = bi[k]; jmax = bi[k+1]-1;
2185:       if (jmin < jmax) {
2186:         for (j=jmin; j<jmax; j++) {
2187:           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2188:         }
2189:         /* add the k-th row into il and c2r */
2190:         il[k] = jmin;
2191:         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2192:       }

2194:       /* MatPivotCheck() */
2195:       sctx.rs = rs;
2196:       sctx.pv = dk;
2197:       MatPivotCheck(B,A,info,&sctx,i);
2198:       if (sctx.newshift) break;
2199:       dk = sctx.pv;

2201:       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2202:     }
2203:   } while (sctx.newshift);

2205:   PetscFree3(rtmp,il,c2r);
2206:   ISRestoreIndices(ip,&rip);
2207:   ISRestoreIndices(iip,&riip);

2209:   ISIdentity(ip,&perm_identity);
2210:   if (perm_identity) {
2211:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2212:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2213:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2214:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2215:   } else {
2216:     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2217:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2218:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2219:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2220:   }

2222:   C->assembled    = PETSC_TRUE;
2223:   C->preallocated = PETSC_TRUE;

2225:   PetscLogFlops(C->rmap->n);

2227:   /* MatPivotView() */
2228:   if (sctx.nshift) {
2229:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2230:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2231:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2232:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2233:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2234:       PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2235:     }
2236:   }
2237:   return(0);
2238: }

2242: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2243: {
2244:   Mat            C = B;
2245:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2246:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2247:   IS             ip=b->row,iip = b->icol;
2249:   const PetscInt *rip,*riip;
2250:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2251:   PetscInt       *ai=a->i,*aj=a->j;
2252:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2253:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2254:   PetscBool      perm_identity;
2255:   FactorShiftCtx sctx;
2256:   PetscReal      rs;
2257:   MatScalar      d,*v;

2260:   /* MatPivotSetUp(): initialize shift context sctx */
2261:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

2263:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2264:     sctx.shift_top = info->zeropivot;
2265:     for (i=0; i<mbs; i++) {
2266:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2267:       d  = (aa)[a->diag[i]];
2268:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2269:       v  = aa+ai[i];
2270:       nz = ai[i+1] - ai[i];
2271:       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2272:       if (rs>sctx.shift_top) sctx.shift_top = rs;
2273:     }
2274:     sctx.shift_top *= 1.1;
2275:     sctx.nshift_max = 5;
2276:     sctx.shift_lo   = 0.;
2277:     sctx.shift_hi   = 1.;
2278:   }

2280:   ISGetIndices(ip,&rip);
2281:   ISGetIndices(iip,&riip);

2283:   /* initialization */
2284:   PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);

2286:   do {
2287:     sctx.newshift = PETSC_FALSE;

2289:     for (i=0; i<mbs; i++) jl[i] = mbs;
2290:     il[0] = 0;

2292:     for (k = 0; k<mbs; k++) {
2293:       /* zero rtmp */
2294:       nz    = bi[k+1] - bi[k];
2295:       bjtmp = bj + bi[k];
2296:       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

2298:       bval = ba + bi[k];
2299:       /* initialize k-th row by the perm[k]-th row of A */
2300:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2301:       for (j = jmin; j < jmax; j++) {
2302:         col = riip[aj[j]];
2303:         if (col >= k) { /* only take upper triangular entry */
2304:           rtmp[col] = aa[j];
2305:           *bval++   = 0.0; /* for in-place factorization */
2306:         }
2307:       }
2308:       /* shift the diagonal of the matrix */
2309:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

2311:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2312:       dk = rtmp[k];
2313:       i  = jl[k]; /* first row to be added to k_th row  */

2315:       while (i < k) {
2316:         nexti = jl[i]; /* next row to be added to k_th row */

2318:         /* compute multiplier, update diag(k) and U(i,k) */
2319:         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2320:         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2321:         dk     += uikdi*ba[ili];
2322:         ba[ili] = uikdi; /* -U(i,k) */

2324:         /* add multiple of row i to k-th row */
2325:         jmin = ili + 1; jmax = bi[i+1];
2326:         if (jmin < jmax) {
2327:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2328:           /* update il and jl for row i */
2329:           il[i] = jmin;
2330:           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2331:         }
2332:         i = nexti;
2333:       }

2335:       /* shift the diagonals when zero pivot is detected */
2336:       /* compute rs=sum of abs(off-diagonal) */
2337:       rs   = 0.0;
2338:       jmin = bi[k]+1;
2339:       nz   = bi[k+1] - jmin;
2340:       bcol = bj + jmin;
2341:       for (j=0; j<nz; j++) {
2342:         rs += PetscAbsScalar(rtmp[bcol[j]]);
2343:       }

2345:       sctx.rs = rs;
2346:       sctx.pv = dk;
2347:       MatPivotCheck(B,A,info,&sctx,k);
2348:       if (sctx.newshift) break;
2349:       dk = sctx.pv;

2351:       /* copy data into U(k,:) */
2352:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2353:       jmin      = bi[k]+1; jmax = bi[k+1];
2354:       if (jmin < jmax) {
2355:         for (j=jmin; j<jmax; j++) {
2356:           col = bj[j]; ba[j] = rtmp[col];
2357:         }
2358:         /* add the k-th row into il and jl */
2359:         il[k] = jmin;
2360:         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2361:       }
2362:     }
2363:   } while (sctx.newshift);

2365:   PetscFree3(rtmp,il,jl);
2366:   ISRestoreIndices(ip,&rip);
2367:   ISRestoreIndices(iip,&riip);

2369:   ISIdentity(ip,&perm_identity);
2370:   if (perm_identity) {
2371:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2372:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2373:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2374:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2375:   } else {
2376:     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2377:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2378:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2379:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2380:   }

2382:   C->assembled    = PETSC_TRUE;
2383:   C->preallocated = PETSC_TRUE;

2385:   PetscLogFlops(C->rmap->n);
2386:   if (sctx.nshift) {
2387:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2388:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2389:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2390:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2391:     }
2392:   }
2393:   return(0);
2394: }

2396: /*
2397:    icc() under revised new data structure.
2398:    Factored arrays bj and ba are stored as
2399:      U(0,:),...,U(i,:),U(n-1,:)

2401:    ui=fact->i is an array of size n+1, in which
2402:    ui+
2403:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2404:      ui[n]:  points to U(n-1,n-1)+1

2406:   udiag=fact->diag is an array of size n,in which
2407:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2409:    U(i,:) contains udiag[i] as its last entry, i.e.,
2410:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2411: */

2415: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2416: {
2417:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2418:   Mat_SeqSBAIJ       *b;
2419:   PetscErrorCode     ierr;
2420:   PetscBool          perm_identity,missing;
2421:   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2422:   const PetscInt     *rip,*riip;
2423:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2424:   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2425:   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2426:   PetscReal          fill          =info->fill,levels=info->levels;
2427:   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2428:   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2429:   PetscBT            lnkbt;
2430:   IS                 iperm;

2433:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2434:   MatMissingDiagonal(A,&missing,&d);
2435:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2436:   ISIdentity(perm,&perm_identity);
2437:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);

2439:   PetscMalloc1(am+1,&ui);
2440:   PetscMalloc1(am+1,&udiag);
2441:   ui[0] = 0;

2443:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2444:   if (!levels && perm_identity) {
2445:     for (i=0; i<am; i++) {
2446:       ncols    = ai[i+1] - a->diag[i];
2447:       ui[i+1]  = ui[i] + ncols;
2448:       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2449:     }
2450:     PetscMalloc1(ui[am]+1,&uj);
2451:     cols = uj;
2452:     for (i=0; i<am; i++) {
2453:       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2454:       ncols = ai[i+1] - a->diag[i] -1;
2455:       for (j=0; j<ncols; j++) *cols++ = aj[j];
2456:       *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2457:     }
2458:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2459:     ISGetIndices(iperm,&riip);
2460:     ISGetIndices(perm,&rip);

2462:     /* initialization */
2463:     PetscMalloc1(am+1,&ajtmp);

2465:     /* jl: linked list for storing indices of the pivot rows
2466:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2467:     PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2468:     for (i=0; i<am; i++) {
2469:       jl[i] = am; il[i] = 0;
2470:     }

2472:     /* create and initialize a linked list for storing column indices of the active row k */
2473:     nlnk = am + 1;
2474:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

2476:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2477:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2478:     current_space     = free_space;
2479:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);
2480:     current_space_lvl = free_space_lvl;

2482:     for (k=0; k<am; k++) {  /* for each active row k */
2483:       /* initialize lnk by the column indices of row rip[k] of A */
2484:       nzk   = 0;
2485:       ncols = ai[rip[k]+1] - ai[rip[k]];
2486:       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2487:       ncols_upper = 0;
2488:       for (j=0; j<ncols; j++) {
2489:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2490:         if (riip[i] >= k) { /* only take upper triangular entry */
2491:           ajtmp[ncols_upper] = i;
2492:           ncols_upper++;
2493:         }
2494:       }
2495:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2496:       nzk += nlnk;

2498:       /* update lnk by computing fill-in for each pivot row to be merged in */
2499:       prow = jl[k]; /* 1st pivot row */

2501:       while (prow < k) {
2502:         nextprow = jl[prow];

2504:         /* merge prow into k-th row */
2505:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2506:         jmax  = ui[prow+1];
2507:         ncols = jmax-jmin;
2508:         i     = jmin - ui[prow];
2509:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2510:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2511:         j     = *(uj - 1);
2512:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2513:         nzk  += nlnk;

2515:         /* update il and jl for prow */
2516:         if (jmin < jmax) {
2517:           il[prow] = jmin;
2518:           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2519:         }
2520:         prow = nextprow;
2521:       }

2523:       /* if free space is not available, make more free space */
2524:       if (current_space->local_remaining<nzk) {
2525:         i    = am - k + 1; /* num of unfactored rows */
2526:         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2527:         PetscFreeSpaceGet(i,&current_space);
2528:         PetscFreeSpaceGet(i,&current_space_lvl);
2529:         reallocs++;
2530:       }

2532:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2533:       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2534:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

2536:       /* add the k-th row into il and jl */
2537:       if (nzk > 1) {
2538:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2539:         jl[k] = jl[i]; jl[i] = k;
2540:         il[k] = ui[k] + 1;
2541:       }
2542:       uj_ptr[k]     = current_space->array;
2543:       uj_lvl_ptr[k] = current_space_lvl->array;

2545:       current_space->array           += nzk;
2546:       current_space->local_used      += nzk;
2547:       current_space->local_remaining -= nzk;

2549:       current_space_lvl->array           += nzk;
2550:       current_space_lvl->local_used      += nzk;
2551:       current_space_lvl->local_remaining -= nzk;

2553:       ui[k+1] = ui[k] + nzk;
2554:     }

2556:     ISRestoreIndices(perm,&rip);
2557:     ISRestoreIndices(iperm,&riip);
2558:     PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2559:     PetscFree(ajtmp);

2561:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2562:     PetscMalloc1(ui[am]+1,&uj);
2563:     PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor  */
2564:     PetscIncompleteLLDestroy(lnk,lnkbt);
2565:     PetscFreeSpaceDestroy(free_space_lvl);

2567:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2569:   /* put together the new matrix in MATSEQSBAIJ format */
2570:   b               = (Mat_SeqSBAIJ*)(fact)->data;
2571:   b->singlemalloc = PETSC_FALSE;

2573:   PetscMalloc1(ui[am]+1,&b->a);

2575:   b->j             = uj;
2576:   b->i             = ui;
2577:   b->diag          = udiag;
2578:   b->free_diag     = PETSC_TRUE;
2579:   b->ilen          = 0;
2580:   b->imax          = 0;
2581:   b->row           = perm;
2582:   b->col           = perm;
2583:   PetscObjectReference((PetscObject)perm);
2584:   PetscObjectReference((PetscObject)perm);
2585:   b->icol          = iperm;
2586:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2588:   PetscMalloc1(am+1,&b->solve_work);
2589:   PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));

2591:   b->maxnz   = b->nz = ui[am];
2592:   b->free_a  = PETSC_TRUE;
2593:   b->free_ij = PETSC_TRUE;

2595:   fact->info.factor_mallocs   = reallocs;
2596:   fact->info.fill_ratio_given = fill;
2597:   if (ai[am] != 0) {
2598:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2599:     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2600:   } else {
2601:     fact->info.fill_ratio_needed = 0.0;
2602:   }
2603: #if defined(PETSC_USE_INFO)
2604:   if (ai[am] != 0) {
2605:     PetscReal af = fact->info.fill_ratio_needed;
2606:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2607:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2608:     PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2609:   } else {
2610:     PetscInfo(A,"Empty matrix.\n");
2611:   }
2612: #endif
2613:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2614:   return(0);
2615: }

2619: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2620: {
2621:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2622:   Mat_SeqSBAIJ       *b;
2623:   PetscErrorCode     ierr;
2624:   PetscBool          perm_identity,missing;
2625:   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2626:   const PetscInt     *rip,*riip;
2627:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2628:   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2629:   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2630:   PetscReal          fill          =info->fill,levels=info->levels;
2631:   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2632:   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2633:   PetscBT            lnkbt;
2634:   IS                 iperm;

2637:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2638:   MatMissingDiagonal(A,&missing,&d);
2639:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2640:   ISIdentity(perm,&perm_identity);
2641:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);

2643:   PetscMalloc1(am+1,&ui);
2644:   PetscMalloc1(am+1,&udiag);
2645:   ui[0] = 0;

2647:   /* ICC(0) without matrix ordering: simply copies fill pattern */
2648:   if (!levels && perm_identity) {

2650:     for (i=0; i<am; i++) {
2651:       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2652:       udiag[i] = ui[i];
2653:     }
2654:     PetscMalloc1(ui[am]+1,&uj);
2655:     cols = uj;
2656:     for (i=0; i<am; i++) {
2657:       aj    = a->j + a->diag[i];
2658:       ncols = ui[i+1] - ui[i];
2659:       for (j=0; j<ncols; j++) *cols++ = *aj++;
2660:     }
2661:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2662:     ISGetIndices(iperm,&riip);
2663:     ISGetIndices(perm,&rip);

2665:     /* initialization */
2666:     PetscMalloc1(am+1,&ajtmp);

2668:     /* jl: linked list for storing indices of the pivot rows
2669:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2670:     PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2671:     for (i=0; i<am; i++) {
2672:       jl[i] = am; il[i] = 0;
2673:     }

2675:     /* create and initialize a linked list for storing column indices of the active row k */
2676:     nlnk = am + 1;
2677:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

2679:     /* initial FreeSpace size is fill*(ai[am]+1) */
2680:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2681:     current_space     = free_space;
2682:     PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);
2683:     current_space_lvl = free_space_lvl;

2685:     for (k=0; k<am; k++) {  /* for each active row k */
2686:       /* initialize lnk by the column indices of row rip[k] of A */
2687:       nzk   = 0;
2688:       ncols = ai[rip[k]+1] - ai[rip[k]];
2689:       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2690:       ncols_upper = 0;
2691:       for (j=0; j<ncols; j++) {
2692:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2693:         if (riip[i] >= k) { /* only take upper triangular entry */
2694:           ajtmp[ncols_upper] = i;
2695:           ncols_upper++;
2696:         }
2697:       }
2698:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2699:       nzk += nlnk;

2701:       /* update lnk by computing fill-in for each pivot row to be merged in */
2702:       prow = jl[k]; /* 1st pivot row */

2704:       while (prow < k) {
2705:         nextprow = jl[prow];

2707:         /* merge prow into k-th row */
2708:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2709:         jmax  = ui[prow+1];
2710:         ncols = jmax-jmin;
2711:         i     = jmin - ui[prow];
2712:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2713:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2714:         j     = *(uj - 1);
2715:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2716:         nzk  += nlnk;

2718:         /* update il and jl for prow */
2719:         if (jmin < jmax) {
2720:           il[prow] = jmin;
2721:           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2722:         }
2723:         prow = nextprow;
2724:       }

2726:       /* if free space is not available, make more free space */
2727:       if (current_space->local_remaining<nzk) {
2728:         i    = am - k + 1; /* num of unfactored rows */
2729:         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2730:         PetscFreeSpaceGet(i,&current_space);
2731:         PetscFreeSpaceGet(i,&current_space_lvl);
2732:         reallocs++;
2733:       }

2735:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2736:       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2737:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

2739:       /* add the k-th row into il and jl */
2740:       if (nzk > 1) {
2741:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2742:         jl[k] = jl[i]; jl[i] = k;
2743:         il[k] = ui[k] + 1;
2744:       }
2745:       uj_ptr[k]     = current_space->array;
2746:       uj_lvl_ptr[k] = current_space_lvl->array;

2748:       current_space->array           += nzk;
2749:       current_space->local_used      += nzk;
2750:       current_space->local_remaining -= nzk;

2752:       current_space_lvl->array           += nzk;
2753:       current_space_lvl->local_used      += nzk;
2754:       current_space_lvl->local_remaining -= nzk;

2756:       ui[k+1] = ui[k] + nzk;
2757:     }

2759: #if defined(PETSC_USE_INFO)
2760:     if (ai[am] != 0) {
2761:       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2762:       PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2763:       PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2764:       PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2765:     } else {
2766:       PetscInfo(A,"Empty matrix.\n");
2767:     }
2768: #endif

2770:     ISRestoreIndices(perm,&rip);
2771:     ISRestoreIndices(iperm,&riip);
2772:     PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2773:     PetscFree(ajtmp);

2775:     /* destroy list of free space and other temporary array(s) */
2776:     PetscMalloc1(ui[am]+1,&uj);
2777:     PetscFreeSpaceContiguous(&free_space,uj);
2778:     PetscIncompleteLLDestroy(lnk,lnkbt);
2779:     PetscFreeSpaceDestroy(free_space_lvl);

2781:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2783:   /* put together the new matrix in MATSEQSBAIJ format */

2785:   b               = (Mat_SeqSBAIJ*)fact->data;
2786:   b->singlemalloc = PETSC_FALSE;

2788:   PetscMalloc1(ui[am]+1,&b->a);

2790:   b->j         = uj;
2791:   b->i         = ui;
2792:   b->diag      = udiag;
2793:   b->free_diag = PETSC_TRUE;
2794:   b->ilen      = 0;
2795:   b->imax      = 0;
2796:   b->row       = perm;
2797:   b->col       = perm;

2799:   PetscObjectReference((PetscObject)perm);
2800:   PetscObjectReference((PetscObject)perm);

2802:   b->icol          = iperm;
2803:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2804:   PetscMalloc1(am+1,&b->solve_work);
2805:   PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2806:   b->maxnz         = b->nz = ui[am];
2807:   b->free_a        = PETSC_TRUE;
2808:   b->free_ij       = PETSC_TRUE;

2810:   fact->info.factor_mallocs   = reallocs;
2811:   fact->info.fill_ratio_given = fill;
2812:   if (ai[am] != 0) {
2813:     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2814:   } else {
2815:     fact->info.fill_ratio_needed = 0.0;
2816:   }
2817:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2818:   return(0);
2819: }

2823: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2824: {
2825:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2826:   Mat_SeqSBAIJ       *b;
2827:   PetscErrorCode     ierr;
2828:   PetscBool          perm_identity,missing;
2829:   PetscReal          fill = info->fill;
2830:   const PetscInt     *rip,*riip;
2831:   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2832:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2833:   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2834:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2835:   PetscBT            lnkbt;
2836:   IS                 iperm;

2839:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2840:   MatMissingDiagonal(A,&missing,&i);
2841:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);

2843:   /* check whether perm is the identity mapping */
2844:   ISIdentity(perm,&perm_identity);
2845:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2846:   ISGetIndices(iperm,&riip);
2847:   ISGetIndices(perm,&rip);

2849:   /* initialization */
2850:   PetscMalloc1(am+1,&ui);
2851:   PetscMalloc1(am+1,&udiag);
2852:   ui[0] = 0;

2854:   /* jl: linked list for storing indices of the pivot rows
2855:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2856:   PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2857:   for (i=0; i<am; i++) {
2858:     jl[i] = am; il[i] = 0;
2859:   }

2861:   /* create and initialize a linked list for storing column indices of the active row k */
2862:   nlnk = am + 1;
2863:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

2865:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2866:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2867:   current_space = free_space;

2869:   for (k=0; k<am; k++) {  /* for each active row k */
2870:     /* initialize lnk by the column indices of row rip[k] of A */
2871:     nzk   = 0;
2872:     ncols = ai[rip[k]+1] - ai[rip[k]];
2873:     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2874:     ncols_upper = 0;
2875:     for (j=0; j<ncols; j++) {
2876:       i = riip[*(aj + ai[rip[k]] + j)];
2877:       if (i >= k) { /* only take upper triangular entry */
2878:         cols[ncols_upper] = i;
2879:         ncols_upper++;
2880:       }
2881:     }
2882:     PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2883:     nzk += nlnk;

2885:     /* update lnk by computing fill-in for each pivot row to be merged in */
2886:     prow = jl[k]; /* 1st pivot row */

2888:     while (prow < k) {
2889:       nextprow = jl[prow];
2890:       /* merge prow into k-th row */
2891:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2892:       jmax   = ui[prow+1];
2893:       ncols  = jmax-jmin;
2894:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2895:       PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2896:       nzk   += nlnk;

2898:       /* update il and jl for prow */
2899:       if (jmin < jmax) {
2900:         il[prow] = jmin;
2901:         j        = *uj_ptr;
2902:         jl[prow] = jl[j];
2903:         jl[j]    = prow;
2904:       }
2905:       prow = nextprow;
2906:     }

2908:     /* if free space is not available, make more free space */
2909:     if (current_space->local_remaining<nzk) {
2910:       i    = am - k + 1; /* num of unfactored rows */
2911:       i    = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2912:       PetscFreeSpaceGet(i,&current_space);
2913:       reallocs++;
2914:     }

2916:     /* copy data into free space, then initialize lnk */
2917:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

2919:     /* add the k-th row into il and jl */
2920:     if (nzk > 1) {
2921:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2922:       jl[k] = jl[i]; jl[i] = k;
2923:       il[k] = ui[k] + 1;
2924:     }
2925:     ui_ptr[k] = current_space->array;

2927:     current_space->array           += nzk;
2928:     current_space->local_used      += nzk;
2929:     current_space->local_remaining -= nzk;

2931:     ui[k+1] = ui[k] + nzk;
2932:   }

2934:   ISRestoreIndices(perm,&rip);
2935:   ISRestoreIndices(iperm,&riip);
2936:   PetscFree4(ui_ptr,jl,il,cols);

2938:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2939:   PetscMalloc1(ui[am]+1,&uj);
2940:   PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2941:   PetscLLDestroy(lnk,lnkbt);

2943:   /* put together the new matrix in MATSEQSBAIJ format */

2945:   b               = (Mat_SeqSBAIJ*)fact->data;
2946:   b->singlemalloc = PETSC_FALSE;
2947:   b->free_a       = PETSC_TRUE;
2948:   b->free_ij      = PETSC_TRUE;

2950:   PetscMalloc1(ui[am]+1,&b->a);

2952:   b->j         = uj;
2953:   b->i         = ui;
2954:   b->diag      = udiag;
2955:   b->free_diag = PETSC_TRUE;
2956:   b->ilen      = 0;
2957:   b->imax      = 0;
2958:   b->row       = perm;
2959:   b->col       = perm;

2961:   PetscObjectReference((PetscObject)perm);
2962:   PetscObjectReference((PetscObject)perm);

2964:   b->icol          = iperm;
2965:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2967:   PetscMalloc1(am+1,&b->solve_work);
2968:   PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));

2970:   b->maxnz = b->nz = ui[am];

2972:   fact->info.factor_mallocs   = reallocs;
2973:   fact->info.fill_ratio_given = fill;
2974:   if (ai[am] != 0) {
2975:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2976:     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2977:   } else {
2978:     fact->info.fill_ratio_needed = 0.0;
2979:   }
2980: #if defined(PETSC_USE_INFO)
2981:   if (ai[am] != 0) {
2982:     PetscReal af = fact->info.fill_ratio_needed;
2983:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2984:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2985:     PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2986:   } else {
2987:     PetscInfo(A,"Empty matrix.\n");
2988:   }
2989: #endif
2990:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2991:   return(0);
2992: }

2996: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2997: {
2998:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2999:   Mat_SeqSBAIJ       *b;
3000:   PetscErrorCode     ierr;
3001:   PetscBool          perm_identity,missing;
3002:   PetscReal          fill = info->fill;
3003:   const PetscInt     *rip,*riip;
3004:   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3005:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3006:   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3007:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
3008:   PetscBT            lnkbt;
3009:   IS                 iperm;

3012:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3013:   MatMissingDiagonal(A,&missing,&i);
3014:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);

3016:   /* check whether perm is the identity mapping */
3017:   ISIdentity(perm,&perm_identity);
3018:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
3019:   ISGetIndices(iperm,&riip);
3020:   ISGetIndices(perm,&rip);

3022:   /* initialization */
3023:   PetscMalloc1(am+1,&ui);
3024:   ui[0] = 0;

3026:   /* jl: linked list for storing indices of the pivot rows
3027:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3028:   PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
3029:   for (i=0; i<am; i++) {
3030:     jl[i] = am; il[i] = 0;
3031:   }

3033:   /* create and initialize a linked list for storing column indices of the active row k */
3034:   nlnk = am + 1;
3035:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

3037:   /* initial FreeSpace size is fill*(ai[am]+1) */
3038:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
3039:   current_space = free_space;

3041:   for (k=0; k<am; k++) {  /* for each active row k */
3042:     /* initialize lnk by the column indices of row rip[k] of A */
3043:     nzk   = 0;
3044:     ncols = ai[rip[k]+1] - ai[rip[k]];
3045:     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3046:     ncols_upper = 0;
3047:     for (j=0; j<ncols; j++) {
3048:       i = riip[*(aj + ai[rip[k]] + j)];
3049:       if (i >= k) { /* only take upper triangular entry */
3050:         cols[ncols_upper] = i;
3051:         ncols_upper++;
3052:       }
3053:     }
3054:     PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3055:     nzk += nlnk;

3057:     /* update lnk by computing fill-in for each pivot row to be merged in */
3058:     prow = jl[k]; /* 1st pivot row */

3060:     while (prow < k) {
3061:       nextprow = jl[prow];
3062:       /* merge prow into k-th row */
3063:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3064:       jmax   = ui[prow+1];
3065:       ncols  = jmax-jmin;
3066:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3067:       PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3068:       nzk   += nlnk;

3070:       /* update il and jl for prow */
3071:       if (jmin < jmax) {
3072:         il[prow] = jmin;
3073:         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3074:       }
3075:       prow = nextprow;
3076:     }

3078:     /* if free space is not available, make more free space */
3079:     if (current_space->local_remaining<nzk) {
3080:       i    = am - k + 1; /* num of unfactored rows */
3081:       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3082:       PetscFreeSpaceGet(i,&current_space);
3083:       reallocs++;
3084:     }

3086:     /* copy data into free space, then initialize lnk */
3087:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

3089:     /* add the k-th row into il and jl */
3090:     if (nzk-1 > 0) {
3091:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3092:       jl[k] = jl[i]; jl[i] = k;
3093:       il[k] = ui[k] + 1;
3094:     }
3095:     ui_ptr[k] = current_space->array;

3097:     current_space->array           += nzk;
3098:     current_space->local_used      += nzk;
3099:     current_space->local_remaining -= nzk;

3101:     ui[k+1] = ui[k] + nzk;
3102:   }

3104: #if defined(PETSC_USE_INFO)
3105:   if (ai[am] != 0) {
3106:     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3107:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3108:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3109:     PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3110:   } else {
3111:     PetscInfo(A,"Empty matrix.\n");
3112:   }
3113: #endif

3115:   ISRestoreIndices(perm,&rip);
3116:   ISRestoreIndices(iperm,&riip);
3117:   PetscFree4(ui_ptr,jl,il,cols);

3119:   /* destroy list of free space and other temporary array(s) */
3120:   PetscMalloc1(ui[am]+1,&uj);
3121:   PetscFreeSpaceContiguous(&free_space,uj);
3122:   PetscLLDestroy(lnk,lnkbt);

3124:   /* put together the new matrix in MATSEQSBAIJ format */

3126:   b               = (Mat_SeqSBAIJ*)fact->data;
3127:   b->singlemalloc = PETSC_FALSE;
3128:   b->free_a       = PETSC_TRUE;
3129:   b->free_ij      = PETSC_TRUE;

3131:   PetscMalloc1(ui[am]+1,&b->a);

3133:   b->j    = uj;
3134:   b->i    = ui;
3135:   b->diag = 0;
3136:   b->ilen = 0;
3137:   b->imax = 0;
3138:   b->row  = perm;
3139:   b->col  = perm;

3141:   PetscObjectReference((PetscObject)perm);
3142:   PetscObjectReference((PetscObject)perm);

3144:   b->icol          = iperm;
3145:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

3147:   PetscMalloc1(am+1,&b->solve_work);
3148:   PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3149:   b->maxnz = b->nz = ui[am];

3151:   fact->info.factor_mallocs   = reallocs;
3152:   fact->info.fill_ratio_given = fill;
3153:   if (ai[am] != 0) {
3154:     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3155:   } else {
3156:     fact->info.fill_ratio_needed = 0.0;
3157:   }
3158:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3159:   return(0);
3160: }

3164: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3165: {
3166:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3167:   PetscErrorCode    ierr;
3168:   PetscInt          n   = A->rmap->n;
3169:   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3170:   PetscScalar       *x,sum;
3171:   const PetscScalar *b;
3172:   const MatScalar   *aa = a->a,*v;
3173:   PetscInt          i,nz;

3176:   if (!n) return(0);

3178:   VecGetArrayRead(bb,&b);
3179:   VecGetArray(xx,&x);

3181:   /* forward solve the lower triangular */
3182:   x[0] = b[0];
3183:   v    = aa;
3184:   vi   = aj;
3185:   for (i=1; i<n; i++) {
3186:     nz  = ai[i+1] - ai[i];
3187:     sum = b[i];
3188:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3189:     v   += nz;
3190:     vi  += nz;
3191:     x[i] = sum;
3192:   }

3194:   /* backward solve the upper triangular */
3195:   for (i=n-1; i>=0; i--) {
3196:     v   = aa + adiag[i+1] + 1;
3197:     vi  = aj + adiag[i+1] + 1;
3198:     nz  = adiag[i] - adiag[i+1]-1;
3199:     sum = x[i];
3200:     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3201:     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3202:   }

3204:   PetscLogFlops(2.0*a->nz - A->cmap->n);
3205:   VecRestoreArrayRead(bb,&b);
3206:   VecRestoreArray(xx,&x);
3207:   return(0);
3208: }

3212: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3213: {
3214:   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3215:   IS                iscol = a->col,isrow = a->row;
3216:   PetscErrorCode    ierr;
3217:   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3218:   const PetscInt    *rout,*cout,*r,*c;
3219:   PetscScalar       *x,*tmp,sum;
3220:   const PetscScalar *b;
3221:   const MatScalar   *aa = a->a,*v;

3224:   if (!n) return(0);

3226:   VecGetArrayRead(bb,&b);
3227:   VecGetArray(xx,&x);
3228:   tmp  = a->solve_work;

3230:   ISGetIndices(isrow,&rout); r = rout;
3231:   ISGetIndices(iscol,&cout); c = cout;

3233:   /* forward solve the lower triangular */
3234:   tmp[0] = b[r[0]];
3235:   v      = aa;
3236:   vi     = aj;
3237:   for (i=1; i<n; i++) {
3238:     nz  = ai[i+1] - ai[i];
3239:     sum = b[r[i]];
3240:     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3241:     tmp[i] = sum;
3242:     v     += nz; vi += nz;
3243:   }

3245:   /* backward solve the upper triangular */
3246:   for (i=n-1; i>=0; i--) {
3247:     v   = aa + adiag[i+1]+1;
3248:     vi  = aj + adiag[i+1]+1;
3249:     nz  = adiag[i]-adiag[i+1]-1;
3250:     sum = tmp[i];
3251:     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3252:     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3253:   }

3255:   ISRestoreIndices(isrow,&rout);
3256:   ISRestoreIndices(iscol,&cout);
3257:   VecRestoreArrayRead(bb,&b);
3258:   VecRestoreArray(xx,&x);
3259:   PetscLogFlops(2*a->nz - A->cmap->n);
3260:   return(0);
3261: }

3265: /*
3266:     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3267: */
3268: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3269: {
3270:   Mat            B = *fact;
3271:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3272:   IS             isicol;
3274:   const PetscInt *r,*ic;
3275:   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3276:   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3277:   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3278:   PetscInt       nlnk,*lnk;
3279:   PetscBT        lnkbt;
3280:   PetscBool      row_identity,icol_identity;
3281:   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3282:   const PetscInt *ics;
3283:   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3284:   PetscReal      dt     =info->dt,shift=info->shiftamount;
3285:   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3286:   PetscBool      missing;

3289:   if (dt      == PETSC_DEFAULT) dt = 0.005;
3290:   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);

3292:   /* ------- symbolic factorization, can be reused ---------*/
3293:   MatMissingDiagonal(A,&missing,&i);
3294:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3295:   adiag=a->diag;

3297:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

3299:   /* bdiag is location of diagonal in factor */
3300:   PetscMalloc1(n+1,&bdiag);     /* becomes b->diag */
3301:   PetscMalloc1(n+1,&bdiag_rev); /* temporary */

3303:   /* allocate row pointers bi */
3304:   PetscMalloc1(2*n+2,&bi);

3306:   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3307:   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3308:   nnz_max = ai[n]+2*n*dtcount+2;

3310:   PetscMalloc1(nnz_max+1,&bj);
3311:   PetscMalloc1(nnz_max+1,&ba);

3313:   /* put together the new matrix */
3314:   MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3315:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3316:   b    = (Mat_SeqAIJ*)B->data;

3318:   b->free_a       = PETSC_TRUE;
3319:   b->free_ij      = PETSC_TRUE;
3320:   b->singlemalloc = PETSC_FALSE;

3322:   b->a    = ba;
3323:   b->j    = bj;
3324:   b->i    = bi;
3325:   b->diag = bdiag;
3326:   b->ilen = 0;
3327:   b->imax = 0;
3328:   b->row  = isrow;
3329:   b->col  = iscol;
3330:   PetscObjectReference((PetscObject)isrow);
3331:   PetscObjectReference((PetscObject)iscol);
3332:   b->icol = isicol;

3334:   PetscMalloc1(n+1,&b->solve_work);
3335:   PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3336:   b->maxnz = nnz_max;

3338:   B->factortype            = MAT_FACTOR_ILUDT;
3339:   B->info.factor_mallocs   = 0;
3340:   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3341:   /* ------- end of symbolic factorization ---------*/

3343:   ISGetIndices(isrow,&r);
3344:   ISGetIndices(isicol,&ic);
3345:   ics  = ic;

3347:   /* linked list for storing column indices of the active row */
3348:   nlnk = n + 1;
3349:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

3351:   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3352:   PetscMalloc2(n,&im,n,&jtmp);
3353:   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3354:   PetscMalloc2(n,&rtmp,n,&vtmp);
3355:   PetscMemzero(rtmp,n*sizeof(MatScalar));

3357:   bi[0]        = 0;
3358:   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3359:   bdiag_rev[n] = bdiag[0];
3360:   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3361:   for (i=0; i<n; i++) {
3362:     /* copy initial fill into linked list */
3363:     nzi = ai[r[i]+1] - ai[r[i]];
3364:     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3365:     nzi_al = adiag[r[i]] - ai[r[i]];
3366:     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3367:     ajtmp  = aj + ai[r[i]];
3368:     PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);

3370:     /* load in initial (unfactored row) */
3371:     aatmp = a->a + ai[r[i]];
3372:     for (j=0; j<nzi; j++) {
3373:       rtmp[ics[*ajtmp++]] = *aatmp++;
3374:     }

3376:     /* add pivot rows into linked list */
3377:     row = lnk[n];
3378:     while (row < i) {
3379:       nzi_bl = bi[row+1] - bi[row] + 1;
3380:       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3381:       PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3382:       nzi   += nlnk;
3383:       row    = lnk[row];
3384:     }

3386:     /* copy data from lnk into jtmp, then initialize lnk */
3387:     PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);

3389:     /* numerical factorization */
3390:     bjtmp = jtmp;
3391:     row   = *bjtmp++; /* 1st pivot row */
3392:     while (row < i) {
3393:       pc         = rtmp + row;
3394:       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3395:       multiplier = (*pc) * (*pv);
3396:       *pc        = multiplier;
3397:       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3398:         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3399:         pv = ba + bdiag[row+1] + 1;
3400:         /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3401:         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3402:         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3403:         PetscLogFlops(1+2*nz);
3404:       }
3405:       row = *bjtmp++;
3406:     }

3408:     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3409:     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3410:     nzi_bl   = 0; j = 0;
3411:     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3412:       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3413:       nzi_bl++; j++;
3414:     }
3415:     nzi_bu = nzi - nzi_bl -1;
3416:     while (j < nzi) {
3417:       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3418:       j++;
3419:     }

3421:     bjtmp = bj + bi[i];
3422:     batmp = ba + bi[i];
3423:     /* apply level dropping rule to L part */
3424:     ncut = nzi_al + dtcount;
3425:     if (ncut < nzi_bl) {
3426:       PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3427:       PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3428:     } else {
3429:       ncut = nzi_bl;
3430:     }
3431:     for (j=0; j<ncut; j++) {
3432:       bjtmp[j] = jtmp[j];
3433:       batmp[j] = vtmp[j];
3434:       /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3435:     }
3436:     bi[i+1] = bi[i] + ncut;
3437:     nzi     = ncut + 1;

3439:     /* apply level dropping rule to U part */
3440:     ncut = nzi_au + dtcount;
3441:     if (ncut < nzi_bu) {
3442:       PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3443:       PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3444:     } else {
3445:       ncut = nzi_bu;
3446:     }
3447:     nzi += ncut;

3449:     /* mark bdiagonal */
3450:     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3451:     bdiag_rev[n-i-1] = bdiag[i+1];
3452:     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3453:     bjtmp            = bj + bdiag[i];
3454:     batmp            = ba + bdiag[i];
3455:     *bjtmp           = i;
3456:     *batmp           = diag_tmp; /* rtmp[i]; */
3457:     if (*batmp == 0.0) {
3458:       *batmp = dt+shift;
3459:       /* printf(" row %d add shift %g\n",i,shift); */
3460:     }
3461:     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3462:     /* printf(" (%d,%g),",*bjtmp,*batmp); */

3464:     bjtmp = bj + bdiag[i+1]+1;
3465:     batmp = ba + bdiag[i+1]+1;
3466:     for (k=0; k<ncut; k++) {
3467:       bjtmp[k] = jtmp[nzi_bl+1+k];
3468:       batmp[k] = vtmp[nzi_bl+1+k];
3469:       /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3470:     }
3471:     /* printf("\n"); */

3473:     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3474:     /*
3475:     printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3476:     printf(" ----------------------------\n");
3477:     */
3478:   } /* for (i=0; i<n; i++) */
3479:     /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3480:   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);

3482:   ISRestoreIndices(isrow,&r);
3483:   ISRestoreIndices(isicol,&ic);

3485:   PetscLLDestroy(lnk,lnkbt);
3486:   PetscFree2(im,jtmp);
3487:   PetscFree2(rtmp,vtmp);
3488:   PetscFree(bdiag_rev);

3490:   PetscLogFlops(B->cmap->n);
3491:   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];

3493:   ISIdentity(isrow,&row_identity);
3494:   ISIdentity(isicol,&icol_identity);
3495:   if (row_identity && icol_identity) {
3496:     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3497:   } else {
3498:     B->ops->solve = MatSolve_SeqAIJ;
3499:   }

3501:   B->ops->solveadd          = 0;
3502:   B->ops->solvetranspose    = 0;
3503:   B->ops->solvetransposeadd = 0;
3504:   B->ops->matsolve          = 0;
3505:   B->assembled              = PETSC_TRUE;
3506:   B->preallocated           = PETSC_TRUE;
3507:   return(0);
3508: }

3510: /* a wraper of MatILUDTFactor_SeqAIJ() */
3513: /*
3514:     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3515: */

3517: PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3518: {

3522:   MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3523:   return(0);
3524: }

3526: /*
3527:    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3528:    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3529: */
3532: /*
3533:     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3534: */

3536: PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3537: {
3538:   Mat            C     =fact;
3539:   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3540:   IS             isrow = b->row,isicol = b->icol;
3542:   const PetscInt *r,*ic,*ics;
3543:   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3544:   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3545:   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3546:   PetscReal      dt=info->dt,shift=info->shiftamount;
3547:   PetscBool      row_identity, col_identity;

3550:   ISGetIndices(isrow,&r);
3551:   ISGetIndices(isicol,&ic);
3552:   PetscMalloc1(n+1,&rtmp);
3553:   ics  = ic;

3555:   for (i=0; i<n; i++) {
3556:     /* initialize rtmp array */
3557:     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3558:     bjtmp = bj + bi[i];
3559:     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3560:     rtmp[i] = 0.0;
3561:     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3562:     bjtmp   = bj + bdiag[i+1] + 1;
3563:     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;

3565:     /* load in initial unfactored row of A */
3566:     /* printf("row %d\n",i); */
3567:     nz    = ai[r[i]+1] - ai[r[i]];
3568:     ajtmp = aj + ai[r[i]];
3569:     v     = aa + ai[r[i]];
3570:     for (j=0; j<nz; j++) {
3571:       rtmp[ics[*ajtmp++]] = v[j];
3572:       /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3573:     }
3574:     /* printf("\n"); */

3576:     /* numerical factorization */
3577:     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3578:     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3579:     k     = 0;
3580:     while (k < nzl) {
3581:       row = *bjtmp++;
3582:       /* printf("  prow %d\n",row); */
3583:       pc         = rtmp + row;
3584:       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3585:       multiplier = (*pc) * (*pv);
3586:       *pc        = multiplier;
3587:       if (PetscAbsScalar(multiplier) > dt) {
3588:         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3589:         pv = b->a + bdiag[row+1] + 1;
3590:         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3591:         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3592:         PetscLogFlops(1+2*nz);
3593:       }
3594:       k++;
3595:     }

3597:     /* finished row so stick it into b->a */
3598:     /* L-part */
3599:     pv  = b->a + bi[i];
3600:     pj  = bj + bi[i];
3601:     nzl = bi[i+1] - bi[i];
3602:     for (j=0; j<nzl; j++) {
3603:       pv[j] = rtmp[pj[j]];
3604:       /* printf(" (%d,%g),",pj[j],pv[j]); */
3605:     }

3607:     /* diagonal: invert diagonal entries for simplier triangular solves */
3608:     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3609:     b->a[bdiag[i]] = 1.0/rtmp[i];
3610:     /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */

3612:     /* U-part */
3613:     pv  = b->a + bdiag[i+1] + 1;
3614:     pj  = bj + bdiag[i+1] + 1;
3615:     nzu = bdiag[i] - bdiag[i+1] - 1;
3616:     for (j=0; j<nzu; j++) {
3617:       pv[j] = rtmp[pj[j]];
3618:       /* printf(" (%d,%g),",pj[j],pv[j]); */
3619:     }
3620:     /* printf("\n"); */
3621:   }

3623:   PetscFree(rtmp);
3624:   ISRestoreIndices(isicol,&ic);
3625:   ISRestoreIndices(isrow,&r);

3627:   ISIdentity(isrow,&row_identity);
3628:   ISIdentity(isicol,&col_identity);
3629:   if (row_identity && col_identity) {
3630:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3631:   } else {
3632:     C->ops->solve = MatSolve_SeqAIJ;
3633:   }
3634:   C->ops->solveadd          = 0;
3635:   C->ops->solvetranspose    = 0;
3636:   C->ops->solvetransposeadd = 0;
3637:   C->ops->matsolve          = 0;
3638:   C->assembled              = PETSC_TRUE;
3639:   C->preallocated           = PETSC_TRUE;

3641:   PetscLogFlops(C->cmap->n);
3642:   return(0);
3643: }