Actual source code: aijfact.c
petsc-3.6.1 2015-08-06
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
9: /*
10: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
13: */
14: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
15: {
16: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
17: PetscErrorCode ierr;
18: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
19: const PetscInt *ai = a->i, *aj = a->j;
20: const PetscScalar *aa = a->a;
21: PetscBool *done;
22: PetscReal best,past = 0,future;
25: /* pick initial row */
26: best = -1;
27: for (i=0; i<n; i++) {
28: future = 0.0;
29: for (j=ai[i]; j<ai[i+1]; j++) {
30: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
31: else past = PetscAbsScalar(aa[j]);
32: }
33: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34: if (past/future > best) {
35: best = past/future;
36: current = i;
37: }
38: }
40: PetscMalloc1(n,&done);
41: PetscMemzero(done,n*sizeof(PetscBool));
42: PetscMalloc1(n,&order);
43: order[0] = current;
44: for (i=0; i<n-1; i++) {
45: done[current] = PETSC_TRUE;
46: best = -1;
47: /* loop over all neighbors of current pivot */
48: for (j=ai[current]; j<ai[current+1]; j++) {
49: jj = aj[j];
50: if (done[jj]) continue;
51: /* loop over columns of potential next row computing weights for below and above diagonal */
52: past = future = 0.0;
53: for (k=ai[jj]; k<ai[jj+1]; k++) {
54: kk = aj[k];
55: if (done[kk]) past += PetscAbsScalar(aa[k]);
56: else if (kk != jj) future += PetscAbsScalar(aa[k]);
57: }
58: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59: if (past/future > best) {
60: best = past/future;
61: newcurrent = jj;
62: }
63: }
64: if (best == -1) { /* no neighbors to select from so select best of all that remain */
65: best = -1;
66: for (k=0; k<n; k++) {
67: if (done[k]) continue;
68: future = 0.0;
69: past = 0.0;
70: for (j=ai[k]; j<ai[k+1]; j++) {
71: kk = aj[j];
72: if (done[kk]) past += PetscAbsScalar(aa[j]);
73: else if (kk != k) future += PetscAbsScalar(aa[j]);
74: }
75: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76: if (past/future > best) {
77: best = past/future;
78: newcurrent = k;
79: }
80: }
81: }
82: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83: current = newcurrent;
84: order[i+1] = current;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
87: *icol = *irow;
88: PetscObjectReference((PetscObject)*irow);
89: PetscFree(done);
90: PetscFree(order);
91: return(0);
92: }
96: PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
97: {
98: PetscInt n = A->rmap->n;
102: #if defined(PETSC_USE_COMPLEX)
103: if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
104: #endif
105: MatCreate(PetscObjectComm((PetscObject)A),B);
106: MatSetSizes(*B,n,n,n,n);
107: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
108: MatSetType(*B,MATSEQAIJ);
110: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
111: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
113: MatSetBlockSizesFromMats(*B,A,A);
114: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
115: MatSetType(*B,MATSEQSBAIJ);
116: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
118: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
119: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
120: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
121: (*B)->factortype = ftype;
122: return(0);
123: }
127: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
128: {
129: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
130: IS isicol;
131: PetscErrorCode ierr;
132: const PetscInt *r,*ic;
133: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
134: PetscInt *bi,*bj,*ajtmp;
135: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
136: PetscReal f;
137: PetscInt nlnk,*lnk,k,**bi_ptr;
138: PetscFreeSpaceList free_space=NULL,current_space=NULL;
139: PetscBT lnkbt;
140: PetscBool missing;
143: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
144: MatMissingDiagonal(A,&missing,&i);
145: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
147: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
148: ISGetIndices(isrow,&r);
149: ISGetIndices(isicol,&ic);
151: /* get new row pointers */
152: PetscMalloc1(n+1,&bi);
153: bi[0] = 0;
155: /* bdiag is location of diagonal in factor */
156: PetscMalloc1(n+1,&bdiag);
157: bdiag[0] = 0;
159: /* linked list for storing column indices of the active row */
160: nlnk = n + 1;
161: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
163: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
165: /* initial FreeSpace size is f*(ai[n]+1) */
166: f = info->fill;
167: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
168: current_space = free_space;
170: for (i=0; i<n; i++) {
171: /* copy previous fill into linked list */
172: nzi = 0;
173: nnz = ai[r[i]+1] - ai[r[i]];
174: ajtmp = aj + ai[r[i]];
175: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
176: nzi += nlnk;
178: /* add pivot rows into linked list */
179: row = lnk[n];
180: while (row < i) {
181: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
182: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
183: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
184: nzi += nlnk;
185: row = lnk[row];
186: }
187: bi[i+1] = bi[i] + nzi;
188: im[i] = nzi;
190: /* mark bdiag */
191: nzbd = 0;
192: nnz = nzi;
193: k = lnk[n];
194: while (nnz-- && k < i) {
195: nzbd++;
196: k = lnk[k];
197: }
198: bdiag[i] = bi[i] + nzbd;
200: /* if free space is not available, make more free space */
201: if (current_space->local_remaining<nzi) {
202: nnz = (n - i)*nzi; /* estimated and max additional space needed */
203: PetscFreeSpaceGet(nnz,¤t_space);
204: reallocs++;
205: }
207: /* copy data into free space, then initialize lnk */
208: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
210: bi_ptr[i] = current_space->array;
211: current_space->array += nzi;
212: current_space->local_used += nzi;
213: current_space->local_remaining -= nzi;
214: }
215: #if defined(PETSC_USE_INFO)
216: if (ai[n] != 0) {
217: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
218: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
219: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
220: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
221: PetscInfo(A,"for best performance.\n");
222: } else {
223: PetscInfo(A,"Empty matrix\n");
224: }
225: #endif
227: ISRestoreIndices(isrow,&r);
228: ISRestoreIndices(isicol,&ic);
230: /* destroy list of free space and other temporary array(s) */
231: PetscMalloc1(bi[n]+1,&bj);
232: PetscFreeSpaceContiguous(&free_space,bj);
233: PetscLLDestroy(lnk,lnkbt);
234: PetscFree2(bi_ptr,im);
236: /* put together the new matrix */
237: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
238: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
239: b = (Mat_SeqAIJ*)(B)->data;
241: b->free_a = PETSC_TRUE;
242: b->free_ij = PETSC_TRUE;
243: b->singlemalloc = PETSC_FALSE;
245: PetscMalloc1(bi[n]+1,&b->a);
246: b->j = bj;
247: b->i = bi;
248: b->diag = bdiag;
249: b->ilen = 0;
250: b->imax = 0;
251: b->row = isrow;
252: b->col = iscol;
253: PetscObjectReference((PetscObject)isrow);
254: PetscObjectReference((PetscObject)iscol);
255: b->icol = isicol;
256: PetscMalloc1(n+1,&b->solve_work);
258: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
259: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
260: b->maxnz = b->nz = bi[n];
262: (B)->factortype = MAT_FACTOR_LU;
263: (B)->info.factor_mallocs = reallocs;
264: (B)->info.fill_ratio_given = f;
266: if (ai[n]) {
267: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
268: } else {
269: (B)->info.fill_ratio_needed = 0.0;
270: }
271: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
272: if (a->inode.size) {
273: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
274: }
275: return(0);
276: }
280: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
281: {
282: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
283: IS isicol;
284: PetscErrorCode ierr;
285: const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
286: PetscInt i,n=A->rmap->n;
287: PetscInt *bi,*bj;
288: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
289: PetscReal f;
290: PetscInt nlnk,*lnk,k,**bi_ptr;
291: PetscFreeSpaceList free_space=NULL,current_space=NULL;
292: PetscBT lnkbt;
293: PetscBool missing;
296: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
297: MatMissingDiagonal(A,&missing,&i);
298: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
299:
300: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
301: ISGetIndices(isrow,&r);
302: ISGetIndices(isicol,&ic);
304: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
305: PetscMalloc1(n+1,&bi);
306: PetscMalloc1(n+1,&bdiag);
307: bi[0] = bdiag[0] = 0;
309: /* linked list for storing column indices of the active row */
310: nlnk = n + 1;
311: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
313: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
315: /* initial FreeSpace size is f*(ai[n]+1) */
316: f = info->fill;
317: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
318: current_space = free_space;
320: for (i=0; i<n; i++) {
321: /* copy previous fill into linked list */
322: nzi = 0;
323: nnz = ai[r[i]+1] - ai[r[i]];
324: ajtmp = aj + ai[r[i]];
325: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
326: nzi += nlnk;
328: /* add pivot rows into linked list */
329: row = lnk[n];
330: while (row < i) {
331: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
332: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
333: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
334: nzi += nlnk;
335: row = lnk[row];
336: }
337: bi[i+1] = bi[i] + nzi;
338: im[i] = nzi;
340: /* mark bdiag */
341: nzbd = 0;
342: nnz = nzi;
343: k = lnk[n];
344: while (nnz-- && k < i) {
345: nzbd++;
346: k = lnk[k];
347: }
348: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
350: /* if free space is not available, make more free space */
351: if (current_space->local_remaining<nzi) {
352: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
353: PetscFreeSpaceGet(nnz,¤t_space);
354: reallocs++;
355: }
357: /* copy data into free space, then initialize lnk */
358: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
360: bi_ptr[i] = current_space->array;
361: current_space->array += nzi;
362: current_space->local_used += nzi;
363: current_space->local_remaining -= nzi;
364: }
366: ISRestoreIndices(isrow,&r);
367: ISRestoreIndices(isicol,&ic);
369: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
370: PetscMalloc1(bi[n]+1,&bj);
371: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
372: PetscLLDestroy(lnk,lnkbt);
373: PetscFree2(bi_ptr,im);
375: /* put together the new matrix */
376: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
377: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
378: b = (Mat_SeqAIJ*)(B)->data;
380: b->free_a = PETSC_TRUE;
381: b->free_ij = PETSC_TRUE;
382: b->singlemalloc = PETSC_FALSE;
384: PetscMalloc1(bdiag[0]+1,&b->a);
386: b->j = bj;
387: b->i = bi;
388: b->diag = bdiag;
389: b->ilen = 0;
390: b->imax = 0;
391: b->row = isrow;
392: b->col = iscol;
393: PetscObjectReference((PetscObject)isrow);
394: PetscObjectReference((PetscObject)iscol);
395: b->icol = isicol;
396: PetscMalloc1(n+1,&b->solve_work);
398: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
399: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
400: b->maxnz = b->nz = bdiag[0]+1;
402: B->factortype = MAT_FACTOR_LU;
403: B->info.factor_mallocs = reallocs;
404: B->info.fill_ratio_given = f;
406: if (ai[n]) {
407: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
408: } else {
409: B->info.fill_ratio_needed = 0.0;
410: }
411: #if defined(PETSC_USE_INFO)
412: if (ai[n] != 0) {
413: PetscReal af = B->info.fill_ratio_needed;
414: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
415: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
416: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
417: PetscInfo(A,"for best performance.\n");
418: } else {
419: PetscInfo(A,"Empty matrix\n");
420: }
421: #endif
422: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
423: if (a->inode.size) {
424: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
425: }
426: MatSeqAIJCheckInode_FactorLU(B);
427: return(0);
428: }
430: /*
431: Trouble in factorization, should we dump the original matrix?
432: */
435: PetscErrorCode MatFactorDumpMatrix(Mat A)
436: {
438: PetscBool flg = PETSC_FALSE;
441: PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);
442: if (flg) {
443: PetscViewer viewer;
444: char filename[PETSC_MAX_PATH_LEN];
446: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
447: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
448: MatView(A,viewer);
449: PetscViewerDestroy(&viewer);
450: }
451: return(0);
452: }
456: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
457: {
458: Mat C =B;
459: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
460: IS isrow = b->row,isicol = b->icol;
461: PetscErrorCode ierr;
462: const PetscInt *r,*ic,*ics;
463: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
464: PetscInt i,j,k,nz,nzL,row,*pj;
465: const PetscInt *ajtmp,*bjtmp;
466: MatScalar *rtmp,*pc,multiplier,*pv;
467: const MatScalar *aa=a->a,*v;
468: PetscBool row_identity,col_identity;
469: FactorShiftCtx sctx;
470: const PetscInt *ddiag;
471: PetscReal rs;
472: MatScalar d;
475: /* MatPivotSetUp(): initialize shift context sctx */
476: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
478: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
479: ddiag = a->diag;
480: sctx.shift_top = info->zeropivot;
481: for (i=0; i<n; i++) {
482: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
483: d = (aa)[ddiag[i]];
484: rs = -PetscAbsScalar(d) - PetscRealPart(d);
485: v = aa+ai[i];
486: nz = ai[i+1] - ai[i];
487: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
488: if (rs>sctx.shift_top) sctx.shift_top = rs;
489: }
490: sctx.shift_top *= 1.1;
491: sctx.nshift_max = 5;
492: sctx.shift_lo = 0.;
493: sctx.shift_hi = 1.;
494: }
496: ISGetIndices(isrow,&r);
497: ISGetIndices(isicol,&ic);
498: PetscMalloc1(n+1,&rtmp);
499: ics = ic;
501: do {
502: sctx.newshift = PETSC_FALSE;
503: for (i=0; i<n; i++) {
504: /* zero rtmp */
505: /* L part */
506: nz = bi[i+1] - bi[i];
507: bjtmp = bj + bi[i];
508: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
510: /* U part */
511: nz = bdiag[i]-bdiag[i+1];
512: bjtmp = bj + bdiag[i+1]+1;
513: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
515: /* load in initial (unfactored row) */
516: nz = ai[r[i]+1] - ai[r[i]];
517: ajtmp = aj + ai[r[i]];
518: v = aa + ai[r[i]];
519: for (j=0; j<nz; j++) {
520: rtmp[ics[ajtmp[j]]] = v[j];
521: }
522: /* ZeropivotApply() */
523: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
525: /* elimination */
526: bjtmp = bj + bi[i];
527: row = *bjtmp++;
528: nzL = bi[i+1] - bi[i];
529: for (k=0; k < nzL; k++) {
530: pc = rtmp + row;
531: if (*pc != 0.0) {
532: pv = b->a + bdiag[row];
533: multiplier = *pc * (*pv);
534: *pc = multiplier;
536: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
537: pv = b->a + bdiag[row+1]+1;
538: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
540: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
541: PetscLogFlops(1+2*nz);
542: }
543: row = *bjtmp++;
544: }
546: /* finished row so stick it into b->a */
547: rs = 0.0;
548: /* L part */
549: pv = b->a + bi[i];
550: pj = b->j + bi[i];
551: nz = bi[i+1] - bi[i];
552: for (j=0; j<nz; j++) {
553: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
554: }
556: /* U part */
557: pv = b->a + bdiag[i+1]+1;
558: pj = b->j + bdiag[i+1]+1;
559: nz = bdiag[i] - bdiag[i+1]-1;
560: for (j=0; j<nz; j++) {
561: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
562: }
564: sctx.rs = rs;
565: sctx.pv = rtmp[i];
566: MatPivotCheck(A,info,&sctx,i);
567: if (sctx.newshift) break; /* break for-loop */
568: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
570: /* Mark diagonal and invert diagonal for simplier triangular solves */
571: pv = b->a + bdiag[i];
572: *pv = 1.0/rtmp[i];
574: } /* endof for (i=0; i<n; i++) { */
576: /* MatPivotRefine() */
577: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
578: /*
579: * if no shift in this attempt & shifting & started shifting & can refine,
580: * then try lower shift
581: */
582: sctx.shift_hi = sctx.shift_fraction;
583: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
584: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
585: sctx.newshift = PETSC_TRUE;
586: sctx.nshift++;
587: }
588: } while (sctx.newshift);
590: PetscFree(rtmp);
591: ISRestoreIndices(isicol,&ic);
592: ISRestoreIndices(isrow,&r);
594: ISIdentity(isrow,&row_identity);
595: ISIdentity(isicol,&col_identity);
596: if (b->inode.size) {
597: C->ops->solve = MatSolve_SeqAIJ_Inode;
598: } else if (row_identity && col_identity) {
599: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
600: } else {
601: C->ops->solve = MatSolve_SeqAIJ;
602: }
603: C->ops->solveadd = MatSolveAdd_SeqAIJ;
604: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
605: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
606: C->ops->matsolve = MatMatSolve_SeqAIJ;
607: C->assembled = PETSC_TRUE;
608: C->preallocated = PETSC_TRUE;
610: PetscLogFlops(C->cmap->n);
612: /* MatShiftView(A,info,&sctx) */
613: if (sctx.nshift) {
614: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
615: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
616: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
617: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
618: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
619: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
620: }
621: }
622: return(0);
623: }
627: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
628: {
629: Mat C =B;
630: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
631: IS isrow = b->row,isicol = b->icol;
632: PetscErrorCode ierr;
633: const PetscInt *r,*ic,*ics;
634: PetscInt nz,row,i,j,n=A->rmap->n,diag;
635: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
636: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
637: MatScalar *pv,*rtmp,*pc,multiplier,d;
638: const MatScalar *v,*aa=a->a;
639: PetscReal rs=0.0;
640: FactorShiftCtx sctx;
641: const PetscInt *ddiag;
642: PetscBool row_identity, col_identity;
645: /* MatPivotSetUp(): initialize shift context sctx */
646: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
648: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
649: ddiag = a->diag;
650: sctx.shift_top = info->zeropivot;
651: for (i=0; i<n; i++) {
652: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
653: d = (aa)[ddiag[i]];
654: rs = -PetscAbsScalar(d) - PetscRealPart(d);
655: v = aa+ai[i];
656: nz = ai[i+1] - ai[i];
657: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
658: if (rs>sctx.shift_top) sctx.shift_top = rs;
659: }
660: sctx.shift_top *= 1.1;
661: sctx.nshift_max = 5;
662: sctx.shift_lo = 0.;
663: sctx.shift_hi = 1.;
664: }
666: ISGetIndices(isrow,&r);
667: ISGetIndices(isicol,&ic);
668: PetscMalloc1(n+1,&rtmp);
669: ics = ic;
671: do {
672: sctx.newshift = PETSC_FALSE;
673: for (i=0; i<n; i++) {
674: nz = bi[i+1] - bi[i];
675: bjtmp = bj + bi[i];
676: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
678: /* load in initial (unfactored row) */
679: nz = ai[r[i]+1] - ai[r[i]];
680: ajtmp = aj + ai[r[i]];
681: v = aa + ai[r[i]];
682: for (j=0; j<nz; j++) {
683: rtmp[ics[ajtmp[j]]] = v[j];
684: }
685: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
687: row = *bjtmp++;
688: while (row < i) {
689: pc = rtmp + row;
690: if (*pc != 0.0) {
691: pv = b->a + diag_offset[row];
692: pj = b->j + diag_offset[row] + 1;
693: multiplier = *pc / *pv++;
694: *pc = multiplier;
695: nz = bi[row+1] - diag_offset[row] - 1;
696: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
697: PetscLogFlops(1+2*nz);
698: }
699: row = *bjtmp++;
700: }
701: /* finished row so stick it into b->a */
702: pv = b->a + bi[i];
703: pj = b->j + bi[i];
704: nz = bi[i+1] - bi[i];
705: diag = diag_offset[i] - bi[i];
706: rs = 0.0;
707: for (j=0; j<nz; j++) {
708: pv[j] = rtmp[pj[j]];
709: rs += PetscAbsScalar(pv[j]);
710: }
711: rs -= PetscAbsScalar(pv[diag]);
713: sctx.rs = rs;
714: sctx.pv = pv[diag];
715: MatPivotCheck(A,info,&sctx,i);
716: if (sctx.newshift) break;
717: pv[diag] = sctx.pv;
718: }
720: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
721: /*
722: * if no shift in this attempt & shifting & started shifting & can refine,
723: * then try lower shift
724: */
725: sctx.shift_hi = sctx.shift_fraction;
726: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
727: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
728: sctx.newshift = PETSC_TRUE;
729: sctx.nshift++;
730: }
731: } while (sctx.newshift);
733: /* invert diagonal entries for simplier triangular solves */
734: for (i=0; i<n; i++) {
735: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
736: }
737: PetscFree(rtmp);
738: ISRestoreIndices(isicol,&ic);
739: ISRestoreIndices(isrow,&r);
741: ISIdentity(isrow,&row_identity);
742: ISIdentity(isicol,&col_identity);
743: if (row_identity && col_identity) {
744: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
745: } else {
746: C->ops->solve = MatSolve_SeqAIJ_inplace;
747: }
748: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
749: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
750: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
751: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
753: C->assembled = PETSC_TRUE;
754: C->preallocated = PETSC_TRUE;
756: PetscLogFlops(C->cmap->n);
757: if (sctx.nshift) {
758: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
759: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
760: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
761: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
762: }
763: }
764: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
765: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
767: MatSeqAIJCheckInode(C);
768: return(0);
769: }
771: /*
772: This routine implements inplace ILU(0) with row or/and column permutations.
773: Input:
774: A - original matrix
775: Output;
776: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
777: a->j (col index) is permuted by the inverse of colperm, then sorted
778: a->a reordered accordingly with a->j
779: a->diag (ptr to diagonal elements) is updated.
780: */
783: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
784: {
785: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
786: IS isrow = a->row,isicol = a->icol;
787: PetscErrorCode ierr;
788: const PetscInt *r,*ic,*ics;
789: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
790: PetscInt *ajtmp,nz,row;
791: PetscInt *diag = a->diag,nbdiag,*pj;
792: PetscScalar *rtmp,*pc,multiplier,d;
793: MatScalar *pv,*v;
794: PetscReal rs;
795: FactorShiftCtx sctx;
796: const MatScalar *aa=a->a,*vtmp;
799: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
801: /* MatPivotSetUp(): initialize shift context sctx */
802: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
804: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
805: const PetscInt *ddiag = a->diag;
806: sctx.shift_top = info->zeropivot;
807: for (i=0; i<n; i++) {
808: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
809: d = (aa)[ddiag[i]];
810: rs = -PetscAbsScalar(d) - PetscRealPart(d);
811: vtmp = aa+ai[i];
812: nz = ai[i+1] - ai[i];
813: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
814: if (rs>sctx.shift_top) sctx.shift_top = rs;
815: }
816: sctx.shift_top *= 1.1;
817: sctx.nshift_max = 5;
818: sctx.shift_lo = 0.;
819: sctx.shift_hi = 1.;
820: }
822: ISGetIndices(isrow,&r);
823: ISGetIndices(isicol,&ic);
824: PetscMalloc1(n+1,&rtmp);
825: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
826: ics = ic;
828: #if defined(MV)
829: sctx.shift_top = 0.;
830: sctx.nshift_max = 0;
831: sctx.shift_lo = 0.;
832: sctx.shift_hi = 0.;
833: sctx.shift_fraction = 0.;
835: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
836: sctx.shift_top = 0.;
837: for (i=0; i<n; i++) {
838: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
839: d = (a->a)[diag[i]];
840: rs = -PetscAbsScalar(d) - PetscRealPart(d);
841: v = a->a+ai[i];
842: nz = ai[i+1] - ai[i];
843: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
844: if (rs>sctx.shift_top) sctx.shift_top = rs;
845: }
846: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
847: sctx.shift_top *= 1.1;
848: sctx.nshift_max = 5;
849: sctx.shift_lo = 0.;
850: sctx.shift_hi = 1.;
851: }
853: sctx.shift_amount = 0.;
854: sctx.nshift = 0;
855: #endif
857: do {
858: sctx.newshift = PETSC_FALSE;
859: for (i=0; i<n; i++) {
860: /* load in initial unfactored row */
861: nz = ai[r[i]+1] - ai[r[i]];
862: ajtmp = aj + ai[r[i]];
863: v = a->a + ai[r[i]];
864: /* sort permuted ajtmp and values v accordingly */
865: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
866: PetscSortIntWithScalarArray(nz,ajtmp,v);
868: diag[r[i]] = ai[r[i]];
869: for (j=0; j<nz; j++) {
870: rtmp[ajtmp[j]] = v[j];
871: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
872: }
873: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
875: row = *ajtmp++;
876: while (row < i) {
877: pc = rtmp + row;
878: if (*pc != 0.0) {
879: pv = a->a + diag[r[row]];
880: pj = aj + diag[r[row]] + 1;
882: multiplier = *pc / *pv++;
883: *pc = multiplier;
884: nz = ai[r[row]+1] - diag[r[row]] - 1;
885: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
886: PetscLogFlops(1+2*nz);
887: }
888: row = *ajtmp++;
889: }
890: /* finished row so overwrite it onto a->a */
891: pv = a->a + ai[r[i]];
892: pj = aj + ai[r[i]];
893: nz = ai[r[i]+1] - ai[r[i]];
894: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
896: rs = 0.0;
897: for (j=0; j<nz; j++) {
898: pv[j] = rtmp[pj[j]];
899: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
900: }
902: sctx.rs = rs;
903: sctx.pv = pv[nbdiag];
904: MatPivotCheck(A,info,&sctx,i);
905: if (sctx.newshift) break;
906: pv[nbdiag] = sctx.pv;
907: }
909: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
910: /*
911: * if no shift in this attempt & shifting & started shifting & can refine,
912: * then try lower shift
913: */
914: sctx.shift_hi = sctx.shift_fraction;
915: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
916: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
917: sctx.newshift = PETSC_TRUE;
918: sctx.nshift++;
919: }
920: } while (sctx.newshift);
922: /* invert diagonal entries for simplier triangular solves */
923: for (i=0; i<n; i++) {
924: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
925: }
927: PetscFree(rtmp);
928: ISRestoreIndices(isicol,&ic);
929: ISRestoreIndices(isrow,&r);
931: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
932: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
933: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
934: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
936: A->assembled = PETSC_TRUE;
937: A->preallocated = PETSC_TRUE;
939: PetscLogFlops(A->cmap->n);
940: if (sctx.nshift) {
941: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
942: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
943: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
944: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
945: }
946: }
947: return(0);
948: }
950: /* ----------------------------------------------------------- */
953: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
954: {
956: Mat C;
959: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
960: MatLUFactorSymbolic(C,A,row,col,info);
961: MatLUFactorNumeric(C,A,info);
963: A->ops->solve = C->ops->solve;
964: A->ops->solvetranspose = C->ops->solvetranspose;
966: MatHeaderMerge(A,C);
967: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
968: return(0);
969: }
970: /* ----------------------------------------------------------- */
975: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
976: {
977: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
978: IS iscol = a->col,isrow = a->row;
979: PetscErrorCode ierr;
980: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
981: PetscInt nz;
982: const PetscInt *rout,*cout,*r,*c;
983: PetscScalar *x,*tmp,*tmps,sum;
984: const PetscScalar *b;
985: const MatScalar *aa = a->a,*v;
988: if (!n) return(0);
990: VecGetArrayRead(bb,&b);
991: VecGetArray(xx,&x);
992: tmp = a->solve_work;
994: ISGetIndices(isrow,&rout); r = rout;
995: ISGetIndices(iscol,&cout); c = cout + (n-1);
997: /* forward solve the lower triangular */
998: tmp[0] = b[*r++];
999: tmps = tmp;
1000: for (i=1; i<n; i++) {
1001: v = aa + ai[i];
1002: vi = aj + ai[i];
1003: nz = a->diag[i] - ai[i];
1004: sum = b[*r++];
1005: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1006: tmp[i] = sum;
1007: }
1009: /* backward solve the upper triangular */
1010: for (i=n-1; i>=0; i--) {
1011: v = aa + a->diag[i] + 1;
1012: vi = aj + a->diag[i] + 1;
1013: nz = ai[i+1] - a->diag[i] - 1;
1014: sum = tmp[i];
1015: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1016: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1017: }
1019: ISRestoreIndices(isrow,&rout);
1020: ISRestoreIndices(iscol,&cout);
1021: VecRestoreArrayRead(bb,&b);
1022: VecRestoreArray(xx,&x);
1023: PetscLogFlops(2.0*a->nz - A->cmap->n);
1024: return(0);
1025: }
1029: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1030: {
1031: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1032: IS iscol = a->col,isrow = a->row;
1033: PetscErrorCode ierr;
1034: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1035: PetscInt nz,neq;
1036: const PetscInt *rout,*cout,*r,*c;
1037: PetscScalar *x,*b,*tmp,*tmps,sum;
1038: const MatScalar *aa = a->a,*v;
1039: PetscBool bisdense,xisdense;
1042: if (!n) return(0);
1044: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1045: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1046: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1047: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1049: MatDenseGetArray(B,&b);
1050: MatDenseGetArray(X,&x);
1052: tmp = a->solve_work;
1053: ISGetIndices(isrow,&rout); r = rout;
1054: ISGetIndices(iscol,&cout); c = cout;
1056: for (neq=0; neq<B->cmap->n; neq++) {
1057: /* forward solve the lower triangular */
1058: tmp[0] = b[r[0]];
1059: tmps = tmp;
1060: for (i=1; i<n; i++) {
1061: v = aa + ai[i];
1062: vi = aj + ai[i];
1063: nz = a->diag[i] - ai[i];
1064: sum = b[r[i]];
1065: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1066: tmp[i] = sum;
1067: }
1068: /* backward solve the upper triangular */
1069: for (i=n-1; i>=0; i--) {
1070: v = aa + a->diag[i] + 1;
1071: vi = aj + a->diag[i] + 1;
1072: nz = ai[i+1] - a->diag[i] - 1;
1073: sum = tmp[i];
1074: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1075: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1076: }
1078: b += n;
1079: x += n;
1080: }
1081: ISRestoreIndices(isrow,&rout);
1082: ISRestoreIndices(iscol,&cout);
1083: MatDenseRestoreArray(B,&b);
1084: MatDenseRestoreArray(X,&x);
1085: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1086: return(0);
1087: }
1091: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1092: {
1093: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1094: IS iscol = a->col,isrow = a->row;
1095: PetscErrorCode ierr;
1096: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1097: PetscInt nz,neq;
1098: const PetscInt *rout,*cout,*r,*c;
1099: PetscScalar *x,*b,*tmp,sum;
1100: const MatScalar *aa = a->a,*v;
1101: PetscBool bisdense,xisdense;
1104: if (!n) return(0);
1106: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1107: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1108: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1109: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1111: MatDenseGetArray(B,&b);
1112: MatDenseGetArray(X,&x);
1114: tmp = a->solve_work;
1115: ISGetIndices(isrow,&rout); r = rout;
1116: ISGetIndices(iscol,&cout); c = cout;
1118: for (neq=0; neq<B->cmap->n; neq++) {
1119: /* forward solve the lower triangular */
1120: tmp[0] = b[r[0]];
1121: v = aa;
1122: vi = aj;
1123: for (i=1; i<n; i++) {
1124: nz = ai[i+1] - ai[i];
1125: sum = b[r[i]];
1126: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1127: tmp[i] = sum;
1128: v += nz; vi += nz;
1129: }
1131: /* backward solve the upper triangular */
1132: for (i=n-1; i>=0; i--) {
1133: v = aa + adiag[i+1]+1;
1134: vi = aj + adiag[i+1]+1;
1135: nz = adiag[i]-adiag[i+1]-1;
1136: sum = tmp[i];
1137: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1138: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1139: }
1141: b += n;
1142: x += n;
1143: }
1144: ISRestoreIndices(isrow,&rout);
1145: ISRestoreIndices(iscol,&cout);
1146: MatDenseRestoreArray(B,&b);
1147: MatDenseRestoreArray(X,&x);
1148: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1149: return(0);
1150: }
1154: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1155: {
1156: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1157: IS iscol = a->col,isrow = a->row;
1158: PetscErrorCode ierr;
1159: const PetscInt *r,*c,*rout,*cout;
1160: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1161: PetscInt nz,row;
1162: PetscScalar *x,*b,*tmp,*tmps,sum;
1163: const MatScalar *aa = a->a,*v;
1166: if (!n) return(0);
1168: VecGetArray(bb,&b);
1169: VecGetArray(xx,&x);
1170: tmp = a->solve_work;
1172: ISGetIndices(isrow,&rout); r = rout;
1173: ISGetIndices(iscol,&cout); c = cout + (n-1);
1175: /* forward solve the lower triangular */
1176: tmp[0] = b[*r++];
1177: tmps = tmp;
1178: for (row=1; row<n; row++) {
1179: i = rout[row]; /* permuted row */
1180: v = aa + ai[i];
1181: vi = aj + ai[i];
1182: nz = a->diag[i] - ai[i];
1183: sum = b[*r++];
1184: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1185: tmp[row] = sum;
1186: }
1188: /* backward solve the upper triangular */
1189: for (row=n-1; row>=0; row--) {
1190: i = rout[row]; /* permuted row */
1191: v = aa + a->diag[i] + 1;
1192: vi = aj + a->diag[i] + 1;
1193: nz = ai[i+1] - a->diag[i] - 1;
1194: sum = tmp[row];
1195: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1196: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1197: }
1199: ISRestoreIndices(isrow,&rout);
1200: ISRestoreIndices(iscol,&cout);
1201: VecRestoreArray(bb,&b);
1202: VecRestoreArray(xx,&x);
1203: PetscLogFlops(2.0*a->nz - A->cmap->n);
1204: return(0);
1205: }
1207: /* ----------------------------------------------------------- */
1208: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1211: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1212: {
1213: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1214: PetscErrorCode ierr;
1215: PetscInt n = A->rmap->n;
1216: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1217: PetscScalar *x;
1218: const PetscScalar *b;
1219: const MatScalar *aa = a->a;
1220: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1221: PetscInt adiag_i,i,nz,ai_i;
1222: const PetscInt *vi;
1223: const MatScalar *v;
1224: PetscScalar sum;
1225: #endif
1228: if (!n) return(0);
1230: VecGetArrayRead(bb,&b);
1231: VecGetArray(xx,&x);
1233: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1234: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1235: #else
1236: /* forward solve the lower triangular */
1237: x[0] = b[0];
1238: for (i=1; i<n; i++) {
1239: ai_i = ai[i];
1240: v = aa + ai_i;
1241: vi = aj + ai_i;
1242: nz = adiag[i] - ai_i;
1243: sum = b[i];
1244: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1245: x[i] = sum;
1246: }
1248: /* backward solve the upper triangular */
1249: for (i=n-1; i>=0; i--) {
1250: adiag_i = adiag[i];
1251: v = aa + adiag_i + 1;
1252: vi = aj + adiag_i + 1;
1253: nz = ai[i+1] - adiag_i - 1;
1254: sum = x[i];
1255: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1256: x[i] = sum*aa[adiag_i];
1257: }
1258: #endif
1259: PetscLogFlops(2.0*a->nz - A->cmap->n);
1260: VecRestoreArrayRead(bb,&b);
1261: VecRestoreArray(xx,&x);
1262: return(0);
1263: }
1267: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1268: {
1269: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1270: IS iscol = a->col,isrow = a->row;
1271: PetscErrorCode ierr;
1272: PetscInt i, n = A->rmap->n,j;
1273: PetscInt nz;
1274: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1275: PetscScalar *x,*tmp,sum;
1276: const PetscScalar *b;
1277: const MatScalar *aa = a->a,*v;
1280: if (yy != xx) {VecCopy(yy,xx);}
1282: VecGetArrayRead(bb,&b);
1283: VecGetArray(xx,&x);
1284: tmp = a->solve_work;
1286: ISGetIndices(isrow,&rout); r = rout;
1287: ISGetIndices(iscol,&cout); c = cout + (n-1);
1289: /* forward solve the lower triangular */
1290: tmp[0] = b[*r++];
1291: for (i=1; i<n; i++) {
1292: v = aa + ai[i];
1293: vi = aj + ai[i];
1294: nz = a->diag[i] - ai[i];
1295: sum = b[*r++];
1296: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1297: tmp[i] = sum;
1298: }
1300: /* backward solve the upper triangular */
1301: for (i=n-1; i>=0; i--) {
1302: v = aa + a->diag[i] + 1;
1303: vi = aj + a->diag[i] + 1;
1304: nz = ai[i+1] - a->diag[i] - 1;
1305: sum = tmp[i];
1306: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1307: tmp[i] = sum*aa[a->diag[i]];
1308: x[*c--] += tmp[i];
1309: }
1311: ISRestoreIndices(isrow,&rout);
1312: ISRestoreIndices(iscol,&cout);
1313: VecRestoreArrayRead(bb,&b);
1314: VecRestoreArray(xx,&x);
1315: PetscLogFlops(2.0*a->nz);
1316: return(0);
1317: }
1321: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1322: {
1323: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1324: IS iscol = a->col,isrow = a->row;
1325: PetscErrorCode ierr;
1326: PetscInt i, n = A->rmap->n,j;
1327: PetscInt nz;
1328: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1329: PetscScalar *x,*tmp,sum;
1330: const PetscScalar *b;
1331: const MatScalar *aa = a->a,*v;
1334: if (yy != xx) {VecCopy(yy,xx);}
1336: VecGetArrayRead(bb,&b);
1337: VecGetArray(xx,&x);
1338: tmp = a->solve_work;
1340: ISGetIndices(isrow,&rout); r = rout;
1341: ISGetIndices(iscol,&cout); c = cout;
1343: /* forward solve the lower triangular */
1344: tmp[0] = b[r[0]];
1345: v = aa;
1346: vi = aj;
1347: for (i=1; i<n; i++) {
1348: nz = ai[i+1] - ai[i];
1349: sum = b[r[i]];
1350: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1351: tmp[i] = sum;
1352: v += nz;
1353: vi += nz;
1354: }
1356: /* backward solve the upper triangular */
1357: v = aa + adiag[n-1];
1358: vi = aj + adiag[n-1];
1359: for (i=n-1; i>=0; i--) {
1360: nz = adiag[i] - adiag[i+1] - 1;
1361: sum = tmp[i];
1362: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1363: tmp[i] = sum*v[nz];
1364: x[c[i]] += tmp[i];
1365: v += nz+1; vi += nz+1;
1366: }
1368: ISRestoreIndices(isrow,&rout);
1369: ISRestoreIndices(iscol,&cout);
1370: VecRestoreArrayRead(bb,&b);
1371: VecRestoreArray(xx,&x);
1372: PetscLogFlops(2.0*a->nz);
1373: return(0);
1374: }
1378: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1379: {
1380: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1381: IS iscol = a->col,isrow = a->row;
1382: PetscErrorCode ierr;
1383: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1384: PetscInt i,n = A->rmap->n,j;
1385: PetscInt nz;
1386: PetscScalar *x,*tmp,s1;
1387: const MatScalar *aa = a->a,*v;
1388: const PetscScalar *b;
1391: VecGetArrayRead(bb,&b);
1392: VecGetArray(xx,&x);
1393: tmp = a->solve_work;
1395: ISGetIndices(isrow,&rout); r = rout;
1396: ISGetIndices(iscol,&cout); c = cout;
1398: /* copy the b into temp work space according to permutation */
1399: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1401: /* forward solve the U^T */
1402: for (i=0; i<n; i++) {
1403: v = aa + diag[i];
1404: vi = aj + diag[i] + 1;
1405: nz = ai[i+1] - diag[i] - 1;
1406: s1 = tmp[i];
1407: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1408: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1409: tmp[i] = s1;
1410: }
1412: /* backward solve the L^T */
1413: for (i=n-1; i>=0; i--) {
1414: v = aa + diag[i] - 1;
1415: vi = aj + diag[i] - 1;
1416: nz = diag[i] - ai[i];
1417: s1 = tmp[i];
1418: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1419: }
1421: /* copy tmp into x according to permutation */
1422: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1424: ISRestoreIndices(isrow,&rout);
1425: ISRestoreIndices(iscol,&cout);
1426: VecRestoreArrayRead(bb,&b);
1427: VecRestoreArray(xx,&x);
1429: PetscLogFlops(2.0*a->nz-A->cmap->n);
1430: return(0);
1431: }
1435: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1436: {
1437: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1438: IS iscol = a->col,isrow = a->row;
1439: PetscErrorCode ierr;
1440: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1441: PetscInt i,n = A->rmap->n,j;
1442: PetscInt nz;
1443: PetscScalar *x,*tmp,s1;
1444: const MatScalar *aa = a->a,*v;
1445: const PetscScalar *b;
1448: VecGetArrayRead(bb,&b);
1449: VecGetArray(xx,&x);
1450: tmp = a->solve_work;
1452: ISGetIndices(isrow,&rout); r = rout;
1453: ISGetIndices(iscol,&cout); c = cout;
1455: /* copy the b into temp work space according to permutation */
1456: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1458: /* forward solve the U^T */
1459: for (i=0; i<n; i++) {
1460: v = aa + adiag[i+1] + 1;
1461: vi = aj + adiag[i+1] + 1;
1462: nz = adiag[i] - adiag[i+1] - 1;
1463: s1 = tmp[i];
1464: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1465: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1466: tmp[i] = s1;
1467: }
1469: /* backward solve the L^T */
1470: for (i=n-1; i>=0; i--) {
1471: v = aa + ai[i];
1472: vi = aj + ai[i];
1473: nz = ai[i+1] - ai[i];
1474: s1 = tmp[i];
1475: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1476: }
1478: /* copy tmp into x according to permutation */
1479: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1481: ISRestoreIndices(isrow,&rout);
1482: ISRestoreIndices(iscol,&cout);
1483: VecRestoreArrayRead(bb,&b);
1484: VecRestoreArray(xx,&x);
1486: PetscLogFlops(2.0*a->nz-A->cmap->n);
1487: return(0);
1488: }
1492: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1493: {
1494: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1495: IS iscol = a->col,isrow = a->row;
1496: PetscErrorCode ierr;
1497: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1498: PetscInt i,n = A->rmap->n,j;
1499: PetscInt nz;
1500: PetscScalar *x,*tmp,s1;
1501: const MatScalar *aa = a->a,*v;
1502: const PetscScalar *b;
1505: if (zz != xx) {VecCopy(zz,xx);}
1506: VecGetArrayRead(bb,&b);
1507: VecGetArray(xx,&x);
1508: tmp = a->solve_work;
1510: ISGetIndices(isrow,&rout); r = rout;
1511: ISGetIndices(iscol,&cout); c = cout;
1513: /* copy the b into temp work space according to permutation */
1514: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1516: /* forward solve the U^T */
1517: for (i=0; i<n; i++) {
1518: v = aa + diag[i];
1519: vi = aj + diag[i] + 1;
1520: nz = ai[i+1] - diag[i] - 1;
1521: s1 = tmp[i];
1522: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1523: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1524: tmp[i] = s1;
1525: }
1527: /* backward solve the L^T */
1528: for (i=n-1; i>=0; i--) {
1529: v = aa + diag[i] - 1;
1530: vi = aj + diag[i] - 1;
1531: nz = diag[i] - ai[i];
1532: s1 = tmp[i];
1533: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1534: }
1536: /* copy tmp into x according to permutation */
1537: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1539: ISRestoreIndices(isrow,&rout);
1540: ISRestoreIndices(iscol,&cout);
1541: VecRestoreArrayRead(bb,&b);
1542: VecRestoreArray(xx,&x);
1544: PetscLogFlops(2.0*a->nz-A->cmap->n);
1545: return(0);
1546: }
1550: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1551: {
1552: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1553: IS iscol = a->col,isrow = a->row;
1554: PetscErrorCode ierr;
1555: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1556: PetscInt i,n = A->rmap->n,j;
1557: PetscInt nz;
1558: PetscScalar *x,*tmp,s1;
1559: const MatScalar *aa = a->a,*v;
1560: const PetscScalar *b;
1563: if (zz != xx) {VecCopy(zz,xx);}
1564: VecGetArrayRead(bb,&b);
1565: VecGetArray(xx,&x);
1566: tmp = a->solve_work;
1568: ISGetIndices(isrow,&rout); r = rout;
1569: ISGetIndices(iscol,&cout); c = cout;
1571: /* copy the b into temp work space according to permutation */
1572: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1574: /* forward solve the U^T */
1575: for (i=0; i<n; i++) {
1576: v = aa + adiag[i+1] + 1;
1577: vi = aj + adiag[i+1] + 1;
1578: nz = adiag[i] - adiag[i+1] - 1;
1579: s1 = tmp[i];
1580: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1581: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1582: tmp[i] = s1;
1583: }
1586: /* backward solve the L^T */
1587: for (i=n-1; i>=0; i--) {
1588: v = aa + ai[i];
1589: vi = aj + ai[i];
1590: nz = ai[i+1] - ai[i];
1591: s1 = tmp[i];
1592: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1593: }
1595: /* copy tmp into x according to permutation */
1596: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1598: ISRestoreIndices(isrow,&rout);
1599: ISRestoreIndices(iscol,&cout);
1600: VecRestoreArrayRead(bb,&b);
1601: VecRestoreArray(xx,&x);
1603: PetscLogFlops(2.0*a->nz-A->cmap->n);
1604: return(0);
1605: }
1607: /* ----------------------------------------------------------------*/
1609: extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1611: /*
1612: ilu() under revised new data structure.
1613: Factored arrays bj and ba are stored as
1614: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1616: bi=fact->i is an array of size n+1, in which
1617: bi+
1618: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1619: bi[n]: points to L(n-1,n-1)+1
1621: bdiag=fact->diag is an array of size n+1,in which
1622: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1623: bdiag[n]: points to entry of U(n-1,0)-1
1625: U(i,:) contains bdiag[i] as its last entry, i.e.,
1626: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1627: */
1630: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1631: {
1632: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1634: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1635: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1636: IS isicol;
1639: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1640: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1641: b = (Mat_SeqAIJ*)(fact)->data;
1643: /* allocate matrix arrays for new data structure */
1644: PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1645: PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1647: b->singlemalloc = PETSC_TRUE;
1648: if (!b->diag) {
1649: PetscMalloc1(n+1,&b->diag);
1650: PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1651: }
1652: bdiag = b->diag;
1654: if (n > 0) {
1655: PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1656: }
1658: /* set bi and bj with new data structure */
1659: bi = b->i;
1660: bj = b->j;
1662: /* L part */
1663: bi[0] = 0;
1664: for (i=0; i<n; i++) {
1665: nz = adiag[i] - ai[i];
1666: bi[i+1] = bi[i] + nz;
1667: aj = a->j + ai[i];
1668: for (j=0; j<nz; j++) {
1669: /* *bj = aj[j]; bj++; */
1670: bj[k++] = aj[j];
1671: }
1672: }
1674: /* U part */
1675: bdiag[n] = bi[n]-1;
1676: for (i=n-1; i>=0; i--) {
1677: nz = ai[i+1] - adiag[i] - 1;
1678: aj = a->j + adiag[i] + 1;
1679: for (j=0; j<nz; j++) {
1680: /* *bj = aj[j]; bj++; */
1681: bj[k++] = aj[j];
1682: }
1683: /* diag[i] */
1684: /* *bj = i; bj++; */
1685: bj[k++] = i;
1686: bdiag[i] = bdiag[i+1] + nz + 1;
1687: }
1689: fact->factortype = MAT_FACTOR_ILU;
1690: fact->info.factor_mallocs = 0;
1691: fact->info.fill_ratio_given = info->fill;
1692: fact->info.fill_ratio_needed = 1.0;
1693: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1694: MatSeqAIJCheckInode_FactorLU(fact);
1696: b = (Mat_SeqAIJ*)(fact)->data;
1697: b->row = isrow;
1698: b->col = iscol;
1699: b->icol = isicol;
1700: PetscMalloc1(fact->rmap->n+1,&b->solve_work);
1701: PetscObjectReference((PetscObject)isrow);
1702: PetscObjectReference((PetscObject)iscol);
1703: return(0);
1704: }
1708: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1709: {
1710: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1711: IS isicol;
1712: PetscErrorCode ierr;
1713: const PetscInt *r,*ic;
1714: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1715: PetscInt *bi,*cols,nnz,*cols_lvl;
1716: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1717: PetscInt i,levels,diagonal_fill;
1718: PetscBool col_identity,row_identity,missing;
1719: PetscReal f;
1720: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1721: PetscBT lnkbt;
1722: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1723: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1724: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1727: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1728: MatMissingDiagonal(A,&missing,&i);
1729: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1730:
1731: levels = (PetscInt)info->levels;
1732: ISIdentity(isrow,&row_identity);
1733: ISIdentity(iscol,&col_identity);
1734: if (!levels && row_identity && col_identity) {
1735: /* special case: ilu(0) with natural ordering */
1736: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1737: if (a->inode.size) {
1738: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1739: }
1740: return(0);
1741: }
1743: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1744: ISGetIndices(isrow,&r);
1745: ISGetIndices(isicol,&ic);
1747: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1748: PetscMalloc1(n+1,&bi);
1749: PetscMalloc1(n+1,&bdiag);
1750: bi[0] = bdiag[0] = 0;
1751: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1753: /* create a linked list for storing column indices of the active row */
1754: nlnk = n + 1;
1755: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1757: /* initial FreeSpace size is f*(ai[n]+1) */
1758: f = info->fill;
1759: diagonal_fill = (PetscInt)info->diagonal_fill;
1760: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1761: current_space = free_space;
1762: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1763: current_space_lvl = free_space_lvl;
1764: for (i=0; i<n; i++) {
1765: nzi = 0;
1766: /* copy current row into linked list */
1767: nnz = ai[r[i]+1] - ai[r[i]];
1768: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1769: cols = aj + ai[r[i]];
1770: lnk[i] = -1; /* marker to indicate if diagonal exists */
1771: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1772: nzi += nlnk;
1774: /* make sure diagonal entry is included */
1775: if (diagonal_fill && lnk[i] == -1) {
1776: fm = n;
1777: while (lnk[fm] < i) fm = lnk[fm];
1778: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1779: lnk[fm] = i;
1780: lnk_lvl[i] = 0;
1781: nzi++; dcount++;
1782: }
1784: /* add pivot rows into the active row */
1785: nzbd = 0;
1786: prow = lnk[n];
1787: while (prow < i) {
1788: nnz = bdiag[prow];
1789: cols = bj_ptr[prow] + nnz + 1;
1790: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1791: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1792: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1793: nzi += nlnk;
1794: prow = lnk[prow];
1795: nzbd++;
1796: }
1797: bdiag[i] = nzbd;
1798: bi[i+1] = bi[i] + nzi;
1799: /* if free space is not available, make more free space */
1800: if (current_space->local_remaining<nzi) {
1801: nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1802: PetscFreeSpaceGet(nnz,¤t_space);
1803: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1804: reallocs++;
1805: }
1807: /* copy data into free_space and free_space_lvl, then initialize lnk */
1808: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1809: bj_ptr[i] = current_space->array;
1810: bjlvl_ptr[i] = current_space_lvl->array;
1812: /* make sure the active row i has diagonal entry */
1813: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1815: current_space->array += nzi;
1816: current_space->local_used += nzi;
1817: current_space->local_remaining -= nzi;
1818: current_space_lvl->array += nzi;
1819: current_space_lvl->local_used += nzi;
1820: current_space_lvl->local_remaining -= nzi;
1821: }
1823: ISRestoreIndices(isrow,&r);
1824: ISRestoreIndices(isicol,&ic);
1825: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1826: PetscMalloc1(bi[n]+1,&bj);
1827: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1829: PetscIncompleteLLDestroy(lnk,lnkbt);
1830: PetscFreeSpaceDestroy(free_space_lvl);
1831: PetscFree2(bj_ptr,bjlvl_ptr);
1833: #if defined(PETSC_USE_INFO)
1834: {
1835: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1836: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1837: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1838: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1839: PetscInfo(A,"for best performance.\n");
1840: if (diagonal_fill) {
1841: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1842: }
1843: }
1844: #endif
1845: /* put together the new matrix */
1846: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1847: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1848: b = (Mat_SeqAIJ*)(fact)->data;
1850: b->free_a = PETSC_TRUE;
1851: b->free_ij = PETSC_TRUE;
1852: b->singlemalloc = PETSC_FALSE;
1854: PetscMalloc1(bdiag[0]+1,&b->a);
1856: b->j = bj;
1857: b->i = bi;
1858: b->diag = bdiag;
1859: b->ilen = 0;
1860: b->imax = 0;
1861: b->row = isrow;
1862: b->col = iscol;
1863: PetscObjectReference((PetscObject)isrow);
1864: PetscObjectReference((PetscObject)iscol);
1865: b->icol = isicol;
1867: PetscMalloc1(n+1,&b->solve_work);
1868: /* In b structure: Free imax, ilen, old a, old j.
1869: Allocate bdiag, solve_work, new a, new j */
1870: PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1871: b->maxnz = b->nz = bdiag[0]+1;
1873: (fact)->info.factor_mallocs = reallocs;
1874: (fact)->info.fill_ratio_given = f;
1875: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1876: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1877: if (a->inode.size) {
1878: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1879: }
1880: MatSeqAIJCheckInode_FactorLU(fact);
1881: return(0);
1882: }
1886: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1887: {
1888: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1889: IS isicol;
1890: PetscErrorCode ierr;
1891: const PetscInt *r,*ic;
1892: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1893: PetscInt *bi,*cols,nnz,*cols_lvl;
1894: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1895: PetscInt i,levels,diagonal_fill;
1896: PetscBool col_identity,row_identity;
1897: PetscReal f;
1898: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1899: PetscBT lnkbt;
1900: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1901: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1902: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1903: PetscBool missing;
1906: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1907: MatMissingDiagonal(A,&missing,&i);
1908: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1910: f = info->fill;
1911: levels = (PetscInt)info->levels;
1912: diagonal_fill = (PetscInt)info->diagonal_fill;
1914: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1916: ISIdentity(isrow,&row_identity);
1917: ISIdentity(iscol,&col_identity);
1918: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1919: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1921: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1922: if (a->inode.size) {
1923: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1924: }
1925: fact->factortype = MAT_FACTOR_ILU;
1926: (fact)->info.factor_mallocs = 0;
1927: (fact)->info.fill_ratio_given = info->fill;
1928: (fact)->info.fill_ratio_needed = 1.0;
1930: b = (Mat_SeqAIJ*)(fact)->data;
1931: b->row = isrow;
1932: b->col = iscol;
1933: b->icol = isicol;
1934: PetscMalloc1((fact)->rmap->n+1,&b->solve_work);
1935: PetscObjectReference((PetscObject)isrow);
1936: PetscObjectReference((PetscObject)iscol);
1937: return(0);
1938: }
1940: ISGetIndices(isrow,&r);
1941: ISGetIndices(isicol,&ic);
1943: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1944: PetscMalloc1(n+1,&bi);
1945: PetscMalloc1(n+1,&bdiag);
1946: bi[0] = bdiag[0] = 0;
1948: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1950: /* create a linked list for storing column indices of the active row */
1951: nlnk = n + 1;
1952: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1954: /* initial FreeSpace size is f*(ai[n]+1) */
1955: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1956: current_space = free_space;
1957: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1958: current_space_lvl = free_space_lvl;
1960: for (i=0; i<n; i++) {
1961: nzi = 0;
1962: /* copy current row into linked list */
1963: nnz = ai[r[i]+1] - ai[r[i]];
1964: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1965: cols = aj + ai[r[i]];
1966: lnk[i] = -1; /* marker to indicate if diagonal exists */
1967: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1968: nzi += nlnk;
1970: /* make sure diagonal entry is included */
1971: if (diagonal_fill && lnk[i] == -1) {
1972: fm = n;
1973: while (lnk[fm] < i) fm = lnk[fm];
1974: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1975: lnk[fm] = i;
1976: lnk_lvl[i] = 0;
1977: nzi++; dcount++;
1978: }
1980: /* add pivot rows into the active row */
1981: nzbd = 0;
1982: prow = lnk[n];
1983: while (prow < i) {
1984: nnz = bdiag[prow];
1985: cols = bj_ptr[prow] + nnz + 1;
1986: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1987: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1988: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1989: nzi += nlnk;
1990: prow = lnk[prow];
1991: nzbd++;
1992: }
1993: bdiag[i] = nzbd;
1994: bi[i+1] = bi[i] + nzi;
1996: /* if free space is not available, make more free space */
1997: if (current_space->local_remaining<nzi) {
1998: nnz = nzi*(n - i); /* estimated and max additional space needed */
1999: PetscFreeSpaceGet(nnz,¤t_space);
2000: PetscFreeSpaceGet(nnz,¤t_space_lvl);
2001: reallocs++;
2002: }
2004: /* copy data into free_space and free_space_lvl, then initialize lnk */
2005: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2006: bj_ptr[i] = current_space->array;
2007: bjlvl_ptr[i] = current_space_lvl->array;
2009: /* make sure the active row i has diagonal entry */
2010: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2012: current_space->array += nzi;
2013: current_space->local_used += nzi;
2014: current_space->local_remaining -= nzi;
2015: current_space_lvl->array += nzi;
2016: current_space_lvl->local_used += nzi;
2017: current_space_lvl->local_remaining -= nzi;
2018: }
2020: ISRestoreIndices(isrow,&r);
2021: ISRestoreIndices(isicol,&ic);
2023: /* destroy list of free space and other temporary arrays */
2024: PetscMalloc1(bi[n]+1,&bj);
2025: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2026: PetscIncompleteLLDestroy(lnk,lnkbt);
2027: PetscFreeSpaceDestroy(free_space_lvl);
2028: PetscFree2(bj_ptr,bjlvl_ptr);
2030: #if defined(PETSC_USE_INFO)
2031: {
2032: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2033: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
2034: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
2035: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
2036: PetscInfo(A,"for best performance.\n");
2037: if (diagonal_fill) {
2038: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
2039: }
2040: }
2041: #endif
2043: /* put together the new matrix */
2044: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
2045: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
2046: b = (Mat_SeqAIJ*)(fact)->data;
2048: b->free_a = PETSC_TRUE;
2049: b->free_ij = PETSC_TRUE;
2050: b->singlemalloc = PETSC_FALSE;
2052: PetscMalloc1(bi[n],&b->a);
2053: b->j = bj;
2054: b->i = bi;
2055: for (i=0; i<n; i++) bdiag[i] += bi[i];
2056: b->diag = bdiag;
2057: b->ilen = 0;
2058: b->imax = 0;
2059: b->row = isrow;
2060: b->col = iscol;
2061: PetscObjectReference((PetscObject)isrow);
2062: PetscObjectReference((PetscObject)iscol);
2063: b->icol = isicol;
2064: PetscMalloc1(n+1,&b->solve_work);
2065: /* In b structure: Free imax, ilen, old a, old j.
2066: Allocate bdiag, solve_work, new a, new j */
2067: PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2068: b->maxnz = b->nz = bi[n];
2070: (fact)->info.factor_mallocs = reallocs;
2071: (fact)->info.fill_ratio_given = f;
2072: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2073: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2074: if (a->inode.size) {
2075: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2076: }
2077: return(0);
2078: }
2082: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2083: {
2084: Mat C = B;
2085: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2086: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2087: IS ip=b->row,iip = b->icol;
2089: const PetscInt *rip,*riip;
2090: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2091: PetscInt *ai=a->i,*aj=a->j;
2092: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2093: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2094: PetscBool perm_identity;
2095: FactorShiftCtx sctx;
2096: PetscReal rs;
2097: MatScalar d,*v;
2100: /* MatPivotSetUp(): initialize shift context sctx */
2101: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2103: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2104: sctx.shift_top = info->zeropivot;
2105: for (i=0; i<mbs; i++) {
2106: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2107: d = (aa)[a->diag[i]];
2108: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2109: v = aa+ai[i];
2110: nz = ai[i+1] - ai[i];
2111: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2112: if (rs>sctx.shift_top) sctx.shift_top = rs;
2113: }
2114: sctx.shift_top *= 1.1;
2115: sctx.nshift_max = 5;
2116: sctx.shift_lo = 0.;
2117: sctx.shift_hi = 1.;
2118: }
2120: ISGetIndices(ip,&rip);
2121: ISGetIndices(iip,&riip);
2123: /* allocate working arrays
2124: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2125: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2126: */
2127: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);
2129: do {
2130: sctx.newshift = PETSC_FALSE;
2132: for (i=0; i<mbs; i++) c2r[i] = mbs;
2133: if (mbs) il[0] = 0;
2135: for (k = 0; k<mbs; k++) {
2136: /* zero rtmp */
2137: nz = bi[k+1] - bi[k];
2138: bjtmp = bj + bi[k];
2139: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2141: /* load in initial unfactored row */
2142: bval = ba + bi[k];
2143: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2144: for (j = jmin; j < jmax; j++) {
2145: col = riip[aj[j]];
2146: if (col >= k) { /* only take upper triangular entry */
2147: rtmp[col] = aa[j];
2148: *bval++ = 0.0; /* for in-place factorization */
2149: }
2150: }
2151: /* shift the diagonal of the matrix: ZeropivotApply() */
2152: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2154: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2155: dk = rtmp[k];
2156: i = c2r[k]; /* first row to be added to k_th row */
2158: while (i < k) {
2159: nexti = c2r[i]; /* next row to be added to k_th row */
2161: /* compute multiplier, update diag(k) and U(i,k) */
2162: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2163: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2164: dk += uikdi*ba[ili]; /* update diag[k] */
2165: ba[ili] = uikdi; /* -U(i,k) */
2167: /* add multiple of row i to k-th row */
2168: jmin = ili + 1; jmax = bi[i+1];
2169: if (jmin < jmax) {
2170: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2171: /* update il and c2r for row i */
2172: il[i] = jmin;
2173: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2174: }
2175: i = nexti;
2176: }
2178: /* copy data into U(k,:) */
2179: rs = 0.0;
2180: jmin = bi[k]; jmax = bi[k+1]-1;
2181: if (jmin < jmax) {
2182: for (j=jmin; j<jmax; j++) {
2183: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2184: }
2185: /* add the k-th row into il and c2r */
2186: il[k] = jmin;
2187: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2188: }
2190: /* MatPivotCheck() */
2191: sctx.rs = rs;
2192: sctx.pv = dk;
2193: MatPivotCheck(A,info,&sctx,i);
2194: if (sctx.newshift) break;
2195: dk = sctx.pv;
2197: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2198: }
2199: } while (sctx.newshift);
2201: PetscFree3(rtmp,il,c2r);
2202: ISRestoreIndices(ip,&rip);
2203: ISRestoreIndices(iip,&riip);
2205: ISIdentity(ip,&perm_identity);
2206: if (perm_identity) {
2207: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2208: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2209: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2210: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2211: } else {
2212: B->ops->solve = MatSolve_SeqSBAIJ_1;
2213: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2214: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2215: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2216: }
2218: C->assembled = PETSC_TRUE;
2219: C->preallocated = PETSC_TRUE;
2221: PetscLogFlops(C->rmap->n);
2223: /* MatPivotView() */
2224: if (sctx.nshift) {
2225: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2226: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2227: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2228: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2229: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2230: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2231: }
2232: }
2233: return(0);
2234: }
2238: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2239: {
2240: Mat C = B;
2241: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2242: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2243: IS ip=b->row,iip = b->icol;
2245: const PetscInt *rip,*riip;
2246: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2247: PetscInt *ai=a->i,*aj=a->j;
2248: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2249: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2250: PetscBool perm_identity;
2251: FactorShiftCtx sctx;
2252: PetscReal rs;
2253: MatScalar d,*v;
2256: /* MatPivotSetUp(): initialize shift context sctx */
2257: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2259: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2260: sctx.shift_top = info->zeropivot;
2261: for (i=0; i<mbs; i++) {
2262: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2263: d = (aa)[a->diag[i]];
2264: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2265: v = aa+ai[i];
2266: nz = ai[i+1] - ai[i];
2267: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2268: if (rs>sctx.shift_top) sctx.shift_top = rs;
2269: }
2270: sctx.shift_top *= 1.1;
2271: sctx.nshift_max = 5;
2272: sctx.shift_lo = 0.;
2273: sctx.shift_hi = 1.;
2274: }
2276: ISGetIndices(ip,&rip);
2277: ISGetIndices(iip,&riip);
2279: /* initialization */
2280: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
2282: do {
2283: sctx.newshift = PETSC_FALSE;
2285: for (i=0; i<mbs; i++) jl[i] = mbs;
2286: il[0] = 0;
2288: for (k = 0; k<mbs; k++) {
2289: /* zero rtmp */
2290: nz = bi[k+1] - bi[k];
2291: bjtmp = bj + bi[k];
2292: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2294: bval = ba + bi[k];
2295: /* initialize k-th row by the perm[k]-th row of A */
2296: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2297: for (j = jmin; j < jmax; j++) {
2298: col = riip[aj[j]];
2299: if (col >= k) { /* only take upper triangular entry */
2300: rtmp[col] = aa[j];
2301: *bval++ = 0.0; /* for in-place factorization */
2302: }
2303: }
2304: /* shift the diagonal of the matrix */
2305: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2307: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2308: dk = rtmp[k];
2309: i = jl[k]; /* first row to be added to k_th row */
2311: while (i < k) {
2312: nexti = jl[i]; /* next row to be added to k_th row */
2314: /* compute multiplier, update diag(k) and U(i,k) */
2315: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2316: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2317: dk += uikdi*ba[ili];
2318: ba[ili] = uikdi; /* -U(i,k) */
2320: /* add multiple of row i to k-th row */
2321: jmin = ili + 1; jmax = bi[i+1];
2322: if (jmin < jmax) {
2323: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2324: /* update il and jl for row i */
2325: il[i] = jmin;
2326: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2327: }
2328: i = nexti;
2329: }
2331: /* shift the diagonals when zero pivot is detected */
2332: /* compute rs=sum of abs(off-diagonal) */
2333: rs = 0.0;
2334: jmin = bi[k]+1;
2335: nz = bi[k+1] - jmin;
2336: bcol = bj + jmin;
2337: for (j=0; j<nz; j++) {
2338: rs += PetscAbsScalar(rtmp[bcol[j]]);
2339: }
2341: sctx.rs = rs;
2342: sctx.pv = dk;
2343: MatPivotCheck(A,info,&sctx,k);
2344: if (sctx.newshift) break;
2345: dk = sctx.pv;
2347: /* copy data into U(k,:) */
2348: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2349: jmin = bi[k]+1; jmax = bi[k+1];
2350: if (jmin < jmax) {
2351: for (j=jmin; j<jmax; j++) {
2352: col = bj[j]; ba[j] = rtmp[col];
2353: }
2354: /* add the k-th row into il and jl */
2355: il[k] = jmin;
2356: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2357: }
2358: }
2359: } while (sctx.newshift);
2361: PetscFree3(rtmp,il,jl);
2362: ISRestoreIndices(ip,&rip);
2363: ISRestoreIndices(iip,&riip);
2365: ISIdentity(ip,&perm_identity);
2366: if (perm_identity) {
2367: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2370: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2371: } else {
2372: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2373: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2374: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2375: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2376: }
2378: C->assembled = PETSC_TRUE;
2379: C->preallocated = PETSC_TRUE;
2381: PetscLogFlops(C->rmap->n);
2382: if (sctx.nshift) {
2383: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2384: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2385: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2386: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2387: }
2388: }
2389: return(0);
2390: }
2392: /*
2393: icc() under revised new data structure.
2394: Factored arrays bj and ba are stored as
2395: U(0,:),...,U(i,:),U(n-1,:)
2397: ui=fact->i is an array of size n+1, in which
2398: ui+
2399: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2400: ui[n]: points to U(n-1,n-1)+1
2402: udiag=fact->diag is an array of size n,in which
2403: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2405: U(i,:) contains udiag[i] as its last entry, i.e.,
2406: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2407: */
2411: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2412: {
2413: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2414: Mat_SeqSBAIJ *b;
2415: PetscErrorCode ierr;
2416: PetscBool perm_identity,missing;
2417: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2418: const PetscInt *rip,*riip;
2419: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2420: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2421: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2422: PetscReal fill =info->fill,levels=info->levels;
2423: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2424: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2425: PetscBT lnkbt;
2426: IS iperm;
2429: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2430: MatMissingDiagonal(A,&missing,&d);
2431: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2432: ISIdentity(perm,&perm_identity);
2433: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2435: PetscMalloc1(am+1,&ui);
2436: PetscMalloc1(am+1,&udiag);
2437: ui[0] = 0;
2439: /* ICC(0) without matrix ordering: simply rearrange column indices */
2440: if (!levels && perm_identity) {
2441: for (i=0; i<am; i++) {
2442: ncols = ai[i+1] - a->diag[i];
2443: ui[i+1] = ui[i] + ncols;
2444: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2445: }
2446: PetscMalloc1(ui[am]+1,&uj);
2447: cols = uj;
2448: for (i=0; i<am; i++) {
2449: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2450: ncols = ai[i+1] - a->diag[i] -1;
2451: for (j=0; j<ncols; j++) *cols++ = aj[j];
2452: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2453: }
2454: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2455: ISGetIndices(iperm,&riip);
2456: ISGetIndices(perm,&rip);
2458: /* initialization */
2459: PetscMalloc1(am+1,&ajtmp);
2461: /* jl: linked list for storing indices of the pivot rows
2462: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2463: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2464: for (i=0; i<am; i++) {
2465: jl[i] = am; il[i] = 0;
2466: }
2468: /* create and initialize a linked list for storing column indices of the active row k */
2469: nlnk = am + 1;
2470: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2472: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2473: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2474: current_space = free_space;
2475: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);
2476: current_space_lvl = free_space_lvl;
2478: for (k=0; k<am; k++) { /* for each active row k */
2479: /* initialize lnk by the column indices of row rip[k] of A */
2480: nzk = 0;
2481: ncols = ai[rip[k]+1] - ai[rip[k]];
2482: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2483: ncols_upper = 0;
2484: for (j=0; j<ncols; j++) {
2485: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2486: if (riip[i] >= k) { /* only take upper triangular entry */
2487: ajtmp[ncols_upper] = i;
2488: ncols_upper++;
2489: }
2490: }
2491: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2492: nzk += nlnk;
2494: /* update lnk by computing fill-in for each pivot row to be merged in */
2495: prow = jl[k]; /* 1st pivot row */
2497: while (prow < k) {
2498: nextprow = jl[prow];
2500: /* merge prow into k-th row */
2501: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2502: jmax = ui[prow+1];
2503: ncols = jmax-jmin;
2504: i = jmin - ui[prow];
2505: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2506: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2507: j = *(uj - 1);
2508: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2509: nzk += nlnk;
2511: /* update il and jl for prow */
2512: if (jmin < jmax) {
2513: il[prow] = jmin;
2514: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2515: }
2516: prow = nextprow;
2517: }
2519: /* if free space is not available, make more free space */
2520: if (current_space->local_remaining<nzk) {
2521: i = am - k + 1; /* num of unfactored rows */
2522: i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2523: PetscFreeSpaceGet(i,¤t_space);
2524: PetscFreeSpaceGet(i,¤t_space_lvl);
2525: reallocs++;
2526: }
2528: /* copy data into free_space and free_space_lvl, then initialize lnk */
2529: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2530: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2532: /* add the k-th row into il and jl */
2533: if (nzk > 1) {
2534: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2535: jl[k] = jl[i]; jl[i] = k;
2536: il[k] = ui[k] + 1;
2537: }
2538: uj_ptr[k] = current_space->array;
2539: uj_lvl_ptr[k] = current_space_lvl->array;
2541: current_space->array += nzk;
2542: current_space->local_used += nzk;
2543: current_space->local_remaining -= nzk;
2545: current_space_lvl->array += nzk;
2546: current_space_lvl->local_used += nzk;
2547: current_space_lvl->local_remaining -= nzk;
2549: ui[k+1] = ui[k] + nzk;
2550: }
2552: ISRestoreIndices(perm,&rip);
2553: ISRestoreIndices(iperm,&riip);
2554: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2555: PetscFree(ajtmp);
2557: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2558: PetscMalloc1(ui[am]+1,&uj);
2559: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2560: PetscIncompleteLLDestroy(lnk,lnkbt);
2561: PetscFreeSpaceDestroy(free_space_lvl);
2563: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2565: /* put together the new matrix in MATSEQSBAIJ format */
2566: b = (Mat_SeqSBAIJ*)(fact)->data;
2567: b->singlemalloc = PETSC_FALSE;
2569: PetscMalloc1(ui[am]+1,&b->a);
2571: b->j = uj;
2572: b->i = ui;
2573: b->diag = udiag;
2574: b->free_diag = PETSC_TRUE;
2575: b->ilen = 0;
2576: b->imax = 0;
2577: b->row = perm;
2578: b->col = perm;
2579: PetscObjectReference((PetscObject)perm);
2580: PetscObjectReference((PetscObject)perm);
2581: b->icol = iperm;
2582: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2584: PetscMalloc1(am+1,&b->solve_work);
2585: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2587: b->maxnz = b->nz = ui[am];
2588: b->free_a = PETSC_TRUE;
2589: b->free_ij = PETSC_TRUE;
2591: fact->info.factor_mallocs = reallocs;
2592: fact->info.fill_ratio_given = fill;
2593: if (ai[am] != 0) {
2594: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2595: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2596: } else {
2597: fact->info.fill_ratio_needed = 0.0;
2598: }
2599: #if defined(PETSC_USE_INFO)
2600: if (ai[am] != 0) {
2601: PetscReal af = fact->info.fill_ratio_needed;
2602: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2603: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2604: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2605: } else {
2606: PetscInfo(A,"Empty matrix.\n");
2607: }
2608: #endif
2609: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2610: return(0);
2611: }
2615: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2616: {
2617: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2618: Mat_SeqSBAIJ *b;
2619: PetscErrorCode ierr;
2620: PetscBool perm_identity,missing;
2621: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2622: const PetscInt *rip,*riip;
2623: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2624: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2625: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2626: PetscReal fill =info->fill,levels=info->levels;
2627: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2628: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2629: PetscBT lnkbt;
2630: IS iperm;
2633: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2634: MatMissingDiagonal(A,&missing,&d);
2635: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2636: ISIdentity(perm,&perm_identity);
2637: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2639: PetscMalloc1(am+1,&ui);
2640: PetscMalloc1(am+1,&udiag);
2641: ui[0] = 0;
2643: /* ICC(0) without matrix ordering: simply copies fill pattern */
2644: if (!levels && perm_identity) {
2646: for (i=0; i<am; i++) {
2647: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2648: udiag[i] = ui[i];
2649: }
2650: PetscMalloc1(ui[am]+1,&uj);
2651: cols = uj;
2652: for (i=0; i<am; i++) {
2653: aj = a->j + a->diag[i];
2654: ncols = ui[i+1] - ui[i];
2655: for (j=0; j<ncols; j++) *cols++ = *aj++;
2656: }
2657: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2658: ISGetIndices(iperm,&riip);
2659: ISGetIndices(perm,&rip);
2661: /* initialization */
2662: PetscMalloc1(am+1,&ajtmp);
2664: /* jl: linked list for storing indices of the pivot rows
2665: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2666: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2667: for (i=0; i<am; i++) {
2668: jl[i] = am; il[i] = 0;
2669: }
2671: /* create and initialize a linked list for storing column indices of the active row k */
2672: nlnk = am + 1;
2673: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2675: /* initial FreeSpace size is fill*(ai[am]+1) */
2676: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2677: current_space = free_space;
2678: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2679: current_space_lvl = free_space_lvl;
2681: for (k=0; k<am; k++) { /* for each active row k */
2682: /* initialize lnk by the column indices of row rip[k] of A */
2683: nzk = 0;
2684: ncols = ai[rip[k]+1] - ai[rip[k]];
2685: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2686: ncols_upper = 0;
2687: for (j=0; j<ncols; j++) {
2688: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2689: if (riip[i] >= k) { /* only take upper triangular entry */
2690: ajtmp[ncols_upper] = i;
2691: ncols_upper++;
2692: }
2693: }
2694: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2695: nzk += nlnk;
2697: /* update lnk by computing fill-in for each pivot row to be merged in */
2698: prow = jl[k]; /* 1st pivot row */
2700: while (prow < k) {
2701: nextprow = jl[prow];
2703: /* merge prow into k-th row */
2704: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2705: jmax = ui[prow+1];
2706: ncols = jmax-jmin;
2707: i = jmin - ui[prow];
2708: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2709: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2710: j = *(uj - 1);
2711: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2712: nzk += nlnk;
2714: /* update il and jl for prow */
2715: if (jmin < jmax) {
2716: il[prow] = jmin;
2717: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2718: }
2719: prow = nextprow;
2720: }
2722: /* if free space is not available, make more free space */
2723: if (current_space->local_remaining<nzk) {
2724: i = am - k + 1; /* num of unfactored rows */
2725: i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2726: PetscFreeSpaceGet(i,¤t_space);
2727: PetscFreeSpaceGet(i,¤t_space_lvl);
2728: reallocs++;
2729: }
2731: /* copy data into free_space and free_space_lvl, then initialize lnk */
2732: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2733: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2735: /* add the k-th row into il and jl */
2736: if (nzk > 1) {
2737: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2738: jl[k] = jl[i]; jl[i] = k;
2739: il[k] = ui[k] + 1;
2740: }
2741: uj_ptr[k] = current_space->array;
2742: uj_lvl_ptr[k] = current_space_lvl->array;
2744: current_space->array += nzk;
2745: current_space->local_used += nzk;
2746: current_space->local_remaining -= nzk;
2748: current_space_lvl->array += nzk;
2749: current_space_lvl->local_used += nzk;
2750: current_space_lvl->local_remaining -= nzk;
2752: ui[k+1] = ui[k] + nzk;
2753: }
2755: #if defined(PETSC_USE_INFO)
2756: if (ai[am] != 0) {
2757: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2758: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2759: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2760: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2761: } else {
2762: PetscInfo(A,"Empty matrix.\n");
2763: }
2764: #endif
2766: ISRestoreIndices(perm,&rip);
2767: ISRestoreIndices(iperm,&riip);
2768: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2769: PetscFree(ajtmp);
2771: /* destroy list of free space and other temporary array(s) */
2772: PetscMalloc1(ui[am]+1,&uj);
2773: PetscFreeSpaceContiguous(&free_space,uj);
2774: PetscIncompleteLLDestroy(lnk,lnkbt);
2775: PetscFreeSpaceDestroy(free_space_lvl);
2777: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2779: /* put together the new matrix in MATSEQSBAIJ format */
2781: b = (Mat_SeqSBAIJ*)fact->data;
2782: b->singlemalloc = PETSC_FALSE;
2784: PetscMalloc1(ui[am]+1,&b->a);
2786: b->j = uj;
2787: b->i = ui;
2788: b->diag = udiag;
2789: b->free_diag = PETSC_TRUE;
2790: b->ilen = 0;
2791: b->imax = 0;
2792: b->row = perm;
2793: b->col = perm;
2795: PetscObjectReference((PetscObject)perm);
2796: PetscObjectReference((PetscObject)perm);
2798: b->icol = iperm;
2799: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2800: PetscMalloc1(am+1,&b->solve_work);
2801: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2802: b->maxnz = b->nz = ui[am];
2803: b->free_a = PETSC_TRUE;
2804: b->free_ij = PETSC_TRUE;
2806: fact->info.factor_mallocs = reallocs;
2807: fact->info.fill_ratio_given = fill;
2808: if (ai[am] != 0) {
2809: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2810: } else {
2811: fact->info.fill_ratio_needed = 0.0;
2812: }
2813: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2814: return(0);
2815: }
2819: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2820: {
2821: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2822: Mat_SeqSBAIJ *b;
2823: PetscErrorCode ierr;
2824: PetscBool perm_identity,missing;
2825: PetscReal fill = info->fill;
2826: const PetscInt *rip,*riip;
2827: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2828: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2829: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2830: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2831: PetscBT lnkbt;
2832: IS iperm;
2835: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2836: MatMissingDiagonal(A,&missing,&i);
2837: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2839: /* check whether perm is the identity mapping */
2840: ISIdentity(perm,&perm_identity);
2841: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2842: ISGetIndices(iperm,&riip);
2843: ISGetIndices(perm,&rip);
2845: /* initialization */
2846: PetscMalloc1(am+1,&ui);
2847: PetscMalloc1(am+1,&udiag);
2848: ui[0] = 0;
2850: /* jl: linked list for storing indices of the pivot rows
2851: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2852: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2853: for (i=0; i<am; i++) {
2854: jl[i] = am; il[i] = 0;
2855: }
2857: /* create and initialize a linked list for storing column indices of the active row k */
2858: nlnk = am + 1;
2859: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2861: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2862: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2863: current_space = free_space;
2865: for (k=0; k<am; k++) { /* for each active row k */
2866: /* initialize lnk by the column indices of row rip[k] of A */
2867: nzk = 0;
2868: ncols = ai[rip[k]+1] - ai[rip[k]];
2869: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2870: ncols_upper = 0;
2871: for (j=0; j<ncols; j++) {
2872: i = riip[*(aj + ai[rip[k]] + j)];
2873: if (i >= k) { /* only take upper triangular entry */
2874: cols[ncols_upper] = i;
2875: ncols_upper++;
2876: }
2877: }
2878: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2879: nzk += nlnk;
2881: /* update lnk by computing fill-in for each pivot row to be merged in */
2882: prow = jl[k]; /* 1st pivot row */
2884: while (prow < k) {
2885: nextprow = jl[prow];
2886: /* merge prow into k-th row */
2887: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2888: jmax = ui[prow+1];
2889: ncols = jmax-jmin;
2890: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2891: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2892: nzk += nlnk;
2894: /* update il and jl for prow */
2895: if (jmin < jmax) {
2896: il[prow] = jmin;
2897: j = *uj_ptr;
2898: jl[prow] = jl[j];
2899: jl[j] = prow;
2900: }
2901: prow = nextprow;
2902: }
2904: /* if free space is not available, make more free space */
2905: if (current_space->local_remaining<nzk) {
2906: i = am - k + 1; /* num of unfactored rows */
2907: i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2908: PetscFreeSpaceGet(i,¤t_space);
2909: reallocs++;
2910: }
2912: /* copy data into free space, then initialize lnk */
2913: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2915: /* add the k-th row into il and jl */
2916: if (nzk > 1) {
2917: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2918: jl[k] = jl[i]; jl[i] = k;
2919: il[k] = ui[k] + 1;
2920: }
2921: ui_ptr[k] = current_space->array;
2923: current_space->array += nzk;
2924: current_space->local_used += nzk;
2925: current_space->local_remaining -= nzk;
2927: ui[k+1] = ui[k] + nzk;
2928: }
2930: ISRestoreIndices(perm,&rip);
2931: ISRestoreIndices(iperm,&riip);
2932: PetscFree4(ui_ptr,jl,il,cols);
2934: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2935: PetscMalloc1(ui[am]+1,&uj);
2936: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2937: PetscLLDestroy(lnk,lnkbt);
2939: /* put together the new matrix in MATSEQSBAIJ format */
2941: b = (Mat_SeqSBAIJ*)fact->data;
2942: b->singlemalloc = PETSC_FALSE;
2943: b->free_a = PETSC_TRUE;
2944: b->free_ij = PETSC_TRUE;
2946: PetscMalloc1(ui[am]+1,&b->a);
2948: b->j = uj;
2949: b->i = ui;
2950: b->diag = udiag;
2951: b->free_diag = PETSC_TRUE;
2952: b->ilen = 0;
2953: b->imax = 0;
2954: b->row = perm;
2955: b->col = perm;
2957: PetscObjectReference((PetscObject)perm);
2958: PetscObjectReference((PetscObject)perm);
2960: b->icol = iperm;
2961: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2963: PetscMalloc1(am+1,&b->solve_work);
2964: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2966: b->maxnz = b->nz = ui[am];
2968: fact->info.factor_mallocs = reallocs;
2969: fact->info.fill_ratio_given = fill;
2970: if (ai[am] != 0) {
2971: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2972: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2973: } else {
2974: fact->info.fill_ratio_needed = 0.0;
2975: }
2976: #if defined(PETSC_USE_INFO)
2977: if (ai[am] != 0) {
2978: PetscReal af = fact->info.fill_ratio_needed;
2979: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2980: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2981: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2982: } else {
2983: PetscInfo(A,"Empty matrix.\n");
2984: }
2985: #endif
2986: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2987: return(0);
2988: }
2992: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2993: {
2994: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2995: Mat_SeqSBAIJ *b;
2996: PetscErrorCode ierr;
2997: PetscBool perm_identity,missing;
2998: PetscReal fill = info->fill;
2999: const PetscInt *rip,*riip;
3000: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3001: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3002: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3003: PetscFreeSpaceList free_space=NULL,current_space=NULL;
3004: PetscBT lnkbt;
3005: IS iperm;
3008: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3009: MatMissingDiagonal(A,&missing,&i);
3010: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3012: /* check whether perm is the identity mapping */
3013: ISIdentity(perm,&perm_identity);
3014: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
3015: ISGetIndices(iperm,&riip);
3016: ISGetIndices(perm,&rip);
3018: /* initialization */
3019: PetscMalloc1(am+1,&ui);
3020: ui[0] = 0;
3022: /* jl: linked list for storing indices of the pivot rows
3023: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3024: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
3025: for (i=0; i<am; i++) {
3026: jl[i] = am; il[i] = 0;
3027: }
3029: /* create and initialize a linked list for storing column indices of the active row k */
3030: nlnk = am + 1;
3031: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
3033: /* initial FreeSpace size is fill*(ai[am]+1) */
3034: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
3035: current_space = free_space;
3037: for (k=0; k<am; k++) { /* for each active row k */
3038: /* initialize lnk by the column indices of row rip[k] of A */
3039: nzk = 0;
3040: ncols = ai[rip[k]+1] - ai[rip[k]];
3041: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3042: ncols_upper = 0;
3043: for (j=0; j<ncols; j++) {
3044: i = riip[*(aj + ai[rip[k]] + j)];
3045: if (i >= k) { /* only take upper triangular entry */
3046: cols[ncols_upper] = i;
3047: ncols_upper++;
3048: }
3049: }
3050: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3051: nzk += nlnk;
3053: /* update lnk by computing fill-in for each pivot row to be merged in */
3054: prow = jl[k]; /* 1st pivot row */
3056: while (prow < k) {
3057: nextprow = jl[prow];
3058: /* merge prow into k-th row */
3059: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3060: jmax = ui[prow+1];
3061: ncols = jmax-jmin;
3062: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3063: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3064: nzk += nlnk;
3066: /* update il and jl for prow */
3067: if (jmin < jmax) {
3068: il[prow] = jmin;
3069: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3070: }
3071: prow = nextprow;
3072: }
3074: /* if free space is not available, make more free space */
3075: if (current_space->local_remaining<nzk) {
3076: i = am - k + 1; /* num of unfactored rows */
3077: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3078: PetscFreeSpaceGet(i,¤t_space);
3079: reallocs++;
3080: }
3082: /* copy data into free space, then initialize lnk */
3083: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3085: /* add the k-th row into il and jl */
3086: if (nzk-1 > 0) {
3087: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3088: jl[k] = jl[i]; jl[i] = k;
3089: il[k] = ui[k] + 1;
3090: }
3091: ui_ptr[k] = current_space->array;
3093: current_space->array += nzk;
3094: current_space->local_used += nzk;
3095: current_space->local_remaining -= nzk;
3097: ui[k+1] = ui[k] + nzk;
3098: }
3100: #if defined(PETSC_USE_INFO)
3101: if (ai[am] != 0) {
3102: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3103: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3104: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3105: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3106: } else {
3107: PetscInfo(A,"Empty matrix.\n");
3108: }
3109: #endif
3111: ISRestoreIndices(perm,&rip);
3112: ISRestoreIndices(iperm,&riip);
3113: PetscFree4(ui_ptr,jl,il,cols);
3115: /* destroy list of free space and other temporary array(s) */
3116: PetscMalloc1(ui[am]+1,&uj);
3117: PetscFreeSpaceContiguous(&free_space,uj);
3118: PetscLLDestroy(lnk,lnkbt);
3120: /* put together the new matrix in MATSEQSBAIJ format */
3122: b = (Mat_SeqSBAIJ*)fact->data;
3123: b->singlemalloc = PETSC_FALSE;
3124: b->free_a = PETSC_TRUE;
3125: b->free_ij = PETSC_TRUE;
3127: PetscMalloc1(ui[am]+1,&b->a);
3129: b->j = uj;
3130: b->i = ui;
3131: b->diag = 0;
3132: b->ilen = 0;
3133: b->imax = 0;
3134: b->row = perm;
3135: b->col = perm;
3137: PetscObjectReference((PetscObject)perm);
3138: PetscObjectReference((PetscObject)perm);
3140: b->icol = iperm;
3141: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3143: PetscMalloc1(am+1,&b->solve_work);
3144: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3145: b->maxnz = b->nz = ui[am];
3147: fact->info.factor_mallocs = reallocs;
3148: fact->info.fill_ratio_given = fill;
3149: if (ai[am] != 0) {
3150: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3151: } else {
3152: fact->info.fill_ratio_needed = 0.0;
3153: }
3154: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3155: return(0);
3156: }
3160: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3161: {
3162: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3163: PetscErrorCode ierr;
3164: PetscInt n = A->rmap->n;
3165: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3166: PetscScalar *x,sum;
3167: const PetscScalar *b;
3168: const MatScalar *aa = a->a,*v;
3169: PetscInt i,nz;
3172: if (!n) return(0);
3174: VecGetArrayRead(bb,&b);
3175: VecGetArray(xx,&x);
3177: /* forward solve the lower triangular */
3178: x[0] = b[0];
3179: v = aa;
3180: vi = aj;
3181: for (i=1; i<n; i++) {
3182: nz = ai[i+1] - ai[i];
3183: sum = b[i];
3184: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3185: v += nz;
3186: vi += nz;
3187: x[i] = sum;
3188: }
3190: /* backward solve the upper triangular */
3191: for (i=n-1; i>=0; i--) {
3192: v = aa + adiag[i+1] + 1;
3193: vi = aj + adiag[i+1] + 1;
3194: nz = adiag[i] - adiag[i+1]-1;
3195: sum = x[i];
3196: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3197: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3198: }
3200: PetscLogFlops(2.0*a->nz - A->cmap->n);
3201: VecRestoreArrayRead(bb,&b);
3202: VecRestoreArray(xx,&x);
3203: return(0);
3204: }
3208: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3209: {
3210: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3211: IS iscol = a->col,isrow = a->row;
3212: PetscErrorCode ierr;
3213: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3214: const PetscInt *rout,*cout,*r,*c;
3215: PetscScalar *x,*tmp,sum;
3216: const PetscScalar *b;
3217: const MatScalar *aa = a->a,*v;
3220: if (!n) return(0);
3222: VecGetArrayRead(bb,&b);
3223: VecGetArray(xx,&x);
3224: tmp = a->solve_work;
3226: ISGetIndices(isrow,&rout); r = rout;
3227: ISGetIndices(iscol,&cout); c = cout;
3229: /* forward solve the lower triangular */
3230: tmp[0] = b[r[0]];
3231: v = aa;
3232: vi = aj;
3233: for (i=1; i<n; i++) {
3234: nz = ai[i+1] - ai[i];
3235: sum = b[r[i]];
3236: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3237: tmp[i] = sum;
3238: v += nz; vi += nz;
3239: }
3241: /* backward solve the upper triangular */
3242: for (i=n-1; i>=0; i--) {
3243: v = aa + adiag[i+1]+1;
3244: vi = aj + adiag[i+1]+1;
3245: nz = adiag[i]-adiag[i+1]-1;
3246: sum = tmp[i];
3247: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3248: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3249: }
3251: ISRestoreIndices(isrow,&rout);
3252: ISRestoreIndices(iscol,&cout);
3253: VecRestoreArrayRead(bb,&b);
3254: VecRestoreArray(xx,&x);
3255: PetscLogFlops(2*a->nz - A->cmap->n);
3256: return(0);
3257: }
3261: /*
3262: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3263: */
3264: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3265: {
3266: Mat B = *fact;
3267: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3268: IS isicol;
3270: const PetscInt *r,*ic;
3271: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3272: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3273: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3274: PetscInt nlnk,*lnk;
3275: PetscBT lnkbt;
3276: PetscBool row_identity,icol_identity;
3277: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3278: const PetscInt *ics;
3279: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3280: PetscReal dt =info->dt,shift=info->shiftamount;
3281: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3282: PetscBool missing;
3285: if (dt == PETSC_DEFAULT) dt = 0.005;
3286: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3288: /* ------- symbolic factorization, can be reused ---------*/
3289: MatMissingDiagonal(A,&missing,&i);
3290: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3291: adiag=a->diag;
3293: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3295: /* bdiag is location of diagonal in factor */
3296: PetscMalloc1(n+1,&bdiag); /* becomes b->diag */
3297: PetscMalloc1(n+1,&bdiag_rev); /* temporary */
3299: /* allocate row pointers bi */
3300: PetscMalloc1(2*n+2,&bi);
3302: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3303: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3304: nnz_max = ai[n]+2*n*dtcount+2;
3306: PetscMalloc1(nnz_max+1,&bj);
3307: PetscMalloc1(nnz_max+1,&ba);
3309: /* put together the new matrix */
3310: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3311: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3312: b = (Mat_SeqAIJ*)B->data;
3314: b->free_a = PETSC_TRUE;
3315: b->free_ij = PETSC_TRUE;
3316: b->singlemalloc = PETSC_FALSE;
3318: b->a = ba;
3319: b->j = bj;
3320: b->i = bi;
3321: b->diag = bdiag;
3322: b->ilen = 0;
3323: b->imax = 0;
3324: b->row = isrow;
3325: b->col = iscol;
3326: PetscObjectReference((PetscObject)isrow);
3327: PetscObjectReference((PetscObject)iscol);
3328: b->icol = isicol;
3330: PetscMalloc1(n+1,&b->solve_work);
3331: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3332: b->maxnz = nnz_max;
3334: B->factortype = MAT_FACTOR_ILUDT;
3335: B->info.factor_mallocs = 0;
3336: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3337: /* ------- end of symbolic factorization ---------*/
3339: ISGetIndices(isrow,&r);
3340: ISGetIndices(isicol,&ic);
3341: ics = ic;
3343: /* linked list for storing column indices of the active row */
3344: nlnk = n + 1;
3345: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3347: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3348: PetscMalloc2(n,&im,n,&jtmp);
3349: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3350: PetscMalloc2(n,&rtmp,n,&vtmp);
3351: PetscMemzero(rtmp,n*sizeof(MatScalar));
3353: bi[0] = 0;
3354: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3355: bdiag_rev[n] = bdiag[0];
3356: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3357: for (i=0; i<n; i++) {
3358: /* copy initial fill into linked list */
3359: nzi = ai[r[i]+1] - ai[r[i]];
3360: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3361: nzi_al = adiag[r[i]] - ai[r[i]];
3362: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3363: ajtmp = aj + ai[r[i]];
3364: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3366: /* load in initial (unfactored row) */
3367: aatmp = a->a + ai[r[i]];
3368: for (j=0; j<nzi; j++) {
3369: rtmp[ics[*ajtmp++]] = *aatmp++;
3370: }
3372: /* add pivot rows into linked list */
3373: row = lnk[n];
3374: while (row < i) {
3375: nzi_bl = bi[row+1] - bi[row] + 1;
3376: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3377: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3378: nzi += nlnk;
3379: row = lnk[row];
3380: }
3382: /* copy data from lnk into jtmp, then initialize lnk */
3383: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3385: /* numerical factorization */
3386: bjtmp = jtmp;
3387: row = *bjtmp++; /* 1st pivot row */
3388: while (row < i) {
3389: pc = rtmp + row;
3390: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3391: multiplier = (*pc) * (*pv);
3392: *pc = multiplier;
3393: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3394: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3395: pv = ba + bdiag[row+1] + 1;
3396: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3397: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3398: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3399: PetscLogFlops(1+2*nz);
3400: }
3401: row = *bjtmp++;
3402: }
3404: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3405: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3406: nzi_bl = 0; j = 0;
3407: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3408: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3409: nzi_bl++; j++;
3410: }
3411: nzi_bu = nzi - nzi_bl -1;
3412: while (j < nzi) {
3413: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3414: j++;
3415: }
3417: bjtmp = bj + bi[i];
3418: batmp = ba + bi[i];
3419: /* apply level dropping rule to L part */
3420: ncut = nzi_al + dtcount;
3421: if (ncut < nzi_bl) {
3422: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3423: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3424: } else {
3425: ncut = nzi_bl;
3426: }
3427: for (j=0; j<ncut; j++) {
3428: bjtmp[j] = jtmp[j];
3429: batmp[j] = vtmp[j];
3430: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3431: }
3432: bi[i+1] = bi[i] + ncut;
3433: nzi = ncut + 1;
3435: /* apply level dropping rule to U part */
3436: ncut = nzi_au + dtcount;
3437: if (ncut < nzi_bu) {
3438: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3439: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3440: } else {
3441: ncut = nzi_bu;
3442: }
3443: nzi += ncut;
3445: /* mark bdiagonal */
3446: bdiag[i+1] = bdiag[i] - (ncut + 1);
3447: bdiag_rev[n-i-1] = bdiag[i+1];
3448: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3449: bjtmp = bj + bdiag[i];
3450: batmp = ba + bdiag[i];
3451: *bjtmp = i;
3452: *batmp = diag_tmp; /* rtmp[i]; */
3453: if (*batmp == 0.0) {
3454: *batmp = dt+shift;
3455: /* printf(" row %d add shift %g\n",i,shift); */
3456: }
3457: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3458: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3460: bjtmp = bj + bdiag[i+1]+1;
3461: batmp = ba + bdiag[i+1]+1;
3462: for (k=0; k<ncut; k++) {
3463: bjtmp[k] = jtmp[nzi_bl+1+k];
3464: batmp[k] = vtmp[nzi_bl+1+k];
3465: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3466: }
3467: /* printf("\n"); */
3469: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3470: /*
3471: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3472: printf(" ----------------------------\n");
3473: */
3474: } /* for (i=0; i<n; i++) */
3475: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3476: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3478: ISRestoreIndices(isrow,&r);
3479: ISRestoreIndices(isicol,&ic);
3481: PetscLLDestroy(lnk,lnkbt);
3482: PetscFree2(im,jtmp);
3483: PetscFree2(rtmp,vtmp);
3484: PetscFree(bdiag_rev);
3486: PetscLogFlops(B->cmap->n);
3487: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3489: ISIdentity(isrow,&row_identity);
3490: ISIdentity(isicol,&icol_identity);
3491: if (row_identity && icol_identity) {
3492: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3493: } else {
3494: B->ops->solve = MatSolve_SeqAIJ;
3495: }
3497: B->ops->solveadd = 0;
3498: B->ops->solvetranspose = 0;
3499: B->ops->solvetransposeadd = 0;
3500: B->ops->matsolve = 0;
3501: B->assembled = PETSC_TRUE;
3502: B->preallocated = PETSC_TRUE;
3503: return(0);
3504: }
3506: /* a wraper of MatILUDTFactor_SeqAIJ() */
3509: /*
3510: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3511: */
3513: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3514: {
3518: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3519: return(0);
3520: }
3522: /*
3523: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3524: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3525: */
3528: /*
3529: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3530: */
3532: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3533: {
3534: Mat C =fact;
3535: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3536: IS isrow = b->row,isicol = b->icol;
3538: const PetscInt *r,*ic,*ics;
3539: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3540: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3541: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3542: PetscReal dt=info->dt,shift=info->shiftamount;
3543: PetscBool row_identity, col_identity;
3546: ISGetIndices(isrow,&r);
3547: ISGetIndices(isicol,&ic);
3548: PetscMalloc1(n+1,&rtmp);
3549: ics = ic;
3551: for (i=0; i<n; i++) {
3552: /* initialize rtmp array */
3553: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3554: bjtmp = bj + bi[i];
3555: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3556: rtmp[i] = 0.0;
3557: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3558: bjtmp = bj + bdiag[i+1] + 1;
3559: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3561: /* load in initial unfactored row of A */
3562: /* printf("row %d\n",i); */
3563: nz = ai[r[i]+1] - ai[r[i]];
3564: ajtmp = aj + ai[r[i]];
3565: v = aa + ai[r[i]];
3566: for (j=0; j<nz; j++) {
3567: rtmp[ics[*ajtmp++]] = v[j];
3568: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3569: }
3570: /* printf("\n"); */
3572: /* numerical factorization */
3573: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3574: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3575: k = 0;
3576: while (k < nzl) {
3577: row = *bjtmp++;
3578: /* printf(" prow %d\n",row); */
3579: pc = rtmp + row;
3580: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3581: multiplier = (*pc) * (*pv);
3582: *pc = multiplier;
3583: if (PetscAbsScalar(multiplier) > dt) {
3584: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3585: pv = b->a + bdiag[row+1] + 1;
3586: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3587: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3588: PetscLogFlops(1+2*nz);
3589: }
3590: k++;
3591: }
3593: /* finished row so stick it into b->a */
3594: /* L-part */
3595: pv = b->a + bi[i];
3596: pj = bj + bi[i];
3597: nzl = bi[i+1] - bi[i];
3598: for (j=0; j<nzl; j++) {
3599: pv[j] = rtmp[pj[j]];
3600: /* printf(" (%d,%g),",pj[j],pv[j]); */
3601: }
3603: /* diagonal: invert diagonal entries for simplier triangular solves */
3604: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3605: b->a[bdiag[i]] = 1.0/rtmp[i];
3606: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3608: /* U-part */
3609: pv = b->a + bdiag[i+1] + 1;
3610: pj = bj + bdiag[i+1] + 1;
3611: nzu = bdiag[i] - bdiag[i+1] - 1;
3612: for (j=0; j<nzu; j++) {
3613: pv[j] = rtmp[pj[j]];
3614: /* printf(" (%d,%g),",pj[j],pv[j]); */
3615: }
3616: /* printf("\n"); */
3617: }
3619: PetscFree(rtmp);
3620: ISRestoreIndices(isicol,&ic);
3621: ISRestoreIndices(isrow,&r);
3623: ISIdentity(isrow,&row_identity);
3624: ISIdentity(isicol,&col_identity);
3625: if (row_identity && col_identity) {
3626: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3627: } else {
3628: C->ops->solve = MatSolve_SeqAIJ;
3629: }
3630: C->ops->solveadd = 0;
3631: C->ops->solvetranspose = 0;
3632: C->ops->solvetransposeadd = 0;
3633: C->ops->matsolve = 0;
3634: C->assembled = PETSC_TRUE;
3635: C->preallocated = PETSC_TRUE;
3637: PetscLogFlops(C->cmap->n);
3638: return(0);
3639: }