Actual source code: aijfact.c
petsc-3.7.3 2016-08-01
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
9: /*
10: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
13: */
14: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
15: {
16: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
17: PetscErrorCode ierr;
18: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
19: const PetscInt *ai = a->i, *aj = a->j;
20: const PetscScalar *aa = a->a;
21: PetscBool *done;
22: PetscReal best,past = 0,future;
25: /* pick initial row */
26: best = -1;
27: for (i=0; i<n; i++) {
28: future = 0.0;
29: for (j=ai[i]; j<ai[i+1]; j++) {
30: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
31: else past = PetscAbsScalar(aa[j]);
32: }
33: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34: if (past/future > best) {
35: best = past/future;
36: current = i;
37: }
38: }
40: PetscMalloc1(n,&done);
41: PetscMemzero(done,n*sizeof(PetscBool));
42: PetscMalloc1(n,&order);
43: order[0] = current;
44: for (i=0; i<n-1; i++) {
45: done[current] = PETSC_TRUE;
46: best = -1;
47: /* loop over all neighbors of current pivot */
48: for (j=ai[current]; j<ai[current+1]; j++) {
49: jj = aj[j];
50: if (done[jj]) continue;
51: /* loop over columns of potential next row computing weights for below and above diagonal */
52: past = future = 0.0;
53: for (k=ai[jj]; k<ai[jj+1]; k++) {
54: kk = aj[k];
55: if (done[kk]) past += PetscAbsScalar(aa[k]);
56: else if (kk != jj) future += PetscAbsScalar(aa[k]);
57: }
58: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59: if (past/future > best) {
60: best = past/future;
61: newcurrent = jj;
62: }
63: }
64: if (best == -1) { /* no neighbors to select from so select best of all that remain */
65: best = -1;
66: for (k=0; k<n; k++) {
67: if (done[k]) continue;
68: future = 0.0;
69: past = 0.0;
70: for (j=ai[k]; j<ai[k+1]; j++) {
71: kk = aj[j];
72: if (done[kk]) past += PetscAbsScalar(aa[j]);
73: else if (kk != k) future += PetscAbsScalar(aa[j]);
74: }
75: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76: if (past/future > best) {
77: best = past/future;
78: newcurrent = k;
79: }
80: }
81: }
82: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83: current = newcurrent;
84: order[i+1] = current;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
87: *icol = *irow;
88: PetscObjectReference((PetscObject)*irow);
89: PetscFree(done);
90: PetscFree(order);
91: return(0);
92: }
96: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
97: {
98: PetscInt n = A->rmap->n;
102: #if defined(PETSC_USE_COMPLEX)
103: if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
104: #endif
105: MatCreate(PetscObjectComm((PetscObject)A),B);
106: MatSetSizes(*B,n,n,n,n);
107: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
108: MatSetType(*B,MATSEQAIJ);
110: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
111: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
113: MatSetBlockSizesFromMats(*B,A,A);
114: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
115: MatSetType(*B,MATSEQSBAIJ);
116: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
118: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
119: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
120: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
121: (*B)->factortype = ftype;
122:
123: PetscFree((*B)->solvertype);
124: PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);
125: return(0);
126: }
130: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
131: {
132: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
133: IS isicol;
134: PetscErrorCode ierr;
135: const PetscInt *r,*ic;
136: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
137: PetscInt *bi,*bj,*ajtmp;
138: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
139: PetscReal f;
140: PetscInt nlnk,*lnk,k,**bi_ptr;
141: PetscFreeSpaceList free_space=NULL,current_space=NULL;
142: PetscBT lnkbt;
143: PetscBool missing;
146: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
147: MatMissingDiagonal(A,&missing,&i);
148: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
150: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
151: ISGetIndices(isrow,&r);
152: ISGetIndices(isicol,&ic);
154: /* get new row pointers */
155: PetscMalloc1(n+1,&bi);
156: bi[0] = 0;
158: /* bdiag is location of diagonal in factor */
159: PetscMalloc1(n+1,&bdiag);
160: bdiag[0] = 0;
162: /* linked list for storing column indices of the active row */
163: nlnk = n + 1;
164: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
166: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
168: /* initial FreeSpace size is f*(ai[n]+1) */
169: f = info->fill;
170: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
171: current_space = free_space;
173: for (i=0; i<n; i++) {
174: /* copy previous fill into linked list */
175: nzi = 0;
176: nnz = ai[r[i]+1] - ai[r[i]];
177: ajtmp = aj + ai[r[i]];
178: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
179: nzi += nlnk;
181: /* add pivot rows into linked list */
182: row = lnk[n];
183: while (row < i) {
184: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
185: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
186: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
187: nzi += nlnk;
188: row = lnk[row];
189: }
190: bi[i+1] = bi[i] + nzi;
191: im[i] = nzi;
193: /* mark bdiag */
194: nzbd = 0;
195: nnz = nzi;
196: k = lnk[n];
197: while (nnz-- && k < i) {
198: nzbd++;
199: k = lnk[k];
200: }
201: bdiag[i] = bi[i] + nzbd;
203: /* if free space is not available, make more free space */
204: if (current_space->local_remaining<nzi) {
205: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
206: PetscFreeSpaceGet(nnz,¤t_space);
207: reallocs++;
208: }
210: /* copy data into free space, then initialize lnk */
211: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
213: bi_ptr[i] = current_space->array;
214: current_space->array += nzi;
215: current_space->local_used += nzi;
216: current_space->local_remaining -= nzi;
217: }
218: #if defined(PETSC_USE_INFO)
219: if (ai[n] != 0) {
220: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
221: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
222: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
223: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
224: PetscInfo(A,"for best performance.\n");
225: } else {
226: PetscInfo(A,"Empty matrix\n");
227: }
228: #endif
230: ISRestoreIndices(isrow,&r);
231: ISRestoreIndices(isicol,&ic);
233: /* destroy list of free space and other temporary array(s) */
234: PetscMalloc1(bi[n]+1,&bj);
235: PetscFreeSpaceContiguous(&free_space,bj);
236: PetscLLDestroy(lnk,lnkbt);
237: PetscFree2(bi_ptr,im);
239: /* put together the new matrix */
240: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
241: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
242: b = (Mat_SeqAIJ*)(B)->data;
244: b->free_a = PETSC_TRUE;
245: b->free_ij = PETSC_TRUE;
246: b->singlemalloc = PETSC_FALSE;
248: PetscMalloc1(bi[n]+1,&b->a);
249: b->j = bj;
250: b->i = bi;
251: b->diag = bdiag;
252: b->ilen = 0;
253: b->imax = 0;
254: b->row = isrow;
255: b->col = iscol;
256: PetscObjectReference((PetscObject)isrow);
257: PetscObjectReference((PetscObject)iscol);
258: b->icol = isicol;
259: PetscMalloc1(n+1,&b->solve_work);
261: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
262: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
263: b->maxnz = b->nz = bi[n];
265: (B)->factortype = MAT_FACTOR_LU;
266: (B)->info.factor_mallocs = reallocs;
267: (B)->info.fill_ratio_given = f;
269: if (ai[n]) {
270: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
271: } else {
272: (B)->info.fill_ratio_needed = 0.0;
273: }
274: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
275: if (a->inode.size) {
276: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
277: }
278: return(0);
279: }
283: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
284: {
285: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
286: IS isicol;
287: PetscErrorCode ierr;
288: const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
289: PetscInt i,n=A->rmap->n;
290: PetscInt *bi,*bj;
291: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
292: PetscReal f;
293: PetscInt nlnk,*lnk,k,**bi_ptr;
294: PetscFreeSpaceList free_space=NULL,current_space=NULL;
295: PetscBT lnkbt;
296: PetscBool missing;
299: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
300: MatMissingDiagonal(A,&missing,&i);
301: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
302:
303: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
304: ISGetIndices(isrow,&r);
305: ISGetIndices(isicol,&ic);
307: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
308: PetscMalloc1(n+1,&bi);
309: PetscMalloc1(n+1,&bdiag);
310: bi[0] = bdiag[0] = 0;
312: /* linked list for storing column indices of the active row */
313: nlnk = n + 1;
314: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
316: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
318: /* initial FreeSpace size is f*(ai[n]+1) */
319: f = info->fill;
320: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
321: current_space = free_space;
323: for (i=0; i<n; i++) {
324: /* copy previous fill into linked list */
325: nzi = 0;
326: nnz = ai[r[i]+1] - ai[r[i]];
327: ajtmp = aj + ai[r[i]];
328: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
329: nzi += nlnk;
331: /* add pivot rows into linked list */
332: row = lnk[n];
333: while (row < i) {
334: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
335: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
336: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
337: nzi += nlnk;
338: row = lnk[row];
339: }
340: bi[i+1] = bi[i] + nzi;
341: im[i] = nzi;
343: /* mark bdiag */
344: nzbd = 0;
345: nnz = nzi;
346: k = lnk[n];
347: while (nnz-- && k < i) {
348: nzbd++;
349: k = lnk[k];
350: }
351: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
353: /* if free space is not available, make more free space */
354: if (current_space->local_remaining<nzi) {
355: /* estimated additional space needed */
356: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
357: PetscFreeSpaceGet(nnz,¤t_space);
358: reallocs++;
359: }
361: /* copy data into free space, then initialize lnk */
362: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
364: bi_ptr[i] = current_space->array;
365: current_space->array += nzi;
366: current_space->local_used += nzi;
367: current_space->local_remaining -= nzi;
368: }
370: ISRestoreIndices(isrow,&r);
371: ISRestoreIndices(isicol,&ic);
373: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
374: PetscMalloc1(bi[n]+1,&bj);
375: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
376: PetscLLDestroy(lnk,lnkbt);
377: PetscFree2(bi_ptr,im);
379: /* put together the new matrix */
380: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
381: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
382: b = (Mat_SeqAIJ*)(B)->data;
384: b->free_a = PETSC_TRUE;
385: b->free_ij = PETSC_TRUE;
386: b->singlemalloc = PETSC_FALSE;
388: PetscMalloc1(bdiag[0]+1,&b->a);
390: b->j = bj;
391: b->i = bi;
392: b->diag = bdiag;
393: b->ilen = 0;
394: b->imax = 0;
395: b->row = isrow;
396: b->col = iscol;
397: PetscObjectReference((PetscObject)isrow);
398: PetscObjectReference((PetscObject)iscol);
399: b->icol = isicol;
400: PetscMalloc1(n+1,&b->solve_work);
402: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
403: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
404: b->maxnz = b->nz = bdiag[0]+1;
406: B->factortype = MAT_FACTOR_LU;
407: B->info.factor_mallocs = reallocs;
408: B->info.fill_ratio_given = f;
410: if (ai[n]) {
411: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
412: } else {
413: B->info.fill_ratio_needed = 0.0;
414: }
415: #if defined(PETSC_USE_INFO)
416: if (ai[n] != 0) {
417: PetscReal af = B->info.fill_ratio_needed;
418: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
419: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
420: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
421: PetscInfo(A,"for best performance.\n");
422: } else {
423: PetscInfo(A,"Empty matrix\n");
424: }
425: #endif
426: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
427: if (a->inode.size) {
428: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
429: }
430: MatSeqAIJCheckInode_FactorLU(B);
431: return(0);
432: }
434: /*
435: Trouble in factorization, should we dump the original matrix?
436: */
439: PetscErrorCode MatFactorDumpMatrix(Mat A)
440: {
442: PetscBool flg = PETSC_FALSE;
445: PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);
446: if (flg) {
447: PetscViewer viewer;
448: char filename[PETSC_MAX_PATH_LEN];
450: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
451: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
452: MatView(A,viewer);
453: PetscViewerDestroy(&viewer);
454: }
455: return(0);
456: }
460: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
461: {
462: Mat C =B;
463: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
464: IS isrow = b->row,isicol = b->icol;
465: PetscErrorCode ierr;
466: const PetscInt *r,*ic,*ics;
467: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
468: PetscInt i,j,k,nz,nzL,row,*pj;
469: const PetscInt *ajtmp,*bjtmp;
470: MatScalar *rtmp,*pc,multiplier,*pv;
471: const MatScalar *aa=a->a,*v;
472: PetscBool row_identity,col_identity;
473: FactorShiftCtx sctx;
474: const PetscInt *ddiag;
475: PetscReal rs;
476: MatScalar d;
479: /* MatPivotSetUp(): initialize shift context sctx */
480: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
482: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
483: ddiag = a->diag;
484: sctx.shift_top = info->zeropivot;
485: for (i=0; i<n; i++) {
486: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
487: d = (aa)[ddiag[i]];
488: rs = -PetscAbsScalar(d) - PetscRealPart(d);
489: v = aa+ai[i];
490: nz = ai[i+1] - ai[i];
491: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
492: if (rs>sctx.shift_top) sctx.shift_top = rs;
493: }
494: sctx.shift_top *= 1.1;
495: sctx.nshift_max = 5;
496: sctx.shift_lo = 0.;
497: sctx.shift_hi = 1.;
498: }
500: ISGetIndices(isrow,&r);
501: ISGetIndices(isicol,&ic);
502: PetscMalloc1(n+1,&rtmp);
503: ics = ic;
505: do {
506: sctx.newshift = PETSC_FALSE;
507: for (i=0; i<n; i++) {
508: /* zero rtmp */
509: /* L part */
510: nz = bi[i+1] - bi[i];
511: bjtmp = bj + bi[i];
512: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
514: /* U part */
515: nz = bdiag[i]-bdiag[i+1];
516: bjtmp = bj + bdiag[i+1]+1;
517: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
519: /* load in initial (unfactored row) */
520: nz = ai[r[i]+1] - ai[r[i]];
521: ajtmp = aj + ai[r[i]];
522: v = aa + ai[r[i]];
523: for (j=0; j<nz; j++) {
524: rtmp[ics[ajtmp[j]]] = v[j];
525: }
526: /* ZeropivotApply() */
527: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
529: /* elimination */
530: bjtmp = bj + bi[i];
531: row = *bjtmp++;
532: nzL = bi[i+1] - bi[i];
533: for (k=0; k < nzL; k++) {
534: pc = rtmp + row;
535: if (*pc != 0.0) {
536: pv = b->a + bdiag[row];
537: multiplier = *pc * (*pv);
538: *pc = multiplier;
540: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
541: pv = b->a + bdiag[row+1]+1;
542: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
544: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
545: PetscLogFlops(1+2*nz);
546: }
547: row = *bjtmp++;
548: }
550: /* finished row so stick it into b->a */
551: rs = 0.0;
552: /* L part */
553: pv = b->a + bi[i];
554: pj = b->j + bi[i];
555: nz = bi[i+1] - bi[i];
556: for (j=0; j<nz; j++) {
557: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
558: }
560: /* U part */
561: pv = b->a + bdiag[i+1]+1;
562: pj = b->j + bdiag[i+1]+1;
563: nz = bdiag[i] - bdiag[i+1]-1;
564: for (j=0; j<nz; j++) {
565: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
566: }
568: sctx.rs = rs;
569: sctx.pv = rtmp[i];
570: MatPivotCheck(B,A,info,&sctx,i);
571: if (sctx.newshift) break; /* break for-loop */
572: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
574: /* Mark diagonal and invert diagonal for simplier triangular solves */
575: pv = b->a + bdiag[i];
576: *pv = 1.0/rtmp[i];
578: } /* endof for (i=0; i<n; i++) { */
580: /* MatPivotRefine() */
581: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
582: /*
583: * if no shift in this attempt & shifting & started shifting & can refine,
584: * then try lower shift
585: */
586: sctx.shift_hi = sctx.shift_fraction;
587: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
588: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
589: sctx.newshift = PETSC_TRUE;
590: sctx.nshift++;
591: }
592: } while (sctx.newshift);
594: PetscFree(rtmp);
595: ISRestoreIndices(isicol,&ic);
596: ISRestoreIndices(isrow,&r);
598: ISIdentity(isrow,&row_identity);
599: ISIdentity(isicol,&col_identity);
600: if (b->inode.size) {
601: C->ops->solve = MatSolve_SeqAIJ_Inode;
602: } else if (row_identity && col_identity) {
603: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
604: } else {
605: C->ops->solve = MatSolve_SeqAIJ;
606: }
607: C->ops->solveadd = MatSolveAdd_SeqAIJ;
608: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
609: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
610: C->ops->matsolve = MatMatSolve_SeqAIJ;
611: C->assembled = PETSC_TRUE;
612: C->preallocated = PETSC_TRUE;
614: PetscLogFlops(C->cmap->n);
616: /* MatShiftView(A,info,&sctx) */
617: if (sctx.nshift) {
618: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
619: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
620: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
621: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
622: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
623: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
624: }
625: }
626: return(0);
627: }
631: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
632: {
633: Mat C =B;
634: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
635: IS isrow = b->row,isicol = b->icol;
636: PetscErrorCode ierr;
637: const PetscInt *r,*ic,*ics;
638: PetscInt nz,row,i,j,n=A->rmap->n,diag;
639: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
640: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
641: MatScalar *pv,*rtmp,*pc,multiplier,d;
642: const MatScalar *v,*aa=a->a;
643: PetscReal rs=0.0;
644: FactorShiftCtx sctx;
645: const PetscInt *ddiag;
646: PetscBool row_identity, col_identity;
649: /* MatPivotSetUp(): initialize shift context sctx */
650: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
652: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
653: ddiag = a->diag;
654: sctx.shift_top = info->zeropivot;
655: for (i=0; i<n; i++) {
656: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
657: d = (aa)[ddiag[i]];
658: rs = -PetscAbsScalar(d) - PetscRealPart(d);
659: v = aa+ai[i];
660: nz = ai[i+1] - ai[i];
661: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
662: if (rs>sctx.shift_top) sctx.shift_top = rs;
663: }
664: sctx.shift_top *= 1.1;
665: sctx.nshift_max = 5;
666: sctx.shift_lo = 0.;
667: sctx.shift_hi = 1.;
668: }
670: ISGetIndices(isrow,&r);
671: ISGetIndices(isicol,&ic);
672: PetscMalloc1(n+1,&rtmp);
673: ics = ic;
675: do {
676: sctx.newshift = PETSC_FALSE;
677: for (i=0; i<n; i++) {
678: nz = bi[i+1] - bi[i];
679: bjtmp = bj + bi[i];
680: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
682: /* load in initial (unfactored row) */
683: nz = ai[r[i]+1] - ai[r[i]];
684: ajtmp = aj + ai[r[i]];
685: v = aa + ai[r[i]];
686: for (j=0; j<nz; j++) {
687: rtmp[ics[ajtmp[j]]] = v[j];
688: }
689: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
691: row = *bjtmp++;
692: while (row < i) {
693: pc = rtmp + row;
694: if (*pc != 0.0) {
695: pv = b->a + diag_offset[row];
696: pj = b->j + diag_offset[row] + 1;
697: multiplier = *pc / *pv++;
698: *pc = multiplier;
699: nz = bi[row+1] - diag_offset[row] - 1;
700: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
701: PetscLogFlops(1+2*nz);
702: }
703: row = *bjtmp++;
704: }
705: /* finished row so stick it into b->a */
706: pv = b->a + bi[i];
707: pj = b->j + bi[i];
708: nz = bi[i+1] - bi[i];
709: diag = diag_offset[i] - bi[i];
710: rs = 0.0;
711: for (j=0; j<nz; j++) {
712: pv[j] = rtmp[pj[j]];
713: rs += PetscAbsScalar(pv[j]);
714: }
715: rs -= PetscAbsScalar(pv[diag]);
717: sctx.rs = rs;
718: sctx.pv = pv[diag];
719: MatPivotCheck(B,A,info,&sctx,i);
720: if (sctx.newshift) break;
721: pv[diag] = sctx.pv;
722: }
724: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
725: /*
726: * if no shift in this attempt & shifting & started shifting & can refine,
727: * then try lower shift
728: */
729: sctx.shift_hi = sctx.shift_fraction;
730: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
731: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
732: sctx.newshift = PETSC_TRUE;
733: sctx.nshift++;
734: }
735: } while (sctx.newshift);
737: /* invert diagonal entries for simplier triangular solves */
738: for (i=0; i<n; i++) {
739: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
740: }
741: PetscFree(rtmp);
742: ISRestoreIndices(isicol,&ic);
743: ISRestoreIndices(isrow,&r);
745: ISIdentity(isrow,&row_identity);
746: ISIdentity(isicol,&col_identity);
747: if (row_identity && col_identity) {
748: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
749: } else {
750: C->ops->solve = MatSolve_SeqAIJ_inplace;
751: }
752: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
753: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
754: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
755: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
757: C->assembled = PETSC_TRUE;
758: C->preallocated = PETSC_TRUE;
760: PetscLogFlops(C->cmap->n);
761: if (sctx.nshift) {
762: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
763: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
764: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
765: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
766: }
767: }
768: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
769: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
771: MatSeqAIJCheckInode(C);
772: return(0);
773: }
775: /*
776: This routine implements inplace ILU(0) with row or/and column permutations.
777: Input:
778: A - original matrix
779: Output;
780: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
781: a->j (col index) is permuted by the inverse of colperm, then sorted
782: a->a reordered accordingly with a->j
783: a->diag (ptr to diagonal elements) is updated.
784: */
787: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
788: {
789: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
790: IS isrow = a->row,isicol = a->icol;
791: PetscErrorCode ierr;
792: const PetscInt *r,*ic,*ics;
793: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
794: PetscInt *ajtmp,nz,row;
795: PetscInt *diag = a->diag,nbdiag,*pj;
796: PetscScalar *rtmp,*pc,multiplier,d;
797: MatScalar *pv,*v;
798: PetscReal rs;
799: FactorShiftCtx sctx;
800: const MatScalar *aa=a->a,*vtmp;
803: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
805: /* MatPivotSetUp(): initialize shift context sctx */
806: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
808: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
809: const PetscInt *ddiag = a->diag;
810: sctx.shift_top = info->zeropivot;
811: for (i=0; i<n; i++) {
812: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
813: d = (aa)[ddiag[i]];
814: rs = -PetscAbsScalar(d) - PetscRealPart(d);
815: vtmp = aa+ai[i];
816: nz = ai[i+1] - ai[i];
817: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
818: if (rs>sctx.shift_top) sctx.shift_top = rs;
819: }
820: sctx.shift_top *= 1.1;
821: sctx.nshift_max = 5;
822: sctx.shift_lo = 0.;
823: sctx.shift_hi = 1.;
824: }
826: ISGetIndices(isrow,&r);
827: ISGetIndices(isicol,&ic);
828: PetscMalloc1(n+1,&rtmp);
829: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
830: ics = ic;
832: #if defined(MV)
833: sctx.shift_top = 0.;
834: sctx.nshift_max = 0;
835: sctx.shift_lo = 0.;
836: sctx.shift_hi = 0.;
837: sctx.shift_fraction = 0.;
839: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
840: sctx.shift_top = 0.;
841: for (i=0; i<n; i++) {
842: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
843: d = (a->a)[diag[i]];
844: rs = -PetscAbsScalar(d) - PetscRealPart(d);
845: v = a->a+ai[i];
846: nz = ai[i+1] - ai[i];
847: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
848: if (rs>sctx.shift_top) sctx.shift_top = rs;
849: }
850: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
851: sctx.shift_top *= 1.1;
852: sctx.nshift_max = 5;
853: sctx.shift_lo = 0.;
854: sctx.shift_hi = 1.;
855: }
857: sctx.shift_amount = 0.;
858: sctx.nshift = 0;
859: #endif
861: do {
862: sctx.newshift = PETSC_FALSE;
863: for (i=0; i<n; i++) {
864: /* load in initial unfactored row */
865: nz = ai[r[i]+1] - ai[r[i]];
866: ajtmp = aj + ai[r[i]];
867: v = a->a + ai[r[i]];
868: /* sort permuted ajtmp and values v accordingly */
869: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
870: PetscSortIntWithScalarArray(nz,ajtmp,v);
872: diag[r[i]] = ai[r[i]];
873: for (j=0; j<nz; j++) {
874: rtmp[ajtmp[j]] = v[j];
875: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
876: }
877: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
879: row = *ajtmp++;
880: while (row < i) {
881: pc = rtmp + row;
882: if (*pc != 0.0) {
883: pv = a->a + diag[r[row]];
884: pj = aj + diag[r[row]] + 1;
886: multiplier = *pc / *pv++;
887: *pc = multiplier;
888: nz = ai[r[row]+1] - diag[r[row]] - 1;
889: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
890: PetscLogFlops(1+2*nz);
891: }
892: row = *ajtmp++;
893: }
894: /* finished row so overwrite it onto a->a */
895: pv = a->a + ai[r[i]];
896: pj = aj + ai[r[i]];
897: nz = ai[r[i]+1] - ai[r[i]];
898: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
900: rs = 0.0;
901: for (j=0; j<nz; j++) {
902: pv[j] = rtmp[pj[j]];
903: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
904: }
906: sctx.rs = rs;
907: sctx.pv = pv[nbdiag];
908: MatPivotCheck(B,A,info,&sctx,i);
909: if (sctx.newshift) break;
910: pv[nbdiag] = sctx.pv;
911: }
913: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
914: /*
915: * if no shift in this attempt & shifting & started shifting & can refine,
916: * then try lower shift
917: */
918: sctx.shift_hi = sctx.shift_fraction;
919: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
920: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
921: sctx.newshift = PETSC_TRUE;
922: sctx.nshift++;
923: }
924: } while (sctx.newshift);
926: /* invert diagonal entries for simplier triangular solves */
927: for (i=0; i<n; i++) {
928: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
929: }
931: PetscFree(rtmp);
932: ISRestoreIndices(isicol,&ic);
933: ISRestoreIndices(isrow,&r);
935: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
936: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
937: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
938: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
940: A->assembled = PETSC_TRUE;
941: A->preallocated = PETSC_TRUE;
943: PetscLogFlops(A->cmap->n);
944: if (sctx.nshift) {
945: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
946: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
947: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
948: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
949: }
950: }
951: return(0);
952: }
954: /* ----------------------------------------------------------- */
957: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
958: {
960: Mat C;
963: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
964: MatLUFactorSymbolic(C,A,row,col,info);
965: MatLUFactorNumeric(C,A,info);
967: A->ops->solve = C->ops->solve;
968: A->ops->solvetranspose = C->ops->solvetranspose;
970: MatHeaderMerge(A,&C);
971: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
972: return(0);
973: }
974: /* ----------------------------------------------------------- */
979: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
980: {
981: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
982: IS iscol = a->col,isrow = a->row;
983: PetscErrorCode ierr;
984: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
985: PetscInt nz;
986: const PetscInt *rout,*cout,*r,*c;
987: PetscScalar *x,*tmp,*tmps,sum;
988: const PetscScalar *b;
989: const MatScalar *aa = a->a,*v;
992: if (!n) return(0);
994: VecGetArrayRead(bb,&b);
995: VecGetArray(xx,&x);
996: tmp = a->solve_work;
998: ISGetIndices(isrow,&rout); r = rout;
999: ISGetIndices(iscol,&cout); c = cout + (n-1);
1001: /* forward solve the lower triangular */
1002: tmp[0] = b[*r++];
1003: tmps = tmp;
1004: for (i=1; i<n; i++) {
1005: v = aa + ai[i];
1006: vi = aj + ai[i];
1007: nz = a->diag[i] - ai[i];
1008: sum = b[*r++];
1009: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1010: tmp[i] = sum;
1011: }
1013: /* backward solve the upper triangular */
1014: for (i=n-1; i>=0; i--) {
1015: v = aa + a->diag[i] + 1;
1016: vi = aj + a->diag[i] + 1;
1017: nz = ai[i+1] - a->diag[i] - 1;
1018: sum = tmp[i];
1019: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1020: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1021: }
1023: ISRestoreIndices(isrow,&rout);
1024: ISRestoreIndices(iscol,&cout);
1025: VecRestoreArrayRead(bb,&b);
1026: VecRestoreArray(xx,&x);
1027: PetscLogFlops(2.0*a->nz - A->cmap->n);
1028: return(0);
1029: }
1033: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1034: {
1035: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1036: IS iscol = a->col,isrow = a->row;
1037: PetscErrorCode ierr;
1038: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1039: PetscInt nz,neq;
1040: const PetscInt *rout,*cout,*r,*c;
1041: PetscScalar *x,*b,*tmp,*tmps,sum;
1042: const MatScalar *aa = a->a,*v;
1043: PetscBool bisdense,xisdense;
1046: if (!n) return(0);
1048: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1049: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1050: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1051: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1053: MatDenseGetArray(B,&b);
1054: MatDenseGetArray(X,&x);
1056: tmp = a->solve_work;
1057: ISGetIndices(isrow,&rout); r = rout;
1058: ISGetIndices(iscol,&cout); c = cout;
1060: for (neq=0; neq<B->cmap->n; neq++) {
1061: /* forward solve the lower triangular */
1062: tmp[0] = b[r[0]];
1063: tmps = tmp;
1064: for (i=1; i<n; i++) {
1065: v = aa + ai[i];
1066: vi = aj + ai[i];
1067: nz = a->diag[i] - ai[i];
1068: sum = b[r[i]];
1069: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1070: tmp[i] = sum;
1071: }
1072: /* backward solve the upper triangular */
1073: for (i=n-1; i>=0; i--) {
1074: v = aa + a->diag[i] + 1;
1075: vi = aj + a->diag[i] + 1;
1076: nz = ai[i+1] - a->diag[i] - 1;
1077: sum = tmp[i];
1078: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1079: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1080: }
1082: b += n;
1083: x += n;
1084: }
1085: ISRestoreIndices(isrow,&rout);
1086: ISRestoreIndices(iscol,&cout);
1087: MatDenseRestoreArray(B,&b);
1088: MatDenseRestoreArray(X,&x);
1089: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1090: return(0);
1091: }
1095: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1096: {
1097: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1098: IS iscol = a->col,isrow = a->row;
1099: PetscErrorCode ierr;
1100: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1101: PetscInt nz,neq;
1102: const PetscInt *rout,*cout,*r,*c;
1103: PetscScalar *x,*b,*tmp,sum;
1104: const MatScalar *aa = a->a,*v;
1105: PetscBool bisdense,xisdense;
1108: if (!n) return(0);
1110: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1111: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1112: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1113: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1115: MatDenseGetArray(B,&b);
1116: MatDenseGetArray(X,&x);
1118: tmp = a->solve_work;
1119: ISGetIndices(isrow,&rout); r = rout;
1120: ISGetIndices(iscol,&cout); c = cout;
1122: for (neq=0; neq<B->cmap->n; neq++) {
1123: /* forward solve the lower triangular */
1124: tmp[0] = b[r[0]];
1125: v = aa;
1126: vi = aj;
1127: for (i=1; i<n; i++) {
1128: nz = ai[i+1] - ai[i];
1129: sum = b[r[i]];
1130: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1131: tmp[i] = sum;
1132: v += nz; vi += nz;
1133: }
1135: /* backward solve the upper triangular */
1136: for (i=n-1; i>=0; i--) {
1137: v = aa + adiag[i+1]+1;
1138: vi = aj + adiag[i+1]+1;
1139: nz = adiag[i]-adiag[i+1]-1;
1140: sum = tmp[i];
1141: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1142: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1143: }
1145: b += n;
1146: x += n;
1147: }
1148: ISRestoreIndices(isrow,&rout);
1149: ISRestoreIndices(iscol,&cout);
1150: MatDenseRestoreArray(B,&b);
1151: MatDenseRestoreArray(X,&x);
1152: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1153: return(0);
1154: }
1158: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1159: {
1160: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1161: IS iscol = a->col,isrow = a->row;
1162: PetscErrorCode ierr;
1163: const PetscInt *r,*c,*rout,*cout;
1164: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1165: PetscInt nz,row;
1166: PetscScalar *x,*b,*tmp,*tmps,sum;
1167: const MatScalar *aa = a->a,*v;
1170: if (!n) return(0);
1172: VecGetArray(bb,&b);
1173: VecGetArray(xx,&x);
1174: tmp = a->solve_work;
1176: ISGetIndices(isrow,&rout); r = rout;
1177: ISGetIndices(iscol,&cout); c = cout + (n-1);
1179: /* forward solve the lower triangular */
1180: tmp[0] = b[*r++];
1181: tmps = tmp;
1182: for (row=1; row<n; row++) {
1183: i = rout[row]; /* permuted row */
1184: v = aa + ai[i];
1185: vi = aj + ai[i];
1186: nz = a->diag[i] - ai[i];
1187: sum = b[*r++];
1188: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1189: tmp[row] = sum;
1190: }
1192: /* backward solve the upper triangular */
1193: for (row=n-1; row>=0; row--) {
1194: i = rout[row]; /* permuted row */
1195: v = aa + a->diag[i] + 1;
1196: vi = aj + a->diag[i] + 1;
1197: nz = ai[i+1] - a->diag[i] - 1;
1198: sum = tmp[row];
1199: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1200: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1201: }
1203: ISRestoreIndices(isrow,&rout);
1204: ISRestoreIndices(iscol,&cout);
1205: VecRestoreArray(bb,&b);
1206: VecRestoreArray(xx,&x);
1207: PetscLogFlops(2.0*a->nz - A->cmap->n);
1208: return(0);
1209: }
1211: /* ----------------------------------------------------------- */
1212: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1215: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1216: {
1217: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1218: PetscErrorCode ierr;
1219: PetscInt n = A->rmap->n;
1220: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1221: PetscScalar *x;
1222: const PetscScalar *b;
1223: const MatScalar *aa = a->a;
1224: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1225: PetscInt adiag_i,i,nz,ai_i;
1226: const PetscInt *vi;
1227: const MatScalar *v;
1228: PetscScalar sum;
1229: #endif
1232: if (!n) return(0);
1234: VecGetArrayRead(bb,&b);
1235: VecGetArray(xx,&x);
1237: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1238: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1239: #else
1240: /* forward solve the lower triangular */
1241: x[0] = b[0];
1242: for (i=1; i<n; i++) {
1243: ai_i = ai[i];
1244: v = aa + ai_i;
1245: vi = aj + ai_i;
1246: nz = adiag[i] - ai_i;
1247: sum = b[i];
1248: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1249: x[i] = sum;
1250: }
1252: /* backward solve the upper triangular */
1253: for (i=n-1; i>=0; i--) {
1254: adiag_i = adiag[i];
1255: v = aa + adiag_i + 1;
1256: vi = aj + adiag_i + 1;
1257: nz = ai[i+1] - adiag_i - 1;
1258: sum = x[i];
1259: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1260: x[i] = sum*aa[adiag_i];
1261: }
1262: #endif
1263: PetscLogFlops(2.0*a->nz - A->cmap->n);
1264: VecRestoreArrayRead(bb,&b);
1265: VecRestoreArray(xx,&x);
1266: return(0);
1267: }
1271: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1272: {
1273: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1274: IS iscol = a->col,isrow = a->row;
1275: PetscErrorCode ierr;
1276: PetscInt i, n = A->rmap->n,j;
1277: PetscInt nz;
1278: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1279: PetscScalar *x,*tmp,sum;
1280: const PetscScalar *b;
1281: const MatScalar *aa = a->a,*v;
1284: if (yy != xx) {VecCopy(yy,xx);}
1286: VecGetArrayRead(bb,&b);
1287: VecGetArray(xx,&x);
1288: tmp = a->solve_work;
1290: ISGetIndices(isrow,&rout); r = rout;
1291: ISGetIndices(iscol,&cout); c = cout + (n-1);
1293: /* forward solve the lower triangular */
1294: tmp[0] = b[*r++];
1295: for (i=1; i<n; i++) {
1296: v = aa + ai[i];
1297: vi = aj + ai[i];
1298: nz = a->diag[i] - ai[i];
1299: sum = b[*r++];
1300: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1301: tmp[i] = sum;
1302: }
1304: /* backward solve the upper triangular */
1305: for (i=n-1; i>=0; i--) {
1306: v = aa + a->diag[i] + 1;
1307: vi = aj + a->diag[i] + 1;
1308: nz = ai[i+1] - a->diag[i] - 1;
1309: sum = tmp[i];
1310: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1311: tmp[i] = sum*aa[a->diag[i]];
1312: x[*c--] += tmp[i];
1313: }
1315: ISRestoreIndices(isrow,&rout);
1316: ISRestoreIndices(iscol,&cout);
1317: VecRestoreArrayRead(bb,&b);
1318: VecRestoreArray(xx,&x);
1319: PetscLogFlops(2.0*a->nz);
1320: return(0);
1321: }
1325: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1326: {
1327: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1328: IS iscol = a->col,isrow = a->row;
1329: PetscErrorCode ierr;
1330: PetscInt i, n = A->rmap->n,j;
1331: PetscInt nz;
1332: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1333: PetscScalar *x,*tmp,sum;
1334: const PetscScalar *b;
1335: const MatScalar *aa = a->a,*v;
1338: if (yy != xx) {VecCopy(yy,xx);}
1340: VecGetArrayRead(bb,&b);
1341: VecGetArray(xx,&x);
1342: tmp = a->solve_work;
1344: ISGetIndices(isrow,&rout); r = rout;
1345: ISGetIndices(iscol,&cout); c = cout;
1347: /* forward solve the lower triangular */
1348: tmp[0] = b[r[0]];
1349: v = aa;
1350: vi = aj;
1351: for (i=1; i<n; i++) {
1352: nz = ai[i+1] - ai[i];
1353: sum = b[r[i]];
1354: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1355: tmp[i] = sum;
1356: v += nz;
1357: vi += nz;
1358: }
1360: /* backward solve the upper triangular */
1361: v = aa + adiag[n-1];
1362: vi = aj + adiag[n-1];
1363: for (i=n-1; i>=0; i--) {
1364: nz = adiag[i] - adiag[i+1] - 1;
1365: sum = tmp[i];
1366: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1367: tmp[i] = sum*v[nz];
1368: x[c[i]] += tmp[i];
1369: v += nz+1; vi += nz+1;
1370: }
1372: ISRestoreIndices(isrow,&rout);
1373: ISRestoreIndices(iscol,&cout);
1374: VecRestoreArrayRead(bb,&b);
1375: VecRestoreArray(xx,&x);
1376: PetscLogFlops(2.0*a->nz);
1377: return(0);
1378: }
1382: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1383: {
1384: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1385: IS iscol = a->col,isrow = a->row;
1386: PetscErrorCode ierr;
1387: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1388: PetscInt i,n = A->rmap->n,j;
1389: PetscInt nz;
1390: PetscScalar *x,*tmp,s1;
1391: const MatScalar *aa = a->a,*v;
1392: const PetscScalar *b;
1395: VecGetArrayRead(bb,&b);
1396: VecGetArray(xx,&x);
1397: tmp = a->solve_work;
1399: ISGetIndices(isrow,&rout); r = rout;
1400: ISGetIndices(iscol,&cout); c = cout;
1402: /* copy the b into temp work space according to permutation */
1403: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1405: /* forward solve the U^T */
1406: for (i=0; i<n; i++) {
1407: v = aa + diag[i];
1408: vi = aj + diag[i] + 1;
1409: nz = ai[i+1] - diag[i] - 1;
1410: s1 = tmp[i];
1411: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1412: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1413: tmp[i] = s1;
1414: }
1416: /* backward solve the L^T */
1417: for (i=n-1; i>=0; i--) {
1418: v = aa + diag[i] - 1;
1419: vi = aj + diag[i] - 1;
1420: nz = diag[i] - ai[i];
1421: s1 = tmp[i];
1422: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1423: }
1425: /* copy tmp into x according to permutation */
1426: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1428: ISRestoreIndices(isrow,&rout);
1429: ISRestoreIndices(iscol,&cout);
1430: VecRestoreArrayRead(bb,&b);
1431: VecRestoreArray(xx,&x);
1433: PetscLogFlops(2.0*a->nz-A->cmap->n);
1434: return(0);
1435: }
1439: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1440: {
1441: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1442: IS iscol = a->col,isrow = a->row;
1443: PetscErrorCode ierr;
1444: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1445: PetscInt i,n = A->rmap->n,j;
1446: PetscInt nz;
1447: PetscScalar *x,*tmp,s1;
1448: const MatScalar *aa = a->a,*v;
1449: const PetscScalar *b;
1452: VecGetArrayRead(bb,&b);
1453: VecGetArray(xx,&x);
1454: tmp = a->solve_work;
1456: ISGetIndices(isrow,&rout); r = rout;
1457: ISGetIndices(iscol,&cout); c = cout;
1459: /* copy the b into temp work space according to permutation */
1460: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1462: /* forward solve the U^T */
1463: for (i=0; i<n; i++) {
1464: v = aa + adiag[i+1] + 1;
1465: vi = aj + adiag[i+1] + 1;
1466: nz = adiag[i] - adiag[i+1] - 1;
1467: s1 = tmp[i];
1468: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1469: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1470: tmp[i] = s1;
1471: }
1473: /* backward solve the L^T */
1474: for (i=n-1; i>=0; i--) {
1475: v = aa + ai[i];
1476: vi = aj + ai[i];
1477: nz = ai[i+1] - ai[i];
1478: s1 = tmp[i];
1479: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1480: }
1482: /* copy tmp into x according to permutation */
1483: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1485: ISRestoreIndices(isrow,&rout);
1486: ISRestoreIndices(iscol,&cout);
1487: VecRestoreArrayRead(bb,&b);
1488: VecRestoreArray(xx,&x);
1490: PetscLogFlops(2.0*a->nz-A->cmap->n);
1491: return(0);
1492: }
1496: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1497: {
1498: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1499: IS iscol = a->col,isrow = a->row;
1500: PetscErrorCode ierr;
1501: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1502: PetscInt i,n = A->rmap->n,j;
1503: PetscInt nz;
1504: PetscScalar *x,*tmp,s1;
1505: const MatScalar *aa = a->a,*v;
1506: const PetscScalar *b;
1509: if (zz != xx) {VecCopy(zz,xx);}
1510: VecGetArrayRead(bb,&b);
1511: VecGetArray(xx,&x);
1512: tmp = a->solve_work;
1514: ISGetIndices(isrow,&rout); r = rout;
1515: ISGetIndices(iscol,&cout); c = cout;
1517: /* copy the b into temp work space according to permutation */
1518: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1520: /* forward solve the U^T */
1521: for (i=0; i<n; i++) {
1522: v = aa + diag[i];
1523: vi = aj + diag[i] + 1;
1524: nz = ai[i+1] - diag[i] - 1;
1525: s1 = tmp[i];
1526: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1527: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1528: tmp[i] = s1;
1529: }
1531: /* backward solve the L^T */
1532: for (i=n-1; i>=0; i--) {
1533: v = aa + diag[i] - 1;
1534: vi = aj + diag[i] - 1;
1535: nz = diag[i] - ai[i];
1536: s1 = tmp[i];
1537: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1538: }
1540: /* copy tmp into x according to permutation */
1541: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1543: ISRestoreIndices(isrow,&rout);
1544: ISRestoreIndices(iscol,&cout);
1545: VecRestoreArrayRead(bb,&b);
1546: VecRestoreArray(xx,&x);
1548: PetscLogFlops(2.0*a->nz-A->cmap->n);
1549: return(0);
1550: }
1554: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1555: {
1556: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1557: IS iscol = a->col,isrow = a->row;
1558: PetscErrorCode ierr;
1559: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1560: PetscInt i,n = A->rmap->n,j;
1561: PetscInt nz;
1562: PetscScalar *x,*tmp,s1;
1563: const MatScalar *aa = a->a,*v;
1564: const PetscScalar *b;
1567: if (zz != xx) {VecCopy(zz,xx);}
1568: VecGetArrayRead(bb,&b);
1569: VecGetArray(xx,&x);
1570: tmp = a->solve_work;
1572: ISGetIndices(isrow,&rout); r = rout;
1573: ISGetIndices(iscol,&cout); c = cout;
1575: /* copy the b into temp work space according to permutation */
1576: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1578: /* forward solve the U^T */
1579: for (i=0; i<n; i++) {
1580: v = aa + adiag[i+1] + 1;
1581: vi = aj + adiag[i+1] + 1;
1582: nz = adiag[i] - adiag[i+1] - 1;
1583: s1 = tmp[i];
1584: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1585: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1586: tmp[i] = s1;
1587: }
1590: /* backward solve the L^T */
1591: for (i=n-1; i>=0; i--) {
1592: v = aa + ai[i];
1593: vi = aj + ai[i];
1594: nz = ai[i+1] - ai[i];
1595: s1 = tmp[i];
1596: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1597: }
1599: /* copy tmp into x according to permutation */
1600: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1602: ISRestoreIndices(isrow,&rout);
1603: ISRestoreIndices(iscol,&cout);
1604: VecRestoreArrayRead(bb,&b);
1605: VecRestoreArray(xx,&x);
1607: PetscLogFlops(2.0*a->nz-A->cmap->n);
1608: return(0);
1609: }
1611: /* ----------------------------------------------------------------*/
1613: extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1615: /*
1616: ilu() under revised new data structure.
1617: Factored arrays bj and ba are stored as
1618: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1620: bi=fact->i is an array of size n+1, in which
1621: bi+
1622: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1623: bi[n]: points to L(n-1,n-1)+1
1625: bdiag=fact->diag is an array of size n+1,in which
1626: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1627: bdiag[n]: points to entry of U(n-1,0)-1
1629: U(i,:) contains bdiag[i] as its last entry, i.e.,
1630: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1631: */
1634: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1635: {
1636: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1638: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1639: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1640: IS isicol;
1643: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1644: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1645: b = (Mat_SeqAIJ*)(fact)->data;
1647: /* allocate matrix arrays for new data structure */
1648: PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1649: PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1651: b->singlemalloc = PETSC_TRUE;
1652: if (!b->diag) {
1653: PetscMalloc1(n+1,&b->diag);
1654: PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1655: }
1656: bdiag = b->diag;
1658: if (n > 0) {
1659: PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1660: }
1662: /* set bi and bj with new data structure */
1663: bi = b->i;
1664: bj = b->j;
1666: /* L part */
1667: bi[0] = 0;
1668: for (i=0; i<n; i++) {
1669: nz = adiag[i] - ai[i];
1670: bi[i+1] = bi[i] + nz;
1671: aj = a->j + ai[i];
1672: for (j=0; j<nz; j++) {
1673: /* *bj = aj[j]; bj++; */
1674: bj[k++] = aj[j];
1675: }
1676: }
1678: /* U part */
1679: bdiag[n] = bi[n]-1;
1680: for (i=n-1; i>=0; i--) {
1681: nz = ai[i+1] - adiag[i] - 1;
1682: aj = a->j + adiag[i] + 1;
1683: for (j=0; j<nz; j++) {
1684: /* *bj = aj[j]; bj++; */
1685: bj[k++] = aj[j];
1686: }
1687: /* diag[i] */
1688: /* *bj = i; bj++; */
1689: bj[k++] = i;
1690: bdiag[i] = bdiag[i+1] + nz + 1;
1691: }
1693: fact->factortype = MAT_FACTOR_ILU;
1694: fact->info.factor_mallocs = 0;
1695: fact->info.fill_ratio_given = info->fill;
1696: fact->info.fill_ratio_needed = 1.0;
1697: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1698: MatSeqAIJCheckInode_FactorLU(fact);
1700: b = (Mat_SeqAIJ*)(fact)->data;
1701: b->row = isrow;
1702: b->col = iscol;
1703: b->icol = isicol;
1704: PetscMalloc1(fact->rmap->n+1,&b->solve_work);
1705: PetscObjectReference((PetscObject)isrow);
1706: PetscObjectReference((PetscObject)iscol);
1707: return(0);
1708: }
1712: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1713: {
1714: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1715: IS isicol;
1716: PetscErrorCode ierr;
1717: const PetscInt *r,*ic;
1718: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1719: PetscInt *bi,*cols,nnz,*cols_lvl;
1720: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1721: PetscInt i,levels,diagonal_fill;
1722: PetscBool col_identity,row_identity,missing;
1723: PetscReal f;
1724: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1725: PetscBT lnkbt;
1726: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1727: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1728: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1731: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1732: MatMissingDiagonal(A,&missing,&i);
1733: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1734:
1735: levels = (PetscInt)info->levels;
1736: ISIdentity(isrow,&row_identity);
1737: ISIdentity(iscol,&col_identity);
1738: if (!levels && row_identity && col_identity) {
1739: /* special case: ilu(0) with natural ordering */
1740: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1741: if (a->inode.size) {
1742: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1743: }
1744: return(0);
1745: }
1747: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1748: ISGetIndices(isrow,&r);
1749: ISGetIndices(isicol,&ic);
1751: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1752: PetscMalloc1(n+1,&bi);
1753: PetscMalloc1(n+1,&bdiag);
1754: bi[0] = bdiag[0] = 0;
1755: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1757: /* create a linked list for storing column indices of the active row */
1758: nlnk = n + 1;
1759: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1761: /* initial FreeSpace size is f*(ai[n]+1) */
1762: f = info->fill;
1763: diagonal_fill = (PetscInt)info->diagonal_fill;
1764: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1765: current_space = free_space;
1766: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1767: current_space_lvl = free_space_lvl;
1768: for (i=0; i<n; i++) {
1769: nzi = 0;
1770: /* copy current row into linked list */
1771: nnz = ai[r[i]+1] - ai[r[i]];
1772: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1773: cols = aj + ai[r[i]];
1774: lnk[i] = -1; /* marker to indicate if diagonal exists */
1775: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1776: nzi += nlnk;
1778: /* make sure diagonal entry is included */
1779: if (diagonal_fill && lnk[i] == -1) {
1780: fm = n;
1781: while (lnk[fm] < i) fm = lnk[fm];
1782: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1783: lnk[fm] = i;
1784: lnk_lvl[i] = 0;
1785: nzi++; dcount++;
1786: }
1788: /* add pivot rows into the active row */
1789: nzbd = 0;
1790: prow = lnk[n];
1791: while (prow < i) {
1792: nnz = bdiag[prow];
1793: cols = bj_ptr[prow] + nnz + 1;
1794: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1795: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1796: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1797: nzi += nlnk;
1798: prow = lnk[prow];
1799: nzbd++;
1800: }
1801: bdiag[i] = nzbd;
1802: bi[i+1] = bi[i] + nzi;
1803: /* if free space is not available, make more free space */
1804: if (current_space->local_remaining<nzi) {
1805: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1806: PetscFreeSpaceGet(nnz,¤t_space);
1807: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1808: reallocs++;
1809: }
1811: /* copy data into free_space and free_space_lvl, then initialize lnk */
1812: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1813: bj_ptr[i] = current_space->array;
1814: bjlvl_ptr[i] = current_space_lvl->array;
1816: /* make sure the active row i has diagonal entry */
1817: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1819: current_space->array += nzi;
1820: current_space->local_used += nzi;
1821: current_space->local_remaining -= nzi;
1822: current_space_lvl->array += nzi;
1823: current_space_lvl->local_used += nzi;
1824: current_space_lvl->local_remaining -= nzi;
1825: }
1827: ISRestoreIndices(isrow,&r);
1828: ISRestoreIndices(isicol,&ic);
1829: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1830: PetscMalloc1(bi[n]+1,&bj);
1831: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1833: PetscIncompleteLLDestroy(lnk,lnkbt);
1834: PetscFreeSpaceDestroy(free_space_lvl);
1835: PetscFree2(bj_ptr,bjlvl_ptr);
1837: #if defined(PETSC_USE_INFO)
1838: {
1839: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1840: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1841: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1842: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1843: PetscInfo(A,"for best performance.\n");
1844: if (diagonal_fill) {
1845: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1846: }
1847: }
1848: #endif
1849: /* put together the new matrix */
1850: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1851: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1852: b = (Mat_SeqAIJ*)(fact)->data;
1854: b->free_a = PETSC_TRUE;
1855: b->free_ij = PETSC_TRUE;
1856: b->singlemalloc = PETSC_FALSE;
1858: PetscMalloc1(bdiag[0]+1,&b->a);
1860: b->j = bj;
1861: b->i = bi;
1862: b->diag = bdiag;
1863: b->ilen = 0;
1864: b->imax = 0;
1865: b->row = isrow;
1866: b->col = iscol;
1867: PetscObjectReference((PetscObject)isrow);
1868: PetscObjectReference((PetscObject)iscol);
1869: b->icol = isicol;
1871: PetscMalloc1(n+1,&b->solve_work);
1872: /* In b structure: Free imax, ilen, old a, old j.
1873: Allocate bdiag, solve_work, new a, new j */
1874: PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1875: b->maxnz = b->nz = bdiag[0]+1;
1877: (fact)->info.factor_mallocs = reallocs;
1878: (fact)->info.fill_ratio_given = f;
1879: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1880: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1881: if (a->inode.size) {
1882: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1883: }
1884: MatSeqAIJCheckInode_FactorLU(fact);
1885: return(0);
1886: }
1890: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1891: {
1892: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1893: IS isicol;
1894: PetscErrorCode ierr;
1895: const PetscInt *r,*ic;
1896: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1897: PetscInt *bi,*cols,nnz,*cols_lvl;
1898: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1899: PetscInt i,levels,diagonal_fill;
1900: PetscBool col_identity,row_identity;
1901: PetscReal f;
1902: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1903: PetscBT lnkbt;
1904: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1905: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1906: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1907: PetscBool missing;
1910: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1911: MatMissingDiagonal(A,&missing,&i);
1912: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1914: f = info->fill;
1915: levels = (PetscInt)info->levels;
1916: diagonal_fill = (PetscInt)info->diagonal_fill;
1918: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1920: ISIdentity(isrow,&row_identity);
1921: ISIdentity(iscol,&col_identity);
1922: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1923: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1925: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1926: if (a->inode.size) {
1927: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1928: }
1929: fact->factortype = MAT_FACTOR_ILU;
1930: (fact)->info.factor_mallocs = 0;
1931: (fact)->info.fill_ratio_given = info->fill;
1932: (fact)->info.fill_ratio_needed = 1.0;
1934: b = (Mat_SeqAIJ*)(fact)->data;
1935: b->row = isrow;
1936: b->col = iscol;
1937: b->icol = isicol;
1938: PetscMalloc1((fact)->rmap->n+1,&b->solve_work);
1939: PetscObjectReference((PetscObject)isrow);
1940: PetscObjectReference((PetscObject)iscol);
1941: return(0);
1942: }
1944: ISGetIndices(isrow,&r);
1945: ISGetIndices(isicol,&ic);
1947: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1948: PetscMalloc1(n+1,&bi);
1949: PetscMalloc1(n+1,&bdiag);
1950: bi[0] = bdiag[0] = 0;
1952: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1954: /* create a linked list for storing column indices of the active row */
1955: nlnk = n + 1;
1956: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1958: /* initial FreeSpace size is f*(ai[n]+1) */
1959: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1960: current_space = free_space;
1961: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1962: current_space_lvl = free_space_lvl;
1964: for (i=0; i<n; i++) {
1965: nzi = 0;
1966: /* copy current row into linked list */
1967: nnz = ai[r[i]+1] - ai[r[i]];
1968: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1969: cols = aj + ai[r[i]];
1970: lnk[i] = -1; /* marker to indicate if diagonal exists */
1971: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1972: nzi += nlnk;
1974: /* make sure diagonal entry is included */
1975: if (diagonal_fill && lnk[i] == -1) {
1976: fm = n;
1977: while (lnk[fm] < i) fm = lnk[fm];
1978: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1979: lnk[fm] = i;
1980: lnk_lvl[i] = 0;
1981: nzi++; dcount++;
1982: }
1984: /* add pivot rows into the active row */
1985: nzbd = 0;
1986: prow = lnk[n];
1987: while (prow < i) {
1988: nnz = bdiag[prow];
1989: cols = bj_ptr[prow] + nnz + 1;
1990: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1991: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1992: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1993: nzi += nlnk;
1994: prow = lnk[prow];
1995: nzbd++;
1996: }
1997: bdiag[i] = nzbd;
1998: bi[i+1] = bi[i] + nzi;
2000: /* if free space is not available, make more free space */
2001: if (current_space->local_remaining<nzi) {
2002: nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
2003: PetscFreeSpaceGet(nnz,¤t_space);
2004: PetscFreeSpaceGet(nnz,¤t_space_lvl);
2005: reallocs++;
2006: }
2008: /* copy data into free_space and free_space_lvl, then initialize lnk */
2009: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2010: bj_ptr[i] = current_space->array;
2011: bjlvl_ptr[i] = current_space_lvl->array;
2013: /* make sure the active row i has diagonal entry */
2014: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2016: current_space->array += nzi;
2017: current_space->local_used += nzi;
2018: current_space->local_remaining -= nzi;
2019: current_space_lvl->array += nzi;
2020: current_space_lvl->local_used += nzi;
2021: current_space_lvl->local_remaining -= nzi;
2022: }
2024: ISRestoreIndices(isrow,&r);
2025: ISRestoreIndices(isicol,&ic);
2027: /* destroy list of free space and other temporary arrays */
2028: PetscMalloc1(bi[n]+1,&bj);
2029: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2030: PetscIncompleteLLDestroy(lnk,lnkbt);
2031: PetscFreeSpaceDestroy(free_space_lvl);
2032: PetscFree2(bj_ptr,bjlvl_ptr);
2034: #if defined(PETSC_USE_INFO)
2035: {
2036: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2037: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
2038: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
2039: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
2040: PetscInfo(A,"for best performance.\n");
2041: if (diagonal_fill) {
2042: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
2043: }
2044: }
2045: #endif
2047: /* put together the new matrix */
2048: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
2049: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
2050: b = (Mat_SeqAIJ*)(fact)->data;
2052: b->free_a = PETSC_TRUE;
2053: b->free_ij = PETSC_TRUE;
2054: b->singlemalloc = PETSC_FALSE;
2056: PetscMalloc1(bi[n],&b->a);
2057: b->j = bj;
2058: b->i = bi;
2059: for (i=0; i<n; i++) bdiag[i] += bi[i];
2060: b->diag = bdiag;
2061: b->ilen = 0;
2062: b->imax = 0;
2063: b->row = isrow;
2064: b->col = iscol;
2065: PetscObjectReference((PetscObject)isrow);
2066: PetscObjectReference((PetscObject)iscol);
2067: b->icol = isicol;
2068: PetscMalloc1(n+1,&b->solve_work);
2069: /* In b structure: Free imax, ilen, old a, old j.
2070: Allocate bdiag, solve_work, new a, new j */
2071: PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2072: b->maxnz = b->nz = bi[n];
2074: (fact)->info.factor_mallocs = reallocs;
2075: (fact)->info.fill_ratio_given = f;
2076: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2077: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2078: if (a->inode.size) {
2079: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2080: }
2081: return(0);
2082: }
2086: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2087: {
2088: Mat C = B;
2089: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2090: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2091: IS ip=b->row,iip = b->icol;
2093: const PetscInt *rip,*riip;
2094: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2095: PetscInt *ai=a->i,*aj=a->j;
2096: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2097: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2098: PetscBool perm_identity;
2099: FactorShiftCtx sctx;
2100: PetscReal rs;
2101: MatScalar d,*v;
2104: /* MatPivotSetUp(): initialize shift context sctx */
2105: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2107: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2108: sctx.shift_top = info->zeropivot;
2109: for (i=0; i<mbs; i++) {
2110: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2111: d = (aa)[a->diag[i]];
2112: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2113: v = aa+ai[i];
2114: nz = ai[i+1] - ai[i];
2115: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2116: if (rs>sctx.shift_top) sctx.shift_top = rs;
2117: }
2118: sctx.shift_top *= 1.1;
2119: sctx.nshift_max = 5;
2120: sctx.shift_lo = 0.;
2121: sctx.shift_hi = 1.;
2122: }
2124: ISGetIndices(ip,&rip);
2125: ISGetIndices(iip,&riip);
2127: /* allocate working arrays
2128: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2129: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2130: */
2131: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);
2133: do {
2134: sctx.newshift = PETSC_FALSE;
2136: for (i=0; i<mbs; i++) c2r[i] = mbs;
2137: if (mbs) il[0] = 0;
2139: for (k = 0; k<mbs; k++) {
2140: /* zero rtmp */
2141: nz = bi[k+1] - bi[k];
2142: bjtmp = bj + bi[k];
2143: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2145: /* load in initial unfactored row */
2146: bval = ba + bi[k];
2147: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2148: for (j = jmin; j < jmax; j++) {
2149: col = riip[aj[j]];
2150: if (col >= k) { /* only take upper triangular entry */
2151: rtmp[col] = aa[j];
2152: *bval++ = 0.0; /* for in-place factorization */
2153: }
2154: }
2155: /* shift the diagonal of the matrix: ZeropivotApply() */
2156: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2158: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2159: dk = rtmp[k];
2160: i = c2r[k]; /* first row to be added to k_th row */
2162: while (i < k) {
2163: nexti = c2r[i]; /* next row to be added to k_th row */
2165: /* compute multiplier, update diag(k) and U(i,k) */
2166: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2167: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2168: dk += uikdi*ba[ili]; /* update diag[k] */
2169: ba[ili] = uikdi; /* -U(i,k) */
2171: /* add multiple of row i to k-th row */
2172: jmin = ili + 1; jmax = bi[i+1];
2173: if (jmin < jmax) {
2174: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2175: /* update il and c2r for row i */
2176: il[i] = jmin;
2177: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2178: }
2179: i = nexti;
2180: }
2182: /* copy data into U(k,:) */
2183: rs = 0.0;
2184: jmin = bi[k]; jmax = bi[k+1]-1;
2185: if (jmin < jmax) {
2186: for (j=jmin; j<jmax; j++) {
2187: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2188: }
2189: /* add the k-th row into il and c2r */
2190: il[k] = jmin;
2191: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2192: }
2194: /* MatPivotCheck() */
2195: sctx.rs = rs;
2196: sctx.pv = dk;
2197: MatPivotCheck(B,A,info,&sctx,i);
2198: if (sctx.newshift) break;
2199: dk = sctx.pv;
2201: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2202: }
2203: } while (sctx.newshift);
2205: PetscFree3(rtmp,il,c2r);
2206: ISRestoreIndices(ip,&rip);
2207: ISRestoreIndices(iip,&riip);
2209: ISIdentity(ip,&perm_identity);
2210: if (perm_identity) {
2211: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2212: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2213: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2214: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2215: } else {
2216: B->ops->solve = MatSolve_SeqSBAIJ_1;
2217: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2218: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2219: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2220: }
2222: C->assembled = PETSC_TRUE;
2223: C->preallocated = PETSC_TRUE;
2225: PetscLogFlops(C->rmap->n);
2227: /* MatPivotView() */
2228: if (sctx.nshift) {
2229: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2230: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2231: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2232: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2233: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2234: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2235: }
2236: }
2237: return(0);
2238: }
2242: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2243: {
2244: Mat C = B;
2245: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2246: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2247: IS ip=b->row,iip = b->icol;
2249: const PetscInt *rip,*riip;
2250: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2251: PetscInt *ai=a->i,*aj=a->j;
2252: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2253: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2254: PetscBool perm_identity;
2255: FactorShiftCtx sctx;
2256: PetscReal rs;
2257: MatScalar d,*v;
2260: /* MatPivotSetUp(): initialize shift context sctx */
2261: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2263: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2264: sctx.shift_top = info->zeropivot;
2265: for (i=0; i<mbs; i++) {
2266: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2267: d = (aa)[a->diag[i]];
2268: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2269: v = aa+ai[i];
2270: nz = ai[i+1] - ai[i];
2271: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2272: if (rs>sctx.shift_top) sctx.shift_top = rs;
2273: }
2274: sctx.shift_top *= 1.1;
2275: sctx.nshift_max = 5;
2276: sctx.shift_lo = 0.;
2277: sctx.shift_hi = 1.;
2278: }
2280: ISGetIndices(ip,&rip);
2281: ISGetIndices(iip,&riip);
2283: /* initialization */
2284: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
2286: do {
2287: sctx.newshift = PETSC_FALSE;
2289: for (i=0; i<mbs; i++) jl[i] = mbs;
2290: il[0] = 0;
2292: for (k = 0; k<mbs; k++) {
2293: /* zero rtmp */
2294: nz = bi[k+1] - bi[k];
2295: bjtmp = bj + bi[k];
2296: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2298: bval = ba + bi[k];
2299: /* initialize k-th row by the perm[k]-th row of A */
2300: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2301: for (j = jmin; j < jmax; j++) {
2302: col = riip[aj[j]];
2303: if (col >= k) { /* only take upper triangular entry */
2304: rtmp[col] = aa[j];
2305: *bval++ = 0.0; /* for in-place factorization */
2306: }
2307: }
2308: /* shift the diagonal of the matrix */
2309: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2311: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2312: dk = rtmp[k];
2313: i = jl[k]; /* first row to be added to k_th row */
2315: while (i < k) {
2316: nexti = jl[i]; /* next row to be added to k_th row */
2318: /* compute multiplier, update diag(k) and U(i,k) */
2319: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2320: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2321: dk += uikdi*ba[ili];
2322: ba[ili] = uikdi; /* -U(i,k) */
2324: /* add multiple of row i to k-th row */
2325: jmin = ili + 1; jmax = bi[i+1];
2326: if (jmin < jmax) {
2327: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2328: /* update il and jl for row i */
2329: il[i] = jmin;
2330: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2331: }
2332: i = nexti;
2333: }
2335: /* shift the diagonals when zero pivot is detected */
2336: /* compute rs=sum of abs(off-diagonal) */
2337: rs = 0.0;
2338: jmin = bi[k]+1;
2339: nz = bi[k+1] - jmin;
2340: bcol = bj + jmin;
2341: for (j=0; j<nz; j++) {
2342: rs += PetscAbsScalar(rtmp[bcol[j]]);
2343: }
2345: sctx.rs = rs;
2346: sctx.pv = dk;
2347: MatPivotCheck(B,A,info,&sctx,k);
2348: if (sctx.newshift) break;
2349: dk = sctx.pv;
2351: /* copy data into U(k,:) */
2352: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2353: jmin = bi[k]+1; jmax = bi[k+1];
2354: if (jmin < jmax) {
2355: for (j=jmin; j<jmax; j++) {
2356: col = bj[j]; ba[j] = rtmp[col];
2357: }
2358: /* add the k-th row into il and jl */
2359: il[k] = jmin;
2360: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2361: }
2362: }
2363: } while (sctx.newshift);
2365: PetscFree3(rtmp,il,jl);
2366: ISRestoreIndices(ip,&rip);
2367: ISRestoreIndices(iip,&riip);
2369: ISIdentity(ip,&perm_identity);
2370: if (perm_identity) {
2371: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2372: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2373: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2374: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2375: } else {
2376: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2377: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2378: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2379: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2380: }
2382: C->assembled = PETSC_TRUE;
2383: C->preallocated = PETSC_TRUE;
2385: PetscLogFlops(C->rmap->n);
2386: if (sctx.nshift) {
2387: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2388: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2389: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2390: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2391: }
2392: }
2393: return(0);
2394: }
2396: /*
2397: icc() under revised new data structure.
2398: Factored arrays bj and ba are stored as
2399: U(0,:),...,U(i,:),U(n-1,:)
2401: ui=fact->i is an array of size n+1, in which
2402: ui+
2403: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2404: ui[n]: points to U(n-1,n-1)+1
2406: udiag=fact->diag is an array of size n,in which
2407: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2409: U(i,:) contains udiag[i] as its last entry, i.e.,
2410: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2411: */
2415: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2416: {
2417: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2418: Mat_SeqSBAIJ *b;
2419: PetscErrorCode ierr;
2420: PetscBool perm_identity,missing;
2421: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2422: const PetscInt *rip,*riip;
2423: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2424: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2425: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2426: PetscReal fill =info->fill,levels=info->levels;
2427: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2428: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2429: PetscBT lnkbt;
2430: IS iperm;
2433: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2434: MatMissingDiagonal(A,&missing,&d);
2435: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2436: ISIdentity(perm,&perm_identity);
2437: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2439: PetscMalloc1(am+1,&ui);
2440: PetscMalloc1(am+1,&udiag);
2441: ui[0] = 0;
2443: /* ICC(0) without matrix ordering: simply rearrange column indices */
2444: if (!levels && perm_identity) {
2445: for (i=0; i<am; i++) {
2446: ncols = ai[i+1] - a->diag[i];
2447: ui[i+1] = ui[i] + ncols;
2448: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2449: }
2450: PetscMalloc1(ui[am]+1,&uj);
2451: cols = uj;
2452: for (i=0; i<am; i++) {
2453: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2454: ncols = ai[i+1] - a->diag[i] -1;
2455: for (j=0; j<ncols; j++) *cols++ = aj[j];
2456: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2457: }
2458: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2459: ISGetIndices(iperm,&riip);
2460: ISGetIndices(perm,&rip);
2462: /* initialization */
2463: PetscMalloc1(am+1,&ajtmp);
2465: /* jl: linked list for storing indices of the pivot rows
2466: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2467: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2468: for (i=0; i<am; i++) {
2469: jl[i] = am; il[i] = 0;
2470: }
2472: /* create and initialize a linked list for storing column indices of the active row k */
2473: nlnk = am + 1;
2474: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2476: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2477: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2478: current_space = free_space;
2479: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);
2480: current_space_lvl = free_space_lvl;
2482: for (k=0; k<am; k++) { /* for each active row k */
2483: /* initialize lnk by the column indices of row rip[k] of A */
2484: nzk = 0;
2485: ncols = ai[rip[k]+1] - ai[rip[k]];
2486: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2487: ncols_upper = 0;
2488: for (j=0; j<ncols; j++) {
2489: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2490: if (riip[i] >= k) { /* only take upper triangular entry */
2491: ajtmp[ncols_upper] = i;
2492: ncols_upper++;
2493: }
2494: }
2495: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2496: nzk += nlnk;
2498: /* update lnk by computing fill-in for each pivot row to be merged in */
2499: prow = jl[k]; /* 1st pivot row */
2501: while (prow < k) {
2502: nextprow = jl[prow];
2504: /* merge prow into k-th row */
2505: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2506: jmax = ui[prow+1];
2507: ncols = jmax-jmin;
2508: i = jmin - ui[prow];
2509: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2510: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2511: j = *(uj - 1);
2512: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2513: nzk += nlnk;
2515: /* update il and jl for prow */
2516: if (jmin < jmax) {
2517: il[prow] = jmin;
2518: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2519: }
2520: prow = nextprow;
2521: }
2523: /* if free space is not available, make more free space */
2524: if (current_space->local_remaining<nzk) {
2525: i = am - k + 1; /* num of unfactored rows */
2526: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2527: PetscFreeSpaceGet(i,¤t_space);
2528: PetscFreeSpaceGet(i,¤t_space_lvl);
2529: reallocs++;
2530: }
2532: /* copy data into free_space and free_space_lvl, then initialize lnk */
2533: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2534: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2536: /* add the k-th row into il and jl */
2537: if (nzk > 1) {
2538: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2539: jl[k] = jl[i]; jl[i] = k;
2540: il[k] = ui[k] + 1;
2541: }
2542: uj_ptr[k] = current_space->array;
2543: uj_lvl_ptr[k] = current_space_lvl->array;
2545: current_space->array += nzk;
2546: current_space->local_used += nzk;
2547: current_space->local_remaining -= nzk;
2549: current_space_lvl->array += nzk;
2550: current_space_lvl->local_used += nzk;
2551: current_space_lvl->local_remaining -= nzk;
2553: ui[k+1] = ui[k] + nzk;
2554: }
2556: ISRestoreIndices(perm,&rip);
2557: ISRestoreIndices(iperm,&riip);
2558: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2559: PetscFree(ajtmp);
2561: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2562: PetscMalloc1(ui[am]+1,&uj);
2563: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2564: PetscIncompleteLLDestroy(lnk,lnkbt);
2565: PetscFreeSpaceDestroy(free_space_lvl);
2567: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2569: /* put together the new matrix in MATSEQSBAIJ format */
2570: b = (Mat_SeqSBAIJ*)(fact)->data;
2571: b->singlemalloc = PETSC_FALSE;
2573: PetscMalloc1(ui[am]+1,&b->a);
2575: b->j = uj;
2576: b->i = ui;
2577: b->diag = udiag;
2578: b->free_diag = PETSC_TRUE;
2579: b->ilen = 0;
2580: b->imax = 0;
2581: b->row = perm;
2582: b->col = perm;
2583: PetscObjectReference((PetscObject)perm);
2584: PetscObjectReference((PetscObject)perm);
2585: b->icol = iperm;
2586: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2588: PetscMalloc1(am+1,&b->solve_work);
2589: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2591: b->maxnz = b->nz = ui[am];
2592: b->free_a = PETSC_TRUE;
2593: b->free_ij = PETSC_TRUE;
2595: fact->info.factor_mallocs = reallocs;
2596: fact->info.fill_ratio_given = fill;
2597: if (ai[am] != 0) {
2598: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2599: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2600: } else {
2601: fact->info.fill_ratio_needed = 0.0;
2602: }
2603: #if defined(PETSC_USE_INFO)
2604: if (ai[am] != 0) {
2605: PetscReal af = fact->info.fill_ratio_needed;
2606: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2607: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2608: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2609: } else {
2610: PetscInfo(A,"Empty matrix.\n");
2611: }
2612: #endif
2613: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2614: return(0);
2615: }
2619: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2620: {
2621: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2622: Mat_SeqSBAIJ *b;
2623: PetscErrorCode ierr;
2624: PetscBool perm_identity,missing;
2625: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2626: const PetscInt *rip,*riip;
2627: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2628: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2629: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2630: PetscReal fill =info->fill,levels=info->levels;
2631: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2632: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2633: PetscBT lnkbt;
2634: IS iperm;
2637: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2638: MatMissingDiagonal(A,&missing,&d);
2639: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2640: ISIdentity(perm,&perm_identity);
2641: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2643: PetscMalloc1(am+1,&ui);
2644: PetscMalloc1(am+1,&udiag);
2645: ui[0] = 0;
2647: /* ICC(0) without matrix ordering: simply copies fill pattern */
2648: if (!levels && perm_identity) {
2650: for (i=0; i<am; i++) {
2651: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2652: udiag[i] = ui[i];
2653: }
2654: PetscMalloc1(ui[am]+1,&uj);
2655: cols = uj;
2656: for (i=0; i<am; i++) {
2657: aj = a->j + a->diag[i];
2658: ncols = ui[i+1] - ui[i];
2659: for (j=0; j<ncols; j++) *cols++ = *aj++;
2660: }
2661: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2662: ISGetIndices(iperm,&riip);
2663: ISGetIndices(perm,&rip);
2665: /* initialization */
2666: PetscMalloc1(am+1,&ajtmp);
2668: /* jl: linked list for storing indices of the pivot rows
2669: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2670: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2671: for (i=0; i<am; i++) {
2672: jl[i] = am; il[i] = 0;
2673: }
2675: /* create and initialize a linked list for storing column indices of the active row k */
2676: nlnk = am + 1;
2677: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2679: /* initial FreeSpace size is fill*(ai[am]+1) */
2680: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2681: current_space = free_space;
2682: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);
2683: current_space_lvl = free_space_lvl;
2685: for (k=0; k<am; k++) { /* for each active row k */
2686: /* initialize lnk by the column indices of row rip[k] of A */
2687: nzk = 0;
2688: ncols = ai[rip[k]+1] - ai[rip[k]];
2689: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2690: ncols_upper = 0;
2691: for (j=0; j<ncols; j++) {
2692: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2693: if (riip[i] >= k) { /* only take upper triangular entry */
2694: ajtmp[ncols_upper] = i;
2695: ncols_upper++;
2696: }
2697: }
2698: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2699: nzk += nlnk;
2701: /* update lnk by computing fill-in for each pivot row to be merged in */
2702: prow = jl[k]; /* 1st pivot row */
2704: while (prow < k) {
2705: nextprow = jl[prow];
2707: /* merge prow into k-th row */
2708: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2709: jmax = ui[prow+1];
2710: ncols = jmax-jmin;
2711: i = jmin - ui[prow];
2712: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2713: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2714: j = *(uj - 1);
2715: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2716: nzk += nlnk;
2718: /* update il and jl for prow */
2719: if (jmin < jmax) {
2720: il[prow] = jmin;
2721: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2722: }
2723: prow = nextprow;
2724: }
2726: /* if free space is not available, make more free space */
2727: if (current_space->local_remaining<nzk) {
2728: i = am - k + 1; /* num of unfactored rows */
2729: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2730: PetscFreeSpaceGet(i,¤t_space);
2731: PetscFreeSpaceGet(i,¤t_space_lvl);
2732: reallocs++;
2733: }
2735: /* copy data into free_space and free_space_lvl, then initialize lnk */
2736: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2737: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2739: /* add the k-th row into il and jl */
2740: if (nzk > 1) {
2741: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2742: jl[k] = jl[i]; jl[i] = k;
2743: il[k] = ui[k] + 1;
2744: }
2745: uj_ptr[k] = current_space->array;
2746: uj_lvl_ptr[k] = current_space_lvl->array;
2748: current_space->array += nzk;
2749: current_space->local_used += nzk;
2750: current_space->local_remaining -= nzk;
2752: current_space_lvl->array += nzk;
2753: current_space_lvl->local_used += nzk;
2754: current_space_lvl->local_remaining -= nzk;
2756: ui[k+1] = ui[k] + nzk;
2757: }
2759: #if defined(PETSC_USE_INFO)
2760: if (ai[am] != 0) {
2761: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2762: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2763: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2764: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2765: } else {
2766: PetscInfo(A,"Empty matrix.\n");
2767: }
2768: #endif
2770: ISRestoreIndices(perm,&rip);
2771: ISRestoreIndices(iperm,&riip);
2772: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2773: PetscFree(ajtmp);
2775: /* destroy list of free space and other temporary array(s) */
2776: PetscMalloc1(ui[am]+1,&uj);
2777: PetscFreeSpaceContiguous(&free_space,uj);
2778: PetscIncompleteLLDestroy(lnk,lnkbt);
2779: PetscFreeSpaceDestroy(free_space_lvl);
2781: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2783: /* put together the new matrix in MATSEQSBAIJ format */
2785: b = (Mat_SeqSBAIJ*)fact->data;
2786: b->singlemalloc = PETSC_FALSE;
2788: PetscMalloc1(ui[am]+1,&b->a);
2790: b->j = uj;
2791: b->i = ui;
2792: b->diag = udiag;
2793: b->free_diag = PETSC_TRUE;
2794: b->ilen = 0;
2795: b->imax = 0;
2796: b->row = perm;
2797: b->col = perm;
2799: PetscObjectReference((PetscObject)perm);
2800: PetscObjectReference((PetscObject)perm);
2802: b->icol = iperm;
2803: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2804: PetscMalloc1(am+1,&b->solve_work);
2805: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2806: b->maxnz = b->nz = ui[am];
2807: b->free_a = PETSC_TRUE;
2808: b->free_ij = PETSC_TRUE;
2810: fact->info.factor_mallocs = reallocs;
2811: fact->info.fill_ratio_given = fill;
2812: if (ai[am] != 0) {
2813: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2814: } else {
2815: fact->info.fill_ratio_needed = 0.0;
2816: }
2817: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2818: return(0);
2819: }
2823: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2824: {
2825: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2826: Mat_SeqSBAIJ *b;
2827: PetscErrorCode ierr;
2828: PetscBool perm_identity,missing;
2829: PetscReal fill = info->fill;
2830: const PetscInt *rip,*riip;
2831: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2832: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2833: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2834: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2835: PetscBT lnkbt;
2836: IS iperm;
2839: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2840: MatMissingDiagonal(A,&missing,&i);
2841: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2843: /* check whether perm is the identity mapping */
2844: ISIdentity(perm,&perm_identity);
2845: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2846: ISGetIndices(iperm,&riip);
2847: ISGetIndices(perm,&rip);
2849: /* initialization */
2850: PetscMalloc1(am+1,&ui);
2851: PetscMalloc1(am+1,&udiag);
2852: ui[0] = 0;
2854: /* jl: linked list for storing indices of the pivot rows
2855: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2856: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2857: for (i=0; i<am; i++) {
2858: jl[i] = am; il[i] = 0;
2859: }
2861: /* create and initialize a linked list for storing column indices of the active row k */
2862: nlnk = am + 1;
2863: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2865: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2866: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2867: current_space = free_space;
2869: for (k=0; k<am; k++) { /* for each active row k */
2870: /* initialize lnk by the column indices of row rip[k] of A */
2871: nzk = 0;
2872: ncols = ai[rip[k]+1] - ai[rip[k]];
2873: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2874: ncols_upper = 0;
2875: for (j=0; j<ncols; j++) {
2876: i = riip[*(aj + ai[rip[k]] + j)];
2877: if (i >= k) { /* only take upper triangular entry */
2878: cols[ncols_upper] = i;
2879: ncols_upper++;
2880: }
2881: }
2882: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2883: nzk += nlnk;
2885: /* update lnk by computing fill-in for each pivot row to be merged in */
2886: prow = jl[k]; /* 1st pivot row */
2888: while (prow < k) {
2889: nextprow = jl[prow];
2890: /* merge prow into k-th row */
2891: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2892: jmax = ui[prow+1];
2893: ncols = jmax-jmin;
2894: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2895: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2896: nzk += nlnk;
2898: /* update il and jl for prow */
2899: if (jmin < jmax) {
2900: il[prow] = jmin;
2901: j = *uj_ptr;
2902: jl[prow] = jl[j];
2903: jl[j] = prow;
2904: }
2905: prow = nextprow;
2906: }
2908: /* if free space is not available, make more free space */
2909: if (current_space->local_remaining<nzk) {
2910: i = am - k + 1; /* num of unfactored rows */
2911: i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2912: PetscFreeSpaceGet(i,¤t_space);
2913: reallocs++;
2914: }
2916: /* copy data into free space, then initialize lnk */
2917: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2919: /* add the k-th row into il and jl */
2920: if (nzk > 1) {
2921: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2922: jl[k] = jl[i]; jl[i] = k;
2923: il[k] = ui[k] + 1;
2924: }
2925: ui_ptr[k] = current_space->array;
2927: current_space->array += nzk;
2928: current_space->local_used += nzk;
2929: current_space->local_remaining -= nzk;
2931: ui[k+1] = ui[k] + nzk;
2932: }
2934: ISRestoreIndices(perm,&rip);
2935: ISRestoreIndices(iperm,&riip);
2936: PetscFree4(ui_ptr,jl,il,cols);
2938: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2939: PetscMalloc1(ui[am]+1,&uj);
2940: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2941: PetscLLDestroy(lnk,lnkbt);
2943: /* put together the new matrix in MATSEQSBAIJ format */
2945: b = (Mat_SeqSBAIJ*)fact->data;
2946: b->singlemalloc = PETSC_FALSE;
2947: b->free_a = PETSC_TRUE;
2948: b->free_ij = PETSC_TRUE;
2950: PetscMalloc1(ui[am]+1,&b->a);
2952: b->j = uj;
2953: b->i = ui;
2954: b->diag = udiag;
2955: b->free_diag = PETSC_TRUE;
2956: b->ilen = 0;
2957: b->imax = 0;
2958: b->row = perm;
2959: b->col = perm;
2961: PetscObjectReference((PetscObject)perm);
2962: PetscObjectReference((PetscObject)perm);
2964: b->icol = iperm;
2965: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2967: PetscMalloc1(am+1,&b->solve_work);
2968: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2970: b->maxnz = b->nz = ui[am];
2972: fact->info.factor_mallocs = reallocs;
2973: fact->info.fill_ratio_given = fill;
2974: if (ai[am] != 0) {
2975: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2976: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2977: } else {
2978: fact->info.fill_ratio_needed = 0.0;
2979: }
2980: #if defined(PETSC_USE_INFO)
2981: if (ai[am] != 0) {
2982: PetscReal af = fact->info.fill_ratio_needed;
2983: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2984: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2985: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2986: } else {
2987: PetscInfo(A,"Empty matrix.\n");
2988: }
2989: #endif
2990: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2991: return(0);
2992: }
2996: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2997: {
2998: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2999: Mat_SeqSBAIJ *b;
3000: PetscErrorCode ierr;
3001: PetscBool perm_identity,missing;
3002: PetscReal fill = info->fill;
3003: const PetscInt *rip,*riip;
3004: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3005: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3006: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3007: PetscFreeSpaceList free_space=NULL,current_space=NULL;
3008: PetscBT lnkbt;
3009: IS iperm;
3012: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3013: MatMissingDiagonal(A,&missing,&i);
3014: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3016: /* check whether perm is the identity mapping */
3017: ISIdentity(perm,&perm_identity);
3018: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
3019: ISGetIndices(iperm,&riip);
3020: ISGetIndices(perm,&rip);
3022: /* initialization */
3023: PetscMalloc1(am+1,&ui);
3024: ui[0] = 0;
3026: /* jl: linked list for storing indices of the pivot rows
3027: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3028: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
3029: for (i=0; i<am; i++) {
3030: jl[i] = am; il[i] = 0;
3031: }
3033: /* create and initialize a linked list for storing column indices of the active row k */
3034: nlnk = am + 1;
3035: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
3037: /* initial FreeSpace size is fill*(ai[am]+1) */
3038: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
3039: current_space = free_space;
3041: for (k=0; k<am; k++) { /* for each active row k */
3042: /* initialize lnk by the column indices of row rip[k] of A */
3043: nzk = 0;
3044: ncols = ai[rip[k]+1] - ai[rip[k]];
3045: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3046: ncols_upper = 0;
3047: for (j=0; j<ncols; j++) {
3048: i = riip[*(aj + ai[rip[k]] + j)];
3049: if (i >= k) { /* only take upper triangular entry */
3050: cols[ncols_upper] = i;
3051: ncols_upper++;
3052: }
3053: }
3054: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3055: nzk += nlnk;
3057: /* update lnk by computing fill-in for each pivot row to be merged in */
3058: prow = jl[k]; /* 1st pivot row */
3060: while (prow < k) {
3061: nextprow = jl[prow];
3062: /* merge prow into k-th row */
3063: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3064: jmax = ui[prow+1];
3065: ncols = jmax-jmin;
3066: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3067: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3068: nzk += nlnk;
3070: /* update il and jl for prow */
3071: if (jmin < jmax) {
3072: il[prow] = jmin;
3073: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3074: }
3075: prow = nextprow;
3076: }
3078: /* if free space is not available, make more free space */
3079: if (current_space->local_remaining<nzk) {
3080: i = am - k + 1; /* num of unfactored rows */
3081: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3082: PetscFreeSpaceGet(i,¤t_space);
3083: reallocs++;
3084: }
3086: /* copy data into free space, then initialize lnk */
3087: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3089: /* add the k-th row into il and jl */
3090: if (nzk-1 > 0) {
3091: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3092: jl[k] = jl[i]; jl[i] = k;
3093: il[k] = ui[k] + 1;
3094: }
3095: ui_ptr[k] = current_space->array;
3097: current_space->array += nzk;
3098: current_space->local_used += nzk;
3099: current_space->local_remaining -= nzk;
3101: ui[k+1] = ui[k] + nzk;
3102: }
3104: #if defined(PETSC_USE_INFO)
3105: if (ai[am] != 0) {
3106: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3107: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3108: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3109: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3110: } else {
3111: PetscInfo(A,"Empty matrix.\n");
3112: }
3113: #endif
3115: ISRestoreIndices(perm,&rip);
3116: ISRestoreIndices(iperm,&riip);
3117: PetscFree4(ui_ptr,jl,il,cols);
3119: /* destroy list of free space and other temporary array(s) */
3120: PetscMalloc1(ui[am]+1,&uj);
3121: PetscFreeSpaceContiguous(&free_space,uj);
3122: PetscLLDestroy(lnk,lnkbt);
3124: /* put together the new matrix in MATSEQSBAIJ format */
3126: b = (Mat_SeqSBAIJ*)fact->data;
3127: b->singlemalloc = PETSC_FALSE;
3128: b->free_a = PETSC_TRUE;
3129: b->free_ij = PETSC_TRUE;
3131: PetscMalloc1(ui[am]+1,&b->a);
3133: b->j = uj;
3134: b->i = ui;
3135: b->diag = 0;
3136: b->ilen = 0;
3137: b->imax = 0;
3138: b->row = perm;
3139: b->col = perm;
3141: PetscObjectReference((PetscObject)perm);
3142: PetscObjectReference((PetscObject)perm);
3144: b->icol = iperm;
3145: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3147: PetscMalloc1(am+1,&b->solve_work);
3148: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3149: b->maxnz = b->nz = ui[am];
3151: fact->info.factor_mallocs = reallocs;
3152: fact->info.fill_ratio_given = fill;
3153: if (ai[am] != 0) {
3154: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3155: } else {
3156: fact->info.fill_ratio_needed = 0.0;
3157: }
3158: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3159: return(0);
3160: }
3164: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3165: {
3166: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3167: PetscErrorCode ierr;
3168: PetscInt n = A->rmap->n;
3169: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3170: PetscScalar *x,sum;
3171: const PetscScalar *b;
3172: const MatScalar *aa = a->a,*v;
3173: PetscInt i,nz;
3176: if (!n) return(0);
3178: VecGetArrayRead(bb,&b);
3179: VecGetArray(xx,&x);
3181: /* forward solve the lower triangular */
3182: x[0] = b[0];
3183: v = aa;
3184: vi = aj;
3185: for (i=1; i<n; i++) {
3186: nz = ai[i+1] - ai[i];
3187: sum = b[i];
3188: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3189: v += nz;
3190: vi += nz;
3191: x[i] = sum;
3192: }
3194: /* backward solve the upper triangular */
3195: for (i=n-1; i>=0; i--) {
3196: v = aa + adiag[i+1] + 1;
3197: vi = aj + adiag[i+1] + 1;
3198: nz = adiag[i] - adiag[i+1]-1;
3199: sum = x[i];
3200: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3201: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3202: }
3204: PetscLogFlops(2.0*a->nz - A->cmap->n);
3205: VecRestoreArrayRead(bb,&b);
3206: VecRestoreArray(xx,&x);
3207: return(0);
3208: }
3212: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3213: {
3214: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3215: IS iscol = a->col,isrow = a->row;
3216: PetscErrorCode ierr;
3217: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3218: const PetscInt *rout,*cout,*r,*c;
3219: PetscScalar *x,*tmp,sum;
3220: const PetscScalar *b;
3221: const MatScalar *aa = a->a,*v;
3224: if (!n) return(0);
3226: VecGetArrayRead(bb,&b);
3227: VecGetArray(xx,&x);
3228: tmp = a->solve_work;
3230: ISGetIndices(isrow,&rout); r = rout;
3231: ISGetIndices(iscol,&cout); c = cout;
3233: /* forward solve the lower triangular */
3234: tmp[0] = b[r[0]];
3235: v = aa;
3236: vi = aj;
3237: for (i=1; i<n; i++) {
3238: nz = ai[i+1] - ai[i];
3239: sum = b[r[i]];
3240: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3241: tmp[i] = sum;
3242: v += nz; vi += nz;
3243: }
3245: /* backward solve the upper triangular */
3246: for (i=n-1; i>=0; i--) {
3247: v = aa + adiag[i+1]+1;
3248: vi = aj + adiag[i+1]+1;
3249: nz = adiag[i]-adiag[i+1]-1;
3250: sum = tmp[i];
3251: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3252: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3253: }
3255: ISRestoreIndices(isrow,&rout);
3256: ISRestoreIndices(iscol,&cout);
3257: VecRestoreArrayRead(bb,&b);
3258: VecRestoreArray(xx,&x);
3259: PetscLogFlops(2*a->nz - A->cmap->n);
3260: return(0);
3261: }
3265: /*
3266: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3267: */
3268: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3269: {
3270: Mat B = *fact;
3271: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3272: IS isicol;
3274: const PetscInt *r,*ic;
3275: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3276: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3277: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3278: PetscInt nlnk,*lnk;
3279: PetscBT lnkbt;
3280: PetscBool row_identity,icol_identity;
3281: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3282: const PetscInt *ics;
3283: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3284: PetscReal dt =info->dt,shift=info->shiftamount;
3285: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3286: PetscBool missing;
3289: if (dt == PETSC_DEFAULT) dt = 0.005;
3290: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3292: /* ------- symbolic factorization, can be reused ---------*/
3293: MatMissingDiagonal(A,&missing,&i);
3294: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3295: adiag=a->diag;
3297: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3299: /* bdiag is location of diagonal in factor */
3300: PetscMalloc1(n+1,&bdiag); /* becomes b->diag */
3301: PetscMalloc1(n+1,&bdiag_rev); /* temporary */
3303: /* allocate row pointers bi */
3304: PetscMalloc1(2*n+2,&bi);
3306: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3307: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3308: nnz_max = ai[n]+2*n*dtcount+2;
3310: PetscMalloc1(nnz_max+1,&bj);
3311: PetscMalloc1(nnz_max+1,&ba);
3313: /* put together the new matrix */
3314: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3315: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3316: b = (Mat_SeqAIJ*)B->data;
3318: b->free_a = PETSC_TRUE;
3319: b->free_ij = PETSC_TRUE;
3320: b->singlemalloc = PETSC_FALSE;
3322: b->a = ba;
3323: b->j = bj;
3324: b->i = bi;
3325: b->diag = bdiag;
3326: b->ilen = 0;
3327: b->imax = 0;
3328: b->row = isrow;
3329: b->col = iscol;
3330: PetscObjectReference((PetscObject)isrow);
3331: PetscObjectReference((PetscObject)iscol);
3332: b->icol = isicol;
3334: PetscMalloc1(n+1,&b->solve_work);
3335: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3336: b->maxnz = nnz_max;
3338: B->factortype = MAT_FACTOR_ILUDT;
3339: B->info.factor_mallocs = 0;
3340: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3341: /* ------- end of symbolic factorization ---------*/
3343: ISGetIndices(isrow,&r);
3344: ISGetIndices(isicol,&ic);
3345: ics = ic;
3347: /* linked list for storing column indices of the active row */
3348: nlnk = n + 1;
3349: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3351: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3352: PetscMalloc2(n,&im,n,&jtmp);
3353: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3354: PetscMalloc2(n,&rtmp,n,&vtmp);
3355: PetscMemzero(rtmp,n*sizeof(MatScalar));
3357: bi[0] = 0;
3358: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3359: bdiag_rev[n] = bdiag[0];
3360: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3361: for (i=0; i<n; i++) {
3362: /* copy initial fill into linked list */
3363: nzi = ai[r[i]+1] - ai[r[i]];
3364: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3365: nzi_al = adiag[r[i]] - ai[r[i]];
3366: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3367: ajtmp = aj + ai[r[i]];
3368: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3370: /* load in initial (unfactored row) */
3371: aatmp = a->a + ai[r[i]];
3372: for (j=0; j<nzi; j++) {
3373: rtmp[ics[*ajtmp++]] = *aatmp++;
3374: }
3376: /* add pivot rows into linked list */
3377: row = lnk[n];
3378: while (row < i) {
3379: nzi_bl = bi[row+1] - bi[row] + 1;
3380: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3381: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3382: nzi += nlnk;
3383: row = lnk[row];
3384: }
3386: /* copy data from lnk into jtmp, then initialize lnk */
3387: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3389: /* numerical factorization */
3390: bjtmp = jtmp;
3391: row = *bjtmp++; /* 1st pivot row */
3392: while (row < i) {
3393: pc = rtmp + row;
3394: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3395: multiplier = (*pc) * (*pv);
3396: *pc = multiplier;
3397: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3398: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3399: pv = ba + bdiag[row+1] + 1;
3400: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3401: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3402: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3403: PetscLogFlops(1+2*nz);
3404: }
3405: row = *bjtmp++;
3406: }
3408: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3409: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3410: nzi_bl = 0; j = 0;
3411: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3412: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3413: nzi_bl++; j++;
3414: }
3415: nzi_bu = nzi - nzi_bl -1;
3416: while (j < nzi) {
3417: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3418: j++;
3419: }
3421: bjtmp = bj + bi[i];
3422: batmp = ba + bi[i];
3423: /* apply level dropping rule to L part */
3424: ncut = nzi_al + dtcount;
3425: if (ncut < nzi_bl) {
3426: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3427: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3428: } else {
3429: ncut = nzi_bl;
3430: }
3431: for (j=0; j<ncut; j++) {
3432: bjtmp[j] = jtmp[j];
3433: batmp[j] = vtmp[j];
3434: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3435: }
3436: bi[i+1] = bi[i] + ncut;
3437: nzi = ncut + 1;
3439: /* apply level dropping rule to U part */
3440: ncut = nzi_au + dtcount;
3441: if (ncut < nzi_bu) {
3442: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3443: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3444: } else {
3445: ncut = nzi_bu;
3446: }
3447: nzi += ncut;
3449: /* mark bdiagonal */
3450: bdiag[i+1] = bdiag[i] - (ncut + 1);
3451: bdiag_rev[n-i-1] = bdiag[i+1];
3452: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3453: bjtmp = bj + bdiag[i];
3454: batmp = ba + bdiag[i];
3455: *bjtmp = i;
3456: *batmp = diag_tmp; /* rtmp[i]; */
3457: if (*batmp == 0.0) {
3458: *batmp = dt+shift;
3459: /* printf(" row %d add shift %g\n",i,shift); */
3460: }
3461: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3462: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3464: bjtmp = bj + bdiag[i+1]+1;
3465: batmp = ba + bdiag[i+1]+1;
3466: for (k=0; k<ncut; k++) {
3467: bjtmp[k] = jtmp[nzi_bl+1+k];
3468: batmp[k] = vtmp[nzi_bl+1+k];
3469: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3470: }
3471: /* printf("\n"); */
3473: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3474: /*
3475: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3476: printf(" ----------------------------\n");
3477: */
3478: } /* for (i=0; i<n; i++) */
3479: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3480: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3482: ISRestoreIndices(isrow,&r);
3483: ISRestoreIndices(isicol,&ic);
3485: PetscLLDestroy(lnk,lnkbt);
3486: PetscFree2(im,jtmp);
3487: PetscFree2(rtmp,vtmp);
3488: PetscFree(bdiag_rev);
3490: PetscLogFlops(B->cmap->n);
3491: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3493: ISIdentity(isrow,&row_identity);
3494: ISIdentity(isicol,&icol_identity);
3495: if (row_identity && icol_identity) {
3496: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3497: } else {
3498: B->ops->solve = MatSolve_SeqAIJ;
3499: }
3501: B->ops->solveadd = 0;
3502: B->ops->solvetranspose = 0;
3503: B->ops->solvetransposeadd = 0;
3504: B->ops->matsolve = 0;
3505: B->assembled = PETSC_TRUE;
3506: B->preallocated = PETSC_TRUE;
3507: return(0);
3508: }
3510: /* a wraper of MatILUDTFactor_SeqAIJ() */
3513: /*
3514: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3515: */
3517: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3518: {
3522: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3523: return(0);
3524: }
3526: /*
3527: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3528: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3529: */
3532: /*
3533: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3534: */
3536: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3537: {
3538: Mat C =fact;
3539: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3540: IS isrow = b->row,isicol = b->icol;
3542: const PetscInt *r,*ic,*ics;
3543: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3544: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3545: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3546: PetscReal dt=info->dt,shift=info->shiftamount;
3547: PetscBool row_identity, col_identity;
3550: ISGetIndices(isrow,&r);
3551: ISGetIndices(isicol,&ic);
3552: PetscMalloc1(n+1,&rtmp);
3553: ics = ic;
3555: for (i=0; i<n; i++) {
3556: /* initialize rtmp array */
3557: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3558: bjtmp = bj + bi[i];
3559: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3560: rtmp[i] = 0.0;
3561: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3562: bjtmp = bj + bdiag[i+1] + 1;
3563: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3565: /* load in initial unfactored row of A */
3566: /* printf("row %d\n",i); */
3567: nz = ai[r[i]+1] - ai[r[i]];
3568: ajtmp = aj + ai[r[i]];
3569: v = aa + ai[r[i]];
3570: for (j=0; j<nz; j++) {
3571: rtmp[ics[*ajtmp++]] = v[j];
3572: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3573: }
3574: /* printf("\n"); */
3576: /* numerical factorization */
3577: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3578: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3579: k = 0;
3580: while (k < nzl) {
3581: row = *bjtmp++;
3582: /* printf(" prow %d\n",row); */
3583: pc = rtmp + row;
3584: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3585: multiplier = (*pc) * (*pv);
3586: *pc = multiplier;
3587: if (PetscAbsScalar(multiplier) > dt) {
3588: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3589: pv = b->a + bdiag[row+1] + 1;
3590: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3591: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3592: PetscLogFlops(1+2*nz);
3593: }
3594: k++;
3595: }
3597: /* finished row so stick it into b->a */
3598: /* L-part */
3599: pv = b->a + bi[i];
3600: pj = bj + bi[i];
3601: nzl = bi[i+1] - bi[i];
3602: for (j=0; j<nzl; j++) {
3603: pv[j] = rtmp[pj[j]];
3604: /* printf(" (%d,%g),",pj[j],pv[j]); */
3605: }
3607: /* diagonal: invert diagonal entries for simplier triangular solves */
3608: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3609: b->a[bdiag[i]] = 1.0/rtmp[i];
3610: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3612: /* U-part */
3613: pv = b->a + bdiag[i+1] + 1;
3614: pj = bj + bdiag[i+1] + 1;
3615: nzu = bdiag[i] - bdiag[i+1] - 1;
3616: for (j=0; j<nzu; j++) {
3617: pv[j] = rtmp[pj[j]];
3618: /* printf(" (%d,%g),",pj[j],pv[j]); */
3619: }
3620: /* printf("\n"); */
3621: }
3623: PetscFree(rtmp);
3624: ISRestoreIndices(isicol,&ic);
3625: ISRestoreIndices(isrow,&r);
3627: ISIdentity(isrow,&row_identity);
3628: ISIdentity(isicol,&col_identity);
3629: if (row_identity && col_identity) {
3630: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3631: } else {
3632: C->ops->solve = MatSolve_SeqAIJ;
3633: }
3634: C->ops->solveadd = 0;
3635: C->ops->solvetranspose = 0;
3636: C->ops->solvetransposeadd = 0;
3637: C->ops->matsolve = 0;
3638: C->assembled = PETSC_TRUE;
3639: C->preallocated = PETSC_TRUE;
3641: PetscLogFlops(C->cmap->n);
3642: return(0);
3643: }