Actual source code: aijfact.c
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
7: /*
8: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
10: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11: */
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
15: PetscErrorCode ierr;
16: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
17: const PetscInt *ai = a->i, *aj = a->j;
18: const PetscScalar *aa = a->a;
19: PetscBool *done;
20: PetscReal best,past = 0,future;
23: /* pick initial row */
24: best = -1;
25: for (i=0; i<n; i++) {
26: future = 0.0;
27: for (j=ai[i]; j<ai[i+1]; j++) {
28: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
29: else past = PetscAbsScalar(aa[j]);
30: }
31: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
32: if (past/future > best) {
33: best = past/future;
34: current = i;
35: }
36: }
38: PetscMalloc1(n,&done);
39: PetscArrayzero(done,n);
40: PetscMalloc1(n,&order);
41: order[0] = current;
42: for (i=0; i<n-1; i++) {
43: done[current] = PETSC_TRUE;
44: best = -1;
45: /* loop over all neighbors of current pivot */
46: for (j=ai[current]; j<ai[current+1]; j++) {
47: jj = aj[j];
48: if (done[jj]) continue;
49: /* loop over columns of potential next row computing weights for below and above diagonal */
50: past = future = 0.0;
51: for (k=ai[jj]; k<ai[jj+1]; k++) {
52: kk = aj[k];
53: if (done[kk]) past += PetscAbsScalar(aa[k]);
54: else if (kk != jj) future += PetscAbsScalar(aa[k]);
55: }
56: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
57: if (past/future > best) {
58: best = past/future;
59: newcurrent = jj;
60: }
61: }
62: if (best == -1) { /* no neighbors to select from so select best of all that remain */
63: best = -1;
64: for (k=0; k<n; k++) {
65: if (done[k]) continue;
66: future = 0.0;
67: past = 0.0;
68: for (j=ai[k]; j<ai[k+1]; j++) {
69: kk = aj[j];
70: if (done[kk]) past += PetscAbsScalar(aa[j]);
71: else if (kk != k) future += PetscAbsScalar(aa[j]);
72: }
73: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
74: if (past/future > best) {
75: best = past/future;
76: newcurrent = k;
77: }
78: }
79: }
80: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
81: current = newcurrent;
82: order[i+1] = current;
83: }
84: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
85: *icol = *irow;
86: PetscObjectReference((PetscObject)*irow);
87: PetscFree(done);
88: PetscFree(order);
89: return(0);
90: }
92: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
93: {
94: PetscInt n = A->rmap->n;
98: #if defined(PETSC_USE_COMPLEX)
99: if (A->hermitian && !A->symmetric && (ftype == MAT_FACTOR_CHOLESKY||ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported");
100: #endif
101: MatCreate(PetscObjectComm((PetscObject)A),B);
102: MatSetSizes(*B,n,n,n,n);
103: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
104: MatSetType(*B,MATSEQAIJ);
106: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
107: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
109: MatSetBlockSizesFromMats(*B,A,A);
110: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
111: MatSetType(*B,MATSEQSBAIJ);
112: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
114: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
115: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
116: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
117: (*B)->factortype = ftype;
119: PetscFree((*B)->solvertype);
120: PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);
121: (*B)->useordering = PETSC_TRUE;
122: return(0);
123: }
125: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
126: {
127: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
128: IS isicol;
129: PetscErrorCode ierr;
130: const PetscInt *r,*ic;
131: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
132: PetscInt *bi,*bj,*ajtmp;
133: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
134: PetscReal f;
135: PetscInt nlnk,*lnk,k,**bi_ptr;
136: PetscFreeSpaceList free_space=NULL,current_space=NULL;
137: PetscBT lnkbt;
138: PetscBool missing;
141: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
142: MatMissingDiagonal(A,&missing,&i);
143: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
145: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
146: ISGetIndices(isrow,&r);
147: ISGetIndices(isicol,&ic);
149: /* get new row pointers */
150: PetscMalloc1(n+1,&bi);
151: bi[0] = 0;
153: /* bdiag is location of diagonal in factor */
154: PetscMalloc1(n+1,&bdiag);
155: bdiag[0] = 0;
157: /* linked list for storing column indices of the active row */
158: nlnk = n + 1;
159: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
161: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
163: /* initial FreeSpace size is f*(ai[n]+1) */
164: f = info->fill;
165: if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
166: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
167: current_space = free_space;
169: for (i=0; i<n; i++) {
170: /* copy previous fill into linked list */
171: nzi = 0;
172: nnz = ai[r[i]+1] - ai[r[i]];
173: ajtmp = aj + ai[r[i]];
174: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
175: nzi += nlnk;
177: /* add pivot rows into linked list */
178: row = lnk[n];
179: while (row < i) {
180: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
181: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
182: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
183: nzi += nlnk;
184: row = lnk[row];
185: }
186: bi[i+1] = bi[i] + nzi;
187: im[i] = nzi;
189: /* mark bdiag */
190: nzbd = 0;
191: nnz = nzi;
192: k = lnk[n];
193: while (nnz-- && k < i) {
194: nzbd++;
195: k = lnk[k];
196: }
197: bdiag[i] = bi[i] + nzbd;
199: /* if free space is not available, make more free space */
200: if (current_space->local_remaining<nzi) {
201: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
202: PetscFreeSpaceGet(nnz,¤t_space);
203: reallocs++;
204: }
206: /* copy data into free space, then initialize lnk */
207: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
209: bi_ptr[i] = current_space->array;
210: current_space->array += nzi;
211: current_space->local_used += nzi;
212: current_space->local_remaining -= nzi;
213: }
214: #if defined(PETSC_USE_INFO)
215: if (ai[n] != 0) {
216: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
217: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
218: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
219: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
220: PetscInfo(A,"for best performance.\n");
221: } else {
222: PetscInfo(A,"Empty matrix\n");
223: }
224: #endif
226: ISRestoreIndices(isrow,&r);
227: ISRestoreIndices(isicol,&ic);
229: /* destroy list of free space and other temporary array(s) */
230: PetscMalloc1(bi[n]+1,&bj);
231: PetscFreeSpaceContiguous(&free_space,bj);
232: PetscLLDestroy(lnk,lnkbt);
233: PetscFree2(bi_ptr,im);
235: /* put together the new matrix */
236: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
237: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
238: b = (Mat_SeqAIJ*)(B)->data;
240: b->free_a = PETSC_TRUE;
241: b->free_ij = PETSC_TRUE;
242: b->singlemalloc = PETSC_FALSE;
244: PetscMalloc1(bi[n]+1,&b->a);
245: b->j = bj;
246: b->i = bi;
247: b->diag = bdiag;
248: b->ilen = NULL;
249: b->imax = NULL;
250: b->row = isrow;
251: b->col = iscol;
252: PetscObjectReference((PetscObject)isrow);
253: PetscObjectReference((PetscObject)iscol);
254: b->icol = isicol;
255: PetscMalloc1(n+1,&b->solve_work);
257: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
258: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
259: b->maxnz = b->nz = bi[n];
261: (B)->factortype = MAT_FACTOR_LU;
262: (B)->info.factor_mallocs = reallocs;
263: (B)->info.fill_ratio_given = f;
265: if (ai[n]) {
266: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
267: } else {
268: (B)->info.fill_ratio_needed = 0.0;
269: }
270: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
271: if (a->inode.size) {
272: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
273: }
274: return(0);
275: }
277: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
278: {
279: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
280: IS isicol;
281: PetscErrorCode ierr;
282: const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
283: PetscInt i,n=A->rmap->n;
284: PetscInt *bi,*bj;
285: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
286: PetscReal f;
287: PetscInt nlnk,*lnk,k,**bi_ptr;
288: PetscFreeSpaceList free_space=NULL,current_space=NULL;
289: PetscBT lnkbt;
290: PetscBool missing;
293: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
294: MatMissingDiagonal(A,&missing,&i);
295: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
297: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
298: ISGetIndices(isrow,&r);
299: ISGetIndices(isicol,&ic);
301: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
302: PetscMalloc1(n+1,&bi);
303: PetscMalloc1(n+1,&bdiag);
304: bi[0] = bdiag[0] = 0;
306: /* linked list for storing column indices of the active row */
307: nlnk = n + 1;
308: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
310: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
312: /* initial FreeSpace size is f*(ai[n]+1) */
313: f = info->fill;
314: if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
315: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
316: current_space = free_space;
318: for (i=0; i<n; i++) {
319: /* copy previous fill into linked list */
320: nzi = 0;
321: nnz = ai[r[i]+1] - ai[r[i]];
322: ajtmp = aj + ai[r[i]];
323: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
324: nzi += nlnk;
326: /* add pivot rows into linked list */
327: row = lnk[n];
328: while (row < i) {
329: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
330: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
331: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
332: nzi += nlnk;
333: row = lnk[row];
334: }
335: bi[i+1] = bi[i] + nzi;
336: im[i] = nzi;
338: /* mark bdiag */
339: nzbd = 0;
340: nnz = nzi;
341: k = lnk[n];
342: while (nnz-- && k < i) {
343: nzbd++;
344: k = lnk[k];
345: }
346: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
348: /* if free space is not available, make more free space */
349: if (current_space->local_remaining<nzi) {
350: /* estimated additional space needed */
351: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
352: PetscFreeSpaceGet(nnz,¤t_space);
353: reallocs++;
354: }
356: /* copy data into free space, then initialize lnk */
357: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
359: bi_ptr[i] = current_space->array;
360: current_space->array += nzi;
361: current_space->local_used += nzi;
362: current_space->local_remaining -= nzi;
363: }
365: ISRestoreIndices(isrow,&r);
366: ISRestoreIndices(isicol,&ic);
368: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
369: PetscMalloc1(bi[n]+1,&bj);
370: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
371: PetscLLDestroy(lnk,lnkbt);
372: PetscFree2(bi_ptr,im);
374: /* put together the new matrix */
375: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
376: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
377: b = (Mat_SeqAIJ*)(B)->data;
379: b->free_a = PETSC_TRUE;
380: b->free_ij = PETSC_TRUE;
381: b->singlemalloc = PETSC_FALSE;
383: PetscMalloc1(bdiag[0]+1,&b->a);
385: b->j = bj;
386: b->i = bi;
387: b->diag = bdiag;
388: b->ilen = NULL;
389: b->imax = NULL;
390: b->row = isrow;
391: b->col = iscol;
392: PetscObjectReference((PetscObject)isrow);
393: PetscObjectReference((PetscObject)iscol);
394: b->icol = isicol;
395: PetscMalloc1(n+1,&b->solve_work);
397: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
398: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
399: b->maxnz = b->nz = bdiag[0]+1;
401: B->factortype = MAT_FACTOR_LU;
402: B->info.factor_mallocs = reallocs;
403: B->info.fill_ratio_given = f;
405: if (ai[n]) {
406: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
407: } else {
408: B->info.fill_ratio_needed = 0.0;
409: }
410: #if defined(PETSC_USE_INFO)
411: if (ai[n] != 0) {
412: PetscReal af = B->info.fill_ratio_needed;
413: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
414: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
415: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
416: PetscInfo(A,"for best performance.\n");
417: } else {
418: PetscInfo(A,"Empty matrix\n");
419: }
420: #endif
421: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
422: if (a->inode.size) {
423: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
424: }
425: MatSeqAIJCheckInode_FactorLU(B);
426: return(0);
427: }
429: /*
430: Trouble in factorization, should we dump the original matrix?
431: */
432: PetscErrorCode MatFactorDumpMatrix(Mat A)
433: {
435: PetscBool flg = PETSC_FALSE;
438: PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);
439: if (flg) {
440: PetscViewer viewer;
441: char filename[PETSC_MAX_PATH_LEN];
443: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
444: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
445: MatView(A,viewer);
446: PetscViewerDestroy(&viewer);
447: }
448: return(0);
449: }
451: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
452: {
453: Mat C =B;
454: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
455: IS isrow = b->row,isicol = b->icol;
456: PetscErrorCode ierr;
457: const PetscInt *r,*ic,*ics;
458: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
459: PetscInt i,j,k,nz,nzL,row,*pj;
460: const PetscInt *ajtmp,*bjtmp;
461: MatScalar *rtmp,*pc,multiplier,*pv;
462: const MatScalar *aa=a->a,*v;
463: PetscBool row_identity,col_identity;
464: FactorShiftCtx sctx;
465: const PetscInt *ddiag;
466: PetscReal rs;
467: MatScalar d;
470: /* MatPivotSetUp(): initialize shift context sctx */
471: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
473: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
474: ddiag = a->diag;
475: sctx.shift_top = info->zeropivot;
476: for (i=0; i<n; i++) {
477: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
478: d = (aa)[ddiag[i]];
479: rs = -PetscAbsScalar(d) - PetscRealPart(d);
480: v = aa+ai[i];
481: nz = ai[i+1] - ai[i];
482: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
483: if (rs>sctx.shift_top) sctx.shift_top = rs;
484: }
485: sctx.shift_top *= 1.1;
486: sctx.nshift_max = 5;
487: sctx.shift_lo = 0.;
488: sctx.shift_hi = 1.;
489: }
491: ISGetIndices(isrow,&r);
492: ISGetIndices(isicol,&ic);
493: PetscMalloc1(n+1,&rtmp);
494: ics = ic;
496: do {
497: sctx.newshift = PETSC_FALSE;
498: for (i=0; i<n; i++) {
499: /* zero rtmp */
500: /* L part */
501: nz = bi[i+1] - bi[i];
502: bjtmp = bj + bi[i];
503: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
505: /* U part */
506: nz = bdiag[i]-bdiag[i+1];
507: bjtmp = bj + bdiag[i+1]+1;
508: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
510: /* load in initial (unfactored row) */
511: nz = ai[r[i]+1] - ai[r[i]];
512: ajtmp = aj + ai[r[i]];
513: v = aa + ai[r[i]];
514: for (j=0; j<nz; j++) {
515: rtmp[ics[ajtmp[j]]] = v[j];
516: }
517: /* ZeropivotApply() */
518: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
520: /* elimination */
521: bjtmp = bj + bi[i];
522: row = *bjtmp++;
523: nzL = bi[i+1] - bi[i];
524: for (k=0; k < nzL; k++) {
525: pc = rtmp + row;
526: if (*pc != 0.0) {
527: pv = b->a + bdiag[row];
528: multiplier = *pc * (*pv);
529: *pc = multiplier;
531: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
532: pv = b->a + bdiag[row+1]+1;
533: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
535: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
536: PetscLogFlops(1+2.0*nz);
537: }
538: row = *bjtmp++;
539: }
541: /* finished row so stick it into b->a */
542: rs = 0.0;
543: /* L part */
544: pv = b->a + bi[i];
545: pj = b->j + bi[i];
546: nz = bi[i+1] - bi[i];
547: for (j=0; j<nz; j++) {
548: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
549: }
551: /* U part */
552: pv = b->a + bdiag[i+1]+1;
553: pj = b->j + bdiag[i+1]+1;
554: nz = bdiag[i] - bdiag[i+1]-1;
555: for (j=0; j<nz; j++) {
556: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
557: }
559: sctx.rs = rs;
560: sctx.pv = rtmp[i];
561: MatPivotCheck(B,A,info,&sctx,i);
562: if (sctx.newshift) break; /* break for-loop */
563: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
565: /* Mark diagonal and invert diagonal for simplier triangular solves */
566: pv = b->a + bdiag[i];
567: *pv = 1.0/rtmp[i];
569: } /* endof for (i=0; i<n; i++) { */
571: /* MatPivotRefine() */
572: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
573: /*
574: * if no shift in this attempt & shifting & started shifting & can refine,
575: * then try lower shift
576: */
577: sctx.shift_hi = sctx.shift_fraction;
578: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
579: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
580: sctx.newshift = PETSC_TRUE;
581: sctx.nshift++;
582: }
583: } while (sctx.newshift);
585: PetscFree(rtmp);
586: ISRestoreIndices(isicol,&ic);
587: ISRestoreIndices(isrow,&r);
589: ISIdentity(isrow,&row_identity);
590: ISIdentity(isicol,&col_identity);
591: if (b->inode.size) {
592: C->ops->solve = MatSolve_SeqAIJ_Inode;
593: } else if (row_identity && col_identity) {
594: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
595: } else {
596: C->ops->solve = MatSolve_SeqAIJ;
597: }
598: C->ops->solveadd = MatSolveAdd_SeqAIJ;
599: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
600: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
601: C->ops->matsolve = MatMatSolve_SeqAIJ;
602: C->assembled = PETSC_TRUE;
603: C->preallocated = PETSC_TRUE;
605: PetscLogFlops(C->cmap->n);
607: /* MatShiftView(A,info,&sctx) */
608: if (sctx.nshift) {
609: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
610: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
611: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
612: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
613: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
614: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
615: }
616: }
617: return(0);
618: }
620: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
621: {
622: Mat C =B;
623: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
624: IS isrow = b->row,isicol = b->icol;
625: PetscErrorCode ierr;
626: const PetscInt *r,*ic,*ics;
627: PetscInt nz,row,i,j,n=A->rmap->n,diag;
628: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
629: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
630: MatScalar *pv,*rtmp,*pc,multiplier,d;
631: const MatScalar *v,*aa=a->a;
632: PetscReal rs=0.0;
633: FactorShiftCtx sctx;
634: const PetscInt *ddiag;
635: PetscBool row_identity, col_identity;
638: /* MatPivotSetUp(): initialize shift context sctx */
639: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
641: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
642: ddiag = a->diag;
643: sctx.shift_top = info->zeropivot;
644: for (i=0; i<n; i++) {
645: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
646: d = (aa)[ddiag[i]];
647: rs = -PetscAbsScalar(d) - PetscRealPart(d);
648: v = aa+ai[i];
649: nz = ai[i+1] - ai[i];
650: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
651: if (rs>sctx.shift_top) sctx.shift_top = rs;
652: }
653: sctx.shift_top *= 1.1;
654: sctx.nshift_max = 5;
655: sctx.shift_lo = 0.;
656: sctx.shift_hi = 1.;
657: }
659: ISGetIndices(isrow,&r);
660: ISGetIndices(isicol,&ic);
661: PetscMalloc1(n+1,&rtmp);
662: ics = ic;
664: do {
665: sctx.newshift = PETSC_FALSE;
666: for (i=0; i<n; i++) {
667: nz = bi[i+1] - bi[i];
668: bjtmp = bj + bi[i];
669: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
671: /* load in initial (unfactored row) */
672: nz = ai[r[i]+1] - ai[r[i]];
673: ajtmp = aj + ai[r[i]];
674: v = aa + ai[r[i]];
675: for (j=0; j<nz; j++) {
676: rtmp[ics[ajtmp[j]]] = v[j];
677: }
678: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
680: row = *bjtmp++;
681: while (row < i) {
682: pc = rtmp + row;
683: if (*pc != 0.0) {
684: pv = b->a + diag_offset[row];
685: pj = b->j + diag_offset[row] + 1;
686: multiplier = *pc / *pv++;
687: *pc = multiplier;
688: nz = bi[row+1] - diag_offset[row] - 1;
689: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
690: PetscLogFlops(1+2.0*nz);
691: }
692: row = *bjtmp++;
693: }
694: /* finished row so stick it into b->a */
695: pv = b->a + bi[i];
696: pj = b->j + bi[i];
697: nz = bi[i+1] - bi[i];
698: diag = diag_offset[i] - bi[i];
699: rs = 0.0;
700: for (j=0; j<nz; j++) {
701: pv[j] = rtmp[pj[j]];
702: rs += PetscAbsScalar(pv[j]);
703: }
704: rs -= PetscAbsScalar(pv[diag]);
706: sctx.rs = rs;
707: sctx.pv = pv[diag];
708: MatPivotCheck(B,A,info,&sctx,i);
709: if (sctx.newshift) break;
710: pv[diag] = sctx.pv;
711: }
713: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
714: /*
715: * if no shift in this attempt & shifting & started shifting & can refine,
716: * then try lower shift
717: */
718: sctx.shift_hi = sctx.shift_fraction;
719: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
720: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
721: sctx.newshift = PETSC_TRUE;
722: sctx.nshift++;
723: }
724: } while (sctx.newshift);
726: /* invert diagonal entries for simplier triangular solves */
727: for (i=0; i<n; i++) {
728: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
729: }
730: PetscFree(rtmp);
731: ISRestoreIndices(isicol,&ic);
732: ISRestoreIndices(isrow,&r);
734: ISIdentity(isrow,&row_identity);
735: ISIdentity(isicol,&col_identity);
736: if (row_identity && col_identity) {
737: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
738: } else {
739: C->ops->solve = MatSolve_SeqAIJ_inplace;
740: }
741: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
742: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
743: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
744: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
746: C->assembled = PETSC_TRUE;
747: C->preallocated = PETSC_TRUE;
749: PetscLogFlops(C->cmap->n);
750: if (sctx.nshift) {
751: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
752: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
753: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
754: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
755: }
756: }
757: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
758: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
760: MatSeqAIJCheckInode(C);
761: return(0);
762: }
764: /*
765: This routine implements inplace ILU(0) with row or/and column permutations.
766: Input:
767: A - original matrix
768: Output;
769: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
770: a->j (col index) is permuted by the inverse of colperm, then sorted
771: a->a reordered accordingly with a->j
772: a->diag (ptr to diagonal elements) is updated.
773: */
774: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
775: {
776: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
777: IS isrow = a->row,isicol = a->icol;
778: PetscErrorCode ierr;
779: const PetscInt *r,*ic,*ics;
780: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
781: PetscInt *ajtmp,nz,row;
782: PetscInt *diag = a->diag,nbdiag,*pj;
783: PetscScalar *rtmp,*pc,multiplier,d;
784: MatScalar *pv,*v;
785: PetscReal rs;
786: FactorShiftCtx sctx;
787: const MatScalar *aa=a->a,*vtmp;
790: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
792: /* MatPivotSetUp(): initialize shift context sctx */
793: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
795: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
796: const PetscInt *ddiag = a->diag;
797: sctx.shift_top = info->zeropivot;
798: for (i=0; i<n; i++) {
799: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
800: d = (aa)[ddiag[i]];
801: rs = -PetscAbsScalar(d) - PetscRealPart(d);
802: vtmp = aa+ai[i];
803: nz = ai[i+1] - ai[i];
804: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
805: if (rs>sctx.shift_top) sctx.shift_top = rs;
806: }
807: sctx.shift_top *= 1.1;
808: sctx.nshift_max = 5;
809: sctx.shift_lo = 0.;
810: sctx.shift_hi = 1.;
811: }
813: ISGetIndices(isrow,&r);
814: ISGetIndices(isicol,&ic);
815: PetscMalloc1(n+1,&rtmp);
816: PetscArrayzero(rtmp,n+1);
817: ics = ic;
819: #if defined(MV)
820: sctx.shift_top = 0.;
821: sctx.nshift_max = 0;
822: sctx.shift_lo = 0.;
823: sctx.shift_hi = 0.;
824: sctx.shift_fraction = 0.;
826: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
827: sctx.shift_top = 0.;
828: for (i=0; i<n; i++) {
829: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
830: d = (a->a)[diag[i]];
831: rs = -PetscAbsScalar(d) - PetscRealPart(d);
832: v = a->a+ai[i];
833: nz = ai[i+1] - ai[i];
834: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
835: if (rs>sctx.shift_top) sctx.shift_top = rs;
836: }
837: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
838: sctx.shift_top *= 1.1;
839: sctx.nshift_max = 5;
840: sctx.shift_lo = 0.;
841: sctx.shift_hi = 1.;
842: }
844: sctx.shift_amount = 0.;
845: sctx.nshift = 0;
846: #endif
848: do {
849: sctx.newshift = PETSC_FALSE;
850: for (i=0; i<n; i++) {
851: /* load in initial unfactored row */
852: nz = ai[r[i]+1] - ai[r[i]];
853: ajtmp = aj + ai[r[i]];
854: v = a->a + ai[r[i]];
855: /* sort permuted ajtmp and values v accordingly */
856: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
857: PetscSortIntWithScalarArray(nz,ajtmp,v);
859: diag[r[i]] = ai[r[i]];
860: for (j=0; j<nz; j++) {
861: rtmp[ajtmp[j]] = v[j];
862: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
863: }
864: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
866: row = *ajtmp++;
867: while (row < i) {
868: pc = rtmp + row;
869: if (*pc != 0.0) {
870: pv = a->a + diag[r[row]];
871: pj = aj + diag[r[row]] + 1;
873: multiplier = *pc / *pv++;
874: *pc = multiplier;
875: nz = ai[r[row]+1] - diag[r[row]] - 1;
876: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
877: PetscLogFlops(1+2.0*nz);
878: }
879: row = *ajtmp++;
880: }
881: /* finished row so overwrite it onto a->a */
882: pv = a->a + ai[r[i]];
883: pj = aj + ai[r[i]];
884: nz = ai[r[i]+1] - ai[r[i]];
885: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
887: rs = 0.0;
888: for (j=0; j<nz; j++) {
889: pv[j] = rtmp[pj[j]];
890: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
891: }
893: sctx.rs = rs;
894: sctx.pv = pv[nbdiag];
895: MatPivotCheck(B,A,info,&sctx,i);
896: if (sctx.newshift) break;
897: pv[nbdiag] = sctx.pv;
898: }
900: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
901: /*
902: * if no shift in this attempt & shifting & started shifting & can refine,
903: * then try lower shift
904: */
905: sctx.shift_hi = sctx.shift_fraction;
906: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
907: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
908: sctx.newshift = PETSC_TRUE;
909: sctx.nshift++;
910: }
911: } while (sctx.newshift);
913: /* invert diagonal entries for simplier triangular solves */
914: for (i=0; i<n; i++) {
915: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
916: }
918: PetscFree(rtmp);
919: ISRestoreIndices(isicol,&ic);
920: ISRestoreIndices(isrow,&r);
922: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
923: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
924: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
925: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
927: A->assembled = PETSC_TRUE;
928: A->preallocated = PETSC_TRUE;
930: PetscLogFlops(A->cmap->n);
931: if (sctx.nshift) {
932: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
933: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
934: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
935: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
936: }
937: }
938: return(0);
939: }
941: /* ----------------------------------------------------------- */
942: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
943: {
945: Mat C;
948: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
949: MatLUFactorSymbolic(C,A,row,col,info);
950: MatLUFactorNumeric(C,A,info);
952: A->ops->solve = C->ops->solve;
953: A->ops->solvetranspose = C->ops->solvetranspose;
955: MatHeaderMerge(A,&C);
956: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
957: return(0);
958: }
959: /* ----------------------------------------------------------- */
962: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
963: {
964: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
965: IS iscol = a->col,isrow = a->row;
966: PetscErrorCode ierr;
967: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
968: PetscInt nz;
969: const PetscInt *rout,*cout,*r,*c;
970: PetscScalar *x,*tmp,*tmps,sum;
971: const PetscScalar *b;
972: const MatScalar *aa = a->a,*v;
975: if (!n) return(0);
977: VecGetArrayRead(bb,&b);
978: VecGetArrayWrite(xx,&x);
979: tmp = a->solve_work;
981: ISGetIndices(isrow,&rout); r = rout;
982: ISGetIndices(iscol,&cout); c = cout + (n-1);
984: /* forward solve the lower triangular */
985: tmp[0] = b[*r++];
986: tmps = tmp;
987: for (i=1; i<n; i++) {
988: v = aa + ai[i];
989: vi = aj + ai[i];
990: nz = a->diag[i] - ai[i];
991: sum = b[*r++];
992: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
993: tmp[i] = sum;
994: }
996: /* backward solve the upper triangular */
997: for (i=n-1; i>=0; i--) {
998: v = aa + a->diag[i] + 1;
999: vi = aj + a->diag[i] + 1;
1000: nz = ai[i+1] - a->diag[i] - 1;
1001: sum = tmp[i];
1002: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1003: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1004: }
1006: ISRestoreIndices(isrow,&rout);
1007: ISRestoreIndices(iscol,&cout);
1008: VecRestoreArrayRead(bb,&b);
1009: VecRestoreArrayWrite(xx,&x);
1010: PetscLogFlops(2.0*a->nz - A->cmap->n);
1011: return(0);
1012: }
1014: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1015: {
1016: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1017: IS iscol = a->col,isrow = a->row;
1018: PetscErrorCode ierr;
1019: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1020: PetscInt nz,neq,ldb,ldx;
1021: const PetscInt *rout,*cout,*r,*c;
1022: PetscScalar *x,*tmp = a->solve_work,*tmps,sum;
1023: const PetscScalar *b,*aa = a->a,*v;
1024: PetscBool isdense;
1027: if (!n) return(0);
1028: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);
1029: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1030: if (X != B) {
1031: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);
1032: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1033: }
1034: MatDenseGetArrayRead(B,&b);
1035: MatDenseGetLDA(B,&ldb);
1036: MatDenseGetArray(X,&x);
1037: MatDenseGetLDA(X,&ldx);
1038: ISGetIndices(isrow,&rout); r = rout;
1039: ISGetIndices(iscol,&cout); c = cout;
1040: for (neq=0; neq<B->cmap->n; neq++) {
1041: /* forward solve the lower triangular */
1042: tmp[0] = b[r[0]];
1043: tmps = tmp;
1044: for (i=1; i<n; i++) {
1045: v = aa + ai[i];
1046: vi = aj + ai[i];
1047: nz = a->diag[i] - ai[i];
1048: sum = b[r[i]];
1049: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1050: tmp[i] = sum;
1051: }
1052: /* backward solve the upper triangular */
1053: for (i=n-1; i>=0; i--) {
1054: v = aa + a->diag[i] + 1;
1055: vi = aj + a->diag[i] + 1;
1056: nz = ai[i+1] - a->diag[i] - 1;
1057: sum = tmp[i];
1058: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1059: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1060: }
1061: b += ldb;
1062: x += ldx;
1063: }
1064: ISRestoreIndices(isrow,&rout);
1065: ISRestoreIndices(iscol,&cout);
1066: MatDenseRestoreArrayRead(B,&b);
1067: MatDenseRestoreArray(X,&x);
1068: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1069: return(0);
1070: }
1072: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1073: {
1074: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1075: IS iscol = a->col,isrow = a->row;
1076: PetscErrorCode ierr;
1077: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1078: PetscInt nz,neq,ldb,ldx;
1079: const PetscInt *rout,*cout,*r,*c;
1080: PetscScalar *x,*tmp = a->solve_work,sum;
1081: const PetscScalar *b,*aa = a->a,*v;
1082: PetscBool isdense;
1085: if (!n) return(0);
1086: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);
1087: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1088: if (X != B) {
1089: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);
1090: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1091: }
1092: MatDenseGetArrayRead(B,&b);
1093: MatDenseGetLDA(B,&ldb);
1094: MatDenseGetArray(X,&x);
1095: MatDenseGetLDA(X,&ldx);
1096: ISGetIndices(isrow,&rout); r = rout;
1097: ISGetIndices(iscol,&cout); c = cout;
1098: for (neq=0; neq<B->cmap->n; neq++) {
1099: /* forward solve the lower triangular */
1100: tmp[0] = b[r[0]];
1101: v = aa;
1102: vi = aj;
1103: for (i=1; i<n; i++) {
1104: nz = ai[i+1] - ai[i];
1105: sum = b[r[i]];
1106: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1107: tmp[i] = sum;
1108: v += nz; vi += nz;
1109: }
1110: /* backward solve the upper triangular */
1111: for (i=n-1; i>=0; i--) {
1112: v = aa + adiag[i+1]+1;
1113: vi = aj + adiag[i+1]+1;
1114: nz = adiag[i]-adiag[i+1]-1;
1115: sum = tmp[i];
1116: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1117: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1118: }
1119: b += ldb;
1120: x += ldx;
1121: }
1122: ISRestoreIndices(isrow,&rout);
1123: ISRestoreIndices(iscol,&cout);
1124: MatDenseRestoreArrayRead(B,&b);
1125: MatDenseRestoreArray(X,&x);
1126: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1127: return(0);
1128: }
1130: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1131: {
1132: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1133: IS iscol = a->col,isrow = a->row;
1134: PetscErrorCode ierr;
1135: const PetscInt *r,*c,*rout,*cout;
1136: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1137: PetscInt nz,row;
1138: PetscScalar *x,*tmp,*tmps,sum;
1139: const PetscScalar *b;
1140: const MatScalar *aa = a->a,*v;
1143: if (!n) return(0);
1145: VecGetArrayRead(bb,&b);
1146: VecGetArrayWrite(xx,&x);
1147: tmp = a->solve_work;
1149: ISGetIndices(isrow,&rout); r = rout;
1150: ISGetIndices(iscol,&cout); c = cout + (n-1);
1152: /* forward solve the lower triangular */
1153: tmp[0] = b[*r++];
1154: tmps = tmp;
1155: for (row=1; row<n; row++) {
1156: i = rout[row]; /* permuted row */
1157: v = aa + ai[i];
1158: vi = aj + ai[i];
1159: nz = a->diag[i] - ai[i];
1160: sum = b[*r++];
1161: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1162: tmp[row] = sum;
1163: }
1165: /* backward solve the upper triangular */
1166: for (row=n-1; row>=0; row--) {
1167: i = rout[row]; /* permuted row */
1168: v = aa + a->diag[i] + 1;
1169: vi = aj + a->diag[i] + 1;
1170: nz = ai[i+1] - a->diag[i] - 1;
1171: sum = tmp[row];
1172: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1173: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1174: }
1176: ISRestoreIndices(isrow,&rout);
1177: ISRestoreIndices(iscol,&cout);
1178: VecRestoreArrayRead(bb,&b);
1179: VecRestoreArrayWrite(xx,&x);
1180: PetscLogFlops(2.0*a->nz - A->cmap->n);
1181: return(0);
1182: }
1184: /* ----------------------------------------------------------- */
1185: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1186: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1187: {
1188: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1189: PetscErrorCode ierr;
1190: PetscInt n = A->rmap->n;
1191: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1192: PetscScalar *x;
1193: const PetscScalar *b;
1194: const MatScalar *aa = a->a;
1195: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1196: PetscInt adiag_i,i,nz,ai_i;
1197: const PetscInt *vi;
1198: const MatScalar *v;
1199: PetscScalar sum;
1200: #endif
1203: if (!n) return(0);
1205: VecGetArrayRead(bb,&b);
1206: VecGetArrayWrite(xx,&x);
1208: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1209: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1210: #else
1211: /* forward solve the lower triangular */
1212: x[0] = b[0];
1213: for (i=1; i<n; i++) {
1214: ai_i = ai[i];
1215: v = aa + ai_i;
1216: vi = aj + ai_i;
1217: nz = adiag[i] - ai_i;
1218: sum = b[i];
1219: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1220: x[i] = sum;
1221: }
1223: /* backward solve the upper triangular */
1224: for (i=n-1; i>=0; i--) {
1225: adiag_i = adiag[i];
1226: v = aa + adiag_i + 1;
1227: vi = aj + adiag_i + 1;
1228: nz = ai[i+1] - adiag_i - 1;
1229: sum = x[i];
1230: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1231: x[i] = sum*aa[adiag_i];
1232: }
1233: #endif
1234: PetscLogFlops(2.0*a->nz - A->cmap->n);
1235: VecRestoreArrayRead(bb,&b);
1236: VecRestoreArrayWrite(xx,&x);
1237: return(0);
1238: }
1240: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1241: {
1242: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1243: IS iscol = a->col,isrow = a->row;
1244: PetscErrorCode ierr;
1245: PetscInt i, n = A->rmap->n,j;
1246: PetscInt nz;
1247: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1248: PetscScalar *x,*tmp,sum;
1249: const PetscScalar *b;
1250: const MatScalar *aa = a->a,*v;
1253: if (yy != xx) {VecCopy(yy,xx);}
1255: VecGetArrayRead(bb,&b);
1256: VecGetArray(xx,&x);
1257: tmp = a->solve_work;
1259: ISGetIndices(isrow,&rout); r = rout;
1260: ISGetIndices(iscol,&cout); c = cout + (n-1);
1262: /* forward solve the lower triangular */
1263: tmp[0] = b[*r++];
1264: for (i=1; i<n; i++) {
1265: v = aa + ai[i];
1266: vi = aj + ai[i];
1267: nz = a->diag[i] - ai[i];
1268: sum = b[*r++];
1269: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1270: tmp[i] = sum;
1271: }
1273: /* backward solve the upper triangular */
1274: for (i=n-1; i>=0; i--) {
1275: v = aa + a->diag[i] + 1;
1276: vi = aj + a->diag[i] + 1;
1277: nz = ai[i+1] - a->diag[i] - 1;
1278: sum = tmp[i];
1279: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1280: tmp[i] = sum*aa[a->diag[i]];
1281: x[*c--] += tmp[i];
1282: }
1284: ISRestoreIndices(isrow,&rout);
1285: ISRestoreIndices(iscol,&cout);
1286: VecRestoreArrayRead(bb,&b);
1287: VecRestoreArray(xx,&x);
1288: PetscLogFlops(2.0*a->nz);
1289: return(0);
1290: }
1292: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1293: {
1294: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1295: IS iscol = a->col,isrow = a->row;
1296: PetscErrorCode ierr;
1297: PetscInt i, n = A->rmap->n,j;
1298: PetscInt nz;
1299: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1300: PetscScalar *x,*tmp,sum;
1301: const PetscScalar *b;
1302: const MatScalar *aa = a->a,*v;
1305: if (yy != xx) {VecCopy(yy,xx);}
1307: VecGetArrayRead(bb,&b);
1308: VecGetArray(xx,&x);
1309: tmp = a->solve_work;
1311: ISGetIndices(isrow,&rout); r = rout;
1312: ISGetIndices(iscol,&cout); c = cout;
1314: /* forward solve the lower triangular */
1315: tmp[0] = b[r[0]];
1316: v = aa;
1317: vi = aj;
1318: for (i=1; i<n; i++) {
1319: nz = ai[i+1] - ai[i];
1320: sum = b[r[i]];
1321: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1322: tmp[i] = sum;
1323: v += nz;
1324: vi += nz;
1325: }
1327: /* backward solve the upper triangular */
1328: v = aa + adiag[n-1];
1329: vi = aj + adiag[n-1];
1330: for (i=n-1; i>=0; i--) {
1331: nz = adiag[i] - adiag[i+1] - 1;
1332: sum = tmp[i];
1333: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1334: tmp[i] = sum*v[nz];
1335: x[c[i]] += tmp[i];
1336: v += nz+1; vi += nz+1;
1337: }
1339: ISRestoreIndices(isrow,&rout);
1340: ISRestoreIndices(iscol,&cout);
1341: VecRestoreArrayRead(bb,&b);
1342: VecRestoreArray(xx,&x);
1343: PetscLogFlops(2.0*a->nz);
1344: return(0);
1345: }
1347: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1348: {
1349: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1350: IS iscol = a->col,isrow = a->row;
1351: PetscErrorCode ierr;
1352: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1353: PetscInt i,n = A->rmap->n,j;
1354: PetscInt nz;
1355: PetscScalar *x,*tmp,s1;
1356: const MatScalar *aa = a->a,*v;
1357: const PetscScalar *b;
1360: VecGetArrayRead(bb,&b);
1361: VecGetArrayWrite(xx,&x);
1362: tmp = a->solve_work;
1364: ISGetIndices(isrow,&rout); r = rout;
1365: ISGetIndices(iscol,&cout); c = cout;
1367: /* copy the b into temp work space according to permutation */
1368: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1370: /* forward solve the U^T */
1371: for (i=0; i<n; i++) {
1372: v = aa + diag[i];
1373: vi = aj + diag[i] + 1;
1374: nz = ai[i+1] - diag[i] - 1;
1375: s1 = tmp[i];
1376: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1377: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1378: tmp[i] = s1;
1379: }
1381: /* backward solve the L^T */
1382: for (i=n-1; i>=0; i--) {
1383: v = aa + diag[i] - 1;
1384: vi = aj + diag[i] - 1;
1385: nz = diag[i] - ai[i];
1386: s1 = tmp[i];
1387: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1388: }
1390: /* copy tmp into x according to permutation */
1391: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1393: ISRestoreIndices(isrow,&rout);
1394: ISRestoreIndices(iscol,&cout);
1395: VecRestoreArrayRead(bb,&b);
1396: VecRestoreArrayWrite(xx,&x);
1398: PetscLogFlops(2.0*a->nz-A->cmap->n);
1399: return(0);
1400: }
1402: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1403: {
1404: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1405: IS iscol = a->col,isrow = a->row;
1406: PetscErrorCode ierr;
1407: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1408: PetscInt i,n = A->rmap->n,j;
1409: PetscInt nz;
1410: PetscScalar *x,*tmp,s1;
1411: const MatScalar *aa = a->a,*v;
1412: const PetscScalar *b;
1415: VecGetArrayRead(bb,&b);
1416: VecGetArrayWrite(xx,&x);
1417: tmp = a->solve_work;
1419: ISGetIndices(isrow,&rout); r = rout;
1420: ISGetIndices(iscol,&cout); c = cout;
1422: /* copy the b into temp work space according to permutation */
1423: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1425: /* forward solve the U^T */
1426: for (i=0; i<n; i++) {
1427: v = aa + adiag[i+1] + 1;
1428: vi = aj + adiag[i+1] + 1;
1429: nz = adiag[i] - adiag[i+1] - 1;
1430: s1 = tmp[i];
1431: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1432: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1433: tmp[i] = s1;
1434: }
1436: /* backward solve the L^T */
1437: for (i=n-1; i>=0; i--) {
1438: v = aa + ai[i];
1439: vi = aj + ai[i];
1440: nz = ai[i+1] - ai[i];
1441: s1 = tmp[i];
1442: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1443: }
1445: /* copy tmp into x according to permutation */
1446: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1448: ISRestoreIndices(isrow,&rout);
1449: ISRestoreIndices(iscol,&cout);
1450: VecRestoreArrayRead(bb,&b);
1451: VecRestoreArrayWrite(xx,&x);
1453: PetscLogFlops(2.0*a->nz-A->cmap->n);
1454: return(0);
1455: }
1457: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1458: {
1459: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1460: IS iscol = a->col,isrow = a->row;
1461: PetscErrorCode ierr;
1462: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1463: PetscInt i,n = A->rmap->n,j;
1464: PetscInt nz;
1465: PetscScalar *x,*tmp,s1;
1466: const MatScalar *aa = a->a,*v;
1467: const PetscScalar *b;
1470: if (zz != xx) {VecCopy(zz,xx);}
1471: VecGetArrayRead(bb,&b);
1472: VecGetArray(xx,&x);
1473: tmp = a->solve_work;
1475: ISGetIndices(isrow,&rout); r = rout;
1476: ISGetIndices(iscol,&cout); c = cout;
1478: /* copy the b into temp work space according to permutation */
1479: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1481: /* forward solve the U^T */
1482: for (i=0; i<n; i++) {
1483: v = aa + diag[i];
1484: vi = aj + diag[i] + 1;
1485: nz = ai[i+1] - diag[i] - 1;
1486: s1 = tmp[i];
1487: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1488: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1489: tmp[i] = s1;
1490: }
1492: /* backward solve the L^T */
1493: for (i=n-1; i>=0; i--) {
1494: v = aa + diag[i] - 1;
1495: vi = aj + diag[i] - 1;
1496: nz = diag[i] - ai[i];
1497: s1 = tmp[i];
1498: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1499: }
1501: /* copy tmp into x according to permutation */
1502: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1504: ISRestoreIndices(isrow,&rout);
1505: ISRestoreIndices(iscol,&cout);
1506: VecRestoreArrayRead(bb,&b);
1507: VecRestoreArray(xx,&x);
1509: PetscLogFlops(2.0*a->nz-A->cmap->n);
1510: return(0);
1511: }
1513: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1514: {
1515: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1516: IS iscol = a->col,isrow = a->row;
1517: PetscErrorCode ierr;
1518: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1519: PetscInt i,n = A->rmap->n,j;
1520: PetscInt nz;
1521: PetscScalar *x,*tmp,s1;
1522: const MatScalar *aa = a->a,*v;
1523: const PetscScalar *b;
1526: if (zz != xx) {VecCopy(zz,xx);}
1527: VecGetArrayRead(bb,&b);
1528: VecGetArray(xx,&x);
1529: tmp = a->solve_work;
1531: ISGetIndices(isrow,&rout); r = rout;
1532: ISGetIndices(iscol,&cout); c = cout;
1534: /* copy the b into temp work space according to permutation */
1535: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1537: /* forward solve the U^T */
1538: for (i=0; i<n; i++) {
1539: v = aa + adiag[i+1] + 1;
1540: vi = aj + adiag[i+1] + 1;
1541: nz = adiag[i] - adiag[i+1] - 1;
1542: s1 = tmp[i];
1543: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1544: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1545: tmp[i] = s1;
1546: }
1549: /* backward solve the L^T */
1550: for (i=n-1; i>=0; i--) {
1551: v = aa + ai[i];
1552: vi = aj + ai[i];
1553: nz = ai[i+1] - ai[i];
1554: s1 = tmp[i];
1555: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1556: }
1558: /* copy tmp into x according to permutation */
1559: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1561: ISRestoreIndices(isrow,&rout);
1562: ISRestoreIndices(iscol,&cout);
1563: VecRestoreArrayRead(bb,&b);
1564: VecRestoreArray(xx,&x);
1566: PetscLogFlops(2.0*a->nz-A->cmap->n);
1567: return(0);
1568: }
1570: /* ----------------------------------------------------------------*/
1572: /*
1573: ilu() under revised new data structure.
1574: Factored arrays bj and ba are stored as
1575: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1577: bi=fact->i is an array of size n+1, in which
1578: bi+
1579: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1580: bi[n]: points to L(n-1,n-1)+1
1582: bdiag=fact->diag is an array of size n+1,in which
1583: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1584: bdiag[n]: points to entry of U(n-1,0)-1
1586: U(i,:) contains bdiag[i] as its last entry, i.e.,
1587: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1588: */
1589: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1590: {
1591: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1593: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1594: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1595: IS isicol;
1598: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1599: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1600: b = (Mat_SeqAIJ*)(fact)->data;
1602: /* allocate matrix arrays for new data structure */
1603: PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1604: PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1606: b->singlemalloc = PETSC_TRUE;
1607: if (!b->diag) {
1608: PetscMalloc1(n+1,&b->diag);
1609: PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1610: }
1611: bdiag = b->diag;
1613: if (n > 0) {
1614: PetscArrayzero(b->a,ai[n]);
1615: }
1617: /* set bi and bj with new data structure */
1618: bi = b->i;
1619: bj = b->j;
1621: /* L part */
1622: bi[0] = 0;
1623: for (i=0; i<n; i++) {
1624: nz = adiag[i] - ai[i];
1625: bi[i+1] = bi[i] + nz;
1626: aj = a->j + ai[i];
1627: for (j=0; j<nz; j++) {
1628: /* *bj = aj[j]; bj++; */
1629: bj[k++] = aj[j];
1630: }
1631: }
1633: /* U part */
1634: bdiag[n] = bi[n]-1;
1635: for (i=n-1; i>=0; i--) {
1636: nz = ai[i+1] - adiag[i] - 1;
1637: aj = a->j + adiag[i] + 1;
1638: for (j=0; j<nz; j++) {
1639: /* *bj = aj[j]; bj++; */
1640: bj[k++] = aj[j];
1641: }
1642: /* diag[i] */
1643: /* *bj = i; bj++; */
1644: bj[k++] = i;
1645: bdiag[i] = bdiag[i+1] + nz + 1;
1646: }
1648: fact->factortype = MAT_FACTOR_ILU;
1649: fact->info.factor_mallocs = 0;
1650: fact->info.fill_ratio_given = info->fill;
1651: fact->info.fill_ratio_needed = 1.0;
1652: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1653: MatSeqAIJCheckInode_FactorLU(fact);
1655: b = (Mat_SeqAIJ*)(fact)->data;
1656: b->row = isrow;
1657: b->col = iscol;
1658: b->icol = isicol;
1659: PetscMalloc1(fact->rmap->n+1,&b->solve_work);
1660: PetscObjectReference((PetscObject)isrow);
1661: PetscObjectReference((PetscObject)iscol);
1662: return(0);
1663: }
1665: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1666: {
1667: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1668: IS isicol;
1669: PetscErrorCode ierr;
1670: const PetscInt *r,*ic;
1671: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1672: PetscInt *bi,*cols,nnz,*cols_lvl;
1673: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1674: PetscInt i,levels,diagonal_fill;
1675: PetscBool col_identity,row_identity,missing;
1676: PetscReal f;
1677: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1678: PetscBT lnkbt;
1679: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1680: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1681: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1684: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1685: MatMissingDiagonal(A,&missing,&i);
1686: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1688: levels = (PetscInt)info->levels;
1689: ISIdentity(isrow,&row_identity);
1690: ISIdentity(iscol,&col_identity);
1691: if (!levels && row_identity && col_identity) {
1692: /* special case: ilu(0) with natural ordering */
1693: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1694: if (a->inode.size) {
1695: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1696: }
1697: return(0);
1698: }
1700: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1701: ISGetIndices(isrow,&r);
1702: ISGetIndices(isicol,&ic);
1704: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1705: PetscMalloc1(n+1,&bi);
1706: PetscMalloc1(n+1,&bdiag);
1707: bi[0] = bdiag[0] = 0;
1708: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1710: /* create a linked list for storing column indices of the active row */
1711: nlnk = n + 1;
1712: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1714: /* initial FreeSpace size is f*(ai[n]+1) */
1715: f = info->fill;
1716: diagonal_fill = (PetscInt)info->diagonal_fill;
1717: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1718: current_space = free_space;
1719: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1720: current_space_lvl = free_space_lvl;
1721: for (i=0; i<n; i++) {
1722: nzi = 0;
1723: /* copy current row into linked list */
1724: nnz = ai[r[i]+1] - ai[r[i]];
1725: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1726: cols = aj + ai[r[i]];
1727: lnk[i] = -1; /* marker to indicate if diagonal exists */
1728: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1729: nzi += nlnk;
1731: /* make sure diagonal entry is included */
1732: if (diagonal_fill && lnk[i] == -1) {
1733: fm = n;
1734: while (lnk[fm] < i) fm = lnk[fm];
1735: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1736: lnk[fm] = i;
1737: lnk_lvl[i] = 0;
1738: nzi++; dcount++;
1739: }
1741: /* add pivot rows into the active row */
1742: nzbd = 0;
1743: prow = lnk[n];
1744: while (prow < i) {
1745: nnz = bdiag[prow];
1746: cols = bj_ptr[prow] + nnz + 1;
1747: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1748: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1749: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1750: nzi += nlnk;
1751: prow = lnk[prow];
1752: nzbd++;
1753: }
1754: bdiag[i] = nzbd;
1755: bi[i+1] = bi[i] + nzi;
1756: /* if free space is not available, make more free space */
1757: if (current_space->local_remaining<nzi) {
1758: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1759: PetscFreeSpaceGet(nnz,¤t_space);
1760: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1761: reallocs++;
1762: }
1764: /* copy data into free_space and free_space_lvl, then initialize lnk */
1765: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1766: bj_ptr[i] = current_space->array;
1767: bjlvl_ptr[i] = current_space_lvl->array;
1769: /* make sure the active row i has diagonal entry */
1770: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1772: current_space->array += nzi;
1773: current_space->local_used += nzi;
1774: current_space->local_remaining -= nzi;
1775: current_space_lvl->array += nzi;
1776: current_space_lvl->local_used += nzi;
1777: current_space_lvl->local_remaining -= nzi;
1778: }
1780: ISRestoreIndices(isrow,&r);
1781: ISRestoreIndices(isicol,&ic);
1782: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1783: PetscMalloc1(bi[n]+1,&bj);
1784: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1786: PetscIncompleteLLDestroy(lnk,lnkbt);
1787: PetscFreeSpaceDestroy(free_space_lvl);
1788: PetscFree2(bj_ptr,bjlvl_ptr);
1790: #if defined(PETSC_USE_INFO)
1791: {
1792: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1793: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1794: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1795: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1796: PetscInfo(A,"for best performance.\n");
1797: if (diagonal_fill) {
1798: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1799: }
1800: }
1801: #endif
1802: /* put together the new matrix */
1803: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1804: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1805: b = (Mat_SeqAIJ*)(fact)->data;
1807: b->free_a = PETSC_TRUE;
1808: b->free_ij = PETSC_TRUE;
1809: b->singlemalloc = PETSC_FALSE;
1811: PetscMalloc1(bdiag[0]+1,&b->a);
1813: b->j = bj;
1814: b->i = bi;
1815: b->diag = bdiag;
1816: b->ilen = NULL;
1817: b->imax = NULL;
1818: b->row = isrow;
1819: b->col = iscol;
1820: PetscObjectReference((PetscObject)isrow);
1821: PetscObjectReference((PetscObject)iscol);
1822: b->icol = isicol;
1824: PetscMalloc1(n+1,&b->solve_work);
1825: /* In b structure: Free imax, ilen, old a, old j.
1826: Allocate bdiag, solve_work, new a, new j */
1827: PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1828: b->maxnz = b->nz = bdiag[0]+1;
1830: (fact)->info.factor_mallocs = reallocs;
1831: (fact)->info.fill_ratio_given = f;
1832: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1833: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1834: if (a->inode.size) {
1835: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1836: }
1837: MatSeqAIJCheckInode_FactorLU(fact);
1838: return(0);
1839: }
1841: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1842: {
1843: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1844: IS isicol;
1845: PetscErrorCode ierr;
1846: const PetscInt *r,*ic;
1847: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1848: PetscInt *bi,*cols,nnz,*cols_lvl;
1849: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1850: PetscInt i,levels,diagonal_fill;
1851: PetscBool col_identity,row_identity;
1852: PetscReal f;
1853: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1854: PetscBT lnkbt;
1855: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1856: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1857: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1858: PetscBool missing;
1861: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1862: MatMissingDiagonal(A,&missing,&i);
1863: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1865: f = info->fill;
1866: levels = (PetscInt)info->levels;
1867: diagonal_fill = (PetscInt)info->diagonal_fill;
1869: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1871: ISIdentity(isrow,&row_identity);
1872: ISIdentity(iscol,&col_identity);
1873: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1874: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1876: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1877: if (a->inode.size) {
1878: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1879: }
1880: fact->factortype = MAT_FACTOR_ILU;
1881: (fact)->info.factor_mallocs = 0;
1882: (fact)->info.fill_ratio_given = info->fill;
1883: (fact)->info.fill_ratio_needed = 1.0;
1885: b = (Mat_SeqAIJ*)(fact)->data;
1886: b->row = isrow;
1887: b->col = iscol;
1888: b->icol = isicol;
1889: PetscMalloc1((fact)->rmap->n+1,&b->solve_work);
1890: PetscObjectReference((PetscObject)isrow);
1891: PetscObjectReference((PetscObject)iscol);
1892: return(0);
1893: }
1895: ISGetIndices(isrow,&r);
1896: ISGetIndices(isicol,&ic);
1898: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1899: PetscMalloc1(n+1,&bi);
1900: PetscMalloc1(n+1,&bdiag);
1901: bi[0] = bdiag[0] = 0;
1903: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1905: /* create a linked list for storing column indices of the active row */
1906: nlnk = n + 1;
1907: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1909: /* initial FreeSpace size is f*(ai[n]+1) */
1910: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1911: current_space = free_space;
1912: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1913: current_space_lvl = free_space_lvl;
1915: for (i=0; i<n; i++) {
1916: nzi = 0;
1917: /* copy current row into linked list */
1918: nnz = ai[r[i]+1] - ai[r[i]];
1919: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1920: cols = aj + ai[r[i]];
1921: lnk[i] = -1; /* marker to indicate if diagonal exists */
1922: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1923: nzi += nlnk;
1925: /* make sure diagonal entry is included */
1926: if (diagonal_fill && lnk[i] == -1) {
1927: fm = n;
1928: while (lnk[fm] < i) fm = lnk[fm];
1929: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1930: lnk[fm] = i;
1931: lnk_lvl[i] = 0;
1932: nzi++; dcount++;
1933: }
1935: /* add pivot rows into the active row */
1936: nzbd = 0;
1937: prow = lnk[n];
1938: while (prow < i) {
1939: nnz = bdiag[prow];
1940: cols = bj_ptr[prow] + nnz + 1;
1941: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1942: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1943: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1944: nzi += nlnk;
1945: prow = lnk[prow];
1946: nzbd++;
1947: }
1948: bdiag[i] = nzbd;
1949: bi[i+1] = bi[i] + nzi;
1951: /* if free space is not available, make more free space */
1952: if (current_space->local_remaining<nzi) {
1953: nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1954: PetscFreeSpaceGet(nnz,¤t_space);
1955: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1956: reallocs++;
1957: }
1959: /* copy data into free_space and free_space_lvl, then initialize lnk */
1960: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1961: bj_ptr[i] = current_space->array;
1962: bjlvl_ptr[i] = current_space_lvl->array;
1964: /* make sure the active row i has diagonal entry */
1965: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1967: current_space->array += nzi;
1968: current_space->local_used += nzi;
1969: current_space->local_remaining -= nzi;
1970: current_space_lvl->array += nzi;
1971: current_space_lvl->local_used += nzi;
1972: current_space_lvl->local_remaining -= nzi;
1973: }
1975: ISRestoreIndices(isrow,&r);
1976: ISRestoreIndices(isicol,&ic);
1978: /* destroy list of free space and other temporary arrays */
1979: PetscMalloc1(bi[n]+1,&bj);
1980: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
1981: PetscIncompleteLLDestroy(lnk,lnkbt);
1982: PetscFreeSpaceDestroy(free_space_lvl);
1983: PetscFree2(bj_ptr,bjlvl_ptr);
1985: #if defined(PETSC_USE_INFO)
1986: {
1987: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1988: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1989: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1990: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1991: PetscInfo(A,"for best performance.\n");
1992: if (diagonal_fill) {
1993: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1994: }
1995: }
1996: #endif
1998: /* put together the new matrix */
1999: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
2000: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
2001: b = (Mat_SeqAIJ*)(fact)->data;
2003: b->free_a = PETSC_TRUE;
2004: b->free_ij = PETSC_TRUE;
2005: b->singlemalloc = PETSC_FALSE;
2007: PetscMalloc1(bi[n],&b->a);
2008: b->j = bj;
2009: b->i = bi;
2010: for (i=0; i<n; i++) bdiag[i] += bi[i];
2011: b->diag = bdiag;
2012: b->ilen = NULL;
2013: b->imax = NULL;
2014: b->row = isrow;
2015: b->col = iscol;
2016: PetscObjectReference((PetscObject)isrow);
2017: PetscObjectReference((PetscObject)iscol);
2018: b->icol = isicol;
2019: PetscMalloc1(n+1,&b->solve_work);
2020: /* In b structure: Free imax, ilen, old a, old j.
2021: Allocate bdiag, solve_work, new a, new j */
2022: PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2023: b->maxnz = b->nz = bi[n];
2025: (fact)->info.factor_mallocs = reallocs;
2026: (fact)->info.fill_ratio_given = f;
2027: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2028: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2029: if (a->inode.size) {
2030: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2031: }
2032: return(0);
2033: }
2035: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2036: {
2037: Mat C = B;
2038: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2039: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2040: IS ip=b->row,iip = b->icol;
2042: const PetscInt *rip,*riip;
2043: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2044: PetscInt *ai=a->i,*aj=a->j;
2045: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2046: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2047: PetscBool perm_identity;
2048: FactorShiftCtx sctx;
2049: PetscReal rs;
2050: MatScalar d,*v;
2053: /* MatPivotSetUp(): initialize shift context sctx */
2054: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2056: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2057: sctx.shift_top = info->zeropivot;
2058: for (i=0; i<mbs; i++) {
2059: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2060: d = (aa)[a->diag[i]];
2061: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2062: v = aa+ai[i];
2063: nz = ai[i+1] - ai[i];
2064: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2065: if (rs>sctx.shift_top) sctx.shift_top = rs;
2066: }
2067: sctx.shift_top *= 1.1;
2068: sctx.nshift_max = 5;
2069: sctx.shift_lo = 0.;
2070: sctx.shift_hi = 1.;
2071: }
2073: ISGetIndices(ip,&rip);
2074: ISGetIndices(iip,&riip);
2076: /* allocate working arrays
2077: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2078: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2079: */
2080: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);
2082: do {
2083: sctx.newshift = PETSC_FALSE;
2085: for (i=0; i<mbs; i++) c2r[i] = mbs;
2086: if (mbs) il[0] = 0;
2088: for (k = 0; k<mbs; k++) {
2089: /* zero rtmp */
2090: nz = bi[k+1] - bi[k];
2091: bjtmp = bj + bi[k];
2092: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2094: /* load in initial unfactored row */
2095: bval = ba + bi[k];
2096: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2097: for (j = jmin; j < jmax; j++) {
2098: col = riip[aj[j]];
2099: if (col >= k) { /* only take upper triangular entry */
2100: rtmp[col] = aa[j];
2101: *bval++ = 0.0; /* for in-place factorization */
2102: }
2103: }
2104: /* shift the diagonal of the matrix: ZeropivotApply() */
2105: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2107: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2108: dk = rtmp[k];
2109: i = c2r[k]; /* first row to be added to k_th row */
2111: while (i < k) {
2112: nexti = c2r[i]; /* next row to be added to k_th row */
2114: /* compute multiplier, update diag(k) and U(i,k) */
2115: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2116: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2117: dk += uikdi*ba[ili]; /* update diag[k] */
2118: ba[ili] = uikdi; /* -U(i,k) */
2120: /* add multiple of row i to k-th row */
2121: jmin = ili + 1; jmax = bi[i+1];
2122: if (jmin < jmax) {
2123: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2124: /* update il and c2r for row i */
2125: il[i] = jmin;
2126: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2127: }
2128: i = nexti;
2129: }
2131: /* copy data into U(k,:) */
2132: rs = 0.0;
2133: jmin = bi[k]; jmax = bi[k+1]-1;
2134: if (jmin < jmax) {
2135: for (j=jmin; j<jmax; j++) {
2136: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2137: }
2138: /* add the k-th row into il and c2r */
2139: il[k] = jmin;
2140: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2141: }
2143: /* MatPivotCheck() */
2144: sctx.rs = rs;
2145: sctx.pv = dk;
2146: MatPivotCheck(B,A,info,&sctx,i);
2147: if (sctx.newshift) break;
2148: dk = sctx.pv;
2150: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2151: }
2152: } while (sctx.newshift);
2154: PetscFree3(rtmp,il,c2r);
2155: ISRestoreIndices(ip,&rip);
2156: ISRestoreIndices(iip,&riip);
2158: ISIdentity(ip,&perm_identity);
2159: if (perm_identity) {
2160: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2161: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2162: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2163: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2164: } else {
2165: B->ops->solve = MatSolve_SeqSBAIJ_1;
2166: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2167: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2168: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2169: }
2171: C->assembled = PETSC_TRUE;
2172: C->preallocated = PETSC_TRUE;
2174: PetscLogFlops(C->rmap->n);
2176: /* MatPivotView() */
2177: if (sctx.nshift) {
2178: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2179: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2180: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2181: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2182: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2183: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2184: }
2185: }
2186: return(0);
2187: }
2189: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2190: {
2191: Mat C = B;
2192: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2193: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2194: IS ip=b->row,iip = b->icol;
2196: const PetscInt *rip,*riip;
2197: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2198: PetscInt *ai=a->i,*aj=a->j;
2199: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2200: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2201: PetscBool perm_identity;
2202: FactorShiftCtx sctx;
2203: PetscReal rs;
2204: MatScalar d,*v;
2207: /* MatPivotSetUp(): initialize shift context sctx */
2208: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2210: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2211: sctx.shift_top = info->zeropivot;
2212: for (i=0; i<mbs; i++) {
2213: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2214: d = (aa)[a->diag[i]];
2215: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2216: v = aa+ai[i];
2217: nz = ai[i+1] - ai[i];
2218: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2219: if (rs>sctx.shift_top) sctx.shift_top = rs;
2220: }
2221: sctx.shift_top *= 1.1;
2222: sctx.nshift_max = 5;
2223: sctx.shift_lo = 0.;
2224: sctx.shift_hi = 1.;
2225: }
2227: ISGetIndices(ip,&rip);
2228: ISGetIndices(iip,&riip);
2230: /* initialization */
2231: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
2233: do {
2234: sctx.newshift = PETSC_FALSE;
2236: for (i=0; i<mbs; i++) jl[i] = mbs;
2237: il[0] = 0;
2239: for (k = 0; k<mbs; k++) {
2240: /* zero rtmp */
2241: nz = bi[k+1] - bi[k];
2242: bjtmp = bj + bi[k];
2243: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2245: bval = ba + bi[k];
2246: /* initialize k-th row by the perm[k]-th row of A */
2247: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2248: for (j = jmin; j < jmax; j++) {
2249: col = riip[aj[j]];
2250: if (col >= k) { /* only take upper triangular entry */
2251: rtmp[col] = aa[j];
2252: *bval++ = 0.0; /* for in-place factorization */
2253: }
2254: }
2255: /* shift the diagonal of the matrix */
2256: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2258: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2259: dk = rtmp[k];
2260: i = jl[k]; /* first row to be added to k_th row */
2262: while (i < k) {
2263: nexti = jl[i]; /* next row to be added to k_th row */
2265: /* compute multiplier, update diag(k) and U(i,k) */
2266: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2267: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2268: dk += uikdi*ba[ili];
2269: ba[ili] = uikdi; /* -U(i,k) */
2271: /* add multiple of row i to k-th row */
2272: jmin = ili + 1; jmax = bi[i+1];
2273: if (jmin < jmax) {
2274: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2275: /* update il and jl for row i */
2276: il[i] = jmin;
2277: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2278: }
2279: i = nexti;
2280: }
2282: /* shift the diagonals when zero pivot is detected */
2283: /* compute rs=sum of abs(off-diagonal) */
2284: rs = 0.0;
2285: jmin = bi[k]+1;
2286: nz = bi[k+1] - jmin;
2287: bcol = bj + jmin;
2288: for (j=0; j<nz; j++) {
2289: rs += PetscAbsScalar(rtmp[bcol[j]]);
2290: }
2292: sctx.rs = rs;
2293: sctx.pv = dk;
2294: MatPivotCheck(B,A,info,&sctx,k);
2295: if (sctx.newshift) break;
2296: dk = sctx.pv;
2298: /* copy data into U(k,:) */
2299: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2300: jmin = bi[k]+1; jmax = bi[k+1];
2301: if (jmin < jmax) {
2302: for (j=jmin; j<jmax; j++) {
2303: col = bj[j]; ba[j] = rtmp[col];
2304: }
2305: /* add the k-th row into il and jl */
2306: il[k] = jmin;
2307: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2308: }
2309: }
2310: } while (sctx.newshift);
2312: PetscFree3(rtmp,il,jl);
2313: ISRestoreIndices(ip,&rip);
2314: ISRestoreIndices(iip,&riip);
2316: ISIdentity(ip,&perm_identity);
2317: if (perm_identity) {
2318: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2319: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2320: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2321: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2322: } else {
2323: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2324: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2325: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2326: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2327: }
2329: C->assembled = PETSC_TRUE;
2330: C->preallocated = PETSC_TRUE;
2332: PetscLogFlops(C->rmap->n);
2333: if (sctx.nshift) {
2334: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2335: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2336: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2337: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2338: }
2339: }
2340: return(0);
2341: }
2343: /*
2344: icc() under revised new data structure.
2345: Factored arrays bj and ba are stored as
2346: U(0,:),...,U(i,:),U(n-1,:)
2348: ui=fact->i is an array of size n+1, in which
2349: ui+
2350: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2351: ui[n]: points to U(n-1,n-1)+1
2353: udiag=fact->diag is an array of size n,in which
2354: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2356: U(i,:) contains udiag[i] as its last entry, i.e.,
2357: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2358: */
2360: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2361: {
2362: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2363: Mat_SeqSBAIJ *b;
2364: PetscErrorCode ierr;
2365: PetscBool perm_identity,missing;
2366: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2367: const PetscInt *rip,*riip;
2368: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2369: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2370: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2371: PetscReal fill =info->fill,levels=info->levels;
2372: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2373: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2374: PetscBT lnkbt;
2375: IS iperm;
2378: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2379: MatMissingDiagonal(A,&missing,&d);
2380: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2381: ISIdentity(perm,&perm_identity);
2382: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2384: PetscMalloc1(am+1,&ui);
2385: PetscMalloc1(am+1,&udiag);
2386: ui[0] = 0;
2388: /* ICC(0) without matrix ordering: simply rearrange column indices */
2389: if (!levels && perm_identity) {
2390: for (i=0; i<am; i++) {
2391: ncols = ai[i+1] - a->diag[i];
2392: ui[i+1] = ui[i] + ncols;
2393: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2394: }
2395: PetscMalloc1(ui[am]+1,&uj);
2396: cols = uj;
2397: for (i=0; i<am; i++) {
2398: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2399: ncols = ai[i+1] - a->diag[i] -1;
2400: for (j=0; j<ncols; j++) *cols++ = aj[j];
2401: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2402: }
2403: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2404: ISGetIndices(iperm,&riip);
2405: ISGetIndices(perm,&rip);
2407: /* initialization */
2408: PetscMalloc1(am+1,&ajtmp);
2410: /* jl: linked list for storing indices of the pivot rows
2411: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2412: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2413: for (i=0; i<am; i++) {
2414: jl[i] = am; il[i] = 0;
2415: }
2417: /* create and initialize a linked list for storing column indices of the active row k */
2418: nlnk = am + 1;
2419: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2421: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2422: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2423: current_space = free_space;
2424: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);
2425: current_space_lvl = free_space_lvl;
2427: for (k=0; k<am; k++) { /* for each active row k */
2428: /* initialize lnk by the column indices of row rip[k] of A */
2429: nzk = 0;
2430: ncols = ai[rip[k]+1] - ai[rip[k]];
2431: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2432: ncols_upper = 0;
2433: for (j=0; j<ncols; j++) {
2434: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2435: if (riip[i] >= k) { /* only take upper triangular entry */
2436: ajtmp[ncols_upper] = i;
2437: ncols_upper++;
2438: }
2439: }
2440: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2441: nzk += nlnk;
2443: /* update lnk by computing fill-in for each pivot row to be merged in */
2444: prow = jl[k]; /* 1st pivot row */
2446: while (prow < k) {
2447: nextprow = jl[prow];
2449: /* merge prow into k-th row */
2450: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2451: jmax = ui[prow+1];
2452: ncols = jmax-jmin;
2453: i = jmin - ui[prow];
2454: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2455: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2456: j = *(uj - 1);
2457: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2458: nzk += nlnk;
2460: /* update il and jl for prow */
2461: if (jmin < jmax) {
2462: il[prow] = jmin;
2463: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2464: }
2465: prow = nextprow;
2466: }
2468: /* if free space is not available, make more free space */
2469: if (current_space->local_remaining<nzk) {
2470: i = am - k + 1; /* num of unfactored rows */
2471: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2472: PetscFreeSpaceGet(i,¤t_space);
2473: PetscFreeSpaceGet(i,¤t_space_lvl);
2474: reallocs++;
2475: }
2477: /* copy data into free_space and free_space_lvl, then initialize lnk */
2478: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2479: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2481: /* add the k-th row into il and jl */
2482: if (nzk > 1) {
2483: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2484: jl[k] = jl[i]; jl[i] = k;
2485: il[k] = ui[k] + 1;
2486: }
2487: uj_ptr[k] = current_space->array;
2488: uj_lvl_ptr[k] = current_space_lvl->array;
2490: current_space->array += nzk;
2491: current_space->local_used += nzk;
2492: current_space->local_remaining -= nzk;
2494: current_space_lvl->array += nzk;
2495: current_space_lvl->local_used += nzk;
2496: current_space_lvl->local_remaining -= nzk;
2498: ui[k+1] = ui[k] + nzk;
2499: }
2501: ISRestoreIndices(perm,&rip);
2502: ISRestoreIndices(iperm,&riip);
2503: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2504: PetscFree(ajtmp);
2506: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2507: PetscMalloc1(ui[am]+1,&uj);
2508: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2509: PetscIncompleteLLDestroy(lnk,lnkbt);
2510: PetscFreeSpaceDestroy(free_space_lvl);
2512: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2514: /* put together the new matrix in MATSEQSBAIJ format */
2515: b = (Mat_SeqSBAIJ*)(fact)->data;
2516: b->singlemalloc = PETSC_FALSE;
2518: PetscMalloc1(ui[am]+1,&b->a);
2520: b->j = uj;
2521: b->i = ui;
2522: b->diag = udiag;
2523: b->free_diag = PETSC_TRUE;
2524: b->ilen = NULL;
2525: b->imax = NULL;
2526: b->row = perm;
2527: b->col = perm;
2528: PetscObjectReference((PetscObject)perm);
2529: PetscObjectReference((PetscObject)perm);
2530: b->icol = iperm;
2531: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2533: PetscMalloc1(am+1,&b->solve_work);
2534: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2536: b->maxnz = b->nz = ui[am];
2537: b->free_a = PETSC_TRUE;
2538: b->free_ij = PETSC_TRUE;
2540: fact->info.factor_mallocs = reallocs;
2541: fact->info.fill_ratio_given = fill;
2542: if (ai[am] != 0) {
2543: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2544: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2545: } else {
2546: fact->info.fill_ratio_needed = 0.0;
2547: }
2548: #if defined(PETSC_USE_INFO)
2549: if (ai[am] != 0) {
2550: PetscReal af = fact->info.fill_ratio_needed;
2551: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2552: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2553: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2554: } else {
2555: PetscInfo(A,"Empty matrix\n");
2556: }
2557: #endif
2558: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2559: return(0);
2560: }
2562: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2563: {
2564: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2565: Mat_SeqSBAIJ *b;
2566: PetscErrorCode ierr;
2567: PetscBool perm_identity,missing;
2568: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2569: const PetscInt *rip,*riip;
2570: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2571: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2572: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2573: PetscReal fill =info->fill,levels=info->levels;
2574: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2575: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2576: PetscBT lnkbt;
2577: IS iperm;
2580: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2581: MatMissingDiagonal(A,&missing,&d);
2582: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2583: ISIdentity(perm,&perm_identity);
2584: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2586: PetscMalloc1(am+1,&ui);
2587: PetscMalloc1(am+1,&udiag);
2588: ui[0] = 0;
2590: /* ICC(0) without matrix ordering: simply copies fill pattern */
2591: if (!levels && perm_identity) {
2593: for (i=0; i<am; i++) {
2594: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2595: udiag[i] = ui[i];
2596: }
2597: PetscMalloc1(ui[am]+1,&uj);
2598: cols = uj;
2599: for (i=0; i<am; i++) {
2600: aj = a->j + a->diag[i];
2601: ncols = ui[i+1] - ui[i];
2602: for (j=0; j<ncols; j++) *cols++ = *aj++;
2603: }
2604: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2605: ISGetIndices(iperm,&riip);
2606: ISGetIndices(perm,&rip);
2608: /* initialization */
2609: PetscMalloc1(am+1,&ajtmp);
2611: /* jl: linked list for storing indices of the pivot rows
2612: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2613: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2614: for (i=0; i<am; i++) {
2615: jl[i] = am; il[i] = 0;
2616: }
2618: /* create and initialize a linked list for storing column indices of the active row k */
2619: nlnk = am + 1;
2620: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2622: /* initial FreeSpace size is fill*(ai[am]+1) */
2623: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2624: current_space = free_space;
2625: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);
2626: current_space_lvl = free_space_lvl;
2628: for (k=0; k<am; k++) { /* for each active row k */
2629: /* initialize lnk by the column indices of row rip[k] of A */
2630: nzk = 0;
2631: ncols = ai[rip[k]+1] - ai[rip[k]];
2632: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2633: ncols_upper = 0;
2634: for (j=0; j<ncols; j++) {
2635: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2636: if (riip[i] >= k) { /* only take upper triangular entry */
2637: ajtmp[ncols_upper] = i;
2638: ncols_upper++;
2639: }
2640: }
2641: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2642: nzk += nlnk;
2644: /* update lnk by computing fill-in for each pivot row to be merged in */
2645: prow = jl[k]; /* 1st pivot row */
2647: while (prow < k) {
2648: nextprow = jl[prow];
2650: /* merge prow into k-th row */
2651: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2652: jmax = ui[prow+1];
2653: ncols = jmax-jmin;
2654: i = jmin - ui[prow];
2655: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2656: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2657: j = *(uj - 1);
2658: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2659: nzk += nlnk;
2661: /* update il and jl for prow */
2662: if (jmin < jmax) {
2663: il[prow] = jmin;
2664: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2665: }
2666: prow = nextprow;
2667: }
2669: /* if free space is not available, make more free space */
2670: if (current_space->local_remaining<nzk) {
2671: i = am - k + 1; /* num of unfactored rows */
2672: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2673: PetscFreeSpaceGet(i,¤t_space);
2674: PetscFreeSpaceGet(i,¤t_space_lvl);
2675: reallocs++;
2676: }
2678: /* copy data into free_space and free_space_lvl, then initialize lnk */
2679: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2680: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2682: /* add the k-th row into il and jl */
2683: if (nzk > 1) {
2684: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2685: jl[k] = jl[i]; jl[i] = k;
2686: il[k] = ui[k] + 1;
2687: }
2688: uj_ptr[k] = current_space->array;
2689: uj_lvl_ptr[k] = current_space_lvl->array;
2691: current_space->array += nzk;
2692: current_space->local_used += nzk;
2693: current_space->local_remaining -= nzk;
2695: current_space_lvl->array += nzk;
2696: current_space_lvl->local_used += nzk;
2697: current_space_lvl->local_remaining -= nzk;
2699: ui[k+1] = ui[k] + nzk;
2700: }
2702: #if defined(PETSC_USE_INFO)
2703: if (ai[am] != 0) {
2704: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2705: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2706: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2707: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2708: } else {
2709: PetscInfo(A,"Empty matrix\n");
2710: }
2711: #endif
2713: ISRestoreIndices(perm,&rip);
2714: ISRestoreIndices(iperm,&riip);
2715: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2716: PetscFree(ajtmp);
2718: /* destroy list of free space and other temporary array(s) */
2719: PetscMalloc1(ui[am]+1,&uj);
2720: PetscFreeSpaceContiguous(&free_space,uj);
2721: PetscIncompleteLLDestroy(lnk,lnkbt);
2722: PetscFreeSpaceDestroy(free_space_lvl);
2724: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2726: /* put together the new matrix in MATSEQSBAIJ format */
2728: b = (Mat_SeqSBAIJ*)fact->data;
2729: b->singlemalloc = PETSC_FALSE;
2731: PetscMalloc1(ui[am]+1,&b->a);
2733: b->j = uj;
2734: b->i = ui;
2735: b->diag = udiag;
2736: b->free_diag = PETSC_TRUE;
2737: b->ilen = NULL;
2738: b->imax = NULL;
2739: b->row = perm;
2740: b->col = perm;
2742: PetscObjectReference((PetscObject)perm);
2743: PetscObjectReference((PetscObject)perm);
2745: b->icol = iperm;
2746: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2747: PetscMalloc1(am+1,&b->solve_work);
2748: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2749: b->maxnz = b->nz = ui[am];
2750: b->free_a = PETSC_TRUE;
2751: b->free_ij = PETSC_TRUE;
2753: fact->info.factor_mallocs = reallocs;
2754: fact->info.fill_ratio_given = fill;
2755: if (ai[am] != 0) {
2756: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2757: } else {
2758: fact->info.fill_ratio_needed = 0.0;
2759: }
2760: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2761: return(0);
2762: }
2764: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2765: {
2766: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2767: Mat_SeqSBAIJ *b;
2768: PetscErrorCode ierr;
2769: PetscBool perm_identity,missing;
2770: PetscReal fill = info->fill;
2771: const PetscInt *rip,*riip;
2772: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2773: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2774: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2775: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2776: PetscBT lnkbt;
2777: IS iperm;
2780: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2781: MatMissingDiagonal(A,&missing,&i);
2782: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2784: /* check whether perm is the identity mapping */
2785: ISIdentity(perm,&perm_identity);
2786: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2787: ISGetIndices(iperm,&riip);
2788: ISGetIndices(perm,&rip);
2790: /* initialization */
2791: PetscMalloc1(am+1,&ui);
2792: PetscMalloc1(am+1,&udiag);
2793: ui[0] = 0;
2795: /* jl: linked list for storing indices of the pivot rows
2796: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2797: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2798: for (i=0; i<am; i++) {
2799: jl[i] = am; il[i] = 0;
2800: }
2802: /* create and initialize a linked list for storing column indices of the active row k */
2803: nlnk = am + 1;
2804: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2806: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2807: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2808: current_space = free_space;
2810: for (k=0; k<am; k++) { /* for each active row k */
2811: /* initialize lnk by the column indices of row rip[k] of A */
2812: nzk = 0;
2813: ncols = ai[rip[k]+1] - ai[rip[k]];
2814: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2815: ncols_upper = 0;
2816: for (j=0; j<ncols; j++) {
2817: i = riip[*(aj + ai[rip[k]] + j)];
2818: if (i >= k) { /* only take upper triangular entry */
2819: cols[ncols_upper] = i;
2820: ncols_upper++;
2821: }
2822: }
2823: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2824: nzk += nlnk;
2826: /* update lnk by computing fill-in for each pivot row to be merged in */
2827: prow = jl[k]; /* 1st pivot row */
2829: while (prow < k) {
2830: nextprow = jl[prow];
2831: /* merge prow into k-th row */
2832: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2833: jmax = ui[prow+1];
2834: ncols = jmax-jmin;
2835: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2836: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2837: nzk += nlnk;
2839: /* update il and jl for prow */
2840: if (jmin < jmax) {
2841: il[prow] = jmin;
2842: j = *uj_ptr;
2843: jl[prow] = jl[j];
2844: jl[j] = prow;
2845: }
2846: prow = nextprow;
2847: }
2849: /* if free space is not available, make more free space */
2850: if (current_space->local_remaining<nzk) {
2851: i = am - k + 1; /* num of unfactored rows */
2852: i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2853: PetscFreeSpaceGet(i,¤t_space);
2854: reallocs++;
2855: }
2857: /* copy data into free space, then initialize lnk */
2858: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2860: /* add the k-th row into il and jl */
2861: if (nzk > 1) {
2862: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2863: jl[k] = jl[i]; jl[i] = k;
2864: il[k] = ui[k] + 1;
2865: }
2866: ui_ptr[k] = current_space->array;
2868: current_space->array += nzk;
2869: current_space->local_used += nzk;
2870: current_space->local_remaining -= nzk;
2872: ui[k+1] = ui[k] + nzk;
2873: }
2875: ISRestoreIndices(perm,&rip);
2876: ISRestoreIndices(iperm,&riip);
2877: PetscFree4(ui_ptr,jl,il,cols);
2879: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2880: PetscMalloc1(ui[am]+1,&uj);
2881: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2882: PetscLLDestroy(lnk,lnkbt);
2884: /* put together the new matrix in MATSEQSBAIJ format */
2886: b = (Mat_SeqSBAIJ*)fact->data;
2887: b->singlemalloc = PETSC_FALSE;
2888: b->free_a = PETSC_TRUE;
2889: b->free_ij = PETSC_TRUE;
2891: PetscMalloc1(ui[am]+1,&b->a);
2893: b->j = uj;
2894: b->i = ui;
2895: b->diag = udiag;
2896: b->free_diag = PETSC_TRUE;
2897: b->ilen = NULL;
2898: b->imax = NULL;
2899: b->row = perm;
2900: b->col = perm;
2902: PetscObjectReference((PetscObject)perm);
2903: PetscObjectReference((PetscObject)perm);
2905: b->icol = iperm;
2906: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2908: PetscMalloc1(am+1,&b->solve_work);
2909: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2911: b->maxnz = b->nz = ui[am];
2913: fact->info.factor_mallocs = reallocs;
2914: fact->info.fill_ratio_given = fill;
2915: if (ai[am] != 0) {
2916: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2917: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2918: } else {
2919: fact->info.fill_ratio_needed = 0.0;
2920: }
2921: #if defined(PETSC_USE_INFO)
2922: if (ai[am] != 0) {
2923: PetscReal af = fact->info.fill_ratio_needed;
2924: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2925: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2926: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2927: } else {
2928: PetscInfo(A,"Empty matrix\n");
2929: }
2930: #endif
2931: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2932: return(0);
2933: }
2935: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2936: {
2937: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2938: Mat_SeqSBAIJ *b;
2939: PetscErrorCode ierr;
2940: PetscBool perm_identity,missing;
2941: PetscReal fill = info->fill;
2942: const PetscInt *rip,*riip;
2943: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2944: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2945: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2946: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2947: PetscBT lnkbt;
2948: IS iperm;
2951: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2952: MatMissingDiagonal(A,&missing,&i);
2953: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2955: /* check whether perm is the identity mapping */
2956: ISIdentity(perm,&perm_identity);
2957: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2958: ISGetIndices(iperm,&riip);
2959: ISGetIndices(perm,&rip);
2961: /* initialization */
2962: PetscMalloc1(am+1,&ui);
2963: ui[0] = 0;
2965: /* jl: linked list for storing indices of the pivot rows
2966: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2967: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2968: for (i=0; i<am; i++) {
2969: jl[i] = am; il[i] = 0;
2970: }
2972: /* create and initialize a linked list for storing column indices of the active row k */
2973: nlnk = am + 1;
2974: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2976: /* initial FreeSpace size is fill*(ai[am]+1) */
2977: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2978: current_space = free_space;
2980: for (k=0; k<am; k++) { /* for each active row k */
2981: /* initialize lnk by the column indices of row rip[k] of A */
2982: nzk = 0;
2983: ncols = ai[rip[k]+1] - ai[rip[k]];
2984: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2985: ncols_upper = 0;
2986: for (j=0; j<ncols; j++) {
2987: i = riip[*(aj + ai[rip[k]] + j)];
2988: if (i >= k) { /* only take upper triangular entry */
2989: cols[ncols_upper] = i;
2990: ncols_upper++;
2991: }
2992: }
2993: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2994: nzk += nlnk;
2996: /* update lnk by computing fill-in for each pivot row to be merged in */
2997: prow = jl[k]; /* 1st pivot row */
2999: while (prow < k) {
3000: nextprow = jl[prow];
3001: /* merge prow into k-th row */
3002: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3003: jmax = ui[prow+1];
3004: ncols = jmax-jmin;
3005: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3006: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3007: nzk += nlnk;
3009: /* update il and jl for prow */
3010: if (jmin < jmax) {
3011: il[prow] = jmin;
3012: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3013: }
3014: prow = nextprow;
3015: }
3017: /* if free space is not available, make more free space */
3018: if (current_space->local_remaining<nzk) {
3019: i = am - k + 1; /* num of unfactored rows */
3020: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3021: PetscFreeSpaceGet(i,¤t_space);
3022: reallocs++;
3023: }
3025: /* copy data into free space, then initialize lnk */
3026: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3028: /* add the k-th row into il and jl */
3029: if (nzk-1 > 0) {
3030: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3031: jl[k] = jl[i]; jl[i] = k;
3032: il[k] = ui[k] + 1;
3033: }
3034: ui_ptr[k] = current_space->array;
3036: current_space->array += nzk;
3037: current_space->local_used += nzk;
3038: current_space->local_remaining -= nzk;
3040: ui[k+1] = ui[k] + nzk;
3041: }
3043: #if defined(PETSC_USE_INFO)
3044: if (ai[am] != 0) {
3045: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3046: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3047: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3048: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3049: } else {
3050: PetscInfo(A,"Empty matrix\n");
3051: }
3052: #endif
3054: ISRestoreIndices(perm,&rip);
3055: ISRestoreIndices(iperm,&riip);
3056: PetscFree4(ui_ptr,jl,il,cols);
3058: /* destroy list of free space and other temporary array(s) */
3059: PetscMalloc1(ui[am]+1,&uj);
3060: PetscFreeSpaceContiguous(&free_space,uj);
3061: PetscLLDestroy(lnk,lnkbt);
3063: /* put together the new matrix in MATSEQSBAIJ format */
3065: b = (Mat_SeqSBAIJ*)fact->data;
3066: b->singlemalloc = PETSC_FALSE;
3067: b->free_a = PETSC_TRUE;
3068: b->free_ij = PETSC_TRUE;
3070: PetscMalloc1(ui[am]+1,&b->a);
3072: b->j = uj;
3073: b->i = ui;
3074: b->diag = NULL;
3075: b->ilen = NULL;
3076: b->imax = NULL;
3077: b->row = perm;
3078: b->col = perm;
3080: PetscObjectReference((PetscObject)perm);
3081: PetscObjectReference((PetscObject)perm);
3083: b->icol = iperm;
3084: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3086: PetscMalloc1(am+1,&b->solve_work);
3087: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3088: b->maxnz = b->nz = ui[am];
3090: fact->info.factor_mallocs = reallocs;
3091: fact->info.fill_ratio_given = fill;
3092: if (ai[am] != 0) {
3093: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3094: } else {
3095: fact->info.fill_ratio_needed = 0.0;
3096: }
3097: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3098: return(0);
3099: }
3101: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3102: {
3103: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3104: PetscErrorCode ierr;
3105: PetscInt n = A->rmap->n;
3106: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3107: PetscScalar *x,sum;
3108: const PetscScalar *b;
3109: const MatScalar *aa = a->a,*v;
3110: PetscInt i,nz;
3113: if (!n) return(0);
3115: VecGetArrayRead(bb,&b);
3116: VecGetArrayWrite(xx,&x);
3118: /* forward solve the lower triangular */
3119: x[0] = b[0];
3120: v = aa;
3121: vi = aj;
3122: for (i=1; i<n; i++) {
3123: nz = ai[i+1] - ai[i];
3124: sum = b[i];
3125: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3126: v += nz;
3127: vi += nz;
3128: x[i] = sum;
3129: }
3131: /* backward solve the upper triangular */
3132: for (i=n-1; i>=0; i--) {
3133: v = aa + adiag[i+1] + 1;
3134: vi = aj + adiag[i+1] + 1;
3135: nz = adiag[i] - adiag[i+1]-1;
3136: sum = x[i];
3137: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3138: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3139: }
3141: PetscLogFlops(2.0*a->nz - A->cmap->n);
3142: VecRestoreArrayRead(bb,&b);
3143: VecRestoreArrayWrite(xx,&x);
3144: return(0);
3145: }
3147: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3148: {
3149: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3150: IS iscol = a->col,isrow = a->row;
3151: PetscErrorCode ierr;
3152: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3153: const PetscInt *rout,*cout,*r,*c;
3154: PetscScalar *x,*tmp,sum;
3155: const PetscScalar *b;
3156: const MatScalar *aa = a->a,*v;
3159: if (!n) return(0);
3161: VecGetArrayRead(bb,&b);
3162: VecGetArrayWrite(xx,&x);
3163: tmp = a->solve_work;
3165: ISGetIndices(isrow,&rout); r = rout;
3166: ISGetIndices(iscol,&cout); c = cout;
3168: /* forward solve the lower triangular */
3169: tmp[0] = b[r[0]];
3170: v = aa;
3171: vi = aj;
3172: for (i=1; i<n; i++) {
3173: nz = ai[i+1] - ai[i];
3174: sum = b[r[i]];
3175: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3176: tmp[i] = sum;
3177: v += nz; vi += nz;
3178: }
3180: /* backward solve the upper triangular */
3181: for (i=n-1; i>=0; i--) {
3182: v = aa + adiag[i+1]+1;
3183: vi = aj + adiag[i+1]+1;
3184: nz = adiag[i]-adiag[i+1]-1;
3185: sum = tmp[i];
3186: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3187: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3188: }
3190: ISRestoreIndices(isrow,&rout);
3191: ISRestoreIndices(iscol,&cout);
3192: VecRestoreArrayRead(bb,&b);
3193: VecRestoreArrayWrite(xx,&x);
3194: PetscLogFlops(2.0*a->nz - A->cmap->n);
3195: return(0);
3196: }
3198: /*
3199: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3200: */
3201: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3202: {
3203: Mat B = *fact;
3204: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3205: IS isicol;
3207: const PetscInt *r,*ic;
3208: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3209: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3210: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3211: PetscInt nlnk,*lnk;
3212: PetscBT lnkbt;
3213: PetscBool row_identity,icol_identity;
3214: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3215: const PetscInt *ics;
3216: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3217: PetscReal dt =info->dt,shift=info->shiftamount;
3218: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3219: PetscBool missing;
3222: if (dt == PETSC_DEFAULT) dt = 0.005;
3223: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3225: /* ------- symbolic factorization, can be reused ---------*/
3226: MatMissingDiagonal(A,&missing,&i);
3227: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3228: adiag=a->diag;
3230: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3232: /* bdiag is location of diagonal in factor */
3233: PetscMalloc1(n+1,&bdiag); /* becomes b->diag */
3234: PetscMalloc1(n+1,&bdiag_rev); /* temporary */
3236: /* allocate row pointers bi */
3237: PetscMalloc1(2*n+2,&bi);
3239: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3240: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3241: nnz_max = ai[n]+2*n*dtcount+2;
3243: PetscMalloc1(nnz_max+1,&bj);
3244: PetscMalloc1(nnz_max+1,&ba);
3246: /* put together the new matrix */
3247: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3248: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3249: b = (Mat_SeqAIJ*)B->data;
3251: b->free_a = PETSC_TRUE;
3252: b->free_ij = PETSC_TRUE;
3253: b->singlemalloc = PETSC_FALSE;
3255: b->a = ba;
3256: b->j = bj;
3257: b->i = bi;
3258: b->diag = bdiag;
3259: b->ilen = NULL;
3260: b->imax = NULL;
3261: b->row = isrow;
3262: b->col = iscol;
3263: PetscObjectReference((PetscObject)isrow);
3264: PetscObjectReference((PetscObject)iscol);
3265: b->icol = isicol;
3267: PetscMalloc1(n+1,&b->solve_work);
3268: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3269: b->maxnz = nnz_max;
3271: B->factortype = MAT_FACTOR_ILUDT;
3272: B->info.factor_mallocs = 0;
3273: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3274: /* ------- end of symbolic factorization ---------*/
3276: ISGetIndices(isrow,&r);
3277: ISGetIndices(isicol,&ic);
3278: ics = ic;
3280: /* linked list for storing column indices of the active row */
3281: nlnk = n + 1;
3282: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3284: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3285: PetscMalloc2(n,&im,n,&jtmp);
3286: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3287: PetscMalloc2(n,&rtmp,n,&vtmp);
3288: PetscArrayzero(rtmp,n);
3290: bi[0] = 0;
3291: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3292: bdiag_rev[n] = bdiag[0];
3293: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3294: for (i=0; i<n; i++) {
3295: /* copy initial fill into linked list */
3296: nzi = ai[r[i]+1] - ai[r[i]];
3297: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3298: nzi_al = adiag[r[i]] - ai[r[i]];
3299: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3300: ajtmp = aj + ai[r[i]];
3301: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3303: /* load in initial (unfactored row) */
3304: aatmp = a->a + ai[r[i]];
3305: for (j=0; j<nzi; j++) {
3306: rtmp[ics[*ajtmp++]] = *aatmp++;
3307: }
3309: /* add pivot rows into linked list */
3310: row = lnk[n];
3311: while (row < i) {
3312: nzi_bl = bi[row+1] - bi[row] + 1;
3313: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3314: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3315: nzi += nlnk;
3316: row = lnk[row];
3317: }
3319: /* copy data from lnk into jtmp, then initialize lnk */
3320: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3322: /* numerical factorization */
3323: bjtmp = jtmp;
3324: row = *bjtmp++; /* 1st pivot row */
3325: while (row < i) {
3326: pc = rtmp + row;
3327: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3328: multiplier = (*pc) * (*pv);
3329: *pc = multiplier;
3330: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3331: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3332: pv = ba + bdiag[row+1] + 1;
3333: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3334: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3335: PetscLogFlops(1+2.0*nz);
3336: }
3337: row = *bjtmp++;
3338: }
3340: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3341: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3342: nzi_bl = 0; j = 0;
3343: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3344: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3345: nzi_bl++; j++;
3346: }
3347: nzi_bu = nzi - nzi_bl -1;
3348: while (j < nzi) {
3349: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3350: j++;
3351: }
3353: bjtmp = bj + bi[i];
3354: batmp = ba + bi[i];
3355: /* apply level dropping rule to L part */
3356: ncut = nzi_al + dtcount;
3357: if (ncut < nzi_bl) {
3358: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3359: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3360: } else {
3361: ncut = nzi_bl;
3362: }
3363: for (j=0; j<ncut; j++) {
3364: bjtmp[j] = jtmp[j];
3365: batmp[j] = vtmp[j];
3366: }
3367: bi[i+1] = bi[i] + ncut;
3368: nzi = ncut + 1;
3370: /* apply level dropping rule to U part */
3371: ncut = nzi_au + dtcount;
3372: if (ncut < nzi_bu) {
3373: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3374: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3375: } else {
3376: ncut = nzi_bu;
3377: }
3378: nzi += ncut;
3380: /* mark bdiagonal */
3381: bdiag[i+1] = bdiag[i] - (ncut + 1);
3382: bdiag_rev[n-i-1] = bdiag[i+1];
3383: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3384: bjtmp = bj + bdiag[i];
3385: batmp = ba + bdiag[i];
3386: *bjtmp = i;
3387: *batmp = diag_tmp; /* rtmp[i]; */
3388: if (*batmp == 0.0) {
3389: *batmp = dt+shift;
3390: }
3391: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3393: bjtmp = bj + bdiag[i+1]+1;
3394: batmp = ba + bdiag[i+1]+1;
3395: for (k=0; k<ncut; k++) {
3396: bjtmp[k] = jtmp[nzi_bl+1+k];
3397: batmp[k] = vtmp[nzi_bl+1+k];
3398: }
3400: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3401: } /* for (i=0; i<n; i++) */
3402: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3404: ISRestoreIndices(isrow,&r);
3405: ISRestoreIndices(isicol,&ic);
3407: PetscLLDestroy(lnk,lnkbt);
3408: PetscFree2(im,jtmp);
3409: PetscFree2(rtmp,vtmp);
3410: PetscFree(bdiag_rev);
3412: PetscLogFlops(B->cmap->n);
3413: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3415: ISIdentity(isrow,&row_identity);
3416: ISIdentity(isicol,&icol_identity);
3417: if (row_identity && icol_identity) {
3418: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3419: } else {
3420: B->ops->solve = MatSolve_SeqAIJ;
3421: }
3423: B->ops->solveadd = NULL;
3424: B->ops->solvetranspose = NULL;
3425: B->ops->solvetransposeadd = NULL;
3426: B->ops->matsolve = NULL;
3427: B->assembled = PETSC_TRUE;
3428: B->preallocated = PETSC_TRUE;
3429: return(0);
3430: }
3432: /* a wraper of MatILUDTFactor_SeqAIJ() */
3433: /*
3434: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3435: */
3437: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3438: {
3442: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3443: return(0);
3444: }
3446: /*
3447: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3448: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3449: */
3450: /*
3451: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3452: */
3454: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3455: {
3456: Mat C =fact;
3457: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3458: IS isrow = b->row,isicol = b->icol;
3460: const PetscInt *r,*ic,*ics;
3461: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3462: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3463: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3464: PetscReal dt=info->dt,shift=info->shiftamount;
3465: PetscBool row_identity, col_identity;
3468: ISGetIndices(isrow,&r);
3469: ISGetIndices(isicol,&ic);
3470: PetscMalloc1(n+1,&rtmp);
3471: ics = ic;
3473: for (i=0; i<n; i++) {
3474: /* initialize rtmp array */
3475: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3476: bjtmp = bj + bi[i];
3477: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3478: rtmp[i] = 0.0;
3479: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3480: bjtmp = bj + bdiag[i+1] + 1;
3481: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3483: /* load in initial unfactored row of A */
3484: nz = ai[r[i]+1] - ai[r[i]];
3485: ajtmp = aj + ai[r[i]];
3486: v = aa + ai[r[i]];
3487: for (j=0; j<nz; j++) {
3488: rtmp[ics[*ajtmp++]] = v[j];
3489: }
3491: /* numerical factorization */
3492: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3493: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3494: k = 0;
3495: while (k < nzl) {
3496: row = *bjtmp++;
3497: pc = rtmp + row;
3498: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3499: multiplier = (*pc) * (*pv);
3500: *pc = multiplier;
3501: if (PetscAbsScalar(multiplier) > dt) {
3502: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3503: pv = b->a + bdiag[row+1] + 1;
3504: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3505: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3506: PetscLogFlops(1+2.0*nz);
3507: }
3508: k++;
3509: }
3511: /* finished row so stick it into b->a */
3512: /* L-part */
3513: pv = b->a + bi[i];
3514: pj = bj + bi[i];
3515: nzl = bi[i+1] - bi[i];
3516: for (j=0; j<nzl; j++) {
3517: pv[j] = rtmp[pj[j]];
3518: }
3520: /* diagonal: invert diagonal entries for simplier triangular solves */
3521: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3522: b->a[bdiag[i]] = 1.0/rtmp[i];
3524: /* U-part */
3525: pv = b->a + bdiag[i+1] + 1;
3526: pj = bj + bdiag[i+1] + 1;
3527: nzu = bdiag[i] - bdiag[i+1] - 1;
3528: for (j=0; j<nzu; j++) {
3529: pv[j] = rtmp[pj[j]];
3530: }
3531: }
3533: PetscFree(rtmp);
3534: ISRestoreIndices(isicol,&ic);
3535: ISRestoreIndices(isrow,&r);
3537: ISIdentity(isrow,&row_identity);
3538: ISIdentity(isicol,&col_identity);
3539: if (row_identity && col_identity) {
3540: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3541: } else {
3542: C->ops->solve = MatSolve_SeqAIJ;
3543: }
3544: C->ops->solveadd = NULL;
3545: C->ops->solvetranspose = NULL;
3546: C->ops->solvetransposeadd = NULL;
3547: C->ops->matsolve = NULL;
3548: C->assembled = PETSC_TRUE;
3549: C->preallocated = PETSC_TRUE;
3551: PetscLogFlops(C->cmap->n);
3552: return(0);
3553: }