Actual source code: aijfact.c
petsc-3.12.5 2020-03-29
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
7: /*
8: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
10: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11: */
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
15: PetscErrorCode ierr;
16: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
17: const PetscInt *ai = a->i, *aj = a->j;
18: const PetscScalar *aa = a->a;
19: PetscBool *done;
20: PetscReal best,past = 0,future;
23: /* pick initial row */
24: best = -1;
25: for (i=0; i<n; i++) {
26: future = 0.0;
27: for (j=ai[i]; j<ai[i+1]; j++) {
28: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
29: else past = PetscAbsScalar(aa[j]);
30: }
31: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
32: if (past/future > best) {
33: best = past/future;
34: current = i;
35: }
36: }
38: PetscMalloc1(n,&done);
39: PetscArrayzero(done,n);
40: PetscMalloc1(n,&order);
41: order[0] = current;
42: for (i=0; i<n-1; i++) {
43: done[current] = PETSC_TRUE;
44: best = -1;
45: /* loop over all neighbors of current pivot */
46: for (j=ai[current]; j<ai[current+1]; j++) {
47: jj = aj[j];
48: if (done[jj]) continue;
49: /* loop over columns of potential next row computing weights for below and above diagonal */
50: past = future = 0.0;
51: for (k=ai[jj]; k<ai[jj+1]; k++) {
52: kk = aj[k];
53: if (done[kk]) past += PetscAbsScalar(aa[k]);
54: else if (kk != jj) future += PetscAbsScalar(aa[k]);
55: }
56: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
57: if (past/future > best) {
58: best = past/future;
59: newcurrent = jj;
60: }
61: }
62: if (best == -1) { /* no neighbors to select from so select best of all that remain */
63: best = -1;
64: for (k=0; k<n; k++) {
65: if (done[k]) continue;
66: future = 0.0;
67: past = 0.0;
68: for (j=ai[k]; j<ai[k+1]; j++) {
69: kk = aj[j];
70: if (done[kk]) past += PetscAbsScalar(aa[j]);
71: else if (kk != k) future += PetscAbsScalar(aa[j]);
72: }
73: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
74: if (past/future > best) {
75: best = past/future;
76: newcurrent = k;
77: }
78: }
79: }
80: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
81: current = newcurrent;
82: order[i+1] = current;
83: }
84: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
85: *icol = *irow;
86: PetscObjectReference((PetscObject)*irow);
87: PetscFree(done);
88: PetscFree(order);
89: return(0);
90: }
92: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
93: {
94: PetscInt n = A->rmap->n;
98: #if defined(PETSC_USE_COMPLEX)
99: if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
100: #endif
101: MatCreate(PetscObjectComm((PetscObject)A),B);
102: MatSetSizes(*B,n,n,n,n);
103: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
104: MatSetType(*B,MATSEQAIJ);
106: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
107: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
109: MatSetBlockSizesFromMats(*B,A,A);
110: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
111: MatSetType(*B,MATSEQSBAIJ);
112: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
114: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
115: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
116: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
117: (*B)->factortype = ftype;
119: PetscFree((*B)->solvertype);
120: PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);
121: return(0);
122: }
124: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
125: {
126: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
127: IS isicol;
128: PetscErrorCode ierr;
129: const PetscInt *r,*ic;
130: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
131: PetscInt *bi,*bj,*ajtmp;
132: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
133: PetscReal f;
134: PetscInt nlnk,*lnk,k,**bi_ptr;
135: PetscFreeSpaceList free_space=NULL,current_space=NULL;
136: PetscBT lnkbt;
137: PetscBool missing;
140: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
141: MatMissingDiagonal(A,&missing,&i);
142: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
144: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
145: ISGetIndices(isrow,&r);
146: ISGetIndices(isicol,&ic);
148: /* get new row pointers */
149: PetscMalloc1(n+1,&bi);
150: bi[0] = 0;
152: /* bdiag is location of diagonal in factor */
153: PetscMalloc1(n+1,&bdiag);
154: bdiag[0] = 0;
156: /* linked list for storing column indices of the active row */
157: nlnk = n + 1;
158: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
160: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
162: /* initial FreeSpace size is f*(ai[n]+1) */
163: f = info->fill;
164: if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
165: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
166: current_space = free_space;
168: for (i=0; i<n; i++) {
169: /* copy previous fill into linked list */
170: nzi = 0;
171: nnz = ai[r[i]+1] - ai[r[i]];
172: ajtmp = aj + ai[r[i]];
173: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
174: nzi += nlnk;
176: /* add pivot rows into linked list */
177: row = lnk[n];
178: while (row < i) {
179: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
180: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
181: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
182: nzi += nlnk;
183: row = lnk[row];
184: }
185: bi[i+1] = bi[i] + nzi;
186: im[i] = nzi;
188: /* mark bdiag */
189: nzbd = 0;
190: nnz = nzi;
191: k = lnk[n];
192: while (nnz-- && k < i) {
193: nzbd++;
194: k = lnk[k];
195: }
196: bdiag[i] = bi[i] + nzbd;
198: /* if free space is not available, make more free space */
199: if (current_space->local_remaining<nzi) {
200: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
201: PetscFreeSpaceGet(nnz,¤t_space);
202: reallocs++;
203: }
205: /* copy data into free space, then initialize lnk */
206: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
208: bi_ptr[i] = current_space->array;
209: current_space->array += nzi;
210: current_space->local_used += nzi;
211: current_space->local_remaining -= nzi;
212: }
213: #if defined(PETSC_USE_INFO)
214: if (ai[n] != 0) {
215: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
216: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
217: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
218: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
219: PetscInfo(A,"for best performance.\n");
220: } else {
221: PetscInfo(A,"Empty matrix\n");
222: }
223: #endif
225: ISRestoreIndices(isrow,&r);
226: ISRestoreIndices(isicol,&ic);
228: /* destroy list of free space and other temporary array(s) */
229: PetscMalloc1(bi[n]+1,&bj);
230: PetscFreeSpaceContiguous(&free_space,bj);
231: PetscLLDestroy(lnk,lnkbt);
232: PetscFree2(bi_ptr,im);
234: /* put together the new matrix */
235: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
236: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
237: b = (Mat_SeqAIJ*)(B)->data;
239: b->free_a = PETSC_TRUE;
240: b->free_ij = PETSC_TRUE;
241: b->singlemalloc = PETSC_FALSE;
243: PetscMalloc1(bi[n]+1,&b->a);
244: b->j = bj;
245: b->i = bi;
246: b->diag = bdiag;
247: b->ilen = 0;
248: b->imax = 0;
249: b->row = isrow;
250: b->col = iscol;
251: PetscObjectReference((PetscObject)isrow);
252: PetscObjectReference((PetscObject)iscol);
253: b->icol = isicol;
254: PetscMalloc1(n+1,&b->solve_work);
256: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
257: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
258: b->maxnz = b->nz = bi[n];
260: (B)->factortype = MAT_FACTOR_LU;
261: (B)->info.factor_mallocs = reallocs;
262: (B)->info.fill_ratio_given = f;
264: if (ai[n]) {
265: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
266: } else {
267: (B)->info.fill_ratio_needed = 0.0;
268: }
269: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
270: if (a->inode.size) {
271: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
272: }
273: return(0);
274: }
276: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
277: {
278: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
279: IS isicol;
280: PetscErrorCode ierr;
281: const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
282: PetscInt i,n=A->rmap->n;
283: PetscInt *bi,*bj;
284: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
285: PetscReal f;
286: PetscInt nlnk,*lnk,k,**bi_ptr;
287: PetscFreeSpaceList free_space=NULL,current_space=NULL;
288: PetscBT lnkbt;
289: PetscBool missing;
292: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
293: MatMissingDiagonal(A,&missing,&i);
294: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
295:
296: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
297: ISGetIndices(isrow,&r);
298: ISGetIndices(isicol,&ic);
300: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
301: PetscMalloc1(n+1,&bi);
302: PetscMalloc1(n+1,&bdiag);
303: bi[0] = bdiag[0] = 0;
305: /* linked list for storing column indices of the active row */
306: nlnk = n + 1;
307: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
309: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
311: /* initial FreeSpace size is f*(ai[n]+1) */
312: f = info->fill;
313: if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
314: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
315: current_space = free_space;
317: for (i=0; i<n; i++) {
318: /* copy previous fill into linked list */
319: nzi = 0;
320: nnz = ai[r[i]+1] - ai[r[i]];
321: ajtmp = aj + ai[r[i]];
322: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
323: nzi += nlnk;
325: /* add pivot rows into linked list */
326: row = lnk[n];
327: while (row < i) {
328: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
329: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
330: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
331: nzi += nlnk;
332: row = lnk[row];
333: }
334: bi[i+1] = bi[i] + nzi;
335: im[i] = nzi;
337: /* mark bdiag */
338: nzbd = 0;
339: nnz = nzi;
340: k = lnk[n];
341: while (nnz-- && k < i) {
342: nzbd++;
343: k = lnk[k];
344: }
345: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
347: /* if free space is not available, make more free space */
348: if (current_space->local_remaining<nzi) {
349: /* estimated additional space needed */
350: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
351: PetscFreeSpaceGet(nnz,¤t_space);
352: reallocs++;
353: }
355: /* copy data into free space, then initialize lnk */
356: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
358: bi_ptr[i] = current_space->array;
359: current_space->array += nzi;
360: current_space->local_used += nzi;
361: current_space->local_remaining -= nzi;
362: }
364: ISRestoreIndices(isrow,&r);
365: ISRestoreIndices(isicol,&ic);
367: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
368: PetscMalloc1(bi[n]+1,&bj);
369: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
370: PetscLLDestroy(lnk,lnkbt);
371: PetscFree2(bi_ptr,im);
373: /* put together the new matrix */
374: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
375: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
376: b = (Mat_SeqAIJ*)(B)->data;
378: b->free_a = PETSC_TRUE;
379: b->free_ij = PETSC_TRUE;
380: b->singlemalloc = PETSC_FALSE;
382: PetscMalloc1(bdiag[0]+1,&b->a);
384: b->j = bj;
385: b->i = bi;
386: b->diag = bdiag;
387: b->ilen = 0;
388: b->imax = 0;
389: b->row = isrow;
390: b->col = iscol;
391: PetscObjectReference((PetscObject)isrow);
392: PetscObjectReference((PetscObject)iscol);
393: b->icol = isicol;
394: PetscMalloc1(n+1,&b->solve_work);
396: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
397: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
398: b->maxnz = b->nz = bdiag[0]+1;
400: B->factortype = MAT_FACTOR_LU;
401: B->info.factor_mallocs = reallocs;
402: B->info.fill_ratio_given = f;
404: if (ai[n]) {
405: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
406: } else {
407: B->info.fill_ratio_needed = 0.0;
408: }
409: #if defined(PETSC_USE_INFO)
410: if (ai[n] != 0) {
411: PetscReal af = B->info.fill_ratio_needed;
412: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
413: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
414: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
415: PetscInfo(A,"for best performance.\n");
416: } else {
417: PetscInfo(A,"Empty matrix\n");
418: }
419: #endif
420: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
421: if (a->inode.size) {
422: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
423: }
424: MatSeqAIJCheckInode_FactorLU(B);
425: return(0);
426: }
428: /*
429: Trouble in factorization, should we dump the original matrix?
430: */
431: PetscErrorCode MatFactorDumpMatrix(Mat A)
432: {
434: PetscBool flg = PETSC_FALSE;
437: PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);
438: if (flg) {
439: PetscViewer viewer;
440: char filename[PETSC_MAX_PATH_LEN];
442: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
443: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
444: MatView(A,viewer);
445: PetscViewerDestroy(&viewer);
446: }
447: return(0);
448: }
450: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
451: {
452: Mat C =B;
453: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
454: IS isrow = b->row,isicol = b->icol;
455: PetscErrorCode ierr;
456: const PetscInt *r,*ic,*ics;
457: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
458: PetscInt i,j,k,nz,nzL,row,*pj;
459: const PetscInt *ajtmp,*bjtmp;
460: MatScalar *rtmp,*pc,multiplier,*pv;
461: const MatScalar *aa=a->a,*v;
462: PetscBool row_identity,col_identity;
463: FactorShiftCtx sctx;
464: const PetscInt *ddiag;
465: PetscReal rs;
466: MatScalar d;
469: /* MatPivotSetUp(): initialize shift context sctx */
470: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
472: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
473: ddiag = a->diag;
474: sctx.shift_top = info->zeropivot;
475: for (i=0; i<n; i++) {
476: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
477: d = (aa)[ddiag[i]];
478: rs = -PetscAbsScalar(d) - PetscRealPart(d);
479: v = aa+ai[i];
480: nz = ai[i+1] - ai[i];
481: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
482: if (rs>sctx.shift_top) sctx.shift_top = rs;
483: }
484: sctx.shift_top *= 1.1;
485: sctx.nshift_max = 5;
486: sctx.shift_lo = 0.;
487: sctx.shift_hi = 1.;
488: }
490: ISGetIndices(isrow,&r);
491: ISGetIndices(isicol,&ic);
492: PetscMalloc1(n+1,&rtmp);
493: ics = ic;
495: do {
496: sctx.newshift = PETSC_FALSE;
497: for (i=0; i<n; i++) {
498: /* zero rtmp */
499: /* L part */
500: nz = bi[i+1] - bi[i];
501: bjtmp = bj + bi[i];
502: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
504: /* U part */
505: nz = bdiag[i]-bdiag[i+1];
506: bjtmp = bj + bdiag[i+1]+1;
507: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
509: /* load in initial (unfactored row) */
510: nz = ai[r[i]+1] - ai[r[i]];
511: ajtmp = aj + ai[r[i]];
512: v = aa + ai[r[i]];
513: for (j=0; j<nz; j++) {
514: rtmp[ics[ajtmp[j]]] = v[j];
515: }
516: /* ZeropivotApply() */
517: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
519: /* elimination */
520: bjtmp = bj + bi[i];
521: row = *bjtmp++;
522: nzL = bi[i+1] - bi[i];
523: for (k=0; k < nzL; k++) {
524: pc = rtmp + row;
525: if (*pc != 0.0) {
526: pv = b->a + bdiag[row];
527: multiplier = *pc * (*pv);
528: *pc = multiplier;
530: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
531: pv = b->a + bdiag[row+1]+1;
532: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
534: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
535: PetscLogFlops(1+2*nz);
536: }
537: row = *bjtmp++;
538: }
540: /* finished row so stick it into b->a */
541: rs = 0.0;
542: /* L part */
543: pv = b->a + bi[i];
544: pj = b->j + bi[i];
545: nz = bi[i+1] - bi[i];
546: for (j=0; j<nz; j++) {
547: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
548: }
550: /* U part */
551: pv = b->a + bdiag[i+1]+1;
552: pj = b->j + bdiag[i+1]+1;
553: nz = bdiag[i] - bdiag[i+1]-1;
554: for (j=0; j<nz; j++) {
555: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
556: }
558: sctx.rs = rs;
559: sctx.pv = rtmp[i];
560: MatPivotCheck(B,A,info,&sctx,i);
561: if (sctx.newshift) break; /* break for-loop */
562: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
564: /* Mark diagonal and invert diagonal for simplier triangular solves */
565: pv = b->a + bdiag[i];
566: *pv = 1.0/rtmp[i];
568: } /* endof for (i=0; i<n; i++) { */
570: /* MatPivotRefine() */
571: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
572: /*
573: * if no shift in this attempt & shifting & started shifting & can refine,
574: * then try lower shift
575: */
576: sctx.shift_hi = sctx.shift_fraction;
577: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
578: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
579: sctx.newshift = PETSC_TRUE;
580: sctx.nshift++;
581: }
582: } while (sctx.newshift);
584: PetscFree(rtmp);
585: ISRestoreIndices(isicol,&ic);
586: ISRestoreIndices(isrow,&r);
588: ISIdentity(isrow,&row_identity);
589: ISIdentity(isicol,&col_identity);
590: if (b->inode.size) {
591: C->ops->solve = MatSolve_SeqAIJ_Inode;
592: } else if (row_identity && col_identity) {
593: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
594: } else {
595: C->ops->solve = MatSolve_SeqAIJ;
596: }
597: C->ops->solveadd = MatSolveAdd_SeqAIJ;
598: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
599: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
600: C->ops->matsolve = MatMatSolve_SeqAIJ;
601: C->assembled = PETSC_TRUE;
602: C->preallocated = PETSC_TRUE;
604: PetscLogFlops(C->cmap->n);
606: /* MatShiftView(A,info,&sctx) */
607: if (sctx.nshift) {
608: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
609: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
610: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
611: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
612: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
613: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
614: }
615: }
616: return(0);
617: }
619: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
620: {
621: Mat C =B;
622: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
623: IS isrow = b->row,isicol = b->icol;
624: PetscErrorCode ierr;
625: const PetscInt *r,*ic,*ics;
626: PetscInt nz,row,i,j,n=A->rmap->n,diag;
627: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
628: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
629: MatScalar *pv,*rtmp,*pc,multiplier,d;
630: const MatScalar *v,*aa=a->a;
631: PetscReal rs=0.0;
632: FactorShiftCtx sctx;
633: const PetscInt *ddiag;
634: PetscBool row_identity, col_identity;
637: /* MatPivotSetUp(): initialize shift context sctx */
638: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
640: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
641: ddiag = a->diag;
642: sctx.shift_top = info->zeropivot;
643: for (i=0; i<n; i++) {
644: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
645: d = (aa)[ddiag[i]];
646: rs = -PetscAbsScalar(d) - PetscRealPart(d);
647: v = aa+ai[i];
648: nz = ai[i+1] - ai[i];
649: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
650: if (rs>sctx.shift_top) sctx.shift_top = rs;
651: }
652: sctx.shift_top *= 1.1;
653: sctx.nshift_max = 5;
654: sctx.shift_lo = 0.;
655: sctx.shift_hi = 1.;
656: }
658: ISGetIndices(isrow,&r);
659: ISGetIndices(isicol,&ic);
660: PetscMalloc1(n+1,&rtmp);
661: ics = ic;
663: do {
664: sctx.newshift = PETSC_FALSE;
665: for (i=0; i<n; i++) {
666: nz = bi[i+1] - bi[i];
667: bjtmp = bj + bi[i];
668: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
670: /* load in initial (unfactored row) */
671: nz = ai[r[i]+1] - ai[r[i]];
672: ajtmp = aj + ai[r[i]];
673: v = aa + ai[r[i]];
674: for (j=0; j<nz; j++) {
675: rtmp[ics[ajtmp[j]]] = v[j];
676: }
677: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
679: row = *bjtmp++;
680: while (row < i) {
681: pc = rtmp + row;
682: if (*pc != 0.0) {
683: pv = b->a + diag_offset[row];
684: pj = b->j + diag_offset[row] + 1;
685: multiplier = *pc / *pv++;
686: *pc = multiplier;
687: nz = bi[row+1] - diag_offset[row] - 1;
688: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
689: PetscLogFlops(1+2*nz);
690: }
691: row = *bjtmp++;
692: }
693: /* finished row so stick it into b->a */
694: pv = b->a + bi[i];
695: pj = b->j + bi[i];
696: nz = bi[i+1] - bi[i];
697: diag = diag_offset[i] - bi[i];
698: rs = 0.0;
699: for (j=0; j<nz; j++) {
700: pv[j] = rtmp[pj[j]];
701: rs += PetscAbsScalar(pv[j]);
702: }
703: rs -= PetscAbsScalar(pv[diag]);
705: sctx.rs = rs;
706: sctx.pv = pv[diag];
707: MatPivotCheck(B,A,info,&sctx,i);
708: if (sctx.newshift) break;
709: pv[diag] = sctx.pv;
710: }
712: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
713: /*
714: * if no shift in this attempt & shifting & started shifting & can refine,
715: * then try lower shift
716: */
717: sctx.shift_hi = sctx.shift_fraction;
718: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
719: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
720: sctx.newshift = PETSC_TRUE;
721: sctx.nshift++;
722: }
723: } while (sctx.newshift);
725: /* invert diagonal entries for simplier triangular solves */
726: for (i=0; i<n; i++) {
727: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
728: }
729: PetscFree(rtmp);
730: ISRestoreIndices(isicol,&ic);
731: ISRestoreIndices(isrow,&r);
733: ISIdentity(isrow,&row_identity);
734: ISIdentity(isicol,&col_identity);
735: if (row_identity && col_identity) {
736: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
737: } else {
738: C->ops->solve = MatSolve_SeqAIJ_inplace;
739: }
740: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
741: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
742: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
743: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
745: C->assembled = PETSC_TRUE;
746: C->preallocated = PETSC_TRUE;
748: PetscLogFlops(C->cmap->n);
749: if (sctx.nshift) {
750: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
751: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
752: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
753: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
754: }
755: }
756: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
757: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
759: MatSeqAIJCheckInode(C);
760: return(0);
761: }
763: /*
764: This routine implements inplace ILU(0) with row or/and column permutations.
765: Input:
766: A - original matrix
767: Output;
768: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
769: a->j (col index) is permuted by the inverse of colperm, then sorted
770: a->a reordered accordingly with a->j
771: a->diag (ptr to diagonal elements) is updated.
772: */
773: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
774: {
775: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
776: IS isrow = a->row,isicol = a->icol;
777: PetscErrorCode ierr;
778: const PetscInt *r,*ic,*ics;
779: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
780: PetscInt *ajtmp,nz,row;
781: PetscInt *diag = a->diag,nbdiag,*pj;
782: PetscScalar *rtmp,*pc,multiplier,d;
783: MatScalar *pv,*v;
784: PetscReal rs;
785: FactorShiftCtx sctx;
786: const MatScalar *aa=a->a,*vtmp;
789: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
791: /* MatPivotSetUp(): initialize shift context sctx */
792: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
794: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
795: const PetscInt *ddiag = a->diag;
796: sctx.shift_top = info->zeropivot;
797: for (i=0; i<n; i++) {
798: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
799: d = (aa)[ddiag[i]];
800: rs = -PetscAbsScalar(d) - PetscRealPart(d);
801: vtmp = aa+ai[i];
802: nz = ai[i+1] - ai[i];
803: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
804: if (rs>sctx.shift_top) sctx.shift_top = rs;
805: }
806: sctx.shift_top *= 1.1;
807: sctx.nshift_max = 5;
808: sctx.shift_lo = 0.;
809: sctx.shift_hi = 1.;
810: }
812: ISGetIndices(isrow,&r);
813: ISGetIndices(isicol,&ic);
814: PetscMalloc1(n+1,&rtmp);
815: PetscArrayzero(rtmp,n+1);
816: ics = ic;
818: #if defined(MV)
819: sctx.shift_top = 0.;
820: sctx.nshift_max = 0;
821: sctx.shift_lo = 0.;
822: sctx.shift_hi = 0.;
823: sctx.shift_fraction = 0.;
825: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
826: sctx.shift_top = 0.;
827: for (i=0; i<n; i++) {
828: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
829: d = (a->a)[diag[i]];
830: rs = -PetscAbsScalar(d) - PetscRealPart(d);
831: v = a->a+ai[i];
832: nz = ai[i+1] - ai[i];
833: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
834: if (rs>sctx.shift_top) sctx.shift_top = rs;
835: }
836: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
837: sctx.shift_top *= 1.1;
838: sctx.nshift_max = 5;
839: sctx.shift_lo = 0.;
840: sctx.shift_hi = 1.;
841: }
843: sctx.shift_amount = 0.;
844: sctx.nshift = 0;
845: #endif
847: do {
848: sctx.newshift = PETSC_FALSE;
849: for (i=0; i<n; i++) {
850: /* load in initial unfactored row */
851: nz = ai[r[i]+1] - ai[r[i]];
852: ajtmp = aj + ai[r[i]];
853: v = a->a + ai[r[i]];
854: /* sort permuted ajtmp and values v accordingly */
855: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
856: PetscSortIntWithScalarArray(nz,ajtmp,v);
858: diag[r[i]] = ai[r[i]];
859: for (j=0; j<nz; j++) {
860: rtmp[ajtmp[j]] = v[j];
861: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
862: }
863: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
865: row = *ajtmp++;
866: while (row < i) {
867: pc = rtmp + row;
868: if (*pc != 0.0) {
869: pv = a->a + diag[r[row]];
870: pj = aj + diag[r[row]] + 1;
872: multiplier = *pc / *pv++;
873: *pc = multiplier;
874: nz = ai[r[row]+1] - diag[r[row]] - 1;
875: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
876: PetscLogFlops(1+2*nz);
877: }
878: row = *ajtmp++;
879: }
880: /* finished row so overwrite it onto a->a */
881: pv = a->a + ai[r[i]];
882: pj = aj + ai[r[i]];
883: nz = ai[r[i]+1] - ai[r[i]];
884: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
886: rs = 0.0;
887: for (j=0; j<nz; j++) {
888: pv[j] = rtmp[pj[j]];
889: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
890: }
892: sctx.rs = rs;
893: sctx.pv = pv[nbdiag];
894: MatPivotCheck(B,A,info,&sctx,i);
895: if (sctx.newshift) break;
896: pv[nbdiag] = sctx.pv;
897: }
899: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
900: /*
901: * if no shift in this attempt & shifting & started shifting & can refine,
902: * then try lower shift
903: */
904: sctx.shift_hi = sctx.shift_fraction;
905: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
906: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
907: sctx.newshift = PETSC_TRUE;
908: sctx.nshift++;
909: }
910: } while (sctx.newshift);
912: /* invert diagonal entries for simplier triangular solves */
913: for (i=0; i<n; i++) {
914: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
915: }
917: PetscFree(rtmp);
918: ISRestoreIndices(isicol,&ic);
919: ISRestoreIndices(isrow,&r);
921: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
922: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
923: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
924: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
926: A->assembled = PETSC_TRUE;
927: A->preallocated = PETSC_TRUE;
929: PetscLogFlops(A->cmap->n);
930: if (sctx.nshift) {
931: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
932: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
933: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
934: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
935: }
936: }
937: return(0);
938: }
940: /* ----------------------------------------------------------- */
941: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
942: {
944: Mat C;
947: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
948: MatLUFactorSymbolic(C,A,row,col,info);
949: MatLUFactorNumeric(C,A,info);
951: A->ops->solve = C->ops->solve;
952: A->ops->solvetranspose = C->ops->solvetranspose;
954: MatHeaderMerge(A,&C);
955: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
956: return(0);
957: }
958: /* ----------------------------------------------------------- */
961: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
962: {
963: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
964: IS iscol = a->col,isrow = a->row;
965: PetscErrorCode ierr;
966: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
967: PetscInt nz;
968: const PetscInt *rout,*cout,*r,*c;
969: PetscScalar *x,*tmp,*tmps,sum;
970: const PetscScalar *b;
971: const MatScalar *aa = a->a,*v;
974: if (!n) return(0);
976: VecGetArrayRead(bb,&b);
977: VecGetArrayWrite(xx,&x);
978: tmp = a->solve_work;
980: ISGetIndices(isrow,&rout); r = rout;
981: ISGetIndices(iscol,&cout); c = cout + (n-1);
983: /* forward solve the lower triangular */
984: tmp[0] = b[*r++];
985: tmps = tmp;
986: for (i=1; i<n; i++) {
987: v = aa + ai[i];
988: vi = aj + ai[i];
989: nz = a->diag[i] - ai[i];
990: sum = b[*r++];
991: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
992: tmp[i] = sum;
993: }
995: /* backward solve the upper triangular */
996: for (i=n-1; i>=0; i--) {
997: v = aa + a->diag[i] + 1;
998: vi = aj + a->diag[i] + 1;
999: nz = ai[i+1] - a->diag[i] - 1;
1000: sum = tmp[i];
1001: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1002: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1003: }
1005: ISRestoreIndices(isrow,&rout);
1006: ISRestoreIndices(iscol,&cout);
1007: VecRestoreArrayRead(bb,&b);
1008: VecRestoreArrayWrite(xx,&x);
1009: PetscLogFlops(2.0*a->nz - A->cmap->n);
1010: return(0);
1011: }
1013: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1014: {
1015: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1016: IS iscol = a->col,isrow = a->row;
1017: PetscErrorCode ierr;
1018: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1019: PetscInt nz,neq;
1020: const PetscInt *rout,*cout,*r,*c;
1021: PetscScalar *x,*tmp,*tmps,sum;
1022: const PetscScalar *aa = a->a,*v;
1023: const PetscScalar *b;
1024: PetscBool bisdense,xisdense;
1027: if (!n) return(0);
1029: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1030: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1031: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1032: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1034: MatDenseGetArrayRead(B,&b);
1035: MatDenseGetArray(X,&x);
1037: tmp = a->solve_work;
1038: ISGetIndices(isrow,&rout); r = rout;
1039: ISGetIndices(iscol,&cout); c = cout;
1041: for (neq=0; neq<B->cmap->n; neq++) {
1042: /* forward solve the lower triangular */
1043: tmp[0] = b[r[0]];
1044: tmps = tmp;
1045: for (i=1; i<n; i++) {
1046: v = aa + ai[i];
1047: vi = aj + ai[i];
1048: nz = a->diag[i] - ai[i];
1049: sum = b[r[i]];
1050: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1051: tmp[i] = sum;
1052: }
1053: /* backward solve the upper triangular */
1054: for (i=n-1; i>=0; i--) {
1055: v = aa + a->diag[i] + 1;
1056: vi = aj + a->diag[i] + 1;
1057: nz = ai[i+1] - a->diag[i] - 1;
1058: sum = tmp[i];
1059: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1060: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1061: }
1063: b += n;
1064: x += n;
1065: }
1066: ISRestoreIndices(isrow,&rout);
1067: ISRestoreIndices(iscol,&cout);
1068: MatDenseRestoreArrayRead(B,&b);
1069: MatDenseRestoreArray(X,&x);
1070: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1071: return(0);
1072: }
1074: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1075: {
1076: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1077: IS iscol = a->col,isrow = a->row;
1078: PetscErrorCode ierr;
1079: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1080: PetscInt nz,neq;
1081: const PetscInt *rout,*cout,*r,*c;
1082: PetscScalar *x,*tmp,sum;
1083: const PetscScalar *b;
1084: const PetscScalar *aa = a->a,*v;
1085: PetscBool bisdense,xisdense;
1088: if (!n) return(0);
1090: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1091: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1092: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1093: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1095: MatDenseGetArrayRead(B,&b);
1096: MatDenseGetArray(X,&x);
1098: tmp = a->solve_work;
1099: ISGetIndices(isrow,&rout); r = rout;
1100: ISGetIndices(iscol,&cout); c = cout;
1102: for (neq=0; neq<B->cmap->n; neq++) {
1103: /* forward solve the lower triangular */
1104: tmp[0] = b[r[0]];
1105: v = aa;
1106: vi = aj;
1107: for (i=1; i<n; i++) {
1108: nz = ai[i+1] - ai[i];
1109: sum = b[r[i]];
1110: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1111: tmp[i] = sum;
1112: v += nz; vi += nz;
1113: }
1115: /* backward solve the upper triangular */
1116: for (i=n-1; i>=0; i--) {
1117: v = aa + adiag[i+1]+1;
1118: vi = aj + adiag[i+1]+1;
1119: nz = adiag[i]-adiag[i+1]-1;
1120: sum = tmp[i];
1121: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1122: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1123: }
1125: b += n;
1126: x += n;
1127: }
1128: ISRestoreIndices(isrow,&rout);
1129: ISRestoreIndices(iscol,&cout);
1130: MatDenseRestoreArrayRead(B,&b);
1131: MatDenseRestoreArray(X,&x);
1132: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1133: return(0);
1134: }
1136: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1137: {
1138: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1139: IS iscol = a->col,isrow = a->row;
1140: PetscErrorCode ierr;
1141: const PetscInt *r,*c,*rout,*cout;
1142: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1143: PetscInt nz,row;
1144: PetscScalar *x,*tmp,*tmps,sum;
1145: const PetscScalar *b;
1146: const MatScalar *aa = a->a,*v;
1149: if (!n) return(0);
1151: VecGetArrayRead(bb,&b);
1152: VecGetArrayWrite(xx,&x);
1153: tmp = a->solve_work;
1155: ISGetIndices(isrow,&rout); r = rout;
1156: ISGetIndices(iscol,&cout); c = cout + (n-1);
1158: /* forward solve the lower triangular */
1159: tmp[0] = b[*r++];
1160: tmps = tmp;
1161: for (row=1; row<n; row++) {
1162: i = rout[row]; /* permuted row */
1163: v = aa + ai[i];
1164: vi = aj + ai[i];
1165: nz = a->diag[i] - ai[i];
1166: sum = b[*r++];
1167: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1168: tmp[row] = sum;
1169: }
1171: /* backward solve the upper triangular */
1172: for (row=n-1; row>=0; row--) {
1173: i = rout[row]; /* permuted row */
1174: v = aa + a->diag[i] + 1;
1175: vi = aj + a->diag[i] + 1;
1176: nz = ai[i+1] - a->diag[i] - 1;
1177: sum = tmp[row];
1178: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1179: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1180: }
1182: ISRestoreIndices(isrow,&rout);
1183: ISRestoreIndices(iscol,&cout);
1184: VecRestoreArrayRead(bb,&b);
1185: VecRestoreArrayWrite(xx,&x);
1186: PetscLogFlops(2.0*a->nz - A->cmap->n);
1187: return(0);
1188: }
1190: /* ----------------------------------------------------------- */
1191: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1192: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1193: {
1194: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1195: PetscErrorCode ierr;
1196: PetscInt n = A->rmap->n;
1197: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1198: PetscScalar *x;
1199: const PetscScalar *b;
1200: const MatScalar *aa = a->a;
1201: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1202: PetscInt adiag_i,i,nz,ai_i;
1203: const PetscInt *vi;
1204: const MatScalar *v;
1205: PetscScalar sum;
1206: #endif
1209: if (!n) return(0);
1211: VecGetArrayRead(bb,&b);
1212: VecGetArrayWrite(xx,&x);
1214: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1215: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1216: #else
1217: /* forward solve the lower triangular */
1218: x[0] = b[0];
1219: for (i=1; i<n; i++) {
1220: ai_i = ai[i];
1221: v = aa + ai_i;
1222: vi = aj + ai_i;
1223: nz = adiag[i] - ai_i;
1224: sum = b[i];
1225: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1226: x[i] = sum;
1227: }
1229: /* backward solve the upper triangular */
1230: for (i=n-1; i>=0; i--) {
1231: adiag_i = adiag[i];
1232: v = aa + adiag_i + 1;
1233: vi = aj + adiag_i + 1;
1234: nz = ai[i+1] - adiag_i - 1;
1235: sum = x[i];
1236: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1237: x[i] = sum*aa[adiag_i];
1238: }
1239: #endif
1240: PetscLogFlops(2.0*a->nz - A->cmap->n);
1241: VecRestoreArrayRead(bb,&b);
1242: VecRestoreArrayWrite(xx,&x);
1243: return(0);
1244: }
1246: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1247: {
1248: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1249: IS iscol = a->col,isrow = a->row;
1250: PetscErrorCode ierr;
1251: PetscInt i, n = A->rmap->n,j;
1252: PetscInt nz;
1253: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1254: PetscScalar *x,*tmp,sum;
1255: const PetscScalar *b;
1256: const MatScalar *aa = a->a,*v;
1259: if (yy != xx) {VecCopy(yy,xx);}
1261: VecGetArrayRead(bb,&b);
1262: VecGetArray(xx,&x);
1263: tmp = a->solve_work;
1265: ISGetIndices(isrow,&rout); r = rout;
1266: ISGetIndices(iscol,&cout); c = cout + (n-1);
1268: /* forward solve the lower triangular */
1269: tmp[0] = b[*r++];
1270: for (i=1; i<n; i++) {
1271: v = aa + ai[i];
1272: vi = aj + ai[i];
1273: nz = a->diag[i] - ai[i];
1274: sum = b[*r++];
1275: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1276: tmp[i] = sum;
1277: }
1279: /* backward solve the upper triangular */
1280: for (i=n-1; i>=0; i--) {
1281: v = aa + a->diag[i] + 1;
1282: vi = aj + a->diag[i] + 1;
1283: nz = ai[i+1] - a->diag[i] - 1;
1284: sum = tmp[i];
1285: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1286: tmp[i] = sum*aa[a->diag[i]];
1287: x[*c--] += tmp[i];
1288: }
1290: ISRestoreIndices(isrow,&rout);
1291: ISRestoreIndices(iscol,&cout);
1292: VecRestoreArrayRead(bb,&b);
1293: VecRestoreArray(xx,&x);
1294: PetscLogFlops(2.0*a->nz);
1295: return(0);
1296: }
1298: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1299: {
1300: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1301: IS iscol = a->col,isrow = a->row;
1302: PetscErrorCode ierr;
1303: PetscInt i, n = A->rmap->n,j;
1304: PetscInt nz;
1305: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1306: PetscScalar *x,*tmp,sum;
1307: const PetscScalar *b;
1308: const MatScalar *aa = a->a,*v;
1311: if (yy != xx) {VecCopy(yy,xx);}
1313: VecGetArrayRead(bb,&b);
1314: VecGetArray(xx,&x);
1315: tmp = a->solve_work;
1317: ISGetIndices(isrow,&rout); r = rout;
1318: ISGetIndices(iscol,&cout); c = cout;
1320: /* forward solve the lower triangular */
1321: tmp[0] = b[r[0]];
1322: v = aa;
1323: vi = aj;
1324: for (i=1; i<n; i++) {
1325: nz = ai[i+1] - ai[i];
1326: sum = b[r[i]];
1327: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1328: tmp[i] = sum;
1329: v += nz;
1330: vi += nz;
1331: }
1333: /* backward solve the upper triangular */
1334: v = aa + adiag[n-1];
1335: vi = aj + adiag[n-1];
1336: for (i=n-1; i>=0; i--) {
1337: nz = adiag[i] - adiag[i+1] - 1;
1338: sum = tmp[i];
1339: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1340: tmp[i] = sum*v[nz];
1341: x[c[i]] += tmp[i];
1342: v += nz+1; vi += nz+1;
1343: }
1345: ISRestoreIndices(isrow,&rout);
1346: ISRestoreIndices(iscol,&cout);
1347: VecRestoreArrayRead(bb,&b);
1348: VecRestoreArray(xx,&x);
1349: PetscLogFlops(2.0*a->nz);
1350: return(0);
1351: }
1353: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1354: {
1355: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1356: IS iscol = a->col,isrow = a->row;
1357: PetscErrorCode ierr;
1358: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1359: PetscInt i,n = A->rmap->n,j;
1360: PetscInt nz;
1361: PetscScalar *x,*tmp,s1;
1362: const MatScalar *aa = a->a,*v;
1363: const PetscScalar *b;
1366: VecGetArrayRead(bb,&b);
1367: VecGetArrayWrite(xx,&x);
1368: tmp = a->solve_work;
1370: ISGetIndices(isrow,&rout); r = rout;
1371: ISGetIndices(iscol,&cout); c = cout;
1373: /* copy the b into temp work space according to permutation */
1374: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1376: /* forward solve the U^T */
1377: for (i=0; i<n; i++) {
1378: v = aa + diag[i];
1379: vi = aj + diag[i] + 1;
1380: nz = ai[i+1] - diag[i] - 1;
1381: s1 = tmp[i];
1382: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1383: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1384: tmp[i] = s1;
1385: }
1387: /* backward solve the L^T */
1388: for (i=n-1; i>=0; i--) {
1389: v = aa + diag[i] - 1;
1390: vi = aj + diag[i] - 1;
1391: nz = diag[i] - ai[i];
1392: s1 = tmp[i];
1393: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1394: }
1396: /* copy tmp into x according to permutation */
1397: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1399: ISRestoreIndices(isrow,&rout);
1400: ISRestoreIndices(iscol,&cout);
1401: VecRestoreArrayRead(bb,&b);
1402: VecRestoreArrayWrite(xx,&x);
1404: PetscLogFlops(2.0*a->nz-A->cmap->n);
1405: return(0);
1406: }
1408: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1409: {
1410: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1411: IS iscol = a->col,isrow = a->row;
1412: PetscErrorCode ierr;
1413: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1414: PetscInt i,n = A->rmap->n,j;
1415: PetscInt nz;
1416: PetscScalar *x,*tmp,s1;
1417: const MatScalar *aa = a->a,*v;
1418: const PetscScalar *b;
1421: VecGetArrayRead(bb,&b);
1422: VecGetArrayWrite(xx,&x);
1423: tmp = a->solve_work;
1425: ISGetIndices(isrow,&rout); r = rout;
1426: ISGetIndices(iscol,&cout); c = cout;
1428: /* copy the b into temp work space according to permutation */
1429: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1431: /* forward solve the U^T */
1432: for (i=0; i<n; i++) {
1433: v = aa + adiag[i+1] + 1;
1434: vi = aj + adiag[i+1] + 1;
1435: nz = adiag[i] - adiag[i+1] - 1;
1436: s1 = tmp[i];
1437: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1438: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1439: tmp[i] = s1;
1440: }
1442: /* backward solve the L^T */
1443: for (i=n-1; i>=0; i--) {
1444: v = aa + ai[i];
1445: vi = aj + ai[i];
1446: nz = ai[i+1] - ai[i];
1447: s1 = tmp[i];
1448: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1449: }
1451: /* copy tmp into x according to permutation */
1452: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1454: ISRestoreIndices(isrow,&rout);
1455: ISRestoreIndices(iscol,&cout);
1456: VecRestoreArrayRead(bb,&b);
1457: VecRestoreArrayWrite(xx,&x);
1459: PetscLogFlops(2.0*a->nz-A->cmap->n);
1460: return(0);
1461: }
1463: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1464: {
1465: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1466: IS iscol = a->col,isrow = a->row;
1467: PetscErrorCode ierr;
1468: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1469: PetscInt i,n = A->rmap->n,j;
1470: PetscInt nz;
1471: PetscScalar *x,*tmp,s1;
1472: const MatScalar *aa = a->a,*v;
1473: const PetscScalar *b;
1476: if (zz != xx) {VecCopy(zz,xx);}
1477: VecGetArrayRead(bb,&b);
1478: VecGetArray(xx,&x);
1479: tmp = a->solve_work;
1481: ISGetIndices(isrow,&rout); r = rout;
1482: ISGetIndices(iscol,&cout); c = cout;
1484: /* copy the b into temp work space according to permutation */
1485: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1487: /* forward solve the U^T */
1488: for (i=0; i<n; i++) {
1489: v = aa + diag[i];
1490: vi = aj + diag[i] + 1;
1491: nz = ai[i+1] - diag[i] - 1;
1492: s1 = tmp[i];
1493: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1494: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1495: tmp[i] = s1;
1496: }
1498: /* backward solve the L^T */
1499: for (i=n-1; i>=0; i--) {
1500: v = aa + diag[i] - 1;
1501: vi = aj + diag[i] - 1;
1502: nz = diag[i] - ai[i];
1503: s1 = tmp[i];
1504: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1505: }
1507: /* copy tmp into x according to permutation */
1508: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1510: ISRestoreIndices(isrow,&rout);
1511: ISRestoreIndices(iscol,&cout);
1512: VecRestoreArrayRead(bb,&b);
1513: VecRestoreArray(xx,&x);
1515: PetscLogFlops(2.0*a->nz-A->cmap->n);
1516: return(0);
1517: }
1519: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1520: {
1521: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1522: IS iscol = a->col,isrow = a->row;
1523: PetscErrorCode ierr;
1524: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1525: PetscInt i,n = A->rmap->n,j;
1526: PetscInt nz;
1527: PetscScalar *x,*tmp,s1;
1528: const MatScalar *aa = a->a,*v;
1529: const PetscScalar *b;
1532: if (zz != xx) {VecCopy(zz,xx);}
1533: VecGetArrayRead(bb,&b);
1534: VecGetArray(xx,&x);
1535: tmp = a->solve_work;
1537: ISGetIndices(isrow,&rout); r = rout;
1538: ISGetIndices(iscol,&cout); c = cout;
1540: /* copy the b into temp work space according to permutation */
1541: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1543: /* forward solve the U^T */
1544: for (i=0; i<n; i++) {
1545: v = aa + adiag[i+1] + 1;
1546: vi = aj + adiag[i+1] + 1;
1547: nz = adiag[i] - adiag[i+1] - 1;
1548: s1 = tmp[i];
1549: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1550: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1551: tmp[i] = s1;
1552: }
1555: /* backward solve the L^T */
1556: for (i=n-1; i>=0; i--) {
1557: v = aa + ai[i];
1558: vi = aj + ai[i];
1559: nz = ai[i+1] - ai[i];
1560: s1 = tmp[i];
1561: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1562: }
1564: /* copy tmp into x according to permutation */
1565: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1567: ISRestoreIndices(isrow,&rout);
1568: ISRestoreIndices(iscol,&cout);
1569: VecRestoreArrayRead(bb,&b);
1570: VecRestoreArray(xx,&x);
1572: PetscLogFlops(2.0*a->nz-A->cmap->n);
1573: return(0);
1574: }
1576: /* ----------------------------------------------------------------*/
1578: /*
1579: ilu() under revised new data structure.
1580: Factored arrays bj and ba are stored as
1581: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1583: bi=fact->i is an array of size n+1, in which
1584: bi+
1585: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1586: bi[n]: points to L(n-1,n-1)+1
1588: bdiag=fact->diag is an array of size n+1,in which
1589: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1590: bdiag[n]: points to entry of U(n-1,0)-1
1592: U(i,:) contains bdiag[i] as its last entry, i.e.,
1593: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1594: */
1595: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1596: {
1597: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1599: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1600: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1601: IS isicol;
1604: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1605: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1606: b = (Mat_SeqAIJ*)(fact)->data;
1608: /* allocate matrix arrays for new data structure */
1609: PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1610: PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1612: b->singlemalloc = PETSC_TRUE;
1613: if (!b->diag) {
1614: PetscMalloc1(n+1,&b->diag);
1615: PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1616: }
1617: bdiag = b->diag;
1619: if (n > 0) {
1620: PetscArrayzero(b->a,ai[n]);
1621: }
1623: /* set bi and bj with new data structure */
1624: bi = b->i;
1625: bj = b->j;
1627: /* L part */
1628: bi[0] = 0;
1629: for (i=0; i<n; i++) {
1630: nz = adiag[i] - ai[i];
1631: bi[i+1] = bi[i] + nz;
1632: aj = a->j + ai[i];
1633: for (j=0; j<nz; j++) {
1634: /* *bj = aj[j]; bj++; */
1635: bj[k++] = aj[j];
1636: }
1637: }
1639: /* U part */
1640: bdiag[n] = bi[n]-1;
1641: for (i=n-1; i>=0; i--) {
1642: nz = ai[i+1] - adiag[i] - 1;
1643: aj = a->j + adiag[i] + 1;
1644: for (j=0; j<nz; j++) {
1645: /* *bj = aj[j]; bj++; */
1646: bj[k++] = aj[j];
1647: }
1648: /* diag[i] */
1649: /* *bj = i; bj++; */
1650: bj[k++] = i;
1651: bdiag[i] = bdiag[i+1] + nz + 1;
1652: }
1654: fact->factortype = MAT_FACTOR_ILU;
1655: fact->info.factor_mallocs = 0;
1656: fact->info.fill_ratio_given = info->fill;
1657: fact->info.fill_ratio_needed = 1.0;
1658: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1659: MatSeqAIJCheckInode_FactorLU(fact);
1661: b = (Mat_SeqAIJ*)(fact)->data;
1662: b->row = isrow;
1663: b->col = iscol;
1664: b->icol = isicol;
1665: PetscMalloc1(fact->rmap->n+1,&b->solve_work);
1666: PetscObjectReference((PetscObject)isrow);
1667: PetscObjectReference((PetscObject)iscol);
1668: return(0);
1669: }
1671: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1672: {
1673: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1674: IS isicol;
1675: PetscErrorCode ierr;
1676: const PetscInt *r,*ic;
1677: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1678: PetscInt *bi,*cols,nnz,*cols_lvl;
1679: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1680: PetscInt i,levels,diagonal_fill;
1681: PetscBool col_identity,row_identity,missing;
1682: PetscReal f;
1683: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1684: PetscBT lnkbt;
1685: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1686: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1687: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1690: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1691: MatMissingDiagonal(A,&missing,&i);
1692: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1693:
1694: levels = (PetscInt)info->levels;
1695: ISIdentity(isrow,&row_identity);
1696: ISIdentity(iscol,&col_identity);
1697: if (!levels && row_identity && col_identity) {
1698: /* special case: ilu(0) with natural ordering */
1699: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1700: if (a->inode.size) {
1701: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1702: }
1703: return(0);
1704: }
1706: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1707: ISGetIndices(isrow,&r);
1708: ISGetIndices(isicol,&ic);
1710: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1711: PetscMalloc1(n+1,&bi);
1712: PetscMalloc1(n+1,&bdiag);
1713: bi[0] = bdiag[0] = 0;
1714: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1716: /* create a linked list for storing column indices of the active row */
1717: nlnk = n + 1;
1718: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1720: /* initial FreeSpace size is f*(ai[n]+1) */
1721: f = info->fill;
1722: diagonal_fill = (PetscInt)info->diagonal_fill;
1723: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1724: current_space = free_space;
1725: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1726: current_space_lvl = free_space_lvl;
1727: for (i=0; i<n; i++) {
1728: nzi = 0;
1729: /* copy current row into linked list */
1730: nnz = ai[r[i]+1] - ai[r[i]];
1731: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1732: cols = aj + ai[r[i]];
1733: lnk[i] = -1; /* marker to indicate if diagonal exists */
1734: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1735: nzi += nlnk;
1737: /* make sure diagonal entry is included */
1738: if (diagonal_fill && lnk[i] == -1) {
1739: fm = n;
1740: while (lnk[fm] < i) fm = lnk[fm];
1741: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1742: lnk[fm] = i;
1743: lnk_lvl[i] = 0;
1744: nzi++; dcount++;
1745: }
1747: /* add pivot rows into the active row */
1748: nzbd = 0;
1749: prow = lnk[n];
1750: while (prow < i) {
1751: nnz = bdiag[prow];
1752: cols = bj_ptr[prow] + nnz + 1;
1753: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1754: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1755: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1756: nzi += nlnk;
1757: prow = lnk[prow];
1758: nzbd++;
1759: }
1760: bdiag[i] = nzbd;
1761: bi[i+1] = bi[i] + nzi;
1762: /* if free space is not available, make more free space */
1763: if (current_space->local_remaining<nzi) {
1764: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1765: PetscFreeSpaceGet(nnz,¤t_space);
1766: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1767: reallocs++;
1768: }
1770: /* copy data into free_space and free_space_lvl, then initialize lnk */
1771: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1772: bj_ptr[i] = current_space->array;
1773: bjlvl_ptr[i] = current_space_lvl->array;
1775: /* make sure the active row i has diagonal entry */
1776: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1778: current_space->array += nzi;
1779: current_space->local_used += nzi;
1780: current_space->local_remaining -= nzi;
1781: current_space_lvl->array += nzi;
1782: current_space_lvl->local_used += nzi;
1783: current_space_lvl->local_remaining -= nzi;
1784: }
1786: ISRestoreIndices(isrow,&r);
1787: ISRestoreIndices(isicol,&ic);
1788: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1789: PetscMalloc1(bi[n]+1,&bj);
1790: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1792: PetscIncompleteLLDestroy(lnk,lnkbt);
1793: PetscFreeSpaceDestroy(free_space_lvl);
1794: PetscFree2(bj_ptr,bjlvl_ptr);
1796: #if defined(PETSC_USE_INFO)
1797: {
1798: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1799: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1800: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1801: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1802: PetscInfo(A,"for best performance.\n");
1803: if (diagonal_fill) {
1804: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1805: }
1806: }
1807: #endif
1808: /* put together the new matrix */
1809: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1810: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1811: b = (Mat_SeqAIJ*)(fact)->data;
1813: b->free_a = PETSC_TRUE;
1814: b->free_ij = PETSC_TRUE;
1815: b->singlemalloc = PETSC_FALSE;
1817: PetscMalloc1(bdiag[0]+1,&b->a);
1819: b->j = bj;
1820: b->i = bi;
1821: b->diag = bdiag;
1822: b->ilen = 0;
1823: b->imax = 0;
1824: b->row = isrow;
1825: b->col = iscol;
1826: PetscObjectReference((PetscObject)isrow);
1827: PetscObjectReference((PetscObject)iscol);
1828: b->icol = isicol;
1830: PetscMalloc1(n+1,&b->solve_work);
1831: /* In b structure: Free imax, ilen, old a, old j.
1832: Allocate bdiag, solve_work, new a, new j */
1833: PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1834: b->maxnz = b->nz = bdiag[0]+1;
1836: (fact)->info.factor_mallocs = reallocs;
1837: (fact)->info.fill_ratio_given = f;
1838: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1839: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1840: if (a->inode.size) {
1841: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1842: }
1843: MatSeqAIJCheckInode_FactorLU(fact);
1844: return(0);
1845: }
1847: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1848: {
1849: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1850: IS isicol;
1851: PetscErrorCode ierr;
1852: const PetscInt *r,*ic;
1853: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1854: PetscInt *bi,*cols,nnz,*cols_lvl;
1855: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1856: PetscInt i,levels,diagonal_fill;
1857: PetscBool col_identity,row_identity;
1858: PetscReal f;
1859: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1860: PetscBT lnkbt;
1861: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1862: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1863: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1864: PetscBool missing;
1867: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1868: MatMissingDiagonal(A,&missing,&i);
1869: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1871: f = info->fill;
1872: levels = (PetscInt)info->levels;
1873: diagonal_fill = (PetscInt)info->diagonal_fill;
1875: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1877: ISIdentity(isrow,&row_identity);
1878: ISIdentity(iscol,&col_identity);
1879: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1880: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1882: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1883: if (a->inode.size) {
1884: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1885: }
1886: fact->factortype = MAT_FACTOR_ILU;
1887: (fact)->info.factor_mallocs = 0;
1888: (fact)->info.fill_ratio_given = info->fill;
1889: (fact)->info.fill_ratio_needed = 1.0;
1891: b = (Mat_SeqAIJ*)(fact)->data;
1892: b->row = isrow;
1893: b->col = iscol;
1894: b->icol = isicol;
1895: PetscMalloc1((fact)->rmap->n+1,&b->solve_work);
1896: PetscObjectReference((PetscObject)isrow);
1897: PetscObjectReference((PetscObject)iscol);
1898: return(0);
1899: }
1901: ISGetIndices(isrow,&r);
1902: ISGetIndices(isicol,&ic);
1904: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1905: PetscMalloc1(n+1,&bi);
1906: PetscMalloc1(n+1,&bdiag);
1907: bi[0] = bdiag[0] = 0;
1909: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1911: /* create a linked list for storing column indices of the active row */
1912: nlnk = n + 1;
1913: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1915: /* initial FreeSpace size is f*(ai[n]+1) */
1916: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1917: current_space = free_space;
1918: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1919: current_space_lvl = free_space_lvl;
1921: for (i=0; i<n; i++) {
1922: nzi = 0;
1923: /* copy current row into linked list */
1924: nnz = ai[r[i]+1] - ai[r[i]];
1925: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1926: cols = aj + ai[r[i]];
1927: lnk[i] = -1; /* marker to indicate if diagonal exists */
1928: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1929: nzi += nlnk;
1931: /* make sure diagonal entry is included */
1932: if (diagonal_fill && lnk[i] == -1) {
1933: fm = n;
1934: while (lnk[fm] < i) fm = lnk[fm];
1935: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1936: lnk[fm] = i;
1937: lnk_lvl[i] = 0;
1938: nzi++; dcount++;
1939: }
1941: /* add pivot rows into the active row */
1942: nzbd = 0;
1943: prow = lnk[n];
1944: while (prow < i) {
1945: nnz = bdiag[prow];
1946: cols = bj_ptr[prow] + nnz + 1;
1947: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1948: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1949: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1950: nzi += nlnk;
1951: prow = lnk[prow];
1952: nzbd++;
1953: }
1954: bdiag[i] = nzbd;
1955: bi[i+1] = bi[i] + nzi;
1957: /* if free space is not available, make more free space */
1958: if (current_space->local_remaining<nzi) {
1959: nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1960: PetscFreeSpaceGet(nnz,¤t_space);
1961: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1962: reallocs++;
1963: }
1965: /* copy data into free_space and free_space_lvl, then initialize lnk */
1966: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1967: bj_ptr[i] = current_space->array;
1968: bjlvl_ptr[i] = current_space_lvl->array;
1970: /* make sure the active row i has diagonal entry */
1971: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1973: current_space->array += nzi;
1974: current_space->local_used += nzi;
1975: current_space->local_remaining -= nzi;
1976: current_space_lvl->array += nzi;
1977: current_space_lvl->local_used += nzi;
1978: current_space_lvl->local_remaining -= nzi;
1979: }
1981: ISRestoreIndices(isrow,&r);
1982: ISRestoreIndices(isicol,&ic);
1984: /* destroy list of free space and other temporary arrays */
1985: PetscMalloc1(bi[n]+1,&bj);
1986: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
1987: PetscIncompleteLLDestroy(lnk,lnkbt);
1988: PetscFreeSpaceDestroy(free_space_lvl);
1989: PetscFree2(bj_ptr,bjlvl_ptr);
1991: #if defined(PETSC_USE_INFO)
1992: {
1993: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1994: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1995: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1996: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1997: PetscInfo(A,"for best performance.\n");
1998: if (diagonal_fill) {
1999: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
2000: }
2001: }
2002: #endif
2004: /* put together the new matrix */
2005: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
2006: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
2007: b = (Mat_SeqAIJ*)(fact)->data;
2009: b->free_a = PETSC_TRUE;
2010: b->free_ij = PETSC_TRUE;
2011: b->singlemalloc = PETSC_FALSE;
2013: PetscMalloc1(bi[n],&b->a);
2014: b->j = bj;
2015: b->i = bi;
2016: for (i=0; i<n; i++) bdiag[i] += bi[i];
2017: b->diag = bdiag;
2018: b->ilen = 0;
2019: b->imax = 0;
2020: b->row = isrow;
2021: b->col = iscol;
2022: PetscObjectReference((PetscObject)isrow);
2023: PetscObjectReference((PetscObject)iscol);
2024: b->icol = isicol;
2025: PetscMalloc1(n+1,&b->solve_work);
2026: /* In b structure: Free imax, ilen, old a, old j.
2027: Allocate bdiag, solve_work, new a, new j */
2028: PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2029: b->maxnz = b->nz = bi[n];
2031: (fact)->info.factor_mallocs = reallocs;
2032: (fact)->info.fill_ratio_given = f;
2033: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2034: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2035: if (a->inode.size) {
2036: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2037: }
2038: return(0);
2039: }
2041: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2042: {
2043: Mat C = B;
2044: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2045: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2046: IS ip=b->row,iip = b->icol;
2048: const PetscInt *rip,*riip;
2049: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2050: PetscInt *ai=a->i,*aj=a->j;
2051: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2052: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2053: PetscBool perm_identity;
2054: FactorShiftCtx sctx;
2055: PetscReal rs;
2056: MatScalar d,*v;
2059: /* MatPivotSetUp(): initialize shift context sctx */
2060: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2062: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2063: sctx.shift_top = info->zeropivot;
2064: for (i=0; i<mbs; i++) {
2065: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2066: d = (aa)[a->diag[i]];
2067: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2068: v = aa+ai[i];
2069: nz = ai[i+1] - ai[i];
2070: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2071: if (rs>sctx.shift_top) sctx.shift_top = rs;
2072: }
2073: sctx.shift_top *= 1.1;
2074: sctx.nshift_max = 5;
2075: sctx.shift_lo = 0.;
2076: sctx.shift_hi = 1.;
2077: }
2079: ISGetIndices(ip,&rip);
2080: ISGetIndices(iip,&riip);
2082: /* allocate working arrays
2083: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2084: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2085: */
2086: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);
2088: do {
2089: sctx.newshift = PETSC_FALSE;
2091: for (i=0; i<mbs; i++) c2r[i] = mbs;
2092: if (mbs) il[0] = 0;
2094: for (k = 0; k<mbs; k++) {
2095: /* zero rtmp */
2096: nz = bi[k+1] - bi[k];
2097: bjtmp = bj + bi[k];
2098: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2100: /* load in initial unfactored row */
2101: bval = ba + bi[k];
2102: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2103: for (j = jmin; j < jmax; j++) {
2104: col = riip[aj[j]];
2105: if (col >= k) { /* only take upper triangular entry */
2106: rtmp[col] = aa[j];
2107: *bval++ = 0.0; /* for in-place factorization */
2108: }
2109: }
2110: /* shift the diagonal of the matrix: ZeropivotApply() */
2111: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2113: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2114: dk = rtmp[k];
2115: i = c2r[k]; /* first row to be added to k_th row */
2117: while (i < k) {
2118: nexti = c2r[i]; /* next row to be added to k_th row */
2120: /* compute multiplier, update diag(k) and U(i,k) */
2121: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2122: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2123: dk += uikdi*ba[ili]; /* update diag[k] */
2124: ba[ili] = uikdi; /* -U(i,k) */
2126: /* add multiple of row i to k-th row */
2127: jmin = ili + 1; jmax = bi[i+1];
2128: if (jmin < jmax) {
2129: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2130: /* update il and c2r for row i */
2131: il[i] = jmin;
2132: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2133: }
2134: i = nexti;
2135: }
2137: /* copy data into U(k,:) */
2138: rs = 0.0;
2139: jmin = bi[k]; jmax = bi[k+1]-1;
2140: if (jmin < jmax) {
2141: for (j=jmin; j<jmax; j++) {
2142: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2143: }
2144: /* add the k-th row into il and c2r */
2145: il[k] = jmin;
2146: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2147: }
2149: /* MatPivotCheck() */
2150: sctx.rs = rs;
2151: sctx.pv = dk;
2152: MatPivotCheck(B,A,info,&sctx,i);
2153: if (sctx.newshift) break;
2154: dk = sctx.pv;
2156: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2157: }
2158: } while (sctx.newshift);
2160: PetscFree3(rtmp,il,c2r);
2161: ISRestoreIndices(ip,&rip);
2162: ISRestoreIndices(iip,&riip);
2164: ISIdentity(ip,&perm_identity);
2165: if (perm_identity) {
2166: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2167: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2168: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2169: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2170: } else {
2171: B->ops->solve = MatSolve_SeqSBAIJ_1;
2172: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2173: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2174: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2175: }
2177: C->assembled = PETSC_TRUE;
2178: C->preallocated = PETSC_TRUE;
2180: PetscLogFlops(C->rmap->n);
2182: /* MatPivotView() */
2183: if (sctx.nshift) {
2184: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2185: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2186: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2187: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2188: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2189: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2190: }
2191: }
2192: return(0);
2193: }
2195: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2196: {
2197: Mat C = B;
2198: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2199: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2200: IS ip=b->row,iip = b->icol;
2202: const PetscInt *rip,*riip;
2203: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2204: PetscInt *ai=a->i,*aj=a->j;
2205: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2206: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2207: PetscBool perm_identity;
2208: FactorShiftCtx sctx;
2209: PetscReal rs;
2210: MatScalar d,*v;
2213: /* MatPivotSetUp(): initialize shift context sctx */
2214: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2216: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2217: sctx.shift_top = info->zeropivot;
2218: for (i=0; i<mbs; i++) {
2219: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2220: d = (aa)[a->diag[i]];
2221: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2222: v = aa+ai[i];
2223: nz = ai[i+1] - ai[i];
2224: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2225: if (rs>sctx.shift_top) sctx.shift_top = rs;
2226: }
2227: sctx.shift_top *= 1.1;
2228: sctx.nshift_max = 5;
2229: sctx.shift_lo = 0.;
2230: sctx.shift_hi = 1.;
2231: }
2233: ISGetIndices(ip,&rip);
2234: ISGetIndices(iip,&riip);
2236: /* initialization */
2237: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
2239: do {
2240: sctx.newshift = PETSC_FALSE;
2242: for (i=0; i<mbs; i++) jl[i] = mbs;
2243: il[0] = 0;
2245: for (k = 0; k<mbs; k++) {
2246: /* zero rtmp */
2247: nz = bi[k+1] - bi[k];
2248: bjtmp = bj + bi[k];
2249: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2251: bval = ba + bi[k];
2252: /* initialize k-th row by the perm[k]-th row of A */
2253: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2254: for (j = jmin; j < jmax; j++) {
2255: col = riip[aj[j]];
2256: if (col >= k) { /* only take upper triangular entry */
2257: rtmp[col] = aa[j];
2258: *bval++ = 0.0; /* for in-place factorization */
2259: }
2260: }
2261: /* shift the diagonal of the matrix */
2262: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2264: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2265: dk = rtmp[k];
2266: i = jl[k]; /* first row to be added to k_th row */
2268: while (i < k) {
2269: nexti = jl[i]; /* next row to be added to k_th row */
2271: /* compute multiplier, update diag(k) and U(i,k) */
2272: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2273: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2274: dk += uikdi*ba[ili];
2275: ba[ili] = uikdi; /* -U(i,k) */
2277: /* add multiple of row i to k-th row */
2278: jmin = ili + 1; jmax = bi[i+1];
2279: if (jmin < jmax) {
2280: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2281: /* update il and jl for row i */
2282: il[i] = jmin;
2283: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2284: }
2285: i = nexti;
2286: }
2288: /* shift the diagonals when zero pivot is detected */
2289: /* compute rs=sum of abs(off-diagonal) */
2290: rs = 0.0;
2291: jmin = bi[k]+1;
2292: nz = bi[k+1] - jmin;
2293: bcol = bj + jmin;
2294: for (j=0; j<nz; j++) {
2295: rs += PetscAbsScalar(rtmp[bcol[j]]);
2296: }
2298: sctx.rs = rs;
2299: sctx.pv = dk;
2300: MatPivotCheck(B,A,info,&sctx,k);
2301: if (sctx.newshift) break;
2302: dk = sctx.pv;
2304: /* copy data into U(k,:) */
2305: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2306: jmin = bi[k]+1; jmax = bi[k+1];
2307: if (jmin < jmax) {
2308: for (j=jmin; j<jmax; j++) {
2309: col = bj[j]; ba[j] = rtmp[col];
2310: }
2311: /* add the k-th row into il and jl */
2312: il[k] = jmin;
2313: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2314: }
2315: }
2316: } while (sctx.newshift);
2318: PetscFree3(rtmp,il,jl);
2319: ISRestoreIndices(ip,&rip);
2320: ISRestoreIndices(iip,&riip);
2322: ISIdentity(ip,&perm_identity);
2323: if (perm_identity) {
2324: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2325: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2326: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2327: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2328: } else {
2329: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2330: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2331: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2332: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2333: }
2335: C->assembled = PETSC_TRUE;
2336: C->preallocated = PETSC_TRUE;
2338: PetscLogFlops(C->rmap->n);
2339: if (sctx.nshift) {
2340: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2341: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2342: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2343: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2344: }
2345: }
2346: return(0);
2347: }
2349: /*
2350: icc() under revised new data structure.
2351: Factored arrays bj and ba are stored as
2352: U(0,:),...,U(i,:),U(n-1,:)
2354: ui=fact->i is an array of size n+1, in which
2355: ui+
2356: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2357: ui[n]: points to U(n-1,n-1)+1
2359: udiag=fact->diag is an array of size n,in which
2360: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2362: U(i,:) contains udiag[i] as its last entry, i.e.,
2363: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2364: */
2366: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2367: {
2368: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2369: Mat_SeqSBAIJ *b;
2370: PetscErrorCode ierr;
2371: PetscBool perm_identity,missing;
2372: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2373: const PetscInt *rip,*riip;
2374: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2375: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2376: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2377: PetscReal fill =info->fill,levels=info->levels;
2378: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2379: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2380: PetscBT lnkbt;
2381: IS iperm;
2384: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2385: MatMissingDiagonal(A,&missing,&d);
2386: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2387: ISIdentity(perm,&perm_identity);
2388: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2390: PetscMalloc1(am+1,&ui);
2391: PetscMalloc1(am+1,&udiag);
2392: ui[0] = 0;
2394: /* ICC(0) without matrix ordering: simply rearrange column indices */
2395: if (!levels && perm_identity) {
2396: for (i=0; i<am; i++) {
2397: ncols = ai[i+1] - a->diag[i];
2398: ui[i+1] = ui[i] + ncols;
2399: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2400: }
2401: PetscMalloc1(ui[am]+1,&uj);
2402: cols = uj;
2403: for (i=0; i<am; i++) {
2404: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2405: ncols = ai[i+1] - a->diag[i] -1;
2406: for (j=0; j<ncols; j++) *cols++ = aj[j];
2407: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2408: }
2409: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2410: ISGetIndices(iperm,&riip);
2411: ISGetIndices(perm,&rip);
2413: /* initialization */
2414: PetscMalloc1(am+1,&ajtmp);
2416: /* jl: linked list for storing indices of the pivot rows
2417: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2418: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2419: for (i=0; i<am; i++) {
2420: jl[i] = am; il[i] = 0;
2421: }
2423: /* create and initialize a linked list for storing column indices of the active row k */
2424: nlnk = am + 1;
2425: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2427: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2428: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2429: current_space = free_space;
2430: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);
2431: current_space_lvl = free_space_lvl;
2433: for (k=0; k<am; k++) { /* for each active row k */
2434: /* initialize lnk by the column indices of row rip[k] of A */
2435: nzk = 0;
2436: ncols = ai[rip[k]+1] - ai[rip[k]];
2437: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2438: ncols_upper = 0;
2439: for (j=0; j<ncols; j++) {
2440: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2441: if (riip[i] >= k) { /* only take upper triangular entry */
2442: ajtmp[ncols_upper] = i;
2443: ncols_upper++;
2444: }
2445: }
2446: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2447: nzk += nlnk;
2449: /* update lnk by computing fill-in for each pivot row to be merged in */
2450: prow = jl[k]; /* 1st pivot row */
2452: while (prow < k) {
2453: nextprow = jl[prow];
2455: /* merge prow into k-th row */
2456: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2457: jmax = ui[prow+1];
2458: ncols = jmax-jmin;
2459: i = jmin - ui[prow];
2460: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2461: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2462: j = *(uj - 1);
2463: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2464: nzk += nlnk;
2466: /* update il and jl for prow */
2467: if (jmin < jmax) {
2468: il[prow] = jmin;
2469: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2470: }
2471: prow = nextprow;
2472: }
2474: /* if free space is not available, make more free space */
2475: if (current_space->local_remaining<nzk) {
2476: i = am - k + 1; /* num of unfactored rows */
2477: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2478: PetscFreeSpaceGet(i,¤t_space);
2479: PetscFreeSpaceGet(i,¤t_space_lvl);
2480: reallocs++;
2481: }
2483: /* copy data into free_space and free_space_lvl, then initialize lnk */
2484: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2485: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2487: /* add the k-th row into il and jl */
2488: if (nzk > 1) {
2489: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2490: jl[k] = jl[i]; jl[i] = k;
2491: il[k] = ui[k] + 1;
2492: }
2493: uj_ptr[k] = current_space->array;
2494: uj_lvl_ptr[k] = current_space_lvl->array;
2496: current_space->array += nzk;
2497: current_space->local_used += nzk;
2498: current_space->local_remaining -= nzk;
2500: current_space_lvl->array += nzk;
2501: current_space_lvl->local_used += nzk;
2502: current_space_lvl->local_remaining -= nzk;
2504: ui[k+1] = ui[k] + nzk;
2505: }
2507: ISRestoreIndices(perm,&rip);
2508: ISRestoreIndices(iperm,&riip);
2509: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2510: PetscFree(ajtmp);
2512: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2513: PetscMalloc1(ui[am]+1,&uj);
2514: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2515: PetscIncompleteLLDestroy(lnk,lnkbt);
2516: PetscFreeSpaceDestroy(free_space_lvl);
2518: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2520: /* put together the new matrix in MATSEQSBAIJ format */
2521: b = (Mat_SeqSBAIJ*)(fact)->data;
2522: b->singlemalloc = PETSC_FALSE;
2524: PetscMalloc1(ui[am]+1,&b->a);
2526: b->j = uj;
2527: b->i = ui;
2528: b->diag = udiag;
2529: b->free_diag = PETSC_TRUE;
2530: b->ilen = 0;
2531: b->imax = 0;
2532: b->row = perm;
2533: b->col = perm;
2534: PetscObjectReference((PetscObject)perm);
2535: PetscObjectReference((PetscObject)perm);
2536: b->icol = iperm;
2537: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2539: PetscMalloc1(am+1,&b->solve_work);
2540: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2542: b->maxnz = b->nz = ui[am];
2543: b->free_a = PETSC_TRUE;
2544: b->free_ij = PETSC_TRUE;
2546: fact->info.factor_mallocs = reallocs;
2547: fact->info.fill_ratio_given = fill;
2548: if (ai[am] != 0) {
2549: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2550: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2551: } else {
2552: fact->info.fill_ratio_needed = 0.0;
2553: }
2554: #if defined(PETSC_USE_INFO)
2555: if (ai[am] != 0) {
2556: PetscReal af = fact->info.fill_ratio_needed;
2557: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2558: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2559: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2560: } else {
2561: PetscInfo(A,"Empty matrix\n");
2562: }
2563: #endif
2564: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2565: return(0);
2566: }
2568: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2569: {
2570: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2571: Mat_SeqSBAIJ *b;
2572: PetscErrorCode ierr;
2573: PetscBool perm_identity,missing;
2574: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2575: const PetscInt *rip,*riip;
2576: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2577: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2578: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2579: PetscReal fill =info->fill,levels=info->levels;
2580: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2581: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2582: PetscBT lnkbt;
2583: IS iperm;
2586: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2587: MatMissingDiagonal(A,&missing,&d);
2588: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2589: ISIdentity(perm,&perm_identity);
2590: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2592: PetscMalloc1(am+1,&ui);
2593: PetscMalloc1(am+1,&udiag);
2594: ui[0] = 0;
2596: /* ICC(0) without matrix ordering: simply copies fill pattern */
2597: if (!levels && perm_identity) {
2599: for (i=0; i<am; i++) {
2600: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2601: udiag[i] = ui[i];
2602: }
2603: PetscMalloc1(ui[am]+1,&uj);
2604: cols = uj;
2605: for (i=0; i<am; i++) {
2606: aj = a->j + a->diag[i];
2607: ncols = ui[i+1] - ui[i];
2608: for (j=0; j<ncols; j++) *cols++ = *aj++;
2609: }
2610: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2611: ISGetIndices(iperm,&riip);
2612: ISGetIndices(perm,&rip);
2614: /* initialization */
2615: PetscMalloc1(am+1,&ajtmp);
2617: /* jl: linked list for storing indices of the pivot rows
2618: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2619: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2620: for (i=0; i<am; i++) {
2621: jl[i] = am; il[i] = 0;
2622: }
2624: /* create and initialize a linked list for storing column indices of the active row k */
2625: nlnk = am + 1;
2626: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2628: /* initial FreeSpace size is fill*(ai[am]+1) */
2629: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2630: current_space = free_space;
2631: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);
2632: current_space_lvl = free_space_lvl;
2634: for (k=0; k<am; k++) { /* for each active row k */
2635: /* initialize lnk by the column indices of row rip[k] of A */
2636: nzk = 0;
2637: ncols = ai[rip[k]+1] - ai[rip[k]];
2638: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2639: ncols_upper = 0;
2640: for (j=0; j<ncols; j++) {
2641: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2642: if (riip[i] >= k) { /* only take upper triangular entry */
2643: ajtmp[ncols_upper] = i;
2644: ncols_upper++;
2645: }
2646: }
2647: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2648: nzk += nlnk;
2650: /* update lnk by computing fill-in for each pivot row to be merged in */
2651: prow = jl[k]; /* 1st pivot row */
2653: while (prow < k) {
2654: nextprow = jl[prow];
2656: /* merge prow into k-th row */
2657: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2658: jmax = ui[prow+1];
2659: ncols = jmax-jmin;
2660: i = jmin - ui[prow];
2661: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2662: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2663: j = *(uj - 1);
2664: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2665: nzk += nlnk;
2667: /* update il and jl for prow */
2668: if (jmin < jmax) {
2669: il[prow] = jmin;
2670: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2671: }
2672: prow = nextprow;
2673: }
2675: /* if free space is not available, make more free space */
2676: if (current_space->local_remaining<nzk) {
2677: i = am - k + 1; /* num of unfactored rows */
2678: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2679: PetscFreeSpaceGet(i,¤t_space);
2680: PetscFreeSpaceGet(i,¤t_space_lvl);
2681: reallocs++;
2682: }
2684: /* copy data into free_space and free_space_lvl, then initialize lnk */
2685: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2686: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2688: /* add the k-th row into il and jl */
2689: if (nzk > 1) {
2690: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2691: jl[k] = jl[i]; jl[i] = k;
2692: il[k] = ui[k] + 1;
2693: }
2694: uj_ptr[k] = current_space->array;
2695: uj_lvl_ptr[k] = current_space_lvl->array;
2697: current_space->array += nzk;
2698: current_space->local_used += nzk;
2699: current_space->local_remaining -= nzk;
2701: current_space_lvl->array += nzk;
2702: current_space_lvl->local_used += nzk;
2703: current_space_lvl->local_remaining -= nzk;
2705: ui[k+1] = ui[k] + nzk;
2706: }
2708: #if defined(PETSC_USE_INFO)
2709: if (ai[am] != 0) {
2710: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2711: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2712: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2713: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2714: } else {
2715: PetscInfo(A,"Empty matrix\n");
2716: }
2717: #endif
2719: ISRestoreIndices(perm,&rip);
2720: ISRestoreIndices(iperm,&riip);
2721: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2722: PetscFree(ajtmp);
2724: /* destroy list of free space and other temporary array(s) */
2725: PetscMalloc1(ui[am]+1,&uj);
2726: PetscFreeSpaceContiguous(&free_space,uj);
2727: PetscIncompleteLLDestroy(lnk,lnkbt);
2728: PetscFreeSpaceDestroy(free_space_lvl);
2730: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2732: /* put together the new matrix in MATSEQSBAIJ format */
2734: b = (Mat_SeqSBAIJ*)fact->data;
2735: b->singlemalloc = PETSC_FALSE;
2737: PetscMalloc1(ui[am]+1,&b->a);
2739: b->j = uj;
2740: b->i = ui;
2741: b->diag = udiag;
2742: b->free_diag = PETSC_TRUE;
2743: b->ilen = 0;
2744: b->imax = 0;
2745: b->row = perm;
2746: b->col = perm;
2748: PetscObjectReference((PetscObject)perm);
2749: PetscObjectReference((PetscObject)perm);
2751: b->icol = iperm;
2752: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2753: PetscMalloc1(am+1,&b->solve_work);
2754: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2755: b->maxnz = b->nz = ui[am];
2756: b->free_a = PETSC_TRUE;
2757: b->free_ij = PETSC_TRUE;
2759: fact->info.factor_mallocs = reallocs;
2760: fact->info.fill_ratio_given = fill;
2761: if (ai[am] != 0) {
2762: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2763: } else {
2764: fact->info.fill_ratio_needed = 0.0;
2765: }
2766: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2767: return(0);
2768: }
2770: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2771: {
2772: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2773: Mat_SeqSBAIJ *b;
2774: PetscErrorCode ierr;
2775: PetscBool perm_identity,missing;
2776: PetscReal fill = info->fill;
2777: const PetscInt *rip,*riip;
2778: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2779: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2780: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2781: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2782: PetscBT lnkbt;
2783: IS iperm;
2786: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2787: MatMissingDiagonal(A,&missing,&i);
2788: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2790: /* check whether perm is the identity mapping */
2791: ISIdentity(perm,&perm_identity);
2792: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2793: ISGetIndices(iperm,&riip);
2794: ISGetIndices(perm,&rip);
2796: /* initialization */
2797: PetscMalloc1(am+1,&ui);
2798: PetscMalloc1(am+1,&udiag);
2799: ui[0] = 0;
2801: /* jl: linked list for storing indices of the pivot rows
2802: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2803: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2804: for (i=0; i<am; i++) {
2805: jl[i] = am; il[i] = 0;
2806: }
2808: /* create and initialize a linked list for storing column indices of the active row k */
2809: nlnk = am + 1;
2810: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2812: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2813: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2814: current_space = free_space;
2816: for (k=0; k<am; k++) { /* for each active row k */
2817: /* initialize lnk by the column indices of row rip[k] of A */
2818: nzk = 0;
2819: ncols = ai[rip[k]+1] - ai[rip[k]];
2820: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2821: ncols_upper = 0;
2822: for (j=0; j<ncols; j++) {
2823: i = riip[*(aj + ai[rip[k]] + j)];
2824: if (i >= k) { /* only take upper triangular entry */
2825: cols[ncols_upper] = i;
2826: ncols_upper++;
2827: }
2828: }
2829: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2830: nzk += nlnk;
2832: /* update lnk by computing fill-in for each pivot row to be merged in */
2833: prow = jl[k]; /* 1st pivot row */
2835: while (prow < k) {
2836: nextprow = jl[prow];
2837: /* merge prow into k-th row */
2838: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2839: jmax = ui[prow+1];
2840: ncols = jmax-jmin;
2841: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2842: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2843: nzk += nlnk;
2845: /* update il and jl for prow */
2846: if (jmin < jmax) {
2847: il[prow] = jmin;
2848: j = *uj_ptr;
2849: jl[prow] = jl[j];
2850: jl[j] = prow;
2851: }
2852: prow = nextprow;
2853: }
2855: /* if free space is not available, make more free space */
2856: if (current_space->local_remaining<nzk) {
2857: i = am - k + 1; /* num of unfactored rows */
2858: i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2859: PetscFreeSpaceGet(i,¤t_space);
2860: reallocs++;
2861: }
2863: /* copy data into free space, then initialize lnk */
2864: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2866: /* add the k-th row into il and jl */
2867: if (nzk > 1) {
2868: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2869: jl[k] = jl[i]; jl[i] = k;
2870: il[k] = ui[k] + 1;
2871: }
2872: ui_ptr[k] = current_space->array;
2874: current_space->array += nzk;
2875: current_space->local_used += nzk;
2876: current_space->local_remaining -= nzk;
2878: ui[k+1] = ui[k] + nzk;
2879: }
2881: ISRestoreIndices(perm,&rip);
2882: ISRestoreIndices(iperm,&riip);
2883: PetscFree4(ui_ptr,jl,il,cols);
2885: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2886: PetscMalloc1(ui[am]+1,&uj);
2887: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2888: PetscLLDestroy(lnk,lnkbt);
2890: /* put together the new matrix in MATSEQSBAIJ format */
2892: b = (Mat_SeqSBAIJ*)fact->data;
2893: b->singlemalloc = PETSC_FALSE;
2894: b->free_a = PETSC_TRUE;
2895: b->free_ij = PETSC_TRUE;
2897: PetscMalloc1(ui[am]+1,&b->a);
2899: b->j = uj;
2900: b->i = ui;
2901: b->diag = udiag;
2902: b->free_diag = PETSC_TRUE;
2903: b->ilen = 0;
2904: b->imax = 0;
2905: b->row = perm;
2906: b->col = perm;
2908: PetscObjectReference((PetscObject)perm);
2909: PetscObjectReference((PetscObject)perm);
2911: b->icol = iperm;
2912: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2914: PetscMalloc1(am+1,&b->solve_work);
2915: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2917: b->maxnz = b->nz = ui[am];
2919: fact->info.factor_mallocs = reallocs;
2920: fact->info.fill_ratio_given = fill;
2921: if (ai[am] != 0) {
2922: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2923: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2924: } else {
2925: fact->info.fill_ratio_needed = 0.0;
2926: }
2927: #if defined(PETSC_USE_INFO)
2928: if (ai[am] != 0) {
2929: PetscReal af = fact->info.fill_ratio_needed;
2930: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2931: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2932: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2933: } else {
2934: PetscInfo(A,"Empty matrix\n");
2935: }
2936: #endif
2937: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2938: return(0);
2939: }
2941: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2942: {
2943: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2944: Mat_SeqSBAIJ *b;
2945: PetscErrorCode ierr;
2946: PetscBool perm_identity,missing;
2947: PetscReal fill = info->fill;
2948: const PetscInt *rip,*riip;
2949: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2950: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2951: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2952: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2953: PetscBT lnkbt;
2954: IS iperm;
2957: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2958: MatMissingDiagonal(A,&missing,&i);
2959: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2961: /* check whether perm is the identity mapping */
2962: ISIdentity(perm,&perm_identity);
2963: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2964: ISGetIndices(iperm,&riip);
2965: ISGetIndices(perm,&rip);
2967: /* initialization */
2968: PetscMalloc1(am+1,&ui);
2969: ui[0] = 0;
2971: /* jl: linked list for storing indices of the pivot rows
2972: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2973: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2974: for (i=0; i<am; i++) {
2975: jl[i] = am; il[i] = 0;
2976: }
2978: /* create and initialize a linked list for storing column indices of the active row k */
2979: nlnk = am + 1;
2980: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2982: /* initial FreeSpace size is fill*(ai[am]+1) */
2983: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2984: current_space = free_space;
2986: for (k=0; k<am; k++) { /* for each active row k */
2987: /* initialize lnk by the column indices of row rip[k] of A */
2988: nzk = 0;
2989: ncols = ai[rip[k]+1] - ai[rip[k]];
2990: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2991: ncols_upper = 0;
2992: for (j=0; j<ncols; j++) {
2993: i = riip[*(aj + ai[rip[k]] + j)];
2994: if (i >= k) { /* only take upper triangular entry */
2995: cols[ncols_upper] = i;
2996: ncols_upper++;
2997: }
2998: }
2999: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3000: nzk += nlnk;
3002: /* update lnk by computing fill-in for each pivot row to be merged in */
3003: prow = jl[k]; /* 1st pivot row */
3005: while (prow < k) {
3006: nextprow = jl[prow];
3007: /* merge prow into k-th row */
3008: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3009: jmax = ui[prow+1];
3010: ncols = jmax-jmin;
3011: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3012: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3013: nzk += nlnk;
3015: /* update il and jl for prow */
3016: if (jmin < jmax) {
3017: il[prow] = jmin;
3018: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3019: }
3020: prow = nextprow;
3021: }
3023: /* if free space is not available, make more free space */
3024: if (current_space->local_remaining<nzk) {
3025: i = am - k + 1; /* num of unfactored rows */
3026: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3027: PetscFreeSpaceGet(i,¤t_space);
3028: reallocs++;
3029: }
3031: /* copy data into free space, then initialize lnk */
3032: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3034: /* add the k-th row into il and jl */
3035: if (nzk-1 > 0) {
3036: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3037: jl[k] = jl[i]; jl[i] = k;
3038: il[k] = ui[k] + 1;
3039: }
3040: ui_ptr[k] = current_space->array;
3042: current_space->array += nzk;
3043: current_space->local_used += nzk;
3044: current_space->local_remaining -= nzk;
3046: ui[k+1] = ui[k] + nzk;
3047: }
3049: #if defined(PETSC_USE_INFO)
3050: if (ai[am] != 0) {
3051: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3052: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3053: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3054: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3055: } else {
3056: PetscInfo(A,"Empty matrix\n");
3057: }
3058: #endif
3060: ISRestoreIndices(perm,&rip);
3061: ISRestoreIndices(iperm,&riip);
3062: PetscFree4(ui_ptr,jl,il,cols);
3064: /* destroy list of free space and other temporary array(s) */
3065: PetscMalloc1(ui[am]+1,&uj);
3066: PetscFreeSpaceContiguous(&free_space,uj);
3067: PetscLLDestroy(lnk,lnkbt);
3069: /* put together the new matrix in MATSEQSBAIJ format */
3071: b = (Mat_SeqSBAIJ*)fact->data;
3072: b->singlemalloc = PETSC_FALSE;
3073: b->free_a = PETSC_TRUE;
3074: b->free_ij = PETSC_TRUE;
3076: PetscMalloc1(ui[am]+1,&b->a);
3078: b->j = uj;
3079: b->i = ui;
3080: b->diag = 0;
3081: b->ilen = 0;
3082: b->imax = 0;
3083: b->row = perm;
3084: b->col = perm;
3086: PetscObjectReference((PetscObject)perm);
3087: PetscObjectReference((PetscObject)perm);
3089: b->icol = iperm;
3090: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3092: PetscMalloc1(am+1,&b->solve_work);
3093: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3094: b->maxnz = b->nz = ui[am];
3096: fact->info.factor_mallocs = reallocs;
3097: fact->info.fill_ratio_given = fill;
3098: if (ai[am] != 0) {
3099: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3100: } else {
3101: fact->info.fill_ratio_needed = 0.0;
3102: }
3103: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3104: return(0);
3105: }
3107: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3108: {
3109: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3110: PetscErrorCode ierr;
3111: PetscInt n = A->rmap->n;
3112: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3113: PetscScalar *x,sum;
3114: const PetscScalar *b;
3115: const MatScalar *aa = a->a,*v;
3116: PetscInt i,nz;
3119: if (!n) return(0);
3121: VecGetArrayRead(bb,&b);
3122: VecGetArrayWrite(xx,&x);
3124: /* forward solve the lower triangular */
3125: x[0] = b[0];
3126: v = aa;
3127: vi = aj;
3128: for (i=1; i<n; i++) {
3129: nz = ai[i+1] - ai[i];
3130: sum = b[i];
3131: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3132: v += nz;
3133: vi += nz;
3134: x[i] = sum;
3135: }
3137: /* backward solve the upper triangular */
3138: for (i=n-1; i>=0; i--) {
3139: v = aa + adiag[i+1] + 1;
3140: vi = aj + adiag[i+1] + 1;
3141: nz = adiag[i] - adiag[i+1]-1;
3142: sum = x[i];
3143: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3144: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3145: }
3147: PetscLogFlops(2.0*a->nz - A->cmap->n);
3148: VecRestoreArrayRead(bb,&b);
3149: VecRestoreArrayWrite(xx,&x);
3150: return(0);
3151: }
3153: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3154: {
3155: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3156: IS iscol = a->col,isrow = a->row;
3157: PetscErrorCode ierr;
3158: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3159: const PetscInt *rout,*cout,*r,*c;
3160: PetscScalar *x,*tmp,sum;
3161: const PetscScalar *b;
3162: const MatScalar *aa = a->a,*v;
3165: if (!n) return(0);
3167: VecGetArrayRead(bb,&b);
3168: VecGetArrayWrite(xx,&x);
3169: tmp = a->solve_work;
3171: ISGetIndices(isrow,&rout); r = rout;
3172: ISGetIndices(iscol,&cout); c = cout;
3174: /* forward solve the lower triangular */
3175: tmp[0] = b[r[0]];
3176: v = aa;
3177: vi = aj;
3178: for (i=1; i<n; i++) {
3179: nz = ai[i+1] - ai[i];
3180: sum = b[r[i]];
3181: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3182: tmp[i] = sum;
3183: v += nz; vi += nz;
3184: }
3186: /* backward solve the upper triangular */
3187: for (i=n-1; i>=0; i--) {
3188: v = aa + adiag[i+1]+1;
3189: vi = aj + adiag[i+1]+1;
3190: nz = adiag[i]-adiag[i+1]-1;
3191: sum = tmp[i];
3192: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3193: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3194: }
3196: ISRestoreIndices(isrow,&rout);
3197: ISRestoreIndices(iscol,&cout);
3198: VecRestoreArrayRead(bb,&b);
3199: VecRestoreArrayWrite(xx,&x);
3200: PetscLogFlops(2*a->nz - A->cmap->n);
3201: return(0);
3202: }
3204: /*
3205: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3206: */
3207: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3208: {
3209: Mat B = *fact;
3210: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3211: IS isicol;
3213: const PetscInt *r,*ic;
3214: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3215: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3216: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3217: PetscInt nlnk,*lnk;
3218: PetscBT lnkbt;
3219: PetscBool row_identity,icol_identity;
3220: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3221: const PetscInt *ics;
3222: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3223: PetscReal dt =info->dt,shift=info->shiftamount;
3224: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3225: PetscBool missing;
3228: if (dt == PETSC_DEFAULT) dt = 0.005;
3229: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3231: /* ------- symbolic factorization, can be reused ---------*/
3232: MatMissingDiagonal(A,&missing,&i);
3233: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3234: adiag=a->diag;
3236: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3238: /* bdiag is location of diagonal in factor */
3239: PetscMalloc1(n+1,&bdiag); /* becomes b->diag */
3240: PetscMalloc1(n+1,&bdiag_rev); /* temporary */
3242: /* allocate row pointers bi */
3243: PetscMalloc1(2*n+2,&bi);
3245: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3246: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3247: nnz_max = ai[n]+2*n*dtcount+2;
3249: PetscMalloc1(nnz_max+1,&bj);
3250: PetscMalloc1(nnz_max+1,&ba);
3252: /* put together the new matrix */
3253: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3254: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3255: b = (Mat_SeqAIJ*)B->data;
3257: b->free_a = PETSC_TRUE;
3258: b->free_ij = PETSC_TRUE;
3259: b->singlemalloc = PETSC_FALSE;
3261: b->a = ba;
3262: b->j = bj;
3263: b->i = bi;
3264: b->diag = bdiag;
3265: b->ilen = 0;
3266: b->imax = 0;
3267: b->row = isrow;
3268: b->col = iscol;
3269: PetscObjectReference((PetscObject)isrow);
3270: PetscObjectReference((PetscObject)iscol);
3271: b->icol = isicol;
3273: PetscMalloc1(n+1,&b->solve_work);
3274: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3275: b->maxnz = nnz_max;
3277: B->factortype = MAT_FACTOR_ILUDT;
3278: B->info.factor_mallocs = 0;
3279: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3280: /* ------- end of symbolic factorization ---------*/
3282: ISGetIndices(isrow,&r);
3283: ISGetIndices(isicol,&ic);
3284: ics = ic;
3286: /* linked list for storing column indices of the active row */
3287: nlnk = n + 1;
3288: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3290: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3291: PetscMalloc2(n,&im,n,&jtmp);
3292: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3293: PetscMalloc2(n,&rtmp,n,&vtmp);
3294: PetscArrayzero(rtmp,n);
3296: bi[0] = 0;
3297: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3298: bdiag_rev[n] = bdiag[0];
3299: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3300: for (i=0; i<n; i++) {
3301: /* copy initial fill into linked list */
3302: nzi = ai[r[i]+1] - ai[r[i]];
3303: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3304: nzi_al = adiag[r[i]] - ai[r[i]];
3305: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3306: ajtmp = aj + ai[r[i]];
3307: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3309: /* load in initial (unfactored row) */
3310: aatmp = a->a + ai[r[i]];
3311: for (j=0; j<nzi; j++) {
3312: rtmp[ics[*ajtmp++]] = *aatmp++;
3313: }
3315: /* add pivot rows into linked list */
3316: row = lnk[n];
3317: while (row < i) {
3318: nzi_bl = bi[row+1] - bi[row] + 1;
3319: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3320: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3321: nzi += nlnk;
3322: row = lnk[row];
3323: }
3325: /* copy data from lnk into jtmp, then initialize lnk */
3326: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3328: /* numerical factorization */
3329: bjtmp = jtmp;
3330: row = *bjtmp++; /* 1st pivot row */
3331: while (row < i) {
3332: pc = rtmp + row;
3333: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3334: multiplier = (*pc) * (*pv);
3335: *pc = multiplier;
3336: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3337: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3338: pv = ba + bdiag[row+1] + 1;
3339: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3340: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3341: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3342: PetscLogFlops(1+2*nz);
3343: }
3344: row = *bjtmp++;
3345: }
3347: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3348: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3349: nzi_bl = 0; j = 0;
3350: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3351: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3352: nzi_bl++; j++;
3353: }
3354: nzi_bu = nzi - nzi_bl -1;
3355: while (j < nzi) {
3356: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3357: j++;
3358: }
3360: bjtmp = bj + bi[i];
3361: batmp = ba + bi[i];
3362: /* apply level dropping rule to L part */
3363: ncut = nzi_al + dtcount;
3364: if (ncut < nzi_bl) {
3365: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3366: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3367: } else {
3368: ncut = nzi_bl;
3369: }
3370: for (j=0; j<ncut; j++) {
3371: bjtmp[j] = jtmp[j];
3372: batmp[j] = vtmp[j];
3373: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3374: }
3375: bi[i+1] = bi[i] + ncut;
3376: nzi = ncut + 1;
3378: /* apply level dropping rule to U part */
3379: ncut = nzi_au + dtcount;
3380: if (ncut < nzi_bu) {
3381: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3382: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3383: } else {
3384: ncut = nzi_bu;
3385: }
3386: nzi += ncut;
3388: /* mark bdiagonal */
3389: bdiag[i+1] = bdiag[i] - (ncut + 1);
3390: bdiag_rev[n-i-1] = bdiag[i+1];
3391: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3392: bjtmp = bj + bdiag[i];
3393: batmp = ba + bdiag[i];
3394: *bjtmp = i;
3395: *batmp = diag_tmp; /* rtmp[i]; */
3396: if (*batmp == 0.0) {
3397: *batmp = dt+shift;
3398: /* printf(" row %d add shift %g\n",i,shift); */
3399: }
3400: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3401: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3403: bjtmp = bj + bdiag[i+1]+1;
3404: batmp = ba + bdiag[i+1]+1;
3405: for (k=0; k<ncut; k++) {
3406: bjtmp[k] = jtmp[nzi_bl+1+k];
3407: batmp[k] = vtmp[nzi_bl+1+k];
3408: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3409: }
3410: /* printf("\n"); */
3412: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3413: /*
3414: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3415: printf(" ----------------------------\n");
3416: */
3417: } /* for (i=0; i<n; i++) */
3418: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3419: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3421: ISRestoreIndices(isrow,&r);
3422: ISRestoreIndices(isicol,&ic);
3424: PetscLLDestroy(lnk,lnkbt);
3425: PetscFree2(im,jtmp);
3426: PetscFree2(rtmp,vtmp);
3427: PetscFree(bdiag_rev);
3429: PetscLogFlops(B->cmap->n);
3430: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3432: ISIdentity(isrow,&row_identity);
3433: ISIdentity(isicol,&icol_identity);
3434: if (row_identity && icol_identity) {
3435: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3436: } else {
3437: B->ops->solve = MatSolve_SeqAIJ;
3438: }
3440: B->ops->solveadd = 0;
3441: B->ops->solvetranspose = 0;
3442: B->ops->solvetransposeadd = 0;
3443: B->ops->matsolve = 0;
3444: B->assembled = PETSC_TRUE;
3445: B->preallocated = PETSC_TRUE;
3446: return(0);
3447: }
3449: /* a wraper of MatILUDTFactor_SeqAIJ() */
3450: /*
3451: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3452: */
3454: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3455: {
3459: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3460: return(0);
3461: }
3463: /*
3464: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3465: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3466: */
3467: /*
3468: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3469: */
3471: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3472: {
3473: Mat C =fact;
3474: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3475: IS isrow = b->row,isicol = b->icol;
3477: const PetscInt *r,*ic,*ics;
3478: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3479: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3480: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3481: PetscReal dt=info->dt,shift=info->shiftamount;
3482: PetscBool row_identity, col_identity;
3485: ISGetIndices(isrow,&r);
3486: ISGetIndices(isicol,&ic);
3487: PetscMalloc1(n+1,&rtmp);
3488: ics = ic;
3490: for (i=0; i<n; i++) {
3491: /* initialize rtmp array */
3492: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3493: bjtmp = bj + bi[i];
3494: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3495: rtmp[i] = 0.0;
3496: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3497: bjtmp = bj + bdiag[i+1] + 1;
3498: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3500: /* load in initial unfactored row of A */
3501: /* printf("row %d\n",i); */
3502: nz = ai[r[i]+1] - ai[r[i]];
3503: ajtmp = aj + ai[r[i]];
3504: v = aa + ai[r[i]];
3505: for (j=0; j<nz; j++) {
3506: rtmp[ics[*ajtmp++]] = v[j];
3507: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3508: }
3509: /* printf("\n"); */
3511: /* numerical factorization */
3512: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3513: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3514: k = 0;
3515: while (k < nzl) {
3516: row = *bjtmp++;
3517: /* printf(" prow %d\n",row); */
3518: pc = rtmp + row;
3519: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3520: multiplier = (*pc) * (*pv);
3521: *pc = multiplier;
3522: if (PetscAbsScalar(multiplier) > dt) {
3523: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3524: pv = b->a + bdiag[row+1] + 1;
3525: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3526: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3527: PetscLogFlops(1+2*nz);
3528: }
3529: k++;
3530: }
3532: /* finished row so stick it into b->a */
3533: /* L-part */
3534: pv = b->a + bi[i];
3535: pj = bj + bi[i];
3536: nzl = bi[i+1] - bi[i];
3537: for (j=0; j<nzl; j++) {
3538: pv[j] = rtmp[pj[j]];
3539: /* printf(" (%d,%g),",pj[j],pv[j]); */
3540: }
3542: /* diagonal: invert diagonal entries for simplier triangular solves */
3543: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3544: b->a[bdiag[i]] = 1.0/rtmp[i];
3545: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3547: /* U-part */
3548: pv = b->a + bdiag[i+1] + 1;
3549: pj = bj + bdiag[i+1] + 1;
3550: nzu = bdiag[i] - bdiag[i+1] - 1;
3551: for (j=0; j<nzu; j++) {
3552: pv[j] = rtmp[pj[j]];
3553: /* printf(" (%d,%g),",pj[j],pv[j]); */
3554: }
3555: /* printf("\n"); */
3556: }
3558: PetscFree(rtmp);
3559: ISRestoreIndices(isicol,&ic);
3560: ISRestoreIndices(isrow,&r);
3562: ISIdentity(isrow,&row_identity);
3563: ISIdentity(isicol,&col_identity);
3564: if (row_identity && col_identity) {
3565: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3566: } else {
3567: C->ops->solve = MatSolve_SeqAIJ;
3568: }
3569: C->ops->solveadd = 0;
3570: C->ops->solvetranspose = 0;
3571: C->ops->solvetransposeadd = 0;
3572: C->ops->matsolve = 0;
3573: C->assembled = PETSC_TRUE;
3574: C->preallocated = PETSC_TRUE;
3576: PetscLogFlops(C->cmap->n);
3577: return(0);
3578: }