Actual source code: aijfact.c
petsc-3.13.6 2020-09-29
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
7: /*
8: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
10: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11: */
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
15: PetscErrorCode ierr;
16: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
17: const PetscInt *ai = a->i, *aj = a->j;
18: const PetscScalar *aa = a->a;
19: PetscBool *done;
20: PetscReal best,past = 0,future;
23: /* pick initial row */
24: best = -1;
25: for (i=0; i<n; i++) {
26: future = 0.0;
27: for (j=ai[i]; j<ai[i+1]; j++) {
28: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
29: else past = PetscAbsScalar(aa[j]);
30: }
31: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
32: if (past/future > best) {
33: best = past/future;
34: current = i;
35: }
36: }
38: PetscMalloc1(n,&done);
39: PetscArrayzero(done,n);
40: PetscMalloc1(n,&order);
41: order[0] = current;
42: for (i=0; i<n-1; i++) {
43: done[current] = PETSC_TRUE;
44: best = -1;
45: /* loop over all neighbors of current pivot */
46: for (j=ai[current]; j<ai[current+1]; j++) {
47: jj = aj[j];
48: if (done[jj]) continue;
49: /* loop over columns of potential next row computing weights for below and above diagonal */
50: past = future = 0.0;
51: for (k=ai[jj]; k<ai[jj+1]; k++) {
52: kk = aj[k];
53: if (done[kk]) past += PetscAbsScalar(aa[k]);
54: else if (kk != jj) future += PetscAbsScalar(aa[k]);
55: }
56: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
57: if (past/future > best) {
58: best = past/future;
59: newcurrent = jj;
60: }
61: }
62: if (best == -1) { /* no neighbors to select from so select best of all that remain */
63: best = -1;
64: for (k=0; k<n; k++) {
65: if (done[k]) continue;
66: future = 0.0;
67: past = 0.0;
68: for (j=ai[k]; j<ai[k+1]; j++) {
69: kk = aj[j];
70: if (done[kk]) past += PetscAbsScalar(aa[j]);
71: else if (kk != k) future += PetscAbsScalar(aa[j]);
72: }
73: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
74: if (past/future > best) {
75: best = past/future;
76: newcurrent = k;
77: }
78: }
79: }
80: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
81: current = newcurrent;
82: order[i+1] = current;
83: }
84: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
85: *icol = *irow;
86: PetscObjectReference((PetscObject)*irow);
87: PetscFree(done);
88: PetscFree(order);
89: return(0);
90: }
92: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
93: {
94: PetscInt n = A->rmap->n;
98: #if defined(PETSC_USE_COMPLEX)
99: if (A->hermitian && !A->symmetric && (ftype == MAT_FACTOR_CHOLESKY||ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported");
100: #endif
101: MatCreate(PetscObjectComm((PetscObject)A),B);
102: MatSetSizes(*B,n,n,n,n);
103: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
104: MatSetType(*B,MATSEQAIJ);
106: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
107: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
109: MatSetBlockSizesFromMats(*B,A,A);
110: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
111: MatSetType(*B,MATSEQSBAIJ);
112: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
114: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
115: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
116: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
117: (*B)->factortype = ftype;
119: PetscFree((*B)->solvertype);
120: PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);
121: return(0);
122: }
124: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
125: {
126: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
127: IS isicol;
128: PetscErrorCode ierr;
129: const PetscInt *r,*ic;
130: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
131: PetscInt *bi,*bj,*ajtmp;
132: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
133: PetscReal f;
134: PetscInt nlnk,*lnk,k,**bi_ptr;
135: PetscFreeSpaceList free_space=NULL,current_space=NULL;
136: PetscBT lnkbt;
137: PetscBool missing;
140: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
141: MatMissingDiagonal(A,&missing,&i);
142: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
144: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
145: ISGetIndices(isrow,&r);
146: ISGetIndices(isicol,&ic);
148: /* get new row pointers */
149: PetscMalloc1(n+1,&bi);
150: bi[0] = 0;
152: /* bdiag is location of diagonal in factor */
153: PetscMalloc1(n+1,&bdiag);
154: bdiag[0] = 0;
156: /* linked list for storing column indices of the active row */
157: nlnk = n + 1;
158: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
160: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
162: /* initial FreeSpace size is f*(ai[n]+1) */
163: f = info->fill;
164: if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
165: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
166: current_space = free_space;
168: for (i=0; i<n; i++) {
169: /* copy previous fill into linked list */
170: nzi = 0;
171: nnz = ai[r[i]+1] - ai[r[i]];
172: ajtmp = aj + ai[r[i]];
173: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
174: nzi += nlnk;
176: /* add pivot rows into linked list */
177: row = lnk[n];
178: while (row < i) {
179: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
180: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
181: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
182: nzi += nlnk;
183: row = lnk[row];
184: }
185: bi[i+1] = bi[i] + nzi;
186: im[i] = nzi;
188: /* mark bdiag */
189: nzbd = 0;
190: nnz = nzi;
191: k = lnk[n];
192: while (nnz-- && k < i) {
193: nzbd++;
194: k = lnk[k];
195: }
196: bdiag[i] = bi[i] + nzbd;
198: /* if free space is not available, make more free space */
199: if (current_space->local_remaining<nzi) {
200: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
201: PetscFreeSpaceGet(nnz,¤t_space);
202: reallocs++;
203: }
205: /* copy data into free space, then initialize lnk */
206: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
208: bi_ptr[i] = current_space->array;
209: current_space->array += nzi;
210: current_space->local_used += nzi;
211: current_space->local_remaining -= nzi;
212: }
213: #if defined(PETSC_USE_INFO)
214: if (ai[n] != 0) {
215: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
216: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
217: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
218: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
219: PetscInfo(A,"for best performance.\n");
220: } else {
221: PetscInfo(A,"Empty matrix\n");
222: }
223: #endif
225: ISRestoreIndices(isrow,&r);
226: ISRestoreIndices(isicol,&ic);
228: /* destroy list of free space and other temporary array(s) */
229: PetscMalloc1(bi[n]+1,&bj);
230: PetscFreeSpaceContiguous(&free_space,bj);
231: PetscLLDestroy(lnk,lnkbt);
232: PetscFree2(bi_ptr,im);
234: /* put together the new matrix */
235: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
236: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
237: b = (Mat_SeqAIJ*)(B)->data;
239: b->free_a = PETSC_TRUE;
240: b->free_ij = PETSC_TRUE;
241: b->singlemalloc = PETSC_FALSE;
243: PetscMalloc1(bi[n]+1,&b->a);
244: b->j = bj;
245: b->i = bi;
246: b->diag = bdiag;
247: b->ilen = 0;
248: b->imax = 0;
249: b->row = isrow;
250: b->col = iscol;
251: PetscObjectReference((PetscObject)isrow);
252: PetscObjectReference((PetscObject)iscol);
253: b->icol = isicol;
254: PetscMalloc1(n+1,&b->solve_work);
256: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
257: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
258: b->maxnz = b->nz = bi[n];
260: (B)->factortype = MAT_FACTOR_LU;
261: (B)->info.factor_mallocs = reallocs;
262: (B)->info.fill_ratio_given = f;
264: if (ai[n]) {
265: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
266: } else {
267: (B)->info.fill_ratio_needed = 0.0;
268: }
269: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
270: if (a->inode.size) {
271: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
272: }
273: return(0);
274: }
276: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
277: {
278: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
279: IS isicol;
280: PetscErrorCode ierr;
281: const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
282: PetscInt i,n=A->rmap->n;
283: PetscInt *bi,*bj;
284: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
285: PetscReal f;
286: PetscInt nlnk,*lnk,k,**bi_ptr;
287: PetscFreeSpaceList free_space=NULL,current_space=NULL;
288: PetscBT lnkbt;
289: PetscBool missing;
292: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
293: MatMissingDiagonal(A,&missing,&i);
294: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
296: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
297: ISGetIndices(isrow,&r);
298: ISGetIndices(isicol,&ic);
300: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
301: PetscMalloc1(n+1,&bi);
302: PetscMalloc1(n+1,&bdiag);
303: bi[0] = bdiag[0] = 0;
305: /* linked list for storing column indices of the active row */
306: nlnk = n + 1;
307: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
309: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
311: /* initial FreeSpace size is f*(ai[n]+1) */
312: f = info->fill;
313: if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
314: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
315: current_space = free_space;
317: for (i=0; i<n; i++) {
318: /* copy previous fill into linked list */
319: nzi = 0;
320: nnz = ai[r[i]+1] - ai[r[i]];
321: ajtmp = aj + ai[r[i]];
322: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
323: nzi += nlnk;
325: /* add pivot rows into linked list */
326: row = lnk[n];
327: while (row < i) {
328: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
329: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
330: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
331: nzi += nlnk;
332: row = lnk[row];
333: }
334: bi[i+1] = bi[i] + nzi;
335: im[i] = nzi;
337: /* mark bdiag */
338: nzbd = 0;
339: nnz = nzi;
340: k = lnk[n];
341: while (nnz-- && k < i) {
342: nzbd++;
343: k = lnk[k];
344: }
345: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
347: /* if free space is not available, make more free space */
348: if (current_space->local_remaining<nzi) {
349: /* estimated additional space needed */
350: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
351: PetscFreeSpaceGet(nnz,¤t_space);
352: reallocs++;
353: }
355: /* copy data into free space, then initialize lnk */
356: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
358: bi_ptr[i] = current_space->array;
359: current_space->array += nzi;
360: current_space->local_used += nzi;
361: current_space->local_remaining -= nzi;
362: }
364: ISRestoreIndices(isrow,&r);
365: ISRestoreIndices(isicol,&ic);
367: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
368: PetscMalloc1(bi[n]+1,&bj);
369: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
370: PetscLLDestroy(lnk,lnkbt);
371: PetscFree2(bi_ptr,im);
373: /* put together the new matrix */
374: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
375: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
376: b = (Mat_SeqAIJ*)(B)->data;
378: b->free_a = PETSC_TRUE;
379: b->free_ij = PETSC_TRUE;
380: b->singlemalloc = PETSC_FALSE;
382: PetscMalloc1(bdiag[0]+1,&b->a);
384: b->j = bj;
385: b->i = bi;
386: b->diag = bdiag;
387: b->ilen = 0;
388: b->imax = 0;
389: b->row = isrow;
390: b->col = iscol;
391: PetscObjectReference((PetscObject)isrow);
392: PetscObjectReference((PetscObject)iscol);
393: b->icol = isicol;
394: PetscMalloc1(n+1,&b->solve_work);
396: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
397: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
398: b->maxnz = b->nz = bdiag[0]+1;
400: B->factortype = MAT_FACTOR_LU;
401: B->info.factor_mallocs = reallocs;
402: B->info.fill_ratio_given = f;
404: if (ai[n]) {
405: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
406: } else {
407: B->info.fill_ratio_needed = 0.0;
408: }
409: #if defined(PETSC_USE_INFO)
410: if (ai[n] != 0) {
411: PetscReal af = B->info.fill_ratio_needed;
412: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
413: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
414: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
415: PetscInfo(A,"for best performance.\n");
416: } else {
417: PetscInfo(A,"Empty matrix\n");
418: }
419: #endif
420: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
421: if (a->inode.size) {
422: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
423: }
424: MatSeqAIJCheckInode_FactorLU(B);
425: return(0);
426: }
428: /*
429: Trouble in factorization, should we dump the original matrix?
430: */
431: PetscErrorCode MatFactorDumpMatrix(Mat A)
432: {
434: PetscBool flg = PETSC_FALSE;
437: PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);
438: if (flg) {
439: PetscViewer viewer;
440: char filename[PETSC_MAX_PATH_LEN];
442: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
443: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
444: MatView(A,viewer);
445: PetscViewerDestroy(&viewer);
446: }
447: return(0);
448: }
450: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
451: {
452: Mat C =B;
453: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
454: IS isrow = b->row,isicol = b->icol;
455: PetscErrorCode ierr;
456: const PetscInt *r,*ic,*ics;
457: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
458: PetscInt i,j,k,nz,nzL,row,*pj;
459: const PetscInt *ajtmp,*bjtmp;
460: MatScalar *rtmp,*pc,multiplier,*pv;
461: const MatScalar *aa=a->a,*v;
462: PetscBool row_identity,col_identity;
463: FactorShiftCtx sctx;
464: const PetscInt *ddiag;
465: PetscReal rs;
466: MatScalar d;
469: /* MatPivotSetUp(): initialize shift context sctx */
470: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
472: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
473: ddiag = a->diag;
474: sctx.shift_top = info->zeropivot;
475: for (i=0; i<n; i++) {
476: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
477: d = (aa)[ddiag[i]];
478: rs = -PetscAbsScalar(d) - PetscRealPart(d);
479: v = aa+ai[i];
480: nz = ai[i+1] - ai[i];
481: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
482: if (rs>sctx.shift_top) sctx.shift_top = rs;
483: }
484: sctx.shift_top *= 1.1;
485: sctx.nshift_max = 5;
486: sctx.shift_lo = 0.;
487: sctx.shift_hi = 1.;
488: }
490: ISGetIndices(isrow,&r);
491: ISGetIndices(isicol,&ic);
492: PetscMalloc1(n+1,&rtmp);
493: ics = ic;
495: do {
496: sctx.newshift = PETSC_FALSE;
497: for (i=0; i<n; i++) {
498: /* zero rtmp */
499: /* L part */
500: nz = bi[i+1] - bi[i];
501: bjtmp = bj + bi[i];
502: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
504: /* U part */
505: nz = bdiag[i]-bdiag[i+1];
506: bjtmp = bj + bdiag[i+1]+1;
507: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
509: /* load in initial (unfactored row) */
510: nz = ai[r[i]+1] - ai[r[i]];
511: ajtmp = aj + ai[r[i]];
512: v = aa + ai[r[i]];
513: for (j=0; j<nz; j++) {
514: rtmp[ics[ajtmp[j]]] = v[j];
515: }
516: /* ZeropivotApply() */
517: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
519: /* elimination */
520: bjtmp = bj + bi[i];
521: row = *bjtmp++;
522: nzL = bi[i+1] - bi[i];
523: for (k=0; k < nzL; k++) {
524: pc = rtmp + row;
525: if (*pc != 0.0) {
526: pv = b->a + bdiag[row];
527: multiplier = *pc * (*pv);
528: *pc = multiplier;
530: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
531: pv = b->a + bdiag[row+1]+1;
532: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
534: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
535: PetscLogFlops(1+2.0*nz);
536: }
537: row = *bjtmp++;
538: }
540: /* finished row so stick it into b->a */
541: rs = 0.0;
542: /* L part */
543: pv = b->a + bi[i];
544: pj = b->j + bi[i];
545: nz = bi[i+1] - bi[i];
546: for (j=0; j<nz; j++) {
547: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
548: }
550: /* U part */
551: pv = b->a + bdiag[i+1]+1;
552: pj = b->j + bdiag[i+1]+1;
553: nz = bdiag[i] - bdiag[i+1]-1;
554: for (j=0; j<nz; j++) {
555: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
556: }
558: sctx.rs = rs;
559: sctx.pv = rtmp[i];
560: MatPivotCheck(B,A,info,&sctx,i);
561: if (sctx.newshift) break; /* break for-loop */
562: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
564: /* Mark diagonal and invert diagonal for simplier triangular solves */
565: pv = b->a + bdiag[i];
566: *pv = 1.0/rtmp[i];
568: } /* endof for (i=0; i<n; i++) { */
570: /* MatPivotRefine() */
571: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
572: /*
573: * if no shift in this attempt & shifting & started shifting & can refine,
574: * then try lower shift
575: */
576: sctx.shift_hi = sctx.shift_fraction;
577: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
578: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
579: sctx.newshift = PETSC_TRUE;
580: sctx.nshift++;
581: }
582: } while (sctx.newshift);
584: PetscFree(rtmp);
585: ISRestoreIndices(isicol,&ic);
586: ISRestoreIndices(isrow,&r);
588: ISIdentity(isrow,&row_identity);
589: ISIdentity(isicol,&col_identity);
590: if (b->inode.size) {
591: C->ops->solve = MatSolve_SeqAIJ_Inode;
592: } else if (row_identity && col_identity) {
593: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
594: } else {
595: C->ops->solve = MatSolve_SeqAIJ;
596: }
597: C->ops->solveadd = MatSolveAdd_SeqAIJ;
598: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
599: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
600: C->ops->matsolve = MatMatSolve_SeqAIJ;
601: C->assembled = PETSC_TRUE;
602: C->preallocated = PETSC_TRUE;
604: PetscLogFlops(C->cmap->n);
606: /* MatShiftView(A,info,&sctx) */
607: if (sctx.nshift) {
608: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
609: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
610: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
611: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
612: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
613: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
614: }
615: }
616: return(0);
617: }
619: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
620: {
621: Mat C =B;
622: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
623: IS isrow = b->row,isicol = b->icol;
624: PetscErrorCode ierr;
625: const PetscInt *r,*ic,*ics;
626: PetscInt nz,row,i,j,n=A->rmap->n,diag;
627: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
628: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
629: MatScalar *pv,*rtmp,*pc,multiplier,d;
630: const MatScalar *v,*aa=a->a;
631: PetscReal rs=0.0;
632: FactorShiftCtx sctx;
633: const PetscInt *ddiag;
634: PetscBool row_identity, col_identity;
637: /* MatPivotSetUp(): initialize shift context sctx */
638: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
640: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
641: ddiag = a->diag;
642: sctx.shift_top = info->zeropivot;
643: for (i=0; i<n; i++) {
644: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
645: d = (aa)[ddiag[i]];
646: rs = -PetscAbsScalar(d) - PetscRealPart(d);
647: v = aa+ai[i];
648: nz = ai[i+1] - ai[i];
649: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
650: if (rs>sctx.shift_top) sctx.shift_top = rs;
651: }
652: sctx.shift_top *= 1.1;
653: sctx.nshift_max = 5;
654: sctx.shift_lo = 0.;
655: sctx.shift_hi = 1.;
656: }
658: ISGetIndices(isrow,&r);
659: ISGetIndices(isicol,&ic);
660: PetscMalloc1(n+1,&rtmp);
661: ics = ic;
663: do {
664: sctx.newshift = PETSC_FALSE;
665: for (i=0; i<n; i++) {
666: nz = bi[i+1] - bi[i];
667: bjtmp = bj + bi[i];
668: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
670: /* load in initial (unfactored row) */
671: nz = ai[r[i]+1] - ai[r[i]];
672: ajtmp = aj + ai[r[i]];
673: v = aa + ai[r[i]];
674: for (j=0; j<nz; j++) {
675: rtmp[ics[ajtmp[j]]] = v[j];
676: }
677: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
679: row = *bjtmp++;
680: while (row < i) {
681: pc = rtmp + row;
682: if (*pc != 0.0) {
683: pv = b->a + diag_offset[row];
684: pj = b->j + diag_offset[row] + 1;
685: multiplier = *pc / *pv++;
686: *pc = multiplier;
687: nz = bi[row+1] - diag_offset[row] - 1;
688: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
689: PetscLogFlops(1+2.0*nz);
690: }
691: row = *bjtmp++;
692: }
693: /* finished row so stick it into b->a */
694: pv = b->a + bi[i];
695: pj = b->j + bi[i];
696: nz = bi[i+1] - bi[i];
697: diag = diag_offset[i] - bi[i];
698: rs = 0.0;
699: for (j=0; j<nz; j++) {
700: pv[j] = rtmp[pj[j]];
701: rs += PetscAbsScalar(pv[j]);
702: }
703: rs -= PetscAbsScalar(pv[diag]);
705: sctx.rs = rs;
706: sctx.pv = pv[diag];
707: MatPivotCheck(B,A,info,&sctx,i);
708: if (sctx.newshift) break;
709: pv[diag] = sctx.pv;
710: }
712: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
713: /*
714: * if no shift in this attempt & shifting & started shifting & can refine,
715: * then try lower shift
716: */
717: sctx.shift_hi = sctx.shift_fraction;
718: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
719: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
720: sctx.newshift = PETSC_TRUE;
721: sctx.nshift++;
722: }
723: } while (sctx.newshift);
725: /* invert diagonal entries for simplier triangular solves */
726: for (i=0; i<n; i++) {
727: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
728: }
729: PetscFree(rtmp);
730: ISRestoreIndices(isicol,&ic);
731: ISRestoreIndices(isrow,&r);
733: ISIdentity(isrow,&row_identity);
734: ISIdentity(isicol,&col_identity);
735: if (row_identity && col_identity) {
736: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
737: } else {
738: C->ops->solve = MatSolve_SeqAIJ_inplace;
739: }
740: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
741: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
742: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
743: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
745: C->assembled = PETSC_TRUE;
746: C->preallocated = PETSC_TRUE;
748: PetscLogFlops(C->cmap->n);
749: if (sctx.nshift) {
750: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
751: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
752: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
753: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
754: }
755: }
756: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
757: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
759: MatSeqAIJCheckInode(C);
760: return(0);
761: }
763: /*
764: This routine implements inplace ILU(0) with row or/and column permutations.
765: Input:
766: A - original matrix
767: Output;
768: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
769: a->j (col index) is permuted by the inverse of colperm, then sorted
770: a->a reordered accordingly with a->j
771: a->diag (ptr to diagonal elements) is updated.
772: */
773: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
774: {
775: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
776: IS isrow = a->row,isicol = a->icol;
777: PetscErrorCode ierr;
778: const PetscInt *r,*ic,*ics;
779: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
780: PetscInt *ajtmp,nz,row;
781: PetscInt *diag = a->diag,nbdiag,*pj;
782: PetscScalar *rtmp,*pc,multiplier,d;
783: MatScalar *pv,*v;
784: PetscReal rs;
785: FactorShiftCtx sctx;
786: const MatScalar *aa=a->a,*vtmp;
789: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
791: /* MatPivotSetUp(): initialize shift context sctx */
792: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
794: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
795: const PetscInt *ddiag = a->diag;
796: sctx.shift_top = info->zeropivot;
797: for (i=0; i<n; i++) {
798: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
799: d = (aa)[ddiag[i]];
800: rs = -PetscAbsScalar(d) - PetscRealPart(d);
801: vtmp = aa+ai[i];
802: nz = ai[i+1] - ai[i];
803: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
804: if (rs>sctx.shift_top) sctx.shift_top = rs;
805: }
806: sctx.shift_top *= 1.1;
807: sctx.nshift_max = 5;
808: sctx.shift_lo = 0.;
809: sctx.shift_hi = 1.;
810: }
812: ISGetIndices(isrow,&r);
813: ISGetIndices(isicol,&ic);
814: PetscMalloc1(n+1,&rtmp);
815: PetscArrayzero(rtmp,n+1);
816: ics = ic;
818: #if defined(MV)
819: sctx.shift_top = 0.;
820: sctx.nshift_max = 0;
821: sctx.shift_lo = 0.;
822: sctx.shift_hi = 0.;
823: sctx.shift_fraction = 0.;
825: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
826: sctx.shift_top = 0.;
827: for (i=0; i<n; i++) {
828: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
829: d = (a->a)[diag[i]];
830: rs = -PetscAbsScalar(d) - PetscRealPart(d);
831: v = a->a+ai[i];
832: nz = ai[i+1] - ai[i];
833: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
834: if (rs>sctx.shift_top) sctx.shift_top = rs;
835: }
836: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
837: sctx.shift_top *= 1.1;
838: sctx.nshift_max = 5;
839: sctx.shift_lo = 0.;
840: sctx.shift_hi = 1.;
841: }
843: sctx.shift_amount = 0.;
844: sctx.nshift = 0;
845: #endif
847: do {
848: sctx.newshift = PETSC_FALSE;
849: for (i=0; i<n; i++) {
850: /* load in initial unfactored row */
851: nz = ai[r[i]+1] - ai[r[i]];
852: ajtmp = aj + ai[r[i]];
853: v = a->a + ai[r[i]];
854: /* sort permuted ajtmp and values v accordingly */
855: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
856: PetscSortIntWithScalarArray(nz,ajtmp,v);
858: diag[r[i]] = ai[r[i]];
859: for (j=0; j<nz; j++) {
860: rtmp[ajtmp[j]] = v[j];
861: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
862: }
863: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
865: row = *ajtmp++;
866: while (row < i) {
867: pc = rtmp + row;
868: if (*pc != 0.0) {
869: pv = a->a + diag[r[row]];
870: pj = aj + diag[r[row]] + 1;
872: multiplier = *pc / *pv++;
873: *pc = multiplier;
874: nz = ai[r[row]+1] - diag[r[row]] - 1;
875: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
876: PetscLogFlops(1+2.0*nz);
877: }
878: row = *ajtmp++;
879: }
880: /* finished row so overwrite it onto a->a */
881: pv = a->a + ai[r[i]];
882: pj = aj + ai[r[i]];
883: nz = ai[r[i]+1] - ai[r[i]];
884: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
886: rs = 0.0;
887: for (j=0; j<nz; j++) {
888: pv[j] = rtmp[pj[j]];
889: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
890: }
892: sctx.rs = rs;
893: sctx.pv = pv[nbdiag];
894: MatPivotCheck(B,A,info,&sctx,i);
895: if (sctx.newshift) break;
896: pv[nbdiag] = sctx.pv;
897: }
899: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
900: /*
901: * if no shift in this attempt & shifting & started shifting & can refine,
902: * then try lower shift
903: */
904: sctx.shift_hi = sctx.shift_fraction;
905: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
906: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
907: sctx.newshift = PETSC_TRUE;
908: sctx.nshift++;
909: }
910: } while (sctx.newshift);
912: /* invert diagonal entries for simplier triangular solves */
913: for (i=0; i<n; i++) {
914: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
915: }
917: PetscFree(rtmp);
918: ISRestoreIndices(isicol,&ic);
919: ISRestoreIndices(isrow,&r);
921: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
922: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
923: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
924: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
926: A->assembled = PETSC_TRUE;
927: A->preallocated = PETSC_TRUE;
929: PetscLogFlops(A->cmap->n);
930: if (sctx.nshift) {
931: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
932: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
933: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
934: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
935: }
936: }
937: return(0);
938: }
940: /* ----------------------------------------------------------- */
941: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
942: {
944: Mat C;
947: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
948: MatLUFactorSymbolic(C,A,row,col,info);
949: MatLUFactorNumeric(C,A,info);
951: A->ops->solve = C->ops->solve;
952: A->ops->solvetranspose = C->ops->solvetranspose;
954: MatHeaderMerge(A,&C);
955: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
956: return(0);
957: }
958: /* ----------------------------------------------------------- */
961: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
962: {
963: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
964: IS iscol = a->col,isrow = a->row;
965: PetscErrorCode ierr;
966: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
967: PetscInt nz;
968: const PetscInt *rout,*cout,*r,*c;
969: PetscScalar *x,*tmp,*tmps,sum;
970: const PetscScalar *b;
971: const MatScalar *aa = a->a,*v;
974: if (!n) return(0);
976: VecGetArrayRead(bb,&b);
977: VecGetArrayWrite(xx,&x);
978: tmp = a->solve_work;
980: ISGetIndices(isrow,&rout); r = rout;
981: ISGetIndices(iscol,&cout); c = cout + (n-1);
983: /* forward solve the lower triangular */
984: tmp[0] = b[*r++];
985: tmps = tmp;
986: for (i=1; i<n; i++) {
987: v = aa + ai[i];
988: vi = aj + ai[i];
989: nz = a->diag[i] - ai[i];
990: sum = b[*r++];
991: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
992: tmp[i] = sum;
993: }
995: /* backward solve the upper triangular */
996: for (i=n-1; i>=0; i--) {
997: v = aa + a->diag[i] + 1;
998: vi = aj + a->diag[i] + 1;
999: nz = ai[i+1] - a->diag[i] - 1;
1000: sum = tmp[i];
1001: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1002: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1003: }
1005: ISRestoreIndices(isrow,&rout);
1006: ISRestoreIndices(iscol,&cout);
1007: VecRestoreArrayRead(bb,&b);
1008: VecRestoreArrayWrite(xx,&x);
1009: PetscLogFlops(2.0*a->nz - A->cmap->n);
1010: return(0);
1011: }
1013: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1014: {
1015: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1016: IS iscol = a->col,isrow = a->row;
1017: PetscErrorCode ierr;
1018: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1019: PetscInt nz,neq,ldb,ldx;
1020: const PetscInt *rout,*cout,*r,*c;
1021: PetscScalar *x,*tmp = a->solve_work,*tmps,sum;
1022: const PetscScalar *b,*aa = a->a,*v;
1023: PetscBool isdense;
1026: if (!n) return(0);
1027: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);
1028: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1029: if (X != B) {
1030: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);
1031: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1032: }
1033: MatDenseGetArrayRead(B,&b);
1034: MatDenseGetLDA(B,&ldb);
1035: MatDenseGetArray(X,&x);
1036: MatDenseGetLDA(X,&ldx);
1037: ISGetIndices(isrow,&rout); r = rout;
1038: ISGetIndices(iscol,&cout); c = cout;
1039: for (neq=0; neq<B->cmap->n; neq++) {
1040: /* forward solve the lower triangular */
1041: tmp[0] = b[r[0]];
1042: tmps = tmp;
1043: for (i=1; i<n; i++) {
1044: v = aa + ai[i];
1045: vi = aj + ai[i];
1046: nz = a->diag[i] - ai[i];
1047: sum = b[r[i]];
1048: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1049: tmp[i] = sum;
1050: }
1051: /* backward solve the upper triangular */
1052: for (i=n-1; i>=0; i--) {
1053: v = aa + a->diag[i] + 1;
1054: vi = aj + a->diag[i] + 1;
1055: nz = ai[i+1] - a->diag[i] - 1;
1056: sum = tmp[i];
1057: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1058: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1059: }
1060: b += ldb;
1061: x += ldx;
1062: }
1063: ISRestoreIndices(isrow,&rout);
1064: ISRestoreIndices(iscol,&cout);
1065: MatDenseRestoreArrayRead(B,&b);
1066: MatDenseRestoreArray(X,&x);
1067: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1068: return(0);
1069: }
1071: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1072: {
1073: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1074: IS iscol = a->col,isrow = a->row;
1075: PetscErrorCode ierr;
1076: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1077: PetscInt nz,neq,ldb,ldx;
1078: const PetscInt *rout,*cout,*r,*c;
1079: PetscScalar *x,*tmp = a->solve_work,sum;
1080: const PetscScalar *b,*aa = a->a,*v;
1081: PetscBool isdense;
1084: if (!n) return(0);
1085: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);
1086: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1087: if (X != B) {
1088: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);
1089: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1090: }
1091: MatDenseGetArrayRead(B,&b);
1092: MatDenseGetLDA(B,&ldb);
1093: MatDenseGetArray(X,&x);
1094: MatDenseGetLDA(X,&ldx);
1095: ISGetIndices(isrow,&rout); r = rout;
1096: ISGetIndices(iscol,&cout); c = cout;
1097: for (neq=0; neq<B->cmap->n; neq++) {
1098: /* forward solve the lower triangular */
1099: tmp[0] = b[r[0]];
1100: v = aa;
1101: vi = aj;
1102: for (i=1; i<n; i++) {
1103: nz = ai[i+1] - ai[i];
1104: sum = b[r[i]];
1105: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1106: tmp[i] = sum;
1107: v += nz; vi += nz;
1108: }
1109: /* backward solve the upper triangular */
1110: for (i=n-1; i>=0; i--) {
1111: v = aa + adiag[i+1]+1;
1112: vi = aj + adiag[i+1]+1;
1113: nz = adiag[i]-adiag[i+1]-1;
1114: sum = tmp[i];
1115: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1116: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1117: }
1118: b += ldb;
1119: x += ldx;
1120: }
1121: ISRestoreIndices(isrow,&rout);
1122: ISRestoreIndices(iscol,&cout);
1123: MatDenseRestoreArrayRead(B,&b);
1124: MatDenseRestoreArray(X,&x);
1125: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1126: return(0);
1127: }
1129: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1130: {
1131: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1132: IS iscol = a->col,isrow = a->row;
1133: PetscErrorCode ierr;
1134: const PetscInt *r,*c,*rout,*cout;
1135: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1136: PetscInt nz,row;
1137: PetscScalar *x,*tmp,*tmps,sum;
1138: const PetscScalar *b;
1139: const MatScalar *aa = a->a,*v;
1142: if (!n) return(0);
1144: VecGetArrayRead(bb,&b);
1145: VecGetArrayWrite(xx,&x);
1146: tmp = a->solve_work;
1148: ISGetIndices(isrow,&rout); r = rout;
1149: ISGetIndices(iscol,&cout); c = cout + (n-1);
1151: /* forward solve the lower triangular */
1152: tmp[0] = b[*r++];
1153: tmps = tmp;
1154: for (row=1; row<n; row++) {
1155: i = rout[row]; /* permuted row */
1156: v = aa + ai[i];
1157: vi = aj + ai[i];
1158: nz = a->diag[i] - ai[i];
1159: sum = b[*r++];
1160: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1161: tmp[row] = sum;
1162: }
1164: /* backward solve the upper triangular */
1165: for (row=n-1; row>=0; row--) {
1166: i = rout[row]; /* permuted row */
1167: v = aa + a->diag[i] + 1;
1168: vi = aj + a->diag[i] + 1;
1169: nz = ai[i+1] - a->diag[i] - 1;
1170: sum = tmp[row];
1171: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1172: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1173: }
1175: ISRestoreIndices(isrow,&rout);
1176: ISRestoreIndices(iscol,&cout);
1177: VecRestoreArrayRead(bb,&b);
1178: VecRestoreArrayWrite(xx,&x);
1179: PetscLogFlops(2.0*a->nz - A->cmap->n);
1180: return(0);
1181: }
1183: /* ----------------------------------------------------------- */
1184: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1185: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1186: {
1187: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1188: PetscErrorCode ierr;
1189: PetscInt n = A->rmap->n;
1190: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1191: PetscScalar *x;
1192: const PetscScalar *b;
1193: const MatScalar *aa = a->a;
1194: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1195: PetscInt adiag_i,i,nz,ai_i;
1196: const PetscInt *vi;
1197: const MatScalar *v;
1198: PetscScalar sum;
1199: #endif
1202: if (!n) return(0);
1204: VecGetArrayRead(bb,&b);
1205: VecGetArrayWrite(xx,&x);
1207: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1208: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1209: #else
1210: /* forward solve the lower triangular */
1211: x[0] = b[0];
1212: for (i=1; i<n; i++) {
1213: ai_i = ai[i];
1214: v = aa + ai_i;
1215: vi = aj + ai_i;
1216: nz = adiag[i] - ai_i;
1217: sum = b[i];
1218: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1219: x[i] = sum;
1220: }
1222: /* backward solve the upper triangular */
1223: for (i=n-1; i>=0; i--) {
1224: adiag_i = adiag[i];
1225: v = aa + adiag_i + 1;
1226: vi = aj + adiag_i + 1;
1227: nz = ai[i+1] - adiag_i - 1;
1228: sum = x[i];
1229: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1230: x[i] = sum*aa[adiag_i];
1231: }
1232: #endif
1233: PetscLogFlops(2.0*a->nz - A->cmap->n);
1234: VecRestoreArrayRead(bb,&b);
1235: VecRestoreArrayWrite(xx,&x);
1236: return(0);
1237: }
1239: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1240: {
1241: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1242: IS iscol = a->col,isrow = a->row;
1243: PetscErrorCode ierr;
1244: PetscInt i, n = A->rmap->n,j;
1245: PetscInt nz;
1246: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1247: PetscScalar *x,*tmp,sum;
1248: const PetscScalar *b;
1249: const MatScalar *aa = a->a,*v;
1252: if (yy != xx) {VecCopy(yy,xx);}
1254: VecGetArrayRead(bb,&b);
1255: VecGetArray(xx,&x);
1256: tmp = a->solve_work;
1258: ISGetIndices(isrow,&rout); r = rout;
1259: ISGetIndices(iscol,&cout); c = cout + (n-1);
1261: /* forward solve the lower triangular */
1262: tmp[0] = b[*r++];
1263: for (i=1; i<n; i++) {
1264: v = aa + ai[i];
1265: vi = aj + ai[i];
1266: nz = a->diag[i] - ai[i];
1267: sum = b[*r++];
1268: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1269: tmp[i] = sum;
1270: }
1272: /* backward solve the upper triangular */
1273: for (i=n-1; i>=0; i--) {
1274: v = aa + a->diag[i] + 1;
1275: vi = aj + a->diag[i] + 1;
1276: nz = ai[i+1] - a->diag[i] - 1;
1277: sum = tmp[i];
1278: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1279: tmp[i] = sum*aa[a->diag[i]];
1280: x[*c--] += tmp[i];
1281: }
1283: ISRestoreIndices(isrow,&rout);
1284: ISRestoreIndices(iscol,&cout);
1285: VecRestoreArrayRead(bb,&b);
1286: VecRestoreArray(xx,&x);
1287: PetscLogFlops(2.0*a->nz);
1288: return(0);
1289: }
1291: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1292: {
1293: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1294: IS iscol = a->col,isrow = a->row;
1295: PetscErrorCode ierr;
1296: PetscInt i, n = A->rmap->n,j;
1297: PetscInt nz;
1298: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1299: PetscScalar *x,*tmp,sum;
1300: const PetscScalar *b;
1301: const MatScalar *aa = a->a,*v;
1304: if (yy != xx) {VecCopy(yy,xx);}
1306: VecGetArrayRead(bb,&b);
1307: VecGetArray(xx,&x);
1308: tmp = a->solve_work;
1310: ISGetIndices(isrow,&rout); r = rout;
1311: ISGetIndices(iscol,&cout); c = cout;
1313: /* forward solve the lower triangular */
1314: tmp[0] = b[r[0]];
1315: v = aa;
1316: vi = aj;
1317: for (i=1; i<n; i++) {
1318: nz = ai[i+1] - ai[i];
1319: sum = b[r[i]];
1320: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1321: tmp[i] = sum;
1322: v += nz;
1323: vi += nz;
1324: }
1326: /* backward solve the upper triangular */
1327: v = aa + adiag[n-1];
1328: vi = aj + adiag[n-1];
1329: for (i=n-1; i>=0; i--) {
1330: nz = adiag[i] - adiag[i+1] - 1;
1331: sum = tmp[i];
1332: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1333: tmp[i] = sum*v[nz];
1334: x[c[i]] += tmp[i];
1335: v += nz+1; vi += nz+1;
1336: }
1338: ISRestoreIndices(isrow,&rout);
1339: ISRestoreIndices(iscol,&cout);
1340: VecRestoreArrayRead(bb,&b);
1341: VecRestoreArray(xx,&x);
1342: PetscLogFlops(2.0*a->nz);
1343: return(0);
1344: }
1346: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1347: {
1348: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1349: IS iscol = a->col,isrow = a->row;
1350: PetscErrorCode ierr;
1351: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1352: PetscInt i,n = A->rmap->n,j;
1353: PetscInt nz;
1354: PetscScalar *x,*tmp,s1;
1355: const MatScalar *aa = a->a,*v;
1356: const PetscScalar *b;
1359: VecGetArrayRead(bb,&b);
1360: VecGetArrayWrite(xx,&x);
1361: tmp = a->solve_work;
1363: ISGetIndices(isrow,&rout); r = rout;
1364: ISGetIndices(iscol,&cout); c = cout;
1366: /* copy the b into temp work space according to permutation */
1367: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1369: /* forward solve the U^T */
1370: for (i=0; i<n; i++) {
1371: v = aa + diag[i];
1372: vi = aj + diag[i] + 1;
1373: nz = ai[i+1] - diag[i] - 1;
1374: s1 = tmp[i];
1375: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1376: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1377: tmp[i] = s1;
1378: }
1380: /* backward solve the L^T */
1381: for (i=n-1; i>=0; i--) {
1382: v = aa + diag[i] - 1;
1383: vi = aj + diag[i] - 1;
1384: nz = diag[i] - ai[i];
1385: s1 = tmp[i];
1386: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1387: }
1389: /* copy tmp into x according to permutation */
1390: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1392: ISRestoreIndices(isrow,&rout);
1393: ISRestoreIndices(iscol,&cout);
1394: VecRestoreArrayRead(bb,&b);
1395: VecRestoreArrayWrite(xx,&x);
1397: PetscLogFlops(2.0*a->nz-A->cmap->n);
1398: return(0);
1399: }
1401: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1402: {
1403: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1404: IS iscol = a->col,isrow = a->row;
1405: PetscErrorCode ierr;
1406: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1407: PetscInt i,n = A->rmap->n,j;
1408: PetscInt nz;
1409: PetscScalar *x,*tmp,s1;
1410: const MatScalar *aa = a->a,*v;
1411: const PetscScalar *b;
1414: VecGetArrayRead(bb,&b);
1415: VecGetArrayWrite(xx,&x);
1416: tmp = a->solve_work;
1418: ISGetIndices(isrow,&rout); r = rout;
1419: ISGetIndices(iscol,&cout); c = cout;
1421: /* copy the b into temp work space according to permutation */
1422: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1424: /* forward solve the U^T */
1425: for (i=0; i<n; i++) {
1426: v = aa + adiag[i+1] + 1;
1427: vi = aj + adiag[i+1] + 1;
1428: nz = adiag[i] - adiag[i+1] - 1;
1429: s1 = tmp[i];
1430: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1431: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1432: tmp[i] = s1;
1433: }
1435: /* backward solve the L^T */
1436: for (i=n-1; i>=0; i--) {
1437: v = aa + ai[i];
1438: vi = aj + ai[i];
1439: nz = ai[i+1] - ai[i];
1440: s1 = tmp[i];
1441: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1442: }
1444: /* copy tmp into x according to permutation */
1445: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1447: ISRestoreIndices(isrow,&rout);
1448: ISRestoreIndices(iscol,&cout);
1449: VecRestoreArrayRead(bb,&b);
1450: VecRestoreArrayWrite(xx,&x);
1452: PetscLogFlops(2.0*a->nz-A->cmap->n);
1453: return(0);
1454: }
1456: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1457: {
1458: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1459: IS iscol = a->col,isrow = a->row;
1460: PetscErrorCode ierr;
1461: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1462: PetscInt i,n = A->rmap->n,j;
1463: PetscInt nz;
1464: PetscScalar *x,*tmp,s1;
1465: const MatScalar *aa = a->a,*v;
1466: const PetscScalar *b;
1469: if (zz != xx) {VecCopy(zz,xx);}
1470: VecGetArrayRead(bb,&b);
1471: VecGetArray(xx,&x);
1472: tmp = a->solve_work;
1474: ISGetIndices(isrow,&rout); r = rout;
1475: ISGetIndices(iscol,&cout); c = cout;
1477: /* copy the b into temp work space according to permutation */
1478: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1480: /* forward solve the U^T */
1481: for (i=0; i<n; i++) {
1482: v = aa + diag[i];
1483: vi = aj + diag[i] + 1;
1484: nz = ai[i+1] - diag[i] - 1;
1485: s1 = tmp[i];
1486: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1487: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1488: tmp[i] = s1;
1489: }
1491: /* backward solve the L^T */
1492: for (i=n-1; i>=0; i--) {
1493: v = aa + diag[i] - 1;
1494: vi = aj + diag[i] - 1;
1495: nz = diag[i] - ai[i];
1496: s1 = tmp[i];
1497: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1498: }
1500: /* copy tmp into x according to permutation */
1501: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1503: ISRestoreIndices(isrow,&rout);
1504: ISRestoreIndices(iscol,&cout);
1505: VecRestoreArrayRead(bb,&b);
1506: VecRestoreArray(xx,&x);
1508: PetscLogFlops(2.0*a->nz-A->cmap->n);
1509: return(0);
1510: }
1512: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1513: {
1514: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1515: IS iscol = a->col,isrow = a->row;
1516: PetscErrorCode ierr;
1517: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1518: PetscInt i,n = A->rmap->n,j;
1519: PetscInt nz;
1520: PetscScalar *x,*tmp,s1;
1521: const MatScalar *aa = a->a,*v;
1522: const PetscScalar *b;
1525: if (zz != xx) {VecCopy(zz,xx);}
1526: VecGetArrayRead(bb,&b);
1527: VecGetArray(xx,&x);
1528: tmp = a->solve_work;
1530: ISGetIndices(isrow,&rout); r = rout;
1531: ISGetIndices(iscol,&cout); c = cout;
1533: /* copy the b into temp work space according to permutation */
1534: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1536: /* forward solve the U^T */
1537: for (i=0; i<n; i++) {
1538: v = aa + adiag[i+1] + 1;
1539: vi = aj + adiag[i+1] + 1;
1540: nz = adiag[i] - adiag[i+1] - 1;
1541: s1 = tmp[i];
1542: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1543: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1544: tmp[i] = s1;
1545: }
1548: /* backward solve the L^T */
1549: for (i=n-1; i>=0; i--) {
1550: v = aa + ai[i];
1551: vi = aj + ai[i];
1552: nz = ai[i+1] - ai[i];
1553: s1 = tmp[i];
1554: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1555: }
1557: /* copy tmp into x according to permutation */
1558: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1560: ISRestoreIndices(isrow,&rout);
1561: ISRestoreIndices(iscol,&cout);
1562: VecRestoreArrayRead(bb,&b);
1563: VecRestoreArray(xx,&x);
1565: PetscLogFlops(2.0*a->nz-A->cmap->n);
1566: return(0);
1567: }
1569: /* ----------------------------------------------------------------*/
1571: /*
1572: ilu() under revised new data structure.
1573: Factored arrays bj and ba are stored as
1574: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1576: bi=fact->i is an array of size n+1, in which
1577: bi+
1578: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1579: bi[n]: points to L(n-1,n-1)+1
1581: bdiag=fact->diag is an array of size n+1,in which
1582: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1583: bdiag[n]: points to entry of U(n-1,0)-1
1585: U(i,:) contains bdiag[i] as its last entry, i.e.,
1586: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1587: */
1588: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1589: {
1590: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1592: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1593: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1594: IS isicol;
1597: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1598: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1599: b = (Mat_SeqAIJ*)(fact)->data;
1601: /* allocate matrix arrays for new data structure */
1602: PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1603: PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1605: b->singlemalloc = PETSC_TRUE;
1606: if (!b->diag) {
1607: PetscMalloc1(n+1,&b->diag);
1608: PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1609: }
1610: bdiag = b->diag;
1612: if (n > 0) {
1613: PetscArrayzero(b->a,ai[n]);
1614: }
1616: /* set bi and bj with new data structure */
1617: bi = b->i;
1618: bj = b->j;
1620: /* L part */
1621: bi[0] = 0;
1622: for (i=0; i<n; i++) {
1623: nz = adiag[i] - ai[i];
1624: bi[i+1] = bi[i] + nz;
1625: aj = a->j + ai[i];
1626: for (j=0; j<nz; j++) {
1627: /* *bj = aj[j]; bj++; */
1628: bj[k++] = aj[j];
1629: }
1630: }
1632: /* U part */
1633: bdiag[n] = bi[n]-1;
1634: for (i=n-1; i>=0; i--) {
1635: nz = ai[i+1] - adiag[i] - 1;
1636: aj = a->j + adiag[i] + 1;
1637: for (j=0; j<nz; j++) {
1638: /* *bj = aj[j]; bj++; */
1639: bj[k++] = aj[j];
1640: }
1641: /* diag[i] */
1642: /* *bj = i; bj++; */
1643: bj[k++] = i;
1644: bdiag[i] = bdiag[i+1] + nz + 1;
1645: }
1647: fact->factortype = MAT_FACTOR_ILU;
1648: fact->info.factor_mallocs = 0;
1649: fact->info.fill_ratio_given = info->fill;
1650: fact->info.fill_ratio_needed = 1.0;
1651: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1652: MatSeqAIJCheckInode_FactorLU(fact);
1654: b = (Mat_SeqAIJ*)(fact)->data;
1655: b->row = isrow;
1656: b->col = iscol;
1657: b->icol = isicol;
1658: PetscMalloc1(fact->rmap->n+1,&b->solve_work);
1659: PetscObjectReference((PetscObject)isrow);
1660: PetscObjectReference((PetscObject)iscol);
1661: return(0);
1662: }
1664: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1665: {
1666: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1667: IS isicol;
1668: PetscErrorCode ierr;
1669: const PetscInt *r,*ic;
1670: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1671: PetscInt *bi,*cols,nnz,*cols_lvl;
1672: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1673: PetscInt i,levels,diagonal_fill;
1674: PetscBool col_identity,row_identity,missing;
1675: PetscReal f;
1676: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1677: PetscBT lnkbt;
1678: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1679: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1680: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1683: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1684: MatMissingDiagonal(A,&missing,&i);
1685: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1687: levels = (PetscInt)info->levels;
1688: ISIdentity(isrow,&row_identity);
1689: ISIdentity(iscol,&col_identity);
1690: if (!levels && row_identity && col_identity) {
1691: /* special case: ilu(0) with natural ordering */
1692: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1693: if (a->inode.size) {
1694: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1695: }
1696: return(0);
1697: }
1699: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1700: ISGetIndices(isrow,&r);
1701: ISGetIndices(isicol,&ic);
1703: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1704: PetscMalloc1(n+1,&bi);
1705: PetscMalloc1(n+1,&bdiag);
1706: bi[0] = bdiag[0] = 0;
1707: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1709: /* create a linked list for storing column indices of the active row */
1710: nlnk = n + 1;
1711: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1713: /* initial FreeSpace size is f*(ai[n]+1) */
1714: f = info->fill;
1715: diagonal_fill = (PetscInt)info->diagonal_fill;
1716: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1717: current_space = free_space;
1718: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1719: current_space_lvl = free_space_lvl;
1720: for (i=0; i<n; i++) {
1721: nzi = 0;
1722: /* copy current row into linked list */
1723: nnz = ai[r[i]+1] - ai[r[i]];
1724: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1725: cols = aj + ai[r[i]];
1726: lnk[i] = -1; /* marker to indicate if diagonal exists */
1727: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1728: nzi += nlnk;
1730: /* make sure diagonal entry is included */
1731: if (diagonal_fill && lnk[i] == -1) {
1732: fm = n;
1733: while (lnk[fm] < i) fm = lnk[fm];
1734: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1735: lnk[fm] = i;
1736: lnk_lvl[i] = 0;
1737: nzi++; dcount++;
1738: }
1740: /* add pivot rows into the active row */
1741: nzbd = 0;
1742: prow = lnk[n];
1743: while (prow < i) {
1744: nnz = bdiag[prow];
1745: cols = bj_ptr[prow] + nnz + 1;
1746: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1747: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1748: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1749: nzi += nlnk;
1750: prow = lnk[prow];
1751: nzbd++;
1752: }
1753: bdiag[i] = nzbd;
1754: bi[i+1] = bi[i] + nzi;
1755: /* if free space is not available, make more free space */
1756: if (current_space->local_remaining<nzi) {
1757: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1758: PetscFreeSpaceGet(nnz,¤t_space);
1759: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1760: reallocs++;
1761: }
1763: /* copy data into free_space and free_space_lvl, then initialize lnk */
1764: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1765: bj_ptr[i] = current_space->array;
1766: bjlvl_ptr[i] = current_space_lvl->array;
1768: /* make sure the active row i has diagonal entry */
1769: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1771: current_space->array += nzi;
1772: current_space->local_used += nzi;
1773: current_space->local_remaining -= nzi;
1774: current_space_lvl->array += nzi;
1775: current_space_lvl->local_used += nzi;
1776: current_space_lvl->local_remaining -= nzi;
1777: }
1779: ISRestoreIndices(isrow,&r);
1780: ISRestoreIndices(isicol,&ic);
1781: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1782: PetscMalloc1(bi[n]+1,&bj);
1783: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1785: PetscIncompleteLLDestroy(lnk,lnkbt);
1786: PetscFreeSpaceDestroy(free_space_lvl);
1787: PetscFree2(bj_ptr,bjlvl_ptr);
1789: #if defined(PETSC_USE_INFO)
1790: {
1791: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1792: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1793: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1794: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1795: PetscInfo(A,"for best performance.\n");
1796: if (diagonal_fill) {
1797: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1798: }
1799: }
1800: #endif
1801: /* put together the new matrix */
1802: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1803: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1804: b = (Mat_SeqAIJ*)(fact)->data;
1806: b->free_a = PETSC_TRUE;
1807: b->free_ij = PETSC_TRUE;
1808: b->singlemalloc = PETSC_FALSE;
1810: PetscMalloc1(bdiag[0]+1,&b->a);
1812: b->j = bj;
1813: b->i = bi;
1814: b->diag = bdiag;
1815: b->ilen = 0;
1816: b->imax = 0;
1817: b->row = isrow;
1818: b->col = iscol;
1819: PetscObjectReference((PetscObject)isrow);
1820: PetscObjectReference((PetscObject)iscol);
1821: b->icol = isicol;
1823: PetscMalloc1(n+1,&b->solve_work);
1824: /* In b structure: Free imax, ilen, old a, old j.
1825: Allocate bdiag, solve_work, new a, new j */
1826: PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1827: b->maxnz = b->nz = bdiag[0]+1;
1829: (fact)->info.factor_mallocs = reallocs;
1830: (fact)->info.fill_ratio_given = f;
1831: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1832: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1833: if (a->inode.size) {
1834: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1835: }
1836: MatSeqAIJCheckInode_FactorLU(fact);
1837: return(0);
1838: }
1840: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1841: {
1842: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1843: IS isicol;
1844: PetscErrorCode ierr;
1845: const PetscInt *r,*ic;
1846: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1847: PetscInt *bi,*cols,nnz,*cols_lvl;
1848: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1849: PetscInt i,levels,diagonal_fill;
1850: PetscBool col_identity,row_identity;
1851: PetscReal f;
1852: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1853: PetscBT lnkbt;
1854: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1855: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1856: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1857: PetscBool missing;
1860: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1861: MatMissingDiagonal(A,&missing,&i);
1862: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1864: f = info->fill;
1865: levels = (PetscInt)info->levels;
1866: diagonal_fill = (PetscInt)info->diagonal_fill;
1868: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1870: ISIdentity(isrow,&row_identity);
1871: ISIdentity(iscol,&col_identity);
1872: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1873: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1875: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1876: if (a->inode.size) {
1877: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1878: }
1879: fact->factortype = MAT_FACTOR_ILU;
1880: (fact)->info.factor_mallocs = 0;
1881: (fact)->info.fill_ratio_given = info->fill;
1882: (fact)->info.fill_ratio_needed = 1.0;
1884: b = (Mat_SeqAIJ*)(fact)->data;
1885: b->row = isrow;
1886: b->col = iscol;
1887: b->icol = isicol;
1888: PetscMalloc1((fact)->rmap->n+1,&b->solve_work);
1889: PetscObjectReference((PetscObject)isrow);
1890: PetscObjectReference((PetscObject)iscol);
1891: return(0);
1892: }
1894: ISGetIndices(isrow,&r);
1895: ISGetIndices(isicol,&ic);
1897: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1898: PetscMalloc1(n+1,&bi);
1899: PetscMalloc1(n+1,&bdiag);
1900: bi[0] = bdiag[0] = 0;
1902: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1904: /* create a linked list for storing column indices of the active row */
1905: nlnk = n + 1;
1906: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1908: /* initial FreeSpace size is f*(ai[n]+1) */
1909: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1910: current_space = free_space;
1911: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1912: current_space_lvl = free_space_lvl;
1914: for (i=0; i<n; i++) {
1915: nzi = 0;
1916: /* copy current row into linked list */
1917: nnz = ai[r[i]+1] - ai[r[i]];
1918: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1919: cols = aj + ai[r[i]];
1920: lnk[i] = -1; /* marker to indicate if diagonal exists */
1921: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1922: nzi += nlnk;
1924: /* make sure diagonal entry is included */
1925: if (diagonal_fill && lnk[i] == -1) {
1926: fm = n;
1927: while (lnk[fm] < i) fm = lnk[fm];
1928: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1929: lnk[fm] = i;
1930: lnk_lvl[i] = 0;
1931: nzi++; dcount++;
1932: }
1934: /* add pivot rows into the active row */
1935: nzbd = 0;
1936: prow = lnk[n];
1937: while (prow < i) {
1938: nnz = bdiag[prow];
1939: cols = bj_ptr[prow] + nnz + 1;
1940: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1941: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1942: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1943: nzi += nlnk;
1944: prow = lnk[prow];
1945: nzbd++;
1946: }
1947: bdiag[i] = nzbd;
1948: bi[i+1] = bi[i] + nzi;
1950: /* if free space is not available, make more free space */
1951: if (current_space->local_remaining<nzi) {
1952: nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1953: PetscFreeSpaceGet(nnz,¤t_space);
1954: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1955: reallocs++;
1956: }
1958: /* copy data into free_space and free_space_lvl, then initialize lnk */
1959: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1960: bj_ptr[i] = current_space->array;
1961: bjlvl_ptr[i] = current_space_lvl->array;
1963: /* make sure the active row i has diagonal entry */
1964: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1966: current_space->array += nzi;
1967: current_space->local_used += nzi;
1968: current_space->local_remaining -= nzi;
1969: current_space_lvl->array += nzi;
1970: current_space_lvl->local_used += nzi;
1971: current_space_lvl->local_remaining -= nzi;
1972: }
1974: ISRestoreIndices(isrow,&r);
1975: ISRestoreIndices(isicol,&ic);
1977: /* destroy list of free space and other temporary arrays */
1978: PetscMalloc1(bi[n]+1,&bj);
1979: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
1980: PetscIncompleteLLDestroy(lnk,lnkbt);
1981: PetscFreeSpaceDestroy(free_space_lvl);
1982: PetscFree2(bj_ptr,bjlvl_ptr);
1984: #if defined(PETSC_USE_INFO)
1985: {
1986: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1987: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1988: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1989: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1990: PetscInfo(A,"for best performance.\n");
1991: if (diagonal_fill) {
1992: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1993: }
1994: }
1995: #endif
1997: /* put together the new matrix */
1998: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1999: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
2000: b = (Mat_SeqAIJ*)(fact)->data;
2002: b->free_a = PETSC_TRUE;
2003: b->free_ij = PETSC_TRUE;
2004: b->singlemalloc = PETSC_FALSE;
2006: PetscMalloc1(bi[n],&b->a);
2007: b->j = bj;
2008: b->i = bi;
2009: for (i=0; i<n; i++) bdiag[i] += bi[i];
2010: b->diag = bdiag;
2011: b->ilen = 0;
2012: b->imax = 0;
2013: b->row = isrow;
2014: b->col = iscol;
2015: PetscObjectReference((PetscObject)isrow);
2016: PetscObjectReference((PetscObject)iscol);
2017: b->icol = isicol;
2018: PetscMalloc1(n+1,&b->solve_work);
2019: /* In b structure: Free imax, ilen, old a, old j.
2020: Allocate bdiag, solve_work, new a, new j */
2021: PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2022: b->maxnz = b->nz = bi[n];
2024: (fact)->info.factor_mallocs = reallocs;
2025: (fact)->info.fill_ratio_given = f;
2026: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2027: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2028: if (a->inode.size) {
2029: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2030: }
2031: return(0);
2032: }
2034: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2035: {
2036: Mat C = B;
2037: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2038: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2039: IS ip=b->row,iip = b->icol;
2041: const PetscInt *rip,*riip;
2042: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2043: PetscInt *ai=a->i,*aj=a->j;
2044: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2045: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2046: PetscBool perm_identity;
2047: FactorShiftCtx sctx;
2048: PetscReal rs;
2049: MatScalar d,*v;
2052: /* MatPivotSetUp(): initialize shift context sctx */
2053: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2055: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2056: sctx.shift_top = info->zeropivot;
2057: for (i=0; i<mbs; i++) {
2058: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2059: d = (aa)[a->diag[i]];
2060: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2061: v = aa+ai[i];
2062: nz = ai[i+1] - ai[i];
2063: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2064: if (rs>sctx.shift_top) sctx.shift_top = rs;
2065: }
2066: sctx.shift_top *= 1.1;
2067: sctx.nshift_max = 5;
2068: sctx.shift_lo = 0.;
2069: sctx.shift_hi = 1.;
2070: }
2072: ISGetIndices(ip,&rip);
2073: ISGetIndices(iip,&riip);
2075: /* allocate working arrays
2076: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2077: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2078: */
2079: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);
2081: do {
2082: sctx.newshift = PETSC_FALSE;
2084: for (i=0; i<mbs; i++) c2r[i] = mbs;
2085: if (mbs) il[0] = 0;
2087: for (k = 0; k<mbs; k++) {
2088: /* zero rtmp */
2089: nz = bi[k+1] - bi[k];
2090: bjtmp = bj + bi[k];
2091: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2093: /* load in initial unfactored row */
2094: bval = ba + bi[k];
2095: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2096: for (j = jmin; j < jmax; j++) {
2097: col = riip[aj[j]];
2098: if (col >= k) { /* only take upper triangular entry */
2099: rtmp[col] = aa[j];
2100: *bval++ = 0.0; /* for in-place factorization */
2101: }
2102: }
2103: /* shift the diagonal of the matrix: ZeropivotApply() */
2104: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2106: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2107: dk = rtmp[k];
2108: i = c2r[k]; /* first row to be added to k_th row */
2110: while (i < k) {
2111: nexti = c2r[i]; /* next row to be added to k_th row */
2113: /* compute multiplier, update diag(k) and U(i,k) */
2114: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2115: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2116: dk += uikdi*ba[ili]; /* update diag[k] */
2117: ba[ili] = uikdi; /* -U(i,k) */
2119: /* add multiple of row i to k-th row */
2120: jmin = ili + 1; jmax = bi[i+1];
2121: if (jmin < jmax) {
2122: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2123: /* update il and c2r for row i */
2124: il[i] = jmin;
2125: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2126: }
2127: i = nexti;
2128: }
2130: /* copy data into U(k,:) */
2131: rs = 0.0;
2132: jmin = bi[k]; jmax = bi[k+1]-1;
2133: if (jmin < jmax) {
2134: for (j=jmin; j<jmax; j++) {
2135: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2136: }
2137: /* add the k-th row into il and c2r */
2138: il[k] = jmin;
2139: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2140: }
2142: /* MatPivotCheck() */
2143: sctx.rs = rs;
2144: sctx.pv = dk;
2145: MatPivotCheck(B,A,info,&sctx,i);
2146: if (sctx.newshift) break;
2147: dk = sctx.pv;
2149: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2150: }
2151: } while (sctx.newshift);
2153: PetscFree3(rtmp,il,c2r);
2154: ISRestoreIndices(ip,&rip);
2155: ISRestoreIndices(iip,&riip);
2157: ISIdentity(ip,&perm_identity);
2158: if (perm_identity) {
2159: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2160: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2161: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2162: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2163: } else {
2164: B->ops->solve = MatSolve_SeqSBAIJ_1;
2165: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2166: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2167: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2168: }
2170: C->assembled = PETSC_TRUE;
2171: C->preallocated = PETSC_TRUE;
2173: PetscLogFlops(C->rmap->n);
2175: /* MatPivotView() */
2176: if (sctx.nshift) {
2177: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2178: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2179: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2180: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2181: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2182: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2183: }
2184: }
2185: return(0);
2186: }
2188: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2189: {
2190: Mat C = B;
2191: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2192: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2193: IS ip=b->row,iip = b->icol;
2195: const PetscInt *rip,*riip;
2196: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2197: PetscInt *ai=a->i,*aj=a->j;
2198: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2199: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2200: PetscBool perm_identity;
2201: FactorShiftCtx sctx;
2202: PetscReal rs;
2203: MatScalar d,*v;
2206: /* MatPivotSetUp(): initialize shift context sctx */
2207: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2209: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2210: sctx.shift_top = info->zeropivot;
2211: for (i=0; i<mbs; i++) {
2212: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2213: d = (aa)[a->diag[i]];
2214: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2215: v = aa+ai[i];
2216: nz = ai[i+1] - ai[i];
2217: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2218: if (rs>sctx.shift_top) sctx.shift_top = rs;
2219: }
2220: sctx.shift_top *= 1.1;
2221: sctx.nshift_max = 5;
2222: sctx.shift_lo = 0.;
2223: sctx.shift_hi = 1.;
2224: }
2226: ISGetIndices(ip,&rip);
2227: ISGetIndices(iip,&riip);
2229: /* initialization */
2230: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
2232: do {
2233: sctx.newshift = PETSC_FALSE;
2235: for (i=0; i<mbs; i++) jl[i] = mbs;
2236: il[0] = 0;
2238: for (k = 0; k<mbs; k++) {
2239: /* zero rtmp */
2240: nz = bi[k+1] - bi[k];
2241: bjtmp = bj + bi[k];
2242: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2244: bval = ba + bi[k];
2245: /* initialize k-th row by the perm[k]-th row of A */
2246: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2247: for (j = jmin; j < jmax; j++) {
2248: col = riip[aj[j]];
2249: if (col >= k) { /* only take upper triangular entry */
2250: rtmp[col] = aa[j];
2251: *bval++ = 0.0; /* for in-place factorization */
2252: }
2253: }
2254: /* shift the diagonal of the matrix */
2255: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2257: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2258: dk = rtmp[k];
2259: i = jl[k]; /* first row to be added to k_th row */
2261: while (i < k) {
2262: nexti = jl[i]; /* next row to be added to k_th row */
2264: /* compute multiplier, update diag(k) and U(i,k) */
2265: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2266: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2267: dk += uikdi*ba[ili];
2268: ba[ili] = uikdi; /* -U(i,k) */
2270: /* add multiple of row i to k-th row */
2271: jmin = ili + 1; jmax = bi[i+1];
2272: if (jmin < jmax) {
2273: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2274: /* update il and jl for row i */
2275: il[i] = jmin;
2276: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2277: }
2278: i = nexti;
2279: }
2281: /* shift the diagonals when zero pivot is detected */
2282: /* compute rs=sum of abs(off-diagonal) */
2283: rs = 0.0;
2284: jmin = bi[k]+1;
2285: nz = bi[k+1] - jmin;
2286: bcol = bj + jmin;
2287: for (j=0; j<nz; j++) {
2288: rs += PetscAbsScalar(rtmp[bcol[j]]);
2289: }
2291: sctx.rs = rs;
2292: sctx.pv = dk;
2293: MatPivotCheck(B,A,info,&sctx,k);
2294: if (sctx.newshift) break;
2295: dk = sctx.pv;
2297: /* copy data into U(k,:) */
2298: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2299: jmin = bi[k]+1; jmax = bi[k+1];
2300: if (jmin < jmax) {
2301: for (j=jmin; j<jmax; j++) {
2302: col = bj[j]; ba[j] = rtmp[col];
2303: }
2304: /* add the k-th row into il and jl */
2305: il[k] = jmin;
2306: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2307: }
2308: }
2309: } while (sctx.newshift);
2311: PetscFree3(rtmp,il,jl);
2312: ISRestoreIndices(ip,&rip);
2313: ISRestoreIndices(iip,&riip);
2315: ISIdentity(ip,&perm_identity);
2316: if (perm_identity) {
2317: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2318: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2319: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2320: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2321: } else {
2322: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2323: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2324: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2325: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2326: }
2328: C->assembled = PETSC_TRUE;
2329: C->preallocated = PETSC_TRUE;
2331: PetscLogFlops(C->rmap->n);
2332: if (sctx.nshift) {
2333: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2334: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2335: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2336: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2337: }
2338: }
2339: return(0);
2340: }
2342: /*
2343: icc() under revised new data structure.
2344: Factored arrays bj and ba are stored as
2345: U(0,:),...,U(i,:),U(n-1,:)
2347: ui=fact->i is an array of size n+1, in which
2348: ui+
2349: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2350: ui[n]: points to U(n-1,n-1)+1
2352: udiag=fact->diag is an array of size n,in which
2353: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2355: U(i,:) contains udiag[i] as its last entry, i.e.,
2356: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2357: */
2359: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2360: {
2361: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2362: Mat_SeqSBAIJ *b;
2363: PetscErrorCode ierr;
2364: PetscBool perm_identity,missing;
2365: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2366: const PetscInt *rip,*riip;
2367: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2368: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2369: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2370: PetscReal fill =info->fill,levels=info->levels;
2371: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2372: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2373: PetscBT lnkbt;
2374: IS iperm;
2377: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2378: MatMissingDiagonal(A,&missing,&d);
2379: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2380: ISIdentity(perm,&perm_identity);
2381: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2383: PetscMalloc1(am+1,&ui);
2384: PetscMalloc1(am+1,&udiag);
2385: ui[0] = 0;
2387: /* ICC(0) without matrix ordering: simply rearrange column indices */
2388: if (!levels && perm_identity) {
2389: for (i=0; i<am; i++) {
2390: ncols = ai[i+1] - a->diag[i];
2391: ui[i+1] = ui[i] + ncols;
2392: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2393: }
2394: PetscMalloc1(ui[am]+1,&uj);
2395: cols = uj;
2396: for (i=0; i<am; i++) {
2397: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2398: ncols = ai[i+1] - a->diag[i] -1;
2399: for (j=0; j<ncols; j++) *cols++ = aj[j];
2400: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2401: }
2402: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2403: ISGetIndices(iperm,&riip);
2404: ISGetIndices(perm,&rip);
2406: /* initialization */
2407: PetscMalloc1(am+1,&ajtmp);
2409: /* jl: linked list for storing indices of the pivot rows
2410: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2411: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2412: for (i=0; i<am; i++) {
2413: jl[i] = am; il[i] = 0;
2414: }
2416: /* create and initialize a linked list for storing column indices of the active row k */
2417: nlnk = am + 1;
2418: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2420: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2421: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2422: current_space = free_space;
2423: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);
2424: current_space_lvl = free_space_lvl;
2426: for (k=0; k<am; k++) { /* for each active row k */
2427: /* initialize lnk by the column indices of row rip[k] of A */
2428: nzk = 0;
2429: ncols = ai[rip[k]+1] - ai[rip[k]];
2430: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2431: ncols_upper = 0;
2432: for (j=0; j<ncols; j++) {
2433: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2434: if (riip[i] >= k) { /* only take upper triangular entry */
2435: ajtmp[ncols_upper] = i;
2436: ncols_upper++;
2437: }
2438: }
2439: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2440: nzk += nlnk;
2442: /* update lnk by computing fill-in for each pivot row to be merged in */
2443: prow = jl[k]; /* 1st pivot row */
2445: while (prow < k) {
2446: nextprow = jl[prow];
2448: /* merge prow into k-th row */
2449: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2450: jmax = ui[prow+1];
2451: ncols = jmax-jmin;
2452: i = jmin - ui[prow];
2453: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2454: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2455: j = *(uj - 1);
2456: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2457: nzk += nlnk;
2459: /* update il and jl for prow */
2460: if (jmin < jmax) {
2461: il[prow] = jmin;
2462: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2463: }
2464: prow = nextprow;
2465: }
2467: /* if free space is not available, make more free space */
2468: if (current_space->local_remaining<nzk) {
2469: i = am - k + 1; /* num of unfactored rows */
2470: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2471: PetscFreeSpaceGet(i,¤t_space);
2472: PetscFreeSpaceGet(i,¤t_space_lvl);
2473: reallocs++;
2474: }
2476: /* copy data into free_space and free_space_lvl, then initialize lnk */
2477: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2478: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2480: /* add the k-th row into il and jl */
2481: if (nzk > 1) {
2482: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2483: jl[k] = jl[i]; jl[i] = k;
2484: il[k] = ui[k] + 1;
2485: }
2486: uj_ptr[k] = current_space->array;
2487: uj_lvl_ptr[k] = current_space_lvl->array;
2489: current_space->array += nzk;
2490: current_space->local_used += nzk;
2491: current_space->local_remaining -= nzk;
2493: current_space_lvl->array += nzk;
2494: current_space_lvl->local_used += nzk;
2495: current_space_lvl->local_remaining -= nzk;
2497: ui[k+1] = ui[k] + nzk;
2498: }
2500: ISRestoreIndices(perm,&rip);
2501: ISRestoreIndices(iperm,&riip);
2502: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2503: PetscFree(ajtmp);
2505: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2506: PetscMalloc1(ui[am]+1,&uj);
2507: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2508: PetscIncompleteLLDestroy(lnk,lnkbt);
2509: PetscFreeSpaceDestroy(free_space_lvl);
2511: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2513: /* put together the new matrix in MATSEQSBAIJ format */
2514: b = (Mat_SeqSBAIJ*)(fact)->data;
2515: b->singlemalloc = PETSC_FALSE;
2517: PetscMalloc1(ui[am]+1,&b->a);
2519: b->j = uj;
2520: b->i = ui;
2521: b->diag = udiag;
2522: b->free_diag = PETSC_TRUE;
2523: b->ilen = 0;
2524: b->imax = 0;
2525: b->row = perm;
2526: b->col = perm;
2527: PetscObjectReference((PetscObject)perm);
2528: PetscObjectReference((PetscObject)perm);
2529: b->icol = iperm;
2530: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2532: PetscMalloc1(am+1,&b->solve_work);
2533: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2535: b->maxnz = b->nz = ui[am];
2536: b->free_a = PETSC_TRUE;
2537: b->free_ij = PETSC_TRUE;
2539: fact->info.factor_mallocs = reallocs;
2540: fact->info.fill_ratio_given = fill;
2541: if (ai[am] != 0) {
2542: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2543: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2544: } else {
2545: fact->info.fill_ratio_needed = 0.0;
2546: }
2547: #if defined(PETSC_USE_INFO)
2548: if (ai[am] != 0) {
2549: PetscReal af = fact->info.fill_ratio_needed;
2550: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2551: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2552: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2553: } else {
2554: PetscInfo(A,"Empty matrix\n");
2555: }
2556: #endif
2557: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2558: return(0);
2559: }
2561: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2562: {
2563: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2564: Mat_SeqSBAIJ *b;
2565: PetscErrorCode ierr;
2566: PetscBool perm_identity,missing;
2567: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2568: const PetscInt *rip,*riip;
2569: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2570: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2571: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2572: PetscReal fill =info->fill,levels=info->levels;
2573: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2574: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2575: PetscBT lnkbt;
2576: IS iperm;
2579: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2580: MatMissingDiagonal(A,&missing,&d);
2581: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2582: ISIdentity(perm,&perm_identity);
2583: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2585: PetscMalloc1(am+1,&ui);
2586: PetscMalloc1(am+1,&udiag);
2587: ui[0] = 0;
2589: /* ICC(0) without matrix ordering: simply copies fill pattern */
2590: if (!levels && perm_identity) {
2592: for (i=0; i<am; i++) {
2593: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2594: udiag[i] = ui[i];
2595: }
2596: PetscMalloc1(ui[am]+1,&uj);
2597: cols = uj;
2598: for (i=0; i<am; i++) {
2599: aj = a->j + a->diag[i];
2600: ncols = ui[i+1] - ui[i];
2601: for (j=0; j<ncols; j++) *cols++ = *aj++;
2602: }
2603: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2604: ISGetIndices(iperm,&riip);
2605: ISGetIndices(perm,&rip);
2607: /* initialization */
2608: PetscMalloc1(am+1,&ajtmp);
2610: /* jl: linked list for storing indices of the pivot rows
2611: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2612: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2613: for (i=0; i<am; i++) {
2614: jl[i] = am; il[i] = 0;
2615: }
2617: /* create and initialize a linked list for storing column indices of the active row k */
2618: nlnk = am + 1;
2619: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2621: /* initial FreeSpace size is fill*(ai[am]+1) */
2622: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2623: current_space = free_space;
2624: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);
2625: current_space_lvl = free_space_lvl;
2627: for (k=0; k<am; k++) { /* for each active row k */
2628: /* initialize lnk by the column indices of row rip[k] of A */
2629: nzk = 0;
2630: ncols = ai[rip[k]+1] - ai[rip[k]];
2631: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2632: ncols_upper = 0;
2633: for (j=0; j<ncols; j++) {
2634: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2635: if (riip[i] >= k) { /* only take upper triangular entry */
2636: ajtmp[ncols_upper] = i;
2637: ncols_upper++;
2638: }
2639: }
2640: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2641: nzk += nlnk;
2643: /* update lnk by computing fill-in for each pivot row to be merged in */
2644: prow = jl[k]; /* 1st pivot row */
2646: while (prow < k) {
2647: nextprow = jl[prow];
2649: /* merge prow into k-th row */
2650: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2651: jmax = ui[prow+1];
2652: ncols = jmax-jmin;
2653: i = jmin - ui[prow];
2654: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2655: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2656: j = *(uj - 1);
2657: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2658: nzk += nlnk;
2660: /* update il and jl for prow */
2661: if (jmin < jmax) {
2662: il[prow] = jmin;
2663: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2664: }
2665: prow = nextprow;
2666: }
2668: /* if free space is not available, make more free space */
2669: if (current_space->local_remaining<nzk) {
2670: i = am - k + 1; /* num of unfactored rows */
2671: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2672: PetscFreeSpaceGet(i,¤t_space);
2673: PetscFreeSpaceGet(i,¤t_space_lvl);
2674: reallocs++;
2675: }
2677: /* copy data into free_space and free_space_lvl, then initialize lnk */
2678: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2679: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2681: /* add the k-th row into il and jl */
2682: if (nzk > 1) {
2683: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2684: jl[k] = jl[i]; jl[i] = k;
2685: il[k] = ui[k] + 1;
2686: }
2687: uj_ptr[k] = current_space->array;
2688: uj_lvl_ptr[k] = current_space_lvl->array;
2690: current_space->array += nzk;
2691: current_space->local_used += nzk;
2692: current_space->local_remaining -= nzk;
2694: current_space_lvl->array += nzk;
2695: current_space_lvl->local_used += nzk;
2696: current_space_lvl->local_remaining -= nzk;
2698: ui[k+1] = ui[k] + nzk;
2699: }
2701: #if defined(PETSC_USE_INFO)
2702: if (ai[am] != 0) {
2703: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2704: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2705: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2706: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2707: } else {
2708: PetscInfo(A,"Empty matrix\n");
2709: }
2710: #endif
2712: ISRestoreIndices(perm,&rip);
2713: ISRestoreIndices(iperm,&riip);
2714: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2715: PetscFree(ajtmp);
2717: /* destroy list of free space and other temporary array(s) */
2718: PetscMalloc1(ui[am]+1,&uj);
2719: PetscFreeSpaceContiguous(&free_space,uj);
2720: PetscIncompleteLLDestroy(lnk,lnkbt);
2721: PetscFreeSpaceDestroy(free_space_lvl);
2723: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2725: /* put together the new matrix in MATSEQSBAIJ format */
2727: b = (Mat_SeqSBAIJ*)fact->data;
2728: b->singlemalloc = PETSC_FALSE;
2730: PetscMalloc1(ui[am]+1,&b->a);
2732: b->j = uj;
2733: b->i = ui;
2734: b->diag = udiag;
2735: b->free_diag = PETSC_TRUE;
2736: b->ilen = 0;
2737: b->imax = 0;
2738: b->row = perm;
2739: b->col = perm;
2741: PetscObjectReference((PetscObject)perm);
2742: PetscObjectReference((PetscObject)perm);
2744: b->icol = iperm;
2745: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2746: PetscMalloc1(am+1,&b->solve_work);
2747: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2748: b->maxnz = b->nz = ui[am];
2749: b->free_a = PETSC_TRUE;
2750: b->free_ij = PETSC_TRUE;
2752: fact->info.factor_mallocs = reallocs;
2753: fact->info.fill_ratio_given = fill;
2754: if (ai[am] != 0) {
2755: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2756: } else {
2757: fact->info.fill_ratio_needed = 0.0;
2758: }
2759: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2760: return(0);
2761: }
2763: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2764: {
2765: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2766: Mat_SeqSBAIJ *b;
2767: PetscErrorCode ierr;
2768: PetscBool perm_identity,missing;
2769: PetscReal fill = info->fill;
2770: const PetscInt *rip,*riip;
2771: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2772: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2773: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2774: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2775: PetscBT lnkbt;
2776: IS iperm;
2779: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2780: MatMissingDiagonal(A,&missing,&i);
2781: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2783: /* check whether perm is the identity mapping */
2784: ISIdentity(perm,&perm_identity);
2785: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2786: ISGetIndices(iperm,&riip);
2787: ISGetIndices(perm,&rip);
2789: /* initialization */
2790: PetscMalloc1(am+1,&ui);
2791: PetscMalloc1(am+1,&udiag);
2792: ui[0] = 0;
2794: /* jl: linked list for storing indices of the pivot rows
2795: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2796: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2797: for (i=0; i<am; i++) {
2798: jl[i] = am; il[i] = 0;
2799: }
2801: /* create and initialize a linked list for storing column indices of the active row k */
2802: nlnk = am + 1;
2803: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2805: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2806: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2807: current_space = free_space;
2809: for (k=0; k<am; k++) { /* for each active row k */
2810: /* initialize lnk by the column indices of row rip[k] of A */
2811: nzk = 0;
2812: ncols = ai[rip[k]+1] - ai[rip[k]];
2813: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2814: ncols_upper = 0;
2815: for (j=0; j<ncols; j++) {
2816: i = riip[*(aj + ai[rip[k]] + j)];
2817: if (i >= k) { /* only take upper triangular entry */
2818: cols[ncols_upper] = i;
2819: ncols_upper++;
2820: }
2821: }
2822: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2823: nzk += nlnk;
2825: /* update lnk by computing fill-in for each pivot row to be merged in */
2826: prow = jl[k]; /* 1st pivot row */
2828: while (prow < k) {
2829: nextprow = jl[prow];
2830: /* merge prow into k-th row */
2831: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2832: jmax = ui[prow+1];
2833: ncols = jmax-jmin;
2834: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2835: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2836: nzk += nlnk;
2838: /* update il and jl for prow */
2839: if (jmin < jmax) {
2840: il[prow] = jmin;
2841: j = *uj_ptr;
2842: jl[prow] = jl[j];
2843: jl[j] = prow;
2844: }
2845: prow = nextprow;
2846: }
2848: /* if free space is not available, make more free space */
2849: if (current_space->local_remaining<nzk) {
2850: i = am - k + 1; /* num of unfactored rows */
2851: i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2852: PetscFreeSpaceGet(i,¤t_space);
2853: reallocs++;
2854: }
2856: /* copy data into free space, then initialize lnk */
2857: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2859: /* add the k-th row into il and jl */
2860: if (nzk > 1) {
2861: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2862: jl[k] = jl[i]; jl[i] = k;
2863: il[k] = ui[k] + 1;
2864: }
2865: ui_ptr[k] = current_space->array;
2867: current_space->array += nzk;
2868: current_space->local_used += nzk;
2869: current_space->local_remaining -= nzk;
2871: ui[k+1] = ui[k] + nzk;
2872: }
2874: ISRestoreIndices(perm,&rip);
2875: ISRestoreIndices(iperm,&riip);
2876: PetscFree4(ui_ptr,jl,il,cols);
2878: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2879: PetscMalloc1(ui[am]+1,&uj);
2880: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2881: PetscLLDestroy(lnk,lnkbt);
2883: /* put together the new matrix in MATSEQSBAIJ format */
2885: b = (Mat_SeqSBAIJ*)fact->data;
2886: b->singlemalloc = PETSC_FALSE;
2887: b->free_a = PETSC_TRUE;
2888: b->free_ij = PETSC_TRUE;
2890: PetscMalloc1(ui[am]+1,&b->a);
2892: b->j = uj;
2893: b->i = ui;
2894: b->diag = udiag;
2895: b->free_diag = PETSC_TRUE;
2896: b->ilen = 0;
2897: b->imax = 0;
2898: b->row = perm;
2899: b->col = perm;
2901: PetscObjectReference((PetscObject)perm);
2902: PetscObjectReference((PetscObject)perm);
2904: b->icol = iperm;
2905: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2907: PetscMalloc1(am+1,&b->solve_work);
2908: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2910: b->maxnz = b->nz = ui[am];
2912: fact->info.factor_mallocs = reallocs;
2913: fact->info.fill_ratio_given = fill;
2914: if (ai[am] != 0) {
2915: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2916: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2917: } else {
2918: fact->info.fill_ratio_needed = 0.0;
2919: }
2920: #if defined(PETSC_USE_INFO)
2921: if (ai[am] != 0) {
2922: PetscReal af = fact->info.fill_ratio_needed;
2923: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2924: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2925: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2926: } else {
2927: PetscInfo(A,"Empty matrix\n");
2928: }
2929: #endif
2930: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2931: return(0);
2932: }
2934: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2935: {
2936: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2937: Mat_SeqSBAIJ *b;
2938: PetscErrorCode ierr;
2939: PetscBool perm_identity,missing;
2940: PetscReal fill = info->fill;
2941: const PetscInt *rip,*riip;
2942: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2943: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2944: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2945: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2946: PetscBT lnkbt;
2947: IS iperm;
2950: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2951: MatMissingDiagonal(A,&missing,&i);
2952: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2954: /* check whether perm is the identity mapping */
2955: ISIdentity(perm,&perm_identity);
2956: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2957: ISGetIndices(iperm,&riip);
2958: ISGetIndices(perm,&rip);
2960: /* initialization */
2961: PetscMalloc1(am+1,&ui);
2962: ui[0] = 0;
2964: /* jl: linked list for storing indices of the pivot rows
2965: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2966: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2967: for (i=0; i<am; i++) {
2968: jl[i] = am; il[i] = 0;
2969: }
2971: /* create and initialize a linked list for storing column indices of the active row k */
2972: nlnk = am + 1;
2973: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2975: /* initial FreeSpace size is fill*(ai[am]+1) */
2976: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2977: current_space = free_space;
2979: for (k=0; k<am; k++) { /* for each active row k */
2980: /* initialize lnk by the column indices of row rip[k] of A */
2981: nzk = 0;
2982: ncols = ai[rip[k]+1] - ai[rip[k]];
2983: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2984: ncols_upper = 0;
2985: for (j=0; j<ncols; j++) {
2986: i = riip[*(aj + ai[rip[k]] + j)];
2987: if (i >= k) { /* only take upper triangular entry */
2988: cols[ncols_upper] = i;
2989: ncols_upper++;
2990: }
2991: }
2992: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2993: nzk += nlnk;
2995: /* update lnk by computing fill-in for each pivot row to be merged in */
2996: prow = jl[k]; /* 1st pivot row */
2998: while (prow < k) {
2999: nextprow = jl[prow];
3000: /* merge prow into k-th row */
3001: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3002: jmax = ui[prow+1];
3003: ncols = jmax-jmin;
3004: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3005: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3006: nzk += nlnk;
3008: /* update il and jl for prow */
3009: if (jmin < jmax) {
3010: il[prow] = jmin;
3011: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3012: }
3013: prow = nextprow;
3014: }
3016: /* if free space is not available, make more free space */
3017: if (current_space->local_remaining<nzk) {
3018: i = am - k + 1; /* num of unfactored rows */
3019: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3020: PetscFreeSpaceGet(i,¤t_space);
3021: reallocs++;
3022: }
3024: /* copy data into free space, then initialize lnk */
3025: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3027: /* add the k-th row into il and jl */
3028: if (nzk-1 > 0) {
3029: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3030: jl[k] = jl[i]; jl[i] = k;
3031: il[k] = ui[k] + 1;
3032: }
3033: ui_ptr[k] = current_space->array;
3035: current_space->array += nzk;
3036: current_space->local_used += nzk;
3037: current_space->local_remaining -= nzk;
3039: ui[k+1] = ui[k] + nzk;
3040: }
3042: #if defined(PETSC_USE_INFO)
3043: if (ai[am] != 0) {
3044: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3045: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3046: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3047: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3048: } else {
3049: PetscInfo(A,"Empty matrix\n");
3050: }
3051: #endif
3053: ISRestoreIndices(perm,&rip);
3054: ISRestoreIndices(iperm,&riip);
3055: PetscFree4(ui_ptr,jl,il,cols);
3057: /* destroy list of free space and other temporary array(s) */
3058: PetscMalloc1(ui[am]+1,&uj);
3059: PetscFreeSpaceContiguous(&free_space,uj);
3060: PetscLLDestroy(lnk,lnkbt);
3062: /* put together the new matrix in MATSEQSBAIJ format */
3064: b = (Mat_SeqSBAIJ*)fact->data;
3065: b->singlemalloc = PETSC_FALSE;
3066: b->free_a = PETSC_TRUE;
3067: b->free_ij = PETSC_TRUE;
3069: PetscMalloc1(ui[am]+1,&b->a);
3071: b->j = uj;
3072: b->i = ui;
3073: b->diag = 0;
3074: b->ilen = 0;
3075: b->imax = 0;
3076: b->row = perm;
3077: b->col = perm;
3079: PetscObjectReference((PetscObject)perm);
3080: PetscObjectReference((PetscObject)perm);
3082: b->icol = iperm;
3083: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3085: PetscMalloc1(am+1,&b->solve_work);
3086: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3087: b->maxnz = b->nz = ui[am];
3089: fact->info.factor_mallocs = reallocs;
3090: fact->info.fill_ratio_given = fill;
3091: if (ai[am] != 0) {
3092: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3093: } else {
3094: fact->info.fill_ratio_needed = 0.0;
3095: }
3096: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3097: return(0);
3098: }
3100: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3101: {
3102: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3103: PetscErrorCode ierr;
3104: PetscInt n = A->rmap->n;
3105: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3106: PetscScalar *x,sum;
3107: const PetscScalar *b;
3108: const MatScalar *aa = a->a,*v;
3109: PetscInt i,nz;
3112: if (!n) return(0);
3114: VecGetArrayRead(bb,&b);
3115: VecGetArrayWrite(xx,&x);
3117: /* forward solve the lower triangular */
3118: x[0] = b[0];
3119: v = aa;
3120: vi = aj;
3121: for (i=1; i<n; i++) {
3122: nz = ai[i+1] - ai[i];
3123: sum = b[i];
3124: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3125: v += nz;
3126: vi += nz;
3127: x[i] = sum;
3128: }
3130: /* backward solve the upper triangular */
3131: for (i=n-1; i>=0; i--) {
3132: v = aa + adiag[i+1] + 1;
3133: vi = aj + adiag[i+1] + 1;
3134: nz = adiag[i] - adiag[i+1]-1;
3135: sum = x[i];
3136: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3137: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3138: }
3140: PetscLogFlops(2.0*a->nz - A->cmap->n);
3141: VecRestoreArrayRead(bb,&b);
3142: VecRestoreArrayWrite(xx,&x);
3143: return(0);
3144: }
3146: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3147: {
3148: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3149: IS iscol = a->col,isrow = a->row;
3150: PetscErrorCode ierr;
3151: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3152: const PetscInt *rout,*cout,*r,*c;
3153: PetscScalar *x,*tmp,sum;
3154: const PetscScalar *b;
3155: const MatScalar *aa = a->a,*v;
3158: if (!n) return(0);
3160: VecGetArrayRead(bb,&b);
3161: VecGetArrayWrite(xx,&x);
3162: tmp = a->solve_work;
3164: ISGetIndices(isrow,&rout); r = rout;
3165: ISGetIndices(iscol,&cout); c = cout;
3167: /* forward solve the lower triangular */
3168: tmp[0] = b[r[0]];
3169: v = aa;
3170: vi = aj;
3171: for (i=1; i<n; i++) {
3172: nz = ai[i+1] - ai[i];
3173: sum = b[r[i]];
3174: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3175: tmp[i] = sum;
3176: v += nz; vi += nz;
3177: }
3179: /* backward solve the upper triangular */
3180: for (i=n-1; i>=0; i--) {
3181: v = aa + adiag[i+1]+1;
3182: vi = aj + adiag[i+1]+1;
3183: nz = adiag[i]-adiag[i+1]-1;
3184: sum = tmp[i];
3185: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3186: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3187: }
3189: ISRestoreIndices(isrow,&rout);
3190: ISRestoreIndices(iscol,&cout);
3191: VecRestoreArrayRead(bb,&b);
3192: VecRestoreArrayWrite(xx,&x);
3193: PetscLogFlops(2.0*a->nz - A->cmap->n);
3194: return(0);
3195: }
3197: /*
3198: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3199: */
3200: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3201: {
3202: Mat B = *fact;
3203: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3204: IS isicol;
3206: const PetscInt *r,*ic;
3207: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3208: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3209: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3210: PetscInt nlnk,*lnk;
3211: PetscBT lnkbt;
3212: PetscBool row_identity,icol_identity;
3213: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3214: const PetscInt *ics;
3215: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3216: PetscReal dt =info->dt,shift=info->shiftamount;
3217: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3218: PetscBool missing;
3221: if (dt == PETSC_DEFAULT) dt = 0.005;
3222: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3224: /* ------- symbolic factorization, can be reused ---------*/
3225: MatMissingDiagonal(A,&missing,&i);
3226: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3227: adiag=a->diag;
3229: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3231: /* bdiag is location of diagonal in factor */
3232: PetscMalloc1(n+1,&bdiag); /* becomes b->diag */
3233: PetscMalloc1(n+1,&bdiag_rev); /* temporary */
3235: /* allocate row pointers bi */
3236: PetscMalloc1(2*n+2,&bi);
3238: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3239: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3240: nnz_max = ai[n]+2*n*dtcount+2;
3242: PetscMalloc1(nnz_max+1,&bj);
3243: PetscMalloc1(nnz_max+1,&ba);
3245: /* put together the new matrix */
3246: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3247: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3248: b = (Mat_SeqAIJ*)B->data;
3250: b->free_a = PETSC_TRUE;
3251: b->free_ij = PETSC_TRUE;
3252: b->singlemalloc = PETSC_FALSE;
3254: b->a = ba;
3255: b->j = bj;
3256: b->i = bi;
3257: b->diag = bdiag;
3258: b->ilen = 0;
3259: b->imax = 0;
3260: b->row = isrow;
3261: b->col = iscol;
3262: PetscObjectReference((PetscObject)isrow);
3263: PetscObjectReference((PetscObject)iscol);
3264: b->icol = isicol;
3266: PetscMalloc1(n+1,&b->solve_work);
3267: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3268: b->maxnz = nnz_max;
3270: B->factortype = MAT_FACTOR_ILUDT;
3271: B->info.factor_mallocs = 0;
3272: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3273: /* ------- end of symbolic factorization ---------*/
3275: ISGetIndices(isrow,&r);
3276: ISGetIndices(isicol,&ic);
3277: ics = ic;
3279: /* linked list for storing column indices of the active row */
3280: nlnk = n + 1;
3281: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3283: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3284: PetscMalloc2(n,&im,n,&jtmp);
3285: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3286: PetscMalloc2(n,&rtmp,n,&vtmp);
3287: PetscArrayzero(rtmp,n);
3289: bi[0] = 0;
3290: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3291: bdiag_rev[n] = bdiag[0];
3292: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3293: for (i=0; i<n; i++) {
3294: /* copy initial fill into linked list */
3295: nzi = ai[r[i]+1] - ai[r[i]];
3296: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3297: nzi_al = adiag[r[i]] - ai[r[i]];
3298: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3299: ajtmp = aj + ai[r[i]];
3300: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3302: /* load in initial (unfactored row) */
3303: aatmp = a->a + ai[r[i]];
3304: for (j=0; j<nzi; j++) {
3305: rtmp[ics[*ajtmp++]] = *aatmp++;
3306: }
3308: /* add pivot rows into linked list */
3309: row = lnk[n];
3310: while (row < i) {
3311: nzi_bl = bi[row+1] - bi[row] + 1;
3312: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3313: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3314: nzi += nlnk;
3315: row = lnk[row];
3316: }
3318: /* copy data from lnk into jtmp, then initialize lnk */
3319: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3321: /* numerical factorization */
3322: bjtmp = jtmp;
3323: row = *bjtmp++; /* 1st pivot row */
3324: while (row < i) {
3325: pc = rtmp + row;
3326: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3327: multiplier = (*pc) * (*pv);
3328: *pc = multiplier;
3329: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3330: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3331: pv = ba + bdiag[row+1] + 1;
3332: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3333: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3334: PetscLogFlops(1+2.0*nz);
3335: }
3336: row = *bjtmp++;
3337: }
3339: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3340: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3341: nzi_bl = 0; j = 0;
3342: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3343: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3344: nzi_bl++; j++;
3345: }
3346: nzi_bu = nzi - nzi_bl -1;
3347: while (j < nzi) {
3348: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3349: j++;
3350: }
3352: bjtmp = bj + bi[i];
3353: batmp = ba + bi[i];
3354: /* apply level dropping rule to L part */
3355: ncut = nzi_al + dtcount;
3356: if (ncut < nzi_bl) {
3357: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3358: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3359: } else {
3360: ncut = nzi_bl;
3361: }
3362: for (j=0; j<ncut; j++) {
3363: bjtmp[j] = jtmp[j];
3364: batmp[j] = vtmp[j];
3365: }
3366: bi[i+1] = bi[i] + ncut;
3367: nzi = ncut + 1;
3369: /* apply level dropping rule to U part */
3370: ncut = nzi_au + dtcount;
3371: if (ncut < nzi_bu) {
3372: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3373: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3374: } else {
3375: ncut = nzi_bu;
3376: }
3377: nzi += ncut;
3379: /* mark bdiagonal */
3380: bdiag[i+1] = bdiag[i] - (ncut + 1);
3381: bdiag_rev[n-i-1] = bdiag[i+1];
3382: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3383: bjtmp = bj + bdiag[i];
3384: batmp = ba + bdiag[i];
3385: *bjtmp = i;
3386: *batmp = diag_tmp; /* rtmp[i]; */
3387: if (*batmp == 0.0) {
3388: *batmp = dt+shift;
3389: }
3390: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3392: bjtmp = bj + bdiag[i+1]+1;
3393: batmp = ba + bdiag[i+1]+1;
3394: for (k=0; k<ncut; k++) {
3395: bjtmp[k] = jtmp[nzi_bl+1+k];
3396: batmp[k] = vtmp[nzi_bl+1+k];
3397: }
3399: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3400: } /* for (i=0; i<n; i++) */
3401: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3403: ISRestoreIndices(isrow,&r);
3404: ISRestoreIndices(isicol,&ic);
3406: PetscLLDestroy(lnk,lnkbt);
3407: PetscFree2(im,jtmp);
3408: PetscFree2(rtmp,vtmp);
3409: PetscFree(bdiag_rev);
3411: PetscLogFlops(B->cmap->n);
3412: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3414: ISIdentity(isrow,&row_identity);
3415: ISIdentity(isicol,&icol_identity);
3416: if (row_identity && icol_identity) {
3417: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3418: } else {
3419: B->ops->solve = MatSolve_SeqAIJ;
3420: }
3422: B->ops->solveadd = 0;
3423: B->ops->solvetranspose = 0;
3424: B->ops->solvetransposeadd = 0;
3425: B->ops->matsolve = 0;
3426: B->assembled = PETSC_TRUE;
3427: B->preallocated = PETSC_TRUE;
3428: return(0);
3429: }
3431: /* a wraper of MatILUDTFactor_SeqAIJ() */
3432: /*
3433: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3434: */
3436: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3437: {
3441: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3442: return(0);
3443: }
3445: /*
3446: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3447: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3448: */
3449: /*
3450: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3451: */
3453: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3454: {
3455: Mat C =fact;
3456: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3457: IS isrow = b->row,isicol = b->icol;
3459: const PetscInt *r,*ic,*ics;
3460: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3461: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3462: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3463: PetscReal dt=info->dt,shift=info->shiftamount;
3464: PetscBool row_identity, col_identity;
3467: ISGetIndices(isrow,&r);
3468: ISGetIndices(isicol,&ic);
3469: PetscMalloc1(n+1,&rtmp);
3470: ics = ic;
3472: for (i=0; i<n; i++) {
3473: /* initialize rtmp array */
3474: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3475: bjtmp = bj + bi[i];
3476: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3477: rtmp[i] = 0.0;
3478: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3479: bjtmp = bj + bdiag[i+1] + 1;
3480: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3482: /* load in initial unfactored row of A */
3483: nz = ai[r[i]+1] - ai[r[i]];
3484: ajtmp = aj + ai[r[i]];
3485: v = aa + ai[r[i]];
3486: for (j=0; j<nz; j++) {
3487: rtmp[ics[*ajtmp++]] = v[j];
3488: }
3490: /* numerical factorization */
3491: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3492: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3493: k = 0;
3494: while (k < nzl) {
3495: row = *bjtmp++;
3496: pc = rtmp + row;
3497: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3498: multiplier = (*pc) * (*pv);
3499: *pc = multiplier;
3500: if (PetscAbsScalar(multiplier) > dt) {
3501: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3502: pv = b->a + bdiag[row+1] + 1;
3503: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3504: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3505: PetscLogFlops(1+2.0*nz);
3506: }
3507: k++;
3508: }
3510: /* finished row so stick it into b->a */
3511: /* L-part */
3512: pv = b->a + bi[i];
3513: pj = bj + bi[i];
3514: nzl = bi[i+1] - bi[i];
3515: for (j=0; j<nzl; j++) {
3516: pv[j] = rtmp[pj[j]];
3517: }
3519: /* diagonal: invert diagonal entries for simplier triangular solves */
3520: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3521: b->a[bdiag[i]] = 1.0/rtmp[i];
3523: /* U-part */
3524: pv = b->a + bdiag[i+1] + 1;
3525: pj = bj + bdiag[i+1] + 1;
3526: nzu = bdiag[i] - bdiag[i+1] - 1;
3527: for (j=0; j<nzu; j++) {
3528: pv[j] = rtmp[pj[j]];
3529: }
3530: }
3532: PetscFree(rtmp);
3533: ISRestoreIndices(isicol,&ic);
3534: ISRestoreIndices(isrow,&r);
3536: ISIdentity(isrow,&row_identity);
3537: ISIdentity(isicol,&col_identity);
3538: if (row_identity && col_identity) {
3539: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3540: } else {
3541: C->ops->solve = MatSolve_SeqAIJ;
3542: }
3543: C->ops->solveadd = 0;
3544: C->ops->solvetranspose = 0;
3545: C->ops->solvetransposeadd = 0;
3546: C->ops->matsolve = 0;
3547: C->assembled = PETSC_TRUE;
3548: C->preallocated = PETSC_TRUE;
3550: PetscLogFlops(C->cmap->n);
3551: return(0);
3552: }