Actual source code: aijfact.c
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
7: /*
8: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
10: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11: */
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
15: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
16: const PetscInt *ai = a->i, *aj = a->j;
17: const PetscScalar *aa = a->a;
18: PetscBool *done;
19: PetscReal best,past = 0,future;
21: /* pick initial row */
22: best = -1;
23: for (i=0; i<n; i++) {
24: future = 0.0;
25: for (j=ai[i]; j<ai[i+1]; j++) {
26: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
27: else past = PetscAbsScalar(aa[j]);
28: }
29: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
30: if (past/future > best) {
31: best = past/future;
32: current = i;
33: }
34: }
36: PetscMalloc1(n,&done);
37: PetscArrayzero(done,n);
38: PetscMalloc1(n,&order);
39: order[0] = current;
40: for (i=0; i<n-1; i++) {
41: done[current] = PETSC_TRUE;
42: best = -1;
43: /* loop over all neighbors of current pivot */
44: for (j=ai[current]; j<ai[current+1]; j++) {
45: jj = aj[j];
46: if (done[jj]) continue;
47: /* loop over columns of potential next row computing weights for below and above diagonal */
48: past = future = 0.0;
49: for (k=ai[jj]; k<ai[jj+1]; k++) {
50: kk = aj[k];
51: if (done[kk]) past += PetscAbsScalar(aa[k]);
52: else if (kk != jj) future += PetscAbsScalar(aa[k]);
53: }
54: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
55: if (past/future > best) {
56: best = past/future;
57: newcurrent = jj;
58: }
59: }
60: if (best == -1) { /* no neighbors to select from so select best of all that remain */
61: best = -1;
62: for (k=0; k<n; k++) {
63: if (done[k]) continue;
64: future = 0.0;
65: past = 0.0;
66: for (j=ai[k]; j<ai[k+1]; j++) {
67: kk = aj[j];
68: if (done[kk]) past += PetscAbsScalar(aa[j]);
69: else if (kk != k) future += PetscAbsScalar(aa[j]);
70: }
71: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
72: if (past/future > best) {
73: best = past/future;
74: newcurrent = k;
75: }
76: }
77: }
79: current = newcurrent;
80: order[i+1] = current;
81: }
82: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
83: *icol = *irow;
84: PetscObjectReference((PetscObject)*irow);
85: PetscFree(done);
86: PetscFree(order);
87: return 0;
88: }
90: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A,MatSolverType *type)
91: {
92: *type = MATSOLVERPETSC;
93: return 0;
94: }
96: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
97: {
98: PetscInt n = A->rmap->n;
100: #if defined(PETSC_USE_COMPLEX)
102: #endif
103: MatCreate(PetscObjectComm((PetscObject)A),B);
104: MatSetSizes(*B,n,n,n,n);
105: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
106: MatSetType(*B,MATSEQAIJ);
108: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
109: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
111: MatSetBlockSizesFromMats(*B,A,A);
112: PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_LU]);
113: PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILU]);
114: PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILUDT]);
115: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
116: MatSetType(*B,MATSEQSBAIJ);
117: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
119: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
120: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
121: PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]);
122: PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ICC]);
123: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
124: (*B)->factortype = ftype;
126: PetscFree((*B)->solvertype);
127: PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);
128: (*B)->canuseordering = PETSC_TRUE;
129: PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverType_C",MatFactorGetSolverType_petsc);
130: return 0;
131: }
133: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
134: {
135: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
136: IS isicol;
137: const PetscInt *r,*ic;
138: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
139: PetscInt *bi,*bj,*ajtmp;
140: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
141: PetscReal f;
142: PetscInt nlnk,*lnk,k,**bi_ptr;
143: PetscFreeSpaceList free_space=NULL,current_space=NULL;
144: PetscBT lnkbt;
145: PetscBool missing;
148: MatMissingDiagonal(A,&missing,&i);
151: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
152: ISGetIndices(isrow,&r);
153: ISGetIndices(isicol,&ic);
155: /* get new row pointers */
156: PetscMalloc1(n+1,&bi);
157: bi[0] = 0;
159: /* bdiag is location of diagonal in factor */
160: PetscMalloc1(n+1,&bdiag);
161: bdiag[0] = 0;
163: /* linked list for storing column indices of the active row */
164: nlnk = n + 1;
165: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
167: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
169: /* initial FreeSpace size is f*(ai[n]+1) */
170: f = info->fill;
171: if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
172: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
173: current_space = free_space;
175: for (i=0; i<n; i++) {
176: /* copy previous fill into linked list */
177: nzi = 0;
178: nnz = ai[r[i]+1] - ai[r[i]];
179: ajtmp = aj + ai[r[i]];
180: PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt);
181: nzi += nlnk;
183: /* add pivot rows into linked list */
184: row = lnk[n];
185: while (row < i) {
186: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
187: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
188: PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im);
189: nzi += nlnk;
190: row = lnk[row];
191: }
192: bi[i+1] = bi[i] + nzi;
193: im[i] = nzi;
195: /* mark bdiag */
196: nzbd = 0;
197: nnz = nzi;
198: k = lnk[n];
199: while (nnz-- && k < i) {
200: nzbd++;
201: k = lnk[k];
202: }
203: bdiag[i] = bi[i] + nzbd;
205: /* if free space is not available, make more free space */
206: if (current_space->local_remaining<nzi) {
207: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
208: PetscFreeSpaceGet(nnz,¤t_space);
209: reallocs++;
210: }
212: /* copy data into free space, then initialize lnk */
213: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
215: bi_ptr[i] = current_space->array;
216: current_space->array += nzi;
217: current_space->local_used += nzi;
218: current_space->local_remaining -= nzi;
219: }
220: #if defined(PETSC_USE_INFO)
221: if (ai[n] != 0) {
222: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
223: PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
224: PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
225: PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af);
226: PetscInfo(A,"for best performance.\n");
227: } else {
228: PetscInfo(A,"Empty matrix\n");
229: }
230: #endif
232: ISRestoreIndices(isrow,&r);
233: ISRestoreIndices(isicol,&ic);
235: /* destroy list of free space and other temporary array(s) */
236: PetscMalloc1(bi[n]+1,&bj);
237: PetscFreeSpaceContiguous(&free_space,bj);
238: PetscLLDestroy(lnk,lnkbt);
239: PetscFree2(bi_ptr,im);
241: /* put together the new matrix */
242: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
243: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
244: b = (Mat_SeqAIJ*)(B)->data;
246: b->free_a = PETSC_TRUE;
247: b->free_ij = PETSC_TRUE;
248: b->singlemalloc = PETSC_FALSE;
250: PetscMalloc1(bi[n]+1,&b->a);
251: b->j = bj;
252: b->i = bi;
253: b->diag = bdiag;
254: b->ilen = NULL;
255: b->imax = NULL;
256: b->row = isrow;
257: b->col = iscol;
258: PetscObjectReference((PetscObject)isrow);
259: PetscObjectReference((PetscObject)iscol);
260: b->icol = isicol;
261: PetscMalloc1(n+1,&b->solve_work);
263: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
264: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
265: b->maxnz = b->nz = bi[n];
267: (B)->factortype = MAT_FACTOR_LU;
268: (B)->info.factor_mallocs = reallocs;
269: (B)->info.fill_ratio_given = f;
271: if (ai[n]) {
272: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
273: } else {
274: (B)->info.fill_ratio_needed = 0.0;
275: }
276: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
277: if (a->inode.size) {
278: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
279: }
280: return 0;
281: }
283: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
284: {
285: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
286: IS isicol;
287: const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
288: PetscInt i,n=A->rmap->n;
289: PetscInt *bi,*bj;
290: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
291: PetscReal f;
292: PetscInt nlnk,*lnk,k,**bi_ptr;
293: PetscFreeSpaceList free_space=NULL,current_space=NULL;
294: PetscBT lnkbt;
295: PetscBool missing;
298: MatMissingDiagonal(A,&missing,&i);
301: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
302: ISGetIndices(isrow,&r);
303: ISGetIndices(isicol,&ic);
305: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
306: PetscMalloc1(n+1,&bi);
307: PetscMalloc1(n+1,&bdiag);
308: bi[0] = bdiag[0] = 0;
310: /* linked list for storing column indices of the active row */
311: nlnk = n + 1;
312: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
314: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
316: /* initial FreeSpace size is f*(ai[n]+1) */
317: f = info->fill;
318: if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
319: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
320: current_space = free_space;
322: for (i=0; i<n; i++) {
323: /* copy previous fill into linked list */
324: nzi = 0;
325: nnz = ai[r[i]+1] - ai[r[i]];
326: ajtmp = aj + ai[r[i]];
327: PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt);
328: nzi += nlnk;
330: /* add pivot rows into linked list */
331: row = lnk[n];
332: while (row < i) {
333: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
334: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
335: PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im);
336: nzi += nlnk;
337: row = lnk[row];
338: }
339: bi[i+1] = bi[i] + nzi;
340: im[i] = nzi;
342: /* mark bdiag */
343: nzbd = 0;
344: nnz = nzi;
345: k = lnk[n];
346: while (nnz-- && k < i) {
347: nzbd++;
348: k = lnk[k];
349: }
350: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
352: /* if free space is not available, make more free space */
353: if (current_space->local_remaining<nzi) {
354: /* estimated additional space needed */
355: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
356: PetscFreeSpaceGet(nnz,¤t_space);
357: reallocs++;
358: }
360: /* copy data into free space, then initialize lnk */
361: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
363: bi_ptr[i] = current_space->array;
364: current_space->array += nzi;
365: current_space->local_used += nzi;
366: current_space->local_remaining -= nzi;
367: }
369: ISRestoreIndices(isrow,&r);
370: ISRestoreIndices(isicol,&ic);
372: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
373: PetscMalloc1(bi[n]+1,&bj);
374: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
375: PetscLLDestroy(lnk,lnkbt);
376: PetscFree2(bi_ptr,im);
378: /* put together the new matrix */
379: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
380: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
381: b = (Mat_SeqAIJ*)(B)->data;
383: b->free_a = PETSC_TRUE;
384: b->free_ij = PETSC_TRUE;
385: b->singlemalloc = PETSC_FALSE;
387: PetscMalloc1(bdiag[0]+1,&b->a);
389: b->j = bj;
390: b->i = bi;
391: b->diag = bdiag;
392: b->ilen = NULL;
393: b->imax = NULL;
394: b->row = isrow;
395: b->col = iscol;
396: PetscObjectReference((PetscObject)isrow);
397: PetscObjectReference((PetscObject)iscol);
398: b->icol = isicol;
399: PetscMalloc1(n+1,&b->solve_work);
401: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
402: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
403: b->maxnz = b->nz = bdiag[0]+1;
405: B->factortype = MAT_FACTOR_LU;
406: B->info.factor_mallocs = reallocs;
407: B->info.fill_ratio_given = f;
409: if (ai[n]) {
410: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
411: } else {
412: B->info.fill_ratio_needed = 0.0;
413: }
414: #if defined(PETSC_USE_INFO)
415: if (ai[n] != 0) {
416: PetscReal af = B->info.fill_ratio_needed;
417: PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
418: PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
419: PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af);
420: PetscInfo(A,"for best performance.\n");
421: } else {
422: PetscInfo(A,"Empty matrix\n");
423: }
424: #endif
425: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426: if (a->inode.size) {
427: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
428: }
429: MatSeqAIJCheckInode_FactorLU(B);
430: return 0;
431: }
433: /*
434: Trouble in factorization, should we dump the original matrix?
435: */
436: PetscErrorCode MatFactorDumpMatrix(Mat A)
437: {
438: PetscBool flg = PETSC_FALSE;
440: PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);
441: if (flg) {
442: PetscViewer viewer;
443: char filename[PETSC_MAX_PATH_LEN];
445: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
446: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
447: MatView(A,viewer);
448: PetscViewerDestroy(&viewer);
449: }
450: return 0;
451: }
453: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
454: {
455: Mat C =B;
456: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
457: IS isrow = b->row,isicol = b->icol;
458: const PetscInt *r,*ic,*ics;
459: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
460: PetscInt i,j,k,nz,nzL,row,*pj;
461: const PetscInt *ajtmp,*bjtmp;
462: MatScalar *rtmp,*pc,multiplier,*pv;
463: const MatScalar *aa=a->a,*v;
464: PetscBool row_identity,col_identity;
465: FactorShiftCtx sctx;
466: const PetscInt *ddiag;
467: PetscReal rs;
468: MatScalar d;
470: /* MatPivotSetUp(): initialize shift context sctx */
471: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
473: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
474: ddiag = a->diag;
475: sctx.shift_top = info->zeropivot;
476: for (i=0; i<n; i++) {
477: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
478: d = (aa)[ddiag[i]];
479: rs = -PetscAbsScalar(d) - PetscRealPart(d);
480: v = aa+ai[i];
481: nz = ai[i+1] - ai[i];
482: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
483: if (rs>sctx.shift_top) sctx.shift_top = rs;
484: }
485: sctx.shift_top *= 1.1;
486: sctx.nshift_max = 5;
487: sctx.shift_lo = 0.;
488: sctx.shift_hi = 1.;
489: }
491: ISGetIndices(isrow,&r);
492: ISGetIndices(isicol,&ic);
493: PetscMalloc1(n+1,&rtmp);
494: ics = ic;
496: do {
497: sctx.newshift = PETSC_FALSE;
498: for (i=0; i<n; i++) {
499: /* zero rtmp */
500: /* L part */
501: nz = bi[i+1] - bi[i];
502: bjtmp = bj + bi[i];
503: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
505: /* U part */
506: nz = bdiag[i]-bdiag[i+1];
507: bjtmp = bj + bdiag[i+1]+1;
508: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
510: /* load in initial (unfactored row) */
511: nz = ai[r[i]+1] - ai[r[i]];
512: ajtmp = aj + ai[r[i]];
513: v = aa + ai[r[i]];
514: for (j=0; j<nz; j++) {
515: rtmp[ics[ajtmp[j]]] = v[j];
516: }
517: /* ZeropivotApply() */
518: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
520: /* elimination */
521: bjtmp = bj + bi[i];
522: row = *bjtmp++;
523: nzL = bi[i+1] - bi[i];
524: for (k=0; k < nzL; k++) {
525: pc = rtmp + row;
526: if (*pc != 0.0) {
527: pv = b->a + bdiag[row];
528: multiplier = *pc * (*pv);
529: *pc = multiplier;
531: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
532: pv = b->a + bdiag[row+1]+1;
533: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
535: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
536: PetscLogFlops(1+2.0*nz);
537: }
538: row = *bjtmp++;
539: }
541: /* finished row so stick it into b->a */
542: rs = 0.0;
543: /* L part */
544: pv = b->a + bi[i];
545: pj = b->j + bi[i];
546: nz = bi[i+1] - bi[i];
547: for (j=0; j<nz; j++) {
548: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
549: }
551: /* U part */
552: pv = b->a + bdiag[i+1]+1;
553: pj = b->j + bdiag[i+1]+1;
554: nz = bdiag[i] - bdiag[i+1]-1;
555: for (j=0; j<nz; j++) {
556: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
557: }
559: sctx.rs = rs;
560: sctx.pv = rtmp[i];
561: MatPivotCheck(B,A,info,&sctx,i);
562: if (sctx.newshift) break; /* break for-loop */
563: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
565: /* Mark diagonal and invert diagonal for simpler triangular solves */
566: pv = b->a + bdiag[i];
567: *pv = 1.0/rtmp[i];
569: } /* endof for (i=0; i<n; i++) { */
571: /* MatPivotRefine() */
572: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
573: /*
574: * if no shift in this attempt & shifting & started shifting & can refine,
575: * then try lower shift
576: */
577: sctx.shift_hi = sctx.shift_fraction;
578: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
579: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
580: sctx.newshift = PETSC_TRUE;
581: sctx.nshift++;
582: }
583: } while (sctx.newshift);
585: PetscFree(rtmp);
586: ISRestoreIndices(isicol,&ic);
587: ISRestoreIndices(isrow,&r);
589: ISIdentity(isrow,&row_identity);
590: ISIdentity(isicol,&col_identity);
591: if (b->inode.size) {
592: C->ops->solve = MatSolve_SeqAIJ_Inode;
593: } else if (row_identity && col_identity) {
594: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
595: } else {
596: C->ops->solve = MatSolve_SeqAIJ;
597: }
598: C->ops->solveadd = MatSolveAdd_SeqAIJ;
599: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
600: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
601: C->ops->matsolve = MatMatSolve_SeqAIJ;
602: C->assembled = PETSC_TRUE;
603: C->preallocated = PETSC_TRUE;
605: PetscLogFlops(C->cmap->n);
607: /* MatShiftView(A,info,&sctx) */
608: if (sctx.nshift) {
609: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
610: PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
611: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
612: PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
613: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
614: PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
615: }
616: }
617: return 0;
618: }
620: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
621: {
622: Mat C =B;
623: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
624: IS isrow = b->row,isicol = b->icol;
625: const PetscInt *r,*ic,*ics;
626: PetscInt nz,row,i,j,n=A->rmap->n,diag;
627: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
628: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
629: MatScalar *pv,*rtmp,*pc,multiplier,d;
630: const MatScalar *v,*aa=a->a;
631: PetscReal rs=0.0;
632: FactorShiftCtx sctx;
633: const PetscInt *ddiag;
634: PetscBool row_identity, col_identity;
636: /* MatPivotSetUp(): initialize shift context sctx */
637: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
639: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
640: ddiag = a->diag;
641: sctx.shift_top = info->zeropivot;
642: for (i=0; i<n; i++) {
643: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
644: d = (aa)[ddiag[i]];
645: rs = -PetscAbsScalar(d) - PetscRealPart(d);
646: v = aa+ai[i];
647: nz = ai[i+1] - ai[i];
648: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
649: if (rs>sctx.shift_top) sctx.shift_top = rs;
650: }
651: sctx.shift_top *= 1.1;
652: sctx.nshift_max = 5;
653: sctx.shift_lo = 0.;
654: sctx.shift_hi = 1.;
655: }
657: ISGetIndices(isrow,&r);
658: ISGetIndices(isicol,&ic);
659: PetscMalloc1(n+1,&rtmp);
660: ics = ic;
662: do {
663: sctx.newshift = PETSC_FALSE;
664: for (i=0; i<n; i++) {
665: nz = bi[i+1] - bi[i];
666: bjtmp = bj + bi[i];
667: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
669: /* load in initial (unfactored row) */
670: nz = ai[r[i]+1] - ai[r[i]];
671: ajtmp = aj + ai[r[i]];
672: v = aa + ai[r[i]];
673: for (j=0; j<nz; j++) {
674: rtmp[ics[ajtmp[j]]] = v[j];
675: }
676: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
678: row = *bjtmp++;
679: while (row < i) {
680: pc = rtmp + row;
681: if (*pc != 0.0) {
682: pv = b->a + diag_offset[row];
683: pj = b->j + diag_offset[row] + 1;
684: multiplier = *pc / *pv++;
685: *pc = multiplier;
686: nz = bi[row+1] - diag_offset[row] - 1;
687: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
688: PetscLogFlops(1+2.0*nz);
689: }
690: row = *bjtmp++;
691: }
692: /* finished row so stick it into b->a */
693: pv = b->a + bi[i];
694: pj = b->j + bi[i];
695: nz = bi[i+1] - bi[i];
696: diag = diag_offset[i] - bi[i];
697: rs = 0.0;
698: for (j=0; j<nz; j++) {
699: pv[j] = rtmp[pj[j]];
700: rs += PetscAbsScalar(pv[j]);
701: }
702: rs -= PetscAbsScalar(pv[diag]);
704: sctx.rs = rs;
705: sctx.pv = pv[diag];
706: MatPivotCheck(B,A,info,&sctx,i);
707: if (sctx.newshift) break;
708: pv[diag] = sctx.pv;
709: }
711: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
712: /*
713: * if no shift in this attempt & shifting & started shifting & can refine,
714: * then try lower shift
715: */
716: sctx.shift_hi = sctx.shift_fraction;
717: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
718: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
719: sctx.newshift = PETSC_TRUE;
720: sctx.nshift++;
721: }
722: } while (sctx.newshift);
724: /* invert diagonal entries for simpler triangular solves */
725: for (i=0; i<n; i++) {
726: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
727: }
728: PetscFree(rtmp);
729: ISRestoreIndices(isicol,&ic);
730: ISRestoreIndices(isrow,&r);
732: ISIdentity(isrow,&row_identity);
733: ISIdentity(isicol,&col_identity);
734: if (row_identity && col_identity) {
735: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
736: } else {
737: C->ops->solve = MatSolve_SeqAIJ_inplace;
738: }
739: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
740: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
741: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
742: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
744: C->assembled = PETSC_TRUE;
745: C->preallocated = PETSC_TRUE;
747: PetscLogFlops(C->cmap->n);
748: if (sctx.nshift) {
749: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
750: PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
751: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
752: PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
753: }
754: }
755: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
756: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
758: MatSeqAIJCheckInode(C);
759: return 0;
760: }
762: /*
763: This routine implements inplace ILU(0) with row or/and column permutations.
764: Input:
765: A - original matrix
766: Output;
767: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
768: a->j (col index) is permuted by the inverse of colperm, then sorted
769: a->a reordered accordingly with a->j
770: a->diag (ptr to diagonal elements) is updated.
771: */
772: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
773: {
774: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
775: IS isrow = a->row,isicol = a->icol;
776: const PetscInt *r,*ic,*ics;
777: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
778: PetscInt *ajtmp,nz,row;
779: PetscInt *diag = a->diag,nbdiag,*pj;
780: PetscScalar *rtmp,*pc,multiplier,d;
781: MatScalar *pv,*v;
782: PetscReal rs;
783: FactorShiftCtx sctx;
784: const MatScalar *aa=a->a,*vtmp;
788: /* MatPivotSetUp(): initialize shift context sctx */
789: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
791: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
792: const PetscInt *ddiag = a->diag;
793: sctx.shift_top = info->zeropivot;
794: for (i=0; i<n; i++) {
795: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
796: d = (aa)[ddiag[i]];
797: rs = -PetscAbsScalar(d) - PetscRealPart(d);
798: vtmp = aa+ai[i];
799: nz = ai[i+1] - ai[i];
800: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
801: if (rs>sctx.shift_top) sctx.shift_top = rs;
802: }
803: sctx.shift_top *= 1.1;
804: sctx.nshift_max = 5;
805: sctx.shift_lo = 0.;
806: sctx.shift_hi = 1.;
807: }
809: ISGetIndices(isrow,&r);
810: ISGetIndices(isicol,&ic);
811: PetscMalloc1(n+1,&rtmp);
812: PetscArrayzero(rtmp,n+1);
813: ics = ic;
815: #if defined(MV)
816: sctx.shift_top = 0.;
817: sctx.nshift_max = 0;
818: sctx.shift_lo = 0.;
819: sctx.shift_hi = 0.;
820: sctx.shift_fraction = 0.;
822: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
823: sctx.shift_top = 0.;
824: for (i=0; i<n; i++) {
825: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
826: d = (a->a)[diag[i]];
827: rs = -PetscAbsScalar(d) - PetscRealPart(d);
828: v = a->a+ai[i];
829: nz = ai[i+1] - ai[i];
830: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
831: if (rs>sctx.shift_top) sctx.shift_top = rs;
832: }
833: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
834: sctx.shift_top *= 1.1;
835: sctx.nshift_max = 5;
836: sctx.shift_lo = 0.;
837: sctx.shift_hi = 1.;
838: }
840: sctx.shift_amount = 0.;
841: sctx.nshift = 0;
842: #endif
844: do {
845: sctx.newshift = PETSC_FALSE;
846: for (i=0; i<n; i++) {
847: /* load in initial unfactored row */
848: nz = ai[r[i]+1] - ai[r[i]];
849: ajtmp = aj + ai[r[i]];
850: v = a->a + ai[r[i]];
851: /* sort permuted ajtmp and values v accordingly */
852: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
853: PetscSortIntWithScalarArray(nz,ajtmp,v);
855: diag[r[i]] = ai[r[i]];
856: for (j=0; j<nz; j++) {
857: rtmp[ajtmp[j]] = v[j];
858: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
859: }
860: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
862: row = *ajtmp++;
863: while (row < i) {
864: pc = rtmp + row;
865: if (*pc != 0.0) {
866: pv = a->a + diag[r[row]];
867: pj = aj + diag[r[row]] + 1;
869: multiplier = *pc / *pv++;
870: *pc = multiplier;
871: nz = ai[r[row]+1] - diag[r[row]] - 1;
872: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
873: PetscLogFlops(1+2.0*nz);
874: }
875: row = *ajtmp++;
876: }
877: /* finished row so overwrite it onto a->a */
878: pv = a->a + ai[r[i]];
879: pj = aj + ai[r[i]];
880: nz = ai[r[i]+1] - ai[r[i]];
881: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
883: rs = 0.0;
884: for (j=0; j<nz; j++) {
885: pv[j] = rtmp[pj[j]];
886: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
887: }
889: sctx.rs = rs;
890: sctx.pv = pv[nbdiag];
891: MatPivotCheck(B,A,info,&sctx,i);
892: if (sctx.newshift) break;
893: pv[nbdiag] = sctx.pv;
894: }
896: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
897: /*
898: * if no shift in this attempt & shifting & started shifting & can refine,
899: * then try lower shift
900: */
901: sctx.shift_hi = sctx.shift_fraction;
902: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
903: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
904: sctx.newshift = PETSC_TRUE;
905: sctx.nshift++;
906: }
907: } while (sctx.newshift);
909: /* invert diagonal entries for simpler triangular solves */
910: for (i=0; i<n; i++) {
911: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
912: }
914: PetscFree(rtmp);
915: ISRestoreIndices(isicol,&ic);
916: ISRestoreIndices(isrow,&r);
918: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
919: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
920: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
921: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
923: A->assembled = PETSC_TRUE;
924: A->preallocated = PETSC_TRUE;
926: PetscLogFlops(A->cmap->n);
927: if (sctx.nshift) {
928: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
929: PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
930: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
931: PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
932: }
933: }
934: return 0;
935: }
937: /* ----------------------------------------------------------- */
938: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
939: {
940: Mat C;
942: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
943: MatLUFactorSymbolic(C,A,row,col,info);
944: MatLUFactorNumeric(C,A,info);
946: A->ops->solve = C->ops->solve;
947: A->ops->solvetranspose = C->ops->solvetranspose;
949: MatHeaderMerge(A,&C);
950: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
951: return 0;
952: }
953: /* ----------------------------------------------------------- */
955: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
956: {
957: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
958: IS iscol = a->col,isrow = a->row;
959: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
960: PetscInt nz;
961: const PetscInt *rout,*cout,*r,*c;
962: PetscScalar *x,*tmp,*tmps,sum;
963: const PetscScalar *b;
964: const MatScalar *aa = a->a,*v;
966: if (!n) return 0;
968: VecGetArrayRead(bb,&b);
969: VecGetArrayWrite(xx,&x);
970: tmp = a->solve_work;
972: ISGetIndices(isrow,&rout); r = rout;
973: ISGetIndices(iscol,&cout)); c = cout + (n-1;
975: /* forward solve the lower triangular */
976: tmp[0] = b[*r++];
977: tmps = tmp;
978: for (i=1; i<n; i++) {
979: v = aa + ai[i];
980: vi = aj + ai[i];
981: nz = a->diag[i] - ai[i];
982: sum = b[*r++];
983: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
984: tmp[i] = sum;
985: }
987: /* backward solve the upper triangular */
988: for (i=n-1; i>=0; i--) {
989: v = aa + a->diag[i] + 1;
990: vi = aj + a->diag[i] + 1;
991: nz = ai[i+1] - a->diag[i] - 1;
992: sum = tmp[i];
993: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
994: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
995: }
997: ISRestoreIndices(isrow,&rout);
998: ISRestoreIndices(iscol,&cout);
999: VecRestoreArrayRead(bb,&b);
1000: VecRestoreArrayWrite(xx,&x);
1001: PetscLogFlops(2.0*a->nz - A->cmap->n);
1002: return 0;
1003: }
1005: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1006: {
1007: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1008: IS iscol = a->col,isrow = a->row;
1009: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1010: PetscInt nz,neq,ldb,ldx;
1011: const PetscInt *rout,*cout,*r,*c;
1012: PetscScalar *x,*tmp = a->solve_work,*tmps,sum;
1013: const PetscScalar *b,*aa = a->a,*v;
1014: PetscBool isdense;
1016: if (!n) return 0;
1017: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);
1019: if (X != B) {
1020: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);
1022: }
1023: MatDenseGetArrayRead(B,&b);
1024: MatDenseGetLDA(B,&ldb);
1025: MatDenseGetArray(X,&x);
1026: MatDenseGetLDA(X,&ldx);
1027: ISGetIndices(isrow,&rout); r = rout;
1028: ISGetIndices(iscol,&cout); c = cout;
1029: for (neq=0; neq<B->cmap->n; neq++) {
1030: /* forward solve the lower triangular */
1031: tmp[0] = b[r[0]];
1032: tmps = tmp;
1033: for (i=1; i<n; i++) {
1034: v = aa + ai[i];
1035: vi = aj + ai[i];
1036: nz = a->diag[i] - ai[i];
1037: sum = b[r[i]];
1038: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1039: tmp[i] = sum;
1040: }
1041: /* backward solve the upper triangular */
1042: for (i=n-1; i>=0; i--) {
1043: v = aa + a->diag[i] + 1;
1044: vi = aj + a->diag[i] + 1;
1045: nz = ai[i+1] - a->diag[i] - 1;
1046: sum = tmp[i];
1047: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1048: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1049: }
1050: b += ldb;
1051: x += ldx;
1052: }
1053: ISRestoreIndices(isrow,&rout);
1054: ISRestoreIndices(iscol,&cout);
1055: MatDenseRestoreArrayRead(B,&b);
1056: MatDenseRestoreArray(X,&x);
1057: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1058: return 0;
1059: }
1061: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1062: {
1063: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1064: IS iscol = a->col,isrow = a->row;
1065: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1066: PetscInt nz,neq,ldb,ldx;
1067: const PetscInt *rout,*cout,*r,*c;
1068: PetscScalar *x,*tmp = a->solve_work,sum;
1069: const PetscScalar *b,*aa = a->a,*v;
1070: PetscBool isdense;
1072: if (!n) return 0;
1073: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);
1075: if (X != B) {
1076: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);
1078: }
1079: MatDenseGetArrayRead(B,&b);
1080: MatDenseGetLDA(B,&ldb);
1081: MatDenseGetArray(X,&x);
1082: MatDenseGetLDA(X,&ldx);
1083: ISGetIndices(isrow,&rout); r = rout;
1084: ISGetIndices(iscol,&cout); c = cout;
1085: for (neq=0; neq<B->cmap->n; neq++) {
1086: /* forward solve the lower triangular */
1087: tmp[0] = b[r[0]];
1088: v = aa;
1089: vi = aj;
1090: for (i=1; i<n; i++) {
1091: nz = ai[i+1] - ai[i];
1092: sum = b[r[i]];
1093: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1094: tmp[i] = sum;
1095: v += nz; vi += nz;
1096: }
1097: /* backward solve the upper triangular */
1098: for (i=n-1; i>=0; i--) {
1099: v = aa + adiag[i+1]+1;
1100: vi = aj + adiag[i+1]+1;
1101: nz = adiag[i]-adiag[i+1]-1;
1102: sum = tmp[i];
1103: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1104: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1105: }
1106: b += ldb;
1107: x += ldx;
1108: }
1109: ISRestoreIndices(isrow,&rout);
1110: ISRestoreIndices(iscol,&cout);
1111: MatDenseRestoreArrayRead(B,&b);
1112: MatDenseRestoreArray(X,&x);
1113: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1114: return 0;
1115: }
1117: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1118: {
1119: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1120: IS iscol = a->col,isrow = a->row;
1121: const PetscInt *r,*c,*rout,*cout;
1122: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1123: PetscInt nz,row;
1124: PetscScalar *x,*tmp,*tmps,sum;
1125: const PetscScalar *b;
1126: const MatScalar *aa = a->a,*v;
1128: if (!n) return 0;
1130: VecGetArrayRead(bb,&b);
1131: VecGetArrayWrite(xx,&x);
1132: tmp = a->solve_work;
1134: ISGetIndices(isrow,&rout); r = rout;
1135: ISGetIndices(iscol,&cout)); c = cout + (n-1;
1137: /* forward solve the lower triangular */
1138: tmp[0] = b[*r++];
1139: tmps = tmp;
1140: for (row=1; row<n; row++) {
1141: i = rout[row]; /* permuted row */
1142: v = aa + ai[i];
1143: vi = aj + ai[i];
1144: nz = a->diag[i] - ai[i];
1145: sum = b[*r++];
1146: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1147: tmp[row] = sum;
1148: }
1150: /* backward solve the upper triangular */
1151: for (row=n-1; row>=0; row--) {
1152: i = rout[row]; /* permuted row */
1153: v = aa + a->diag[i] + 1;
1154: vi = aj + a->diag[i] + 1;
1155: nz = ai[i+1] - a->diag[i] - 1;
1156: sum = tmp[row];
1157: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1158: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1159: }
1161: ISRestoreIndices(isrow,&rout);
1162: ISRestoreIndices(iscol,&cout);
1163: VecRestoreArrayRead(bb,&b);
1164: VecRestoreArrayWrite(xx,&x);
1165: PetscLogFlops(2.0*a->nz - A->cmap->n);
1166: return 0;
1167: }
1169: /* ----------------------------------------------------------- */
1170: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1171: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1172: {
1173: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1174: PetscInt n = A->rmap->n;
1175: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1176: PetscScalar *x;
1177: const PetscScalar *b;
1178: const MatScalar *aa = a->a;
1179: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1180: PetscInt adiag_i,i,nz,ai_i;
1181: const PetscInt *vi;
1182: const MatScalar *v;
1183: PetscScalar sum;
1184: #endif
1186: if (!n) return 0;
1188: VecGetArrayRead(bb,&b);
1189: VecGetArrayWrite(xx,&x);
1191: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1192: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1193: #else
1194: /* forward solve the lower triangular */
1195: x[0] = b[0];
1196: for (i=1; i<n; i++) {
1197: ai_i = ai[i];
1198: v = aa + ai_i;
1199: vi = aj + ai_i;
1200: nz = adiag[i] - ai_i;
1201: sum = b[i];
1202: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1203: x[i] = sum;
1204: }
1206: /* backward solve the upper triangular */
1207: for (i=n-1; i>=0; i--) {
1208: adiag_i = adiag[i];
1209: v = aa + adiag_i + 1;
1210: vi = aj + adiag_i + 1;
1211: nz = ai[i+1] - adiag_i - 1;
1212: sum = x[i];
1213: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1214: x[i] = sum*aa[adiag_i];
1215: }
1216: #endif
1217: PetscLogFlops(2.0*a->nz - A->cmap->n);
1218: VecRestoreArrayRead(bb,&b);
1219: VecRestoreArrayWrite(xx,&x);
1220: return 0;
1221: }
1223: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1224: {
1225: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1226: IS iscol = a->col,isrow = a->row;
1227: PetscInt i, n = A->rmap->n,j;
1228: PetscInt nz;
1229: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1230: PetscScalar *x,*tmp,sum;
1231: const PetscScalar *b;
1232: const MatScalar *aa = a->a,*v;
1234: if (yy != xx) VecCopy(yy,xx);
1236: VecGetArrayRead(bb,&b);
1237: VecGetArray(xx,&x);
1238: tmp = a->solve_work;
1240: ISGetIndices(isrow,&rout); r = rout;
1241: ISGetIndices(iscol,&cout)); c = cout + (n-1;
1243: /* forward solve the lower triangular */
1244: tmp[0] = b[*r++];
1245: for (i=1; i<n; i++) {
1246: v = aa + ai[i];
1247: vi = aj + ai[i];
1248: nz = a->diag[i] - ai[i];
1249: sum = b[*r++];
1250: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1251: tmp[i] = sum;
1252: }
1254: /* backward solve the upper triangular */
1255: for (i=n-1; i>=0; i--) {
1256: v = aa + a->diag[i] + 1;
1257: vi = aj + a->diag[i] + 1;
1258: nz = ai[i+1] - a->diag[i] - 1;
1259: sum = tmp[i];
1260: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1261: tmp[i] = sum*aa[a->diag[i]];
1262: x[*c--] += tmp[i];
1263: }
1265: ISRestoreIndices(isrow,&rout);
1266: ISRestoreIndices(iscol,&cout);
1267: VecRestoreArrayRead(bb,&b);
1268: VecRestoreArray(xx,&x);
1269: PetscLogFlops(2.0*a->nz);
1270: return 0;
1271: }
1273: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1274: {
1275: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1276: IS iscol = a->col,isrow = a->row;
1277: PetscInt i, n = A->rmap->n,j;
1278: PetscInt nz;
1279: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1280: PetscScalar *x,*tmp,sum;
1281: const PetscScalar *b;
1282: const MatScalar *aa = a->a,*v;
1284: if (yy != xx) VecCopy(yy,xx);
1286: VecGetArrayRead(bb,&b);
1287: VecGetArray(xx,&x);
1288: tmp = a->solve_work;
1290: ISGetIndices(isrow,&rout); r = rout;
1291: ISGetIndices(iscol,&cout); c = cout;
1293: /* forward solve the lower triangular */
1294: tmp[0] = b[r[0]];
1295: v = aa;
1296: vi = aj;
1297: for (i=1; i<n; i++) {
1298: nz = ai[i+1] - ai[i];
1299: sum = b[r[i]];
1300: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1301: tmp[i] = sum;
1302: v += nz;
1303: vi += nz;
1304: }
1306: /* backward solve the upper triangular */
1307: v = aa + adiag[n-1];
1308: vi = aj + adiag[n-1];
1309: for (i=n-1; i>=0; i--) {
1310: nz = adiag[i] - adiag[i+1] - 1;
1311: sum = tmp[i];
1312: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1313: tmp[i] = sum*v[nz];
1314: x[c[i]] += tmp[i];
1315: v += nz+1; vi += nz+1;
1316: }
1318: ISRestoreIndices(isrow,&rout);
1319: ISRestoreIndices(iscol,&cout);
1320: VecRestoreArrayRead(bb,&b);
1321: VecRestoreArray(xx,&x);
1322: PetscLogFlops(2.0*a->nz);
1323: return 0;
1324: }
1326: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1327: {
1328: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1329: IS iscol = a->col,isrow = a->row;
1330: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1331: PetscInt i,n = A->rmap->n,j;
1332: PetscInt nz;
1333: PetscScalar *x,*tmp,s1;
1334: const MatScalar *aa = a->a,*v;
1335: const PetscScalar *b;
1337: VecGetArrayRead(bb,&b);
1338: VecGetArrayWrite(xx,&x);
1339: tmp = a->solve_work;
1341: ISGetIndices(isrow,&rout); r = rout;
1342: ISGetIndices(iscol,&cout); c = cout;
1344: /* copy the b into temp work space according to permutation */
1345: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1347: /* forward solve the U^T */
1348: for (i=0; i<n; i++) {
1349: v = aa + diag[i];
1350: vi = aj + diag[i] + 1;
1351: nz = ai[i+1] - diag[i] - 1;
1352: s1 = tmp[i];
1353: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1354: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1355: tmp[i] = s1;
1356: }
1358: /* backward solve the L^T */
1359: for (i=n-1; i>=0; i--) {
1360: v = aa + diag[i] - 1;
1361: vi = aj + diag[i] - 1;
1362: nz = diag[i] - ai[i];
1363: s1 = tmp[i];
1364: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1365: }
1367: /* copy tmp into x according to permutation */
1368: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1370: ISRestoreIndices(isrow,&rout);
1371: ISRestoreIndices(iscol,&cout);
1372: VecRestoreArrayRead(bb,&b);
1373: VecRestoreArrayWrite(xx,&x);
1375: PetscLogFlops(2.0*a->nz-A->cmap->n);
1376: return 0;
1377: }
1379: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1380: {
1381: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1382: IS iscol = a->col,isrow = a->row;
1383: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1384: PetscInt i,n = A->rmap->n,j;
1385: PetscInt nz;
1386: PetscScalar *x,*tmp,s1;
1387: const MatScalar *aa = a->a,*v;
1388: const PetscScalar *b;
1390: VecGetArrayRead(bb,&b);
1391: VecGetArrayWrite(xx,&x);
1392: tmp = a->solve_work;
1394: ISGetIndices(isrow,&rout); r = rout;
1395: ISGetIndices(iscol,&cout); c = cout;
1397: /* copy the b into temp work space according to permutation */
1398: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1400: /* forward solve the U^T */
1401: for (i=0; i<n; i++) {
1402: v = aa + adiag[i+1] + 1;
1403: vi = aj + adiag[i+1] + 1;
1404: nz = adiag[i] - adiag[i+1] - 1;
1405: s1 = tmp[i];
1406: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1407: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1408: tmp[i] = s1;
1409: }
1411: /* backward solve the L^T */
1412: for (i=n-1; i>=0; i--) {
1413: v = aa + ai[i];
1414: vi = aj + ai[i];
1415: nz = ai[i+1] - ai[i];
1416: s1 = tmp[i];
1417: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1418: }
1420: /* copy tmp into x according to permutation */
1421: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1423: ISRestoreIndices(isrow,&rout);
1424: ISRestoreIndices(iscol,&cout);
1425: VecRestoreArrayRead(bb,&b);
1426: VecRestoreArrayWrite(xx,&x);
1428: PetscLogFlops(2.0*a->nz-A->cmap->n);
1429: return 0;
1430: }
1432: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1433: {
1434: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1435: IS iscol = a->col,isrow = a->row;
1436: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1437: PetscInt i,n = A->rmap->n,j;
1438: PetscInt nz;
1439: PetscScalar *x,*tmp,s1;
1440: const MatScalar *aa = a->a,*v;
1441: const PetscScalar *b;
1443: if (zz != xx) VecCopy(zz,xx);
1444: VecGetArrayRead(bb,&b);
1445: VecGetArray(xx,&x);
1446: tmp = a->solve_work;
1448: ISGetIndices(isrow,&rout); r = rout;
1449: ISGetIndices(iscol,&cout); c = cout;
1451: /* copy the b into temp work space according to permutation */
1452: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1454: /* forward solve the U^T */
1455: for (i=0; i<n; i++) {
1456: v = aa + diag[i];
1457: vi = aj + diag[i] + 1;
1458: nz = ai[i+1] - diag[i] - 1;
1459: s1 = tmp[i];
1460: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1461: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1462: tmp[i] = s1;
1463: }
1465: /* backward solve the L^T */
1466: for (i=n-1; i>=0; i--) {
1467: v = aa + diag[i] - 1;
1468: vi = aj + diag[i] - 1;
1469: nz = diag[i] - ai[i];
1470: s1 = tmp[i];
1471: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1472: }
1474: /* copy tmp into x according to permutation */
1475: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1477: ISRestoreIndices(isrow,&rout);
1478: ISRestoreIndices(iscol,&cout);
1479: VecRestoreArrayRead(bb,&b);
1480: VecRestoreArray(xx,&x);
1482: PetscLogFlops(2.0*a->nz-A->cmap->n);
1483: return 0;
1484: }
1486: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1487: {
1488: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1489: IS iscol = a->col,isrow = a->row;
1490: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1491: PetscInt i,n = A->rmap->n,j;
1492: PetscInt nz;
1493: PetscScalar *x,*tmp,s1;
1494: const MatScalar *aa = a->a,*v;
1495: const PetscScalar *b;
1497: if (zz != xx) VecCopy(zz,xx);
1498: VecGetArrayRead(bb,&b);
1499: VecGetArray(xx,&x);
1500: tmp = a->solve_work;
1502: ISGetIndices(isrow,&rout); r = rout;
1503: ISGetIndices(iscol,&cout); c = cout;
1505: /* copy the b into temp work space according to permutation */
1506: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1508: /* forward solve the U^T */
1509: for (i=0; i<n; i++) {
1510: v = aa + adiag[i+1] + 1;
1511: vi = aj + adiag[i+1] + 1;
1512: nz = adiag[i] - adiag[i+1] - 1;
1513: s1 = tmp[i];
1514: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1515: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1516: tmp[i] = s1;
1517: }
1519: /* backward solve the L^T */
1520: for (i=n-1; i>=0; i--) {
1521: v = aa + ai[i];
1522: vi = aj + ai[i];
1523: nz = ai[i+1] - ai[i];
1524: s1 = tmp[i];
1525: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1526: }
1528: /* copy tmp into x according to permutation */
1529: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1531: ISRestoreIndices(isrow,&rout);
1532: ISRestoreIndices(iscol,&cout);
1533: VecRestoreArrayRead(bb,&b);
1534: VecRestoreArray(xx,&x);
1536: PetscLogFlops(2.0*a->nz-A->cmap->n);
1537: return 0;
1538: }
1540: /* ----------------------------------------------------------------*/
1542: /*
1543: ilu() under revised new data structure.
1544: Factored arrays bj and ba are stored as
1545: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1547: bi=fact->i is an array of size n+1, in which
1548: bi+
1549: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1550: bi[n]: points to L(n-1,n-1)+1
1552: bdiag=fact->diag is an array of size n+1,in which
1553: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1554: bdiag[n]: points to entry of U(n-1,0)-1
1556: U(i,:) contains bdiag[i] as its last entry, i.e.,
1557: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1558: */
1559: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1560: {
1561: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1562: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1563: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1564: IS isicol;
1566: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1567: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1568: b = (Mat_SeqAIJ*)(fact)->data;
1570: /* allocate matrix arrays for new data structure */
1571: PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1572: PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1574: b->singlemalloc = PETSC_TRUE;
1575: if (!b->diag) {
1576: PetscMalloc1(n+1,&b->diag);
1577: PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1578: }
1579: bdiag = b->diag;
1581: if (n > 0) {
1582: PetscArrayzero(b->a,ai[n]);
1583: }
1585: /* set bi and bj with new data structure */
1586: bi = b->i;
1587: bj = b->j;
1589: /* L part */
1590: bi[0] = 0;
1591: for (i=0; i<n; i++) {
1592: nz = adiag[i] - ai[i];
1593: bi[i+1] = bi[i] + nz;
1594: aj = a->j + ai[i];
1595: for (j=0; j<nz; j++) {
1596: /* *bj = aj[j]; bj++; */
1597: bj[k++] = aj[j];
1598: }
1599: }
1601: /* U part */
1602: bdiag[n] = bi[n]-1;
1603: for (i=n-1; i>=0; i--) {
1604: nz = ai[i+1] - adiag[i] - 1;
1605: aj = a->j + adiag[i] + 1;
1606: for (j=0; j<nz; j++) {
1607: /* *bj = aj[j]; bj++; */
1608: bj[k++] = aj[j];
1609: }
1610: /* diag[i] */
1611: /* *bj = i; bj++; */
1612: bj[k++] = i;
1613: bdiag[i] = bdiag[i+1] + nz + 1;
1614: }
1616: fact->factortype = MAT_FACTOR_ILU;
1617: fact->info.factor_mallocs = 0;
1618: fact->info.fill_ratio_given = info->fill;
1619: fact->info.fill_ratio_needed = 1.0;
1620: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1621: MatSeqAIJCheckInode_FactorLU(fact);
1623: b = (Mat_SeqAIJ*)(fact)->data;
1624: b->row = isrow;
1625: b->col = iscol;
1626: b->icol = isicol;
1627: PetscMalloc1(fact->rmap->n+1,&b->solve_work);
1628: PetscObjectReference((PetscObject)isrow);
1629: PetscObjectReference((PetscObject)iscol);
1630: return 0;
1631: }
1633: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1634: {
1635: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1636: IS isicol;
1637: const PetscInt *r,*ic;
1638: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1639: PetscInt *bi,*cols,nnz,*cols_lvl;
1640: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1641: PetscInt i,levels,diagonal_fill;
1642: PetscBool col_identity,row_identity,missing;
1643: PetscReal f;
1644: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1645: PetscBT lnkbt;
1646: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1647: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1648: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1651: MatMissingDiagonal(A,&missing,&i);
1654: levels = (PetscInt)info->levels;
1655: ISIdentity(isrow,&row_identity);
1656: ISIdentity(iscol,&col_identity);
1657: if (!levels && row_identity && col_identity) {
1658: /* special case: ilu(0) with natural ordering */
1659: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1660: if (a->inode.size) {
1661: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1662: }
1663: return 0;
1664: }
1666: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1667: ISGetIndices(isrow,&r);
1668: ISGetIndices(isicol,&ic);
1670: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1671: PetscMalloc1(n+1,&bi);
1672: PetscMalloc1(n+1,&bdiag);
1673: bi[0] = bdiag[0] = 0;
1674: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1676: /* create a linked list for storing column indices of the active row */
1677: nlnk = n + 1;
1678: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1680: /* initial FreeSpace size is f*(ai[n]+1) */
1681: f = info->fill;
1682: diagonal_fill = (PetscInt)info->diagonal_fill;
1683: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1684: current_space = free_space;
1685: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1686: current_space_lvl = free_space_lvl;
1687: for (i=0; i<n; i++) {
1688: nzi = 0;
1689: /* copy current row into linked list */
1690: nnz = ai[r[i]+1] - ai[r[i]];
1692: cols = aj + ai[r[i]];
1693: lnk[i] = -1; /* marker to indicate if diagonal exists */
1694: PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt);
1695: nzi += nlnk;
1697: /* make sure diagonal entry is included */
1698: if (diagonal_fill && lnk[i] == -1) {
1699: fm = n;
1700: while (lnk[fm] < i) fm = lnk[fm];
1701: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1702: lnk[fm] = i;
1703: lnk_lvl[i] = 0;
1704: nzi++; dcount++;
1705: }
1707: /* add pivot rows into the active row */
1708: nzbd = 0;
1709: prow = lnk[n];
1710: while (prow < i) {
1711: nnz = bdiag[prow];
1712: cols = bj_ptr[prow] + nnz + 1;
1713: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1714: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1715: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow);
1716: nzi += nlnk;
1717: prow = lnk[prow];
1718: nzbd++;
1719: }
1720: bdiag[i] = nzbd;
1721: bi[i+1] = bi[i] + nzi;
1722: /* if free space is not available, make more free space */
1723: if (current_space->local_remaining<nzi) {
1724: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1725: PetscFreeSpaceGet(nnz,¤t_space);
1726: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1727: reallocs++;
1728: }
1730: /* copy data into free_space and free_space_lvl, then initialize lnk */
1731: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1732: bj_ptr[i] = current_space->array;
1733: bjlvl_ptr[i] = current_space_lvl->array;
1735: /* make sure the active row i has diagonal entry */
1738: current_space->array += nzi;
1739: current_space->local_used += nzi;
1740: current_space->local_remaining -= nzi;
1741: current_space_lvl->array += nzi;
1742: current_space_lvl->local_used += nzi;
1743: current_space_lvl->local_remaining -= nzi;
1744: }
1746: ISRestoreIndices(isrow,&r);
1747: ISRestoreIndices(isicol,&ic);
1748: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1749: PetscMalloc1(bi[n]+1,&bj);
1750: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1752: PetscIncompleteLLDestroy(lnk,lnkbt);
1753: PetscFreeSpaceDestroy(free_space_lvl);
1754: PetscFree2(bj_ptr,bjlvl_ptr);
1756: #if defined(PETSC_USE_INFO)
1757: {
1758: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1759: PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1760: PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1761: PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1762: PetscInfo(A,"for best performance.\n");
1763: if (diagonal_fill) {
1764: PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount);
1765: }
1766: }
1767: #endif
1768: /* put together the new matrix */
1769: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1770: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1771: b = (Mat_SeqAIJ*)(fact)->data;
1773: b->free_a = PETSC_TRUE;
1774: b->free_ij = PETSC_TRUE;
1775: b->singlemalloc = PETSC_FALSE;
1777: PetscMalloc1(bdiag[0]+1,&b->a);
1779: b->j = bj;
1780: b->i = bi;
1781: b->diag = bdiag;
1782: b->ilen = NULL;
1783: b->imax = NULL;
1784: b->row = isrow;
1785: b->col = iscol;
1786: PetscObjectReference((PetscObject)isrow);
1787: PetscObjectReference((PetscObject)iscol);
1788: b->icol = isicol;
1790: PetscMalloc1(n+1,&b->solve_work);
1791: /* In b structure: Free imax, ilen, old a, old j.
1792: Allocate bdiag, solve_work, new a, new j */
1793: PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1794: b->maxnz = b->nz = bdiag[0]+1;
1796: (fact)->info.factor_mallocs = reallocs;
1797: (fact)->info.fill_ratio_given = f;
1798: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1799: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1800: if (a->inode.size) {
1801: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1802: }
1803: MatSeqAIJCheckInode_FactorLU(fact);
1804: return 0;
1805: }
1807: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1808: {
1809: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1810: IS isicol;
1811: const PetscInt *r,*ic;
1812: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1813: PetscInt *bi,*cols,nnz,*cols_lvl;
1814: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1815: PetscInt i,levels,diagonal_fill;
1816: PetscBool col_identity,row_identity;
1817: PetscReal f;
1818: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1819: PetscBT lnkbt;
1820: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1821: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1822: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1823: PetscBool missing;
1826: MatMissingDiagonal(A,&missing,&i);
1829: f = info->fill;
1830: levels = (PetscInt)info->levels;
1831: diagonal_fill = (PetscInt)info->diagonal_fill;
1833: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1835: ISIdentity(isrow,&row_identity);
1836: ISIdentity(iscol,&col_identity);
1837: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1838: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1840: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1841: if (a->inode.size) {
1842: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1843: }
1844: fact->factortype = MAT_FACTOR_ILU;
1845: (fact)->info.factor_mallocs = 0;
1846: (fact)->info.fill_ratio_given = info->fill;
1847: (fact)->info.fill_ratio_needed = 1.0;
1849: b = (Mat_SeqAIJ*)(fact)->data;
1850: b->row = isrow;
1851: b->col = iscol;
1852: b->icol = isicol;
1853: PetscMalloc1((fact)->rmap->n+1,&b->solve_work);
1854: PetscObjectReference((PetscObject)isrow);
1855: PetscObjectReference((PetscObject)iscol);
1856: return 0;
1857: }
1859: ISGetIndices(isrow,&r);
1860: ISGetIndices(isicol,&ic);
1862: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1863: PetscMalloc1(n+1,&bi);
1864: PetscMalloc1(n+1,&bdiag);
1865: bi[0] = bdiag[0] = 0;
1867: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1869: /* create a linked list for storing column indices of the active row */
1870: nlnk = n + 1;
1871: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1873: /* initial FreeSpace size is f*(ai[n]+1) */
1874: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1875: current_space = free_space;
1876: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1877: current_space_lvl = free_space_lvl;
1879: for (i=0; i<n; i++) {
1880: nzi = 0;
1881: /* copy current row into linked list */
1882: nnz = ai[r[i]+1] - ai[r[i]];
1884: cols = aj + ai[r[i]];
1885: lnk[i] = -1; /* marker to indicate if diagonal exists */
1886: PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt);
1887: nzi += nlnk;
1889: /* make sure diagonal entry is included */
1890: if (diagonal_fill && lnk[i] == -1) {
1891: fm = n;
1892: while (lnk[fm] < i) fm = lnk[fm];
1893: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1894: lnk[fm] = i;
1895: lnk_lvl[i] = 0;
1896: nzi++; dcount++;
1897: }
1899: /* add pivot rows into the active row */
1900: nzbd = 0;
1901: prow = lnk[n];
1902: while (prow < i) {
1903: nnz = bdiag[prow];
1904: cols = bj_ptr[prow] + nnz + 1;
1905: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1906: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1907: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow);
1908: nzi += nlnk;
1909: prow = lnk[prow];
1910: nzbd++;
1911: }
1912: bdiag[i] = nzbd;
1913: bi[i+1] = bi[i] + nzi;
1915: /* if free space is not available, make more free space */
1916: if (current_space->local_remaining<nzi) {
1917: nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1918: PetscFreeSpaceGet(nnz,¤t_space);
1919: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1920: reallocs++;
1921: }
1923: /* copy data into free_space and free_space_lvl, then initialize lnk */
1924: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1925: bj_ptr[i] = current_space->array;
1926: bjlvl_ptr[i] = current_space_lvl->array;
1928: /* make sure the active row i has diagonal entry */
1931: current_space->array += nzi;
1932: current_space->local_used += nzi;
1933: current_space->local_remaining -= nzi;
1934: current_space_lvl->array += nzi;
1935: current_space_lvl->local_used += nzi;
1936: current_space_lvl->local_remaining -= nzi;
1937: }
1939: ISRestoreIndices(isrow,&r);
1940: ISRestoreIndices(isicol,&ic);
1942: /* destroy list of free space and other temporary arrays */
1943: PetscMalloc1(bi[n]+1,&bj);
1944: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
1945: PetscIncompleteLLDestroy(lnk,lnkbt);
1946: PetscFreeSpaceDestroy(free_space_lvl);
1947: PetscFree2(bj_ptr,bjlvl_ptr);
1949: #if defined(PETSC_USE_INFO)
1950: {
1951: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1952: PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1953: PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1954: PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1955: PetscInfo(A,"for best performance.\n");
1956: if (diagonal_fill) {
1957: PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount);
1958: }
1959: }
1960: #endif
1962: /* put together the new matrix */
1963: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1964: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1965: b = (Mat_SeqAIJ*)(fact)->data;
1967: b->free_a = PETSC_TRUE;
1968: b->free_ij = PETSC_TRUE;
1969: b->singlemalloc = PETSC_FALSE;
1971: PetscMalloc1(bi[n],&b->a);
1972: b->j = bj;
1973: b->i = bi;
1974: for (i=0; i<n; i++) bdiag[i] += bi[i];
1975: b->diag = bdiag;
1976: b->ilen = NULL;
1977: b->imax = NULL;
1978: b->row = isrow;
1979: b->col = iscol;
1980: PetscObjectReference((PetscObject)isrow);
1981: PetscObjectReference((PetscObject)iscol);
1982: b->icol = isicol;
1983: PetscMalloc1(n+1,&b->solve_work);
1984: /* In b structure: Free imax, ilen, old a, old j.
1985: Allocate bdiag, solve_work, new a, new j */
1986: PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1987: b->maxnz = b->nz = bi[n];
1989: (fact)->info.factor_mallocs = reallocs;
1990: (fact)->info.fill_ratio_given = f;
1991: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1992: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1993: if (a->inode.size) {
1994: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1995: }
1996: return 0;
1997: }
1999: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2000: {
2001: Mat C = B;
2002: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2003: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2004: IS ip=b->row,iip = b->icol;
2005: const PetscInt *rip,*riip;
2006: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2007: PetscInt *ai=a->i,*aj=a->j;
2008: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2009: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2010: PetscBool perm_identity;
2011: FactorShiftCtx sctx;
2012: PetscReal rs;
2013: MatScalar d,*v;
2015: /* MatPivotSetUp(): initialize shift context sctx */
2016: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2018: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2019: sctx.shift_top = info->zeropivot;
2020: for (i=0; i<mbs; i++) {
2021: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2022: d = (aa)[a->diag[i]];
2023: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2024: v = aa+ai[i];
2025: nz = ai[i+1] - ai[i];
2026: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2027: if (rs>sctx.shift_top) sctx.shift_top = rs;
2028: }
2029: sctx.shift_top *= 1.1;
2030: sctx.nshift_max = 5;
2031: sctx.shift_lo = 0.;
2032: sctx.shift_hi = 1.;
2033: }
2035: ISGetIndices(ip,&rip);
2036: ISGetIndices(iip,&riip);
2038: /* allocate working arrays
2039: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2040: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2041: */
2042: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);
2044: do {
2045: sctx.newshift = PETSC_FALSE;
2047: for (i=0; i<mbs; i++) c2r[i] = mbs;
2048: if (mbs) il[0] = 0;
2050: for (k = 0; k<mbs; k++) {
2051: /* zero rtmp */
2052: nz = bi[k+1] - bi[k];
2053: bjtmp = bj + bi[k];
2054: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2056: /* load in initial unfactored row */
2057: bval = ba + bi[k];
2058: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2059: for (j = jmin; j < jmax; j++) {
2060: col = riip[aj[j]];
2061: if (col >= k) { /* only take upper triangular entry */
2062: rtmp[col] = aa[j];
2063: *bval++ = 0.0; /* for in-place factorization */
2064: }
2065: }
2066: /* shift the diagonal of the matrix: ZeropivotApply() */
2067: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2069: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2070: dk = rtmp[k];
2071: i = c2r[k]; /* first row to be added to k_th row */
2073: while (i < k) {
2074: nexti = c2r[i]; /* next row to be added to k_th row */
2076: /* compute multiplier, update diag(k) and U(i,k) */
2077: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2078: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2079: dk += uikdi*ba[ili]; /* update diag[k] */
2080: ba[ili] = uikdi; /* -U(i,k) */
2082: /* add multiple of row i to k-th row */
2083: jmin = ili + 1; jmax = bi[i+1];
2084: if (jmin < jmax) {
2085: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2086: /* update il and c2r for row i */
2087: il[i] = jmin;
2088: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2089: }
2090: i = nexti;
2091: }
2093: /* copy data into U(k,:) */
2094: rs = 0.0;
2095: jmin = bi[k]; jmax = bi[k+1]-1;
2096: if (jmin < jmax) {
2097: for (j=jmin; j<jmax; j++) {
2098: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2099: }
2100: /* add the k-th row into il and c2r */
2101: il[k] = jmin;
2102: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2103: }
2105: /* MatPivotCheck() */
2106: sctx.rs = rs;
2107: sctx.pv = dk;
2108: MatPivotCheck(B,A,info,&sctx,i);
2109: if (sctx.newshift) break;
2110: dk = sctx.pv;
2112: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2113: }
2114: } while (sctx.newshift);
2116: PetscFree3(rtmp,il,c2r);
2117: ISRestoreIndices(ip,&rip);
2118: ISRestoreIndices(iip,&riip);
2120: ISIdentity(ip,&perm_identity);
2121: if (perm_identity) {
2122: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2123: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2124: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2125: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2126: } else {
2127: B->ops->solve = MatSolve_SeqSBAIJ_1;
2128: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2129: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2130: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2131: }
2133: C->assembled = PETSC_TRUE;
2134: C->preallocated = PETSC_TRUE;
2136: PetscLogFlops(C->rmap->n);
2138: /* MatPivotView() */
2139: if (sctx.nshift) {
2140: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2141: PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2142: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2143: PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2144: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2145: PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2146: }
2147: }
2148: return 0;
2149: }
2151: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2152: {
2153: Mat C = B;
2154: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2155: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2156: IS ip=b->row,iip = b->icol;
2157: const PetscInt *rip,*riip;
2158: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2159: PetscInt *ai=a->i,*aj=a->j;
2160: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2161: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2162: PetscBool perm_identity;
2163: FactorShiftCtx sctx;
2164: PetscReal rs;
2165: MatScalar d,*v;
2167: /* MatPivotSetUp(): initialize shift context sctx */
2168: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2170: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2171: sctx.shift_top = info->zeropivot;
2172: for (i=0; i<mbs; i++) {
2173: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2174: d = (aa)[a->diag[i]];
2175: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2176: v = aa+ai[i];
2177: nz = ai[i+1] - ai[i];
2178: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2179: if (rs>sctx.shift_top) sctx.shift_top = rs;
2180: }
2181: sctx.shift_top *= 1.1;
2182: sctx.nshift_max = 5;
2183: sctx.shift_lo = 0.;
2184: sctx.shift_hi = 1.;
2185: }
2187: ISGetIndices(ip,&rip);
2188: ISGetIndices(iip,&riip);
2190: /* initialization */
2191: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
2193: do {
2194: sctx.newshift = PETSC_FALSE;
2196: for (i=0; i<mbs; i++) jl[i] = mbs;
2197: il[0] = 0;
2199: for (k = 0; k<mbs; k++) {
2200: /* zero rtmp */
2201: nz = bi[k+1] - bi[k];
2202: bjtmp = bj + bi[k];
2203: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2205: bval = ba + bi[k];
2206: /* initialize k-th row by the perm[k]-th row of A */
2207: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2208: for (j = jmin; j < jmax; j++) {
2209: col = riip[aj[j]];
2210: if (col >= k) { /* only take upper triangular entry */
2211: rtmp[col] = aa[j];
2212: *bval++ = 0.0; /* for in-place factorization */
2213: }
2214: }
2215: /* shift the diagonal of the matrix */
2216: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2218: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2219: dk = rtmp[k];
2220: i = jl[k]; /* first row to be added to k_th row */
2222: while (i < k) {
2223: nexti = jl[i]; /* next row to be added to k_th row */
2225: /* compute multiplier, update diag(k) and U(i,k) */
2226: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2227: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2228: dk += uikdi*ba[ili];
2229: ba[ili] = uikdi; /* -U(i,k) */
2231: /* add multiple of row i to k-th row */
2232: jmin = ili + 1; jmax = bi[i+1];
2233: if (jmin < jmax) {
2234: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2235: /* update il and jl for row i */
2236: il[i] = jmin;
2237: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2238: }
2239: i = nexti;
2240: }
2242: /* shift the diagonals when zero pivot is detected */
2243: /* compute rs=sum of abs(off-diagonal) */
2244: rs = 0.0;
2245: jmin = bi[k]+1;
2246: nz = bi[k+1] - jmin;
2247: bcol = bj + jmin;
2248: for (j=0; j<nz; j++) {
2249: rs += PetscAbsScalar(rtmp[bcol[j]]);
2250: }
2252: sctx.rs = rs;
2253: sctx.pv = dk;
2254: MatPivotCheck(B,A,info,&sctx,k);
2255: if (sctx.newshift) break;
2256: dk = sctx.pv;
2258: /* copy data into U(k,:) */
2259: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2260: jmin = bi[k]+1; jmax = bi[k+1];
2261: if (jmin < jmax) {
2262: for (j=jmin; j<jmax; j++) {
2263: col = bj[j]; ba[j] = rtmp[col];
2264: }
2265: /* add the k-th row into il and jl */
2266: il[k] = jmin;
2267: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2268: }
2269: }
2270: } while (sctx.newshift);
2272: PetscFree3(rtmp,il,jl);
2273: ISRestoreIndices(ip,&rip);
2274: ISRestoreIndices(iip,&riip);
2276: ISIdentity(ip,&perm_identity);
2277: if (perm_identity) {
2278: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2279: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2280: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2281: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2282: } else {
2283: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2284: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2285: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2286: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2287: }
2289: C->assembled = PETSC_TRUE;
2290: C->preallocated = PETSC_TRUE;
2292: PetscLogFlops(C->rmap->n);
2293: if (sctx.nshift) {
2294: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2295: PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2296: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2297: PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2298: }
2299: }
2300: return 0;
2301: }
2303: /*
2304: icc() under revised new data structure.
2305: Factored arrays bj and ba are stored as
2306: U(0,:),...,U(i,:),U(n-1,:)
2308: ui=fact->i is an array of size n+1, in which
2309: ui+
2310: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2311: ui[n]: points to U(n-1,n-1)+1
2313: udiag=fact->diag is an array of size n,in which
2314: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2316: U(i,:) contains udiag[i] as its last entry, i.e.,
2317: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2318: */
2320: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2321: {
2322: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2323: Mat_SeqSBAIJ *b;
2324: PetscBool perm_identity,missing;
2325: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2326: const PetscInt *rip,*riip;
2327: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2328: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2329: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2330: PetscReal fill =info->fill,levels=info->levels;
2331: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2332: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2333: PetscBT lnkbt;
2334: IS iperm;
2337: MatMissingDiagonal(A,&missing,&d);
2339: ISIdentity(perm,&perm_identity);
2340: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2342: PetscMalloc1(am+1,&ui);
2343: PetscMalloc1(am+1,&udiag);
2344: ui[0] = 0;
2346: /* ICC(0) without matrix ordering: simply rearrange column indices */
2347: if (!levels && perm_identity) {
2348: for (i=0; i<am; i++) {
2349: ncols = ai[i+1] - a->diag[i];
2350: ui[i+1] = ui[i] + ncols;
2351: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2352: }
2353: PetscMalloc1(ui[am]+1,&uj);
2354: cols = uj;
2355: for (i=0; i<am; i++) {
2356: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2357: ncols = ai[i+1] - a->diag[i] -1;
2358: for (j=0; j<ncols; j++) *cols++ = aj[j];
2359: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2360: }
2361: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2362: ISGetIndices(iperm,&riip);
2363: ISGetIndices(perm,&rip);
2365: /* initialization */
2366: PetscMalloc1(am+1,&ajtmp);
2368: /* jl: linked list for storing indices of the pivot rows
2369: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2370: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2371: for (i=0; i<am; i++) {
2372: jl[i] = am; il[i] = 0;
2373: }
2375: /* create and initialize a linked list for storing column indices of the active row k */
2376: nlnk = am + 1;
2377: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2379: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2380: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2381: current_space = free_space;
2382: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);
2383: current_space_lvl = free_space_lvl;
2385: for (k=0; k<am; k++) { /* for each active row k */
2386: /* initialize lnk by the column indices of row rip[k] of A */
2387: nzk = 0;
2388: ncols = ai[rip[k]+1] - ai[rip[k]];
2390: ncols_upper = 0;
2391: for (j=0; j<ncols; j++) {
2392: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2393: if (riip[i] >= k) { /* only take upper triangular entry */
2394: ajtmp[ncols_upper] = i;
2395: ncols_upper++;
2396: }
2397: }
2398: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt);
2399: nzk += nlnk;
2401: /* update lnk by computing fill-in for each pivot row to be merged in */
2402: prow = jl[k]; /* 1st pivot row */
2404: while (prow < k) {
2405: nextprow = jl[prow];
2407: /* merge prow into k-th row */
2408: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2409: jmax = ui[prow+1];
2410: ncols = jmax-jmin;
2411: i = jmin - ui[prow];
2412: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2413: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2414: j = *(uj - 1);
2415: PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j);
2416: nzk += nlnk;
2418: /* update il and jl for prow */
2419: if (jmin < jmax) {
2420: il[prow] = jmin;
2421: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2422: }
2423: prow = nextprow;
2424: }
2426: /* if free space is not available, make more free space */
2427: if (current_space->local_remaining<nzk) {
2428: i = am - k + 1; /* num of unfactored rows */
2429: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2430: PetscFreeSpaceGet(i,¤t_space);
2431: PetscFreeSpaceGet(i,¤t_space_lvl);
2432: reallocs++;
2433: }
2435: /* copy data into free_space and free_space_lvl, then initialize lnk */
2437: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2439: /* add the k-th row into il and jl */
2440: if (nzk > 1) {
2441: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2442: jl[k] = jl[i]; jl[i] = k;
2443: il[k] = ui[k] + 1;
2444: }
2445: uj_ptr[k] = current_space->array;
2446: uj_lvl_ptr[k] = current_space_lvl->array;
2448: current_space->array += nzk;
2449: current_space->local_used += nzk;
2450: current_space->local_remaining -= nzk;
2452: current_space_lvl->array += nzk;
2453: current_space_lvl->local_used += nzk;
2454: current_space_lvl->local_remaining -= nzk;
2456: ui[k+1] = ui[k] + nzk;
2457: }
2459: ISRestoreIndices(perm,&rip);
2460: ISRestoreIndices(iperm,&riip);
2461: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2462: PetscFree(ajtmp);
2464: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2465: PetscMalloc1(ui[am]+1,&uj);
2466: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2467: PetscIncompleteLLDestroy(lnk,lnkbt);
2468: PetscFreeSpaceDestroy(free_space_lvl);
2470: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2472: /* put together the new matrix in MATSEQSBAIJ format */
2473: b = (Mat_SeqSBAIJ*)(fact)->data;
2474: b->singlemalloc = PETSC_FALSE;
2476: PetscMalloc1(ui[am]+1,&b->a);
2478: b->j = uj;
2479: b->i = ui;
2480: b->diag = udiag;
2481: b->free_diag = PETSC_TRUE;
2482: b->ilen = NULL;
2483: b->imax = NULL;
2484: b->row = perm;
2485: b->col = perm;
2486: PetscObjectReference((PetscObject)perm);
2487: PetscObjectReference((PetscObject)perm);
2488: b->icol = iperm;
2489: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2491: PetscMalloc1(am+1,&b->solve_work);
2492: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2494: b->maxnz = b->nz = ui[am];
2495: b->free_a = PETSC_TRUE;
2496: b->free_ij = PETSC_TRUE;
2498: fact->info.factor_mallocs = reallocs;
2499: fact->info.fill_ratio_given = fill;
2500: if (ai[am] != 0) {
2501: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2502: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2503: } else {
2504: fact->info.fill_ratio_needed = 0.0;
2505: }
2506: #if defined(PETSC_USE_INFO)
2507: if (ai[am] != 0) {
2508: PetscReal af = fact->info.fill_ratio_needed;
2509: PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2510: PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2511: PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2512: } else {
2513: PetscInfo(A,"Empty matrix\n");
2514: }
2515: #endif
2516: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2517: return 0;
2518: }
2520: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2521: {
2522: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2523: Mat_SeqSBAIJ *b;
2524: PetscBool perm_identity,missing;
2525: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2526: const PetscInt *rip,*riip;
2527: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2528: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2529: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2530: PetscReal fill =info->fill,levels=info->levels;
2531: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2532: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2533: PetscBT lnkbt;
2534: IS iperm;
2537: MatMissingDiagonal(A,&missing,&d);
2539: ISIdentity(perm,&perm_identity);
2540: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2542: PetscMalloc1(am+1,&ui);
2543: PetscMalloc1(am+1,&udiag);
2544: ui[0] = 0;
2546: /* ICC(0) without matrix ordering: simply copies fill pattern */
2547: if (!levels && perm_identity) {
2549: for (i=0; i<am; i++) {
2550: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2551: udiag[i] = ui[i];
2552: }
2553: PetscMalloc1(ui[am]+1,&uj);
2554: cols = uj;
2555: for (i=0; i<am; i++) {
2556: aj = a->j + a->diag[i];
2557: ncols = ui[i+1] - ui[i];
2558: for (j=0; j<ncols; j++) *cols++ = *aj++;
2559: }
2560: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2561: ISGetIndices(iperm,&riip);
2562: ISGetIndices(perm,&rip);
2564: /* initialization */
2565: PetscMalloc1(am+1,&ajtmp);
2567: /* jl: linked list for storing indices of the pivot rows
2568: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2569: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2570: for (i=0; i<am; i++) {
2571: jl[i] = am; il[i] = 0;
2572: }
2574: /* create and initialize a linked list for storing column indices of the active row k */
2575: nlnk = am + 1;
2576: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2578: /* initial FreeSpace size is fill*(ai[am]+1) */
2579: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2580: current_space = free_space;
2581: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);
2582: current_space_lvl = free_space_lvl;
2584: for (k=0; k<am; k++) { /* for each active row k */
2585: /* initialize lnk by the column indices of row rip[k] of A */
2586: nzk = 0;
2587: ncols = ai[rip[k]+1] - ai[rip[k]];
2589: ncols_upper = 0;
2590: for (j=0; j<ncols; j++) {
2591: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2592: if (riip[i] >= k) { /* only take upper triangular entry */
2593: ajtmp[ncols_upper] = i;
2594: ncols_upper++;
2595: }
2596: }
2597: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt);
2598: nzk += nlnk;
2600: /* update lnk by computing fill-in for each pivot row to be merged in */
2601: prow = jl[k]; /* 1st pivot row */
2603: while (prow < k) {
2604: nextprow = jl[prow];
2606: /* merge prow into k-th row */
2607: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2608: jmax = ui[prow+1];
2609: ncols = jmax-jmin;
2610: i = jmin - ui[prow];
2611: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2612: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2613: j = *(uj - 1);
2614: PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j);
2615: nzk += nlnk;
2617: /* update il and jl for prow */
2618: if (jmin < jmax) {
2619: il[prow] = jmin;
2620: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2621: }
2622: prow = nextprow;
2623: }
2625: /* if free space is not available, make more free space */
2626: if (current_space->local_remaining<nzk) {
2627: i = am - k + 1; /* num of unfactored rows */
2628: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2629: PetscFreeSpaceGet(i,¤t_space);
2630: PetscFreeSpaceGet(i,¤t_space_lvl);
2631: reallocs++;
2632: }
2634: /* copy data into free_space and free_space_lvl, then initialize lnk */
2636: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2638: /* add the k-th row into il and jl */
2639: if (nzk > 1) {
2640: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2641: jl[k] = jl[i]; jl[i] = k;
2642: il[k] = ui[k] + 1;
2643: }
2644: uj_ptr[k] = current_space->array;
2645: uj_lvl_ptr[k] = current_space_lvl->array;
2647: current_space->array += nzk;
2648: current_space->local_used += nzk;
2649: current_space->local_remaining -= nzk;
2651: current_space_lvl->array += nzk;
2652: current_space_lvl->local_used += nzk;
2653: current_space_lvl->local_remaining -= nzk;
2655: ui[k+1] = ui[k] + nzk;
2656: }
2658: #if defined(PETSC_USE_INFO)
2659: if (ai[am] != 0) {
2660: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2661: PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2662: PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2663: PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2664: } else {
2665: PetscInfo(A,"Empty matrix\n");
2666: }
2667: #endif
2669: ISRestoreIndices(perm,&rip);
2670: ISRestoreIndices(iperm,&riip);
2671: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2672: PetscFree(ajtmp);
2674: /* destroy list of free space and other temporary array(s) */
2675: PetscMalloc1(ui[am]+1,&uj);
2676: PetscFreeSpaceContiguous(&free_space,uj);
2677: PetscIncompleteLLDestroy(lnk,lnkbt);
2678: PetscFreeSpaceDestroy(free_space_lvl);
2680: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2682: /* put together the new matrix in MATSEQSBAIJ format */
2684: b = (Mat_SeqSBAIJ*)fact->data;
2685: b->singlemalloc = PETSC_FALSE;
2687: PetscMalloc1(ui[am]+1,&b->a);
2689: b->j = uj;
2690: b->i = ui;
2691: b->diag = udiag;
2692: b->free_diag = PETSC_TRUE;
2693: b->ilen = NULL;
2694: b->imax = NULL;
2695: b->row = perm;
2696: b->col = perm;
2698: PetscObjectReference((PetscObject)perm);
2699: PetscObjectReference((PetscObject)perm);
2701: b->icol = iperm;
2702: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2703: PetscMalloc1(am+1,&b->solve_work);
2704: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2705: b->maxnz = b->nz = ui[am];
2706: b->free_a = PETSC_TRUE;
2707: b->free_ij = PETSC_TRUE;
2709: fact->info.factor_mallocs = reallocs;
2710: fact->info.fill_ratio_given = fill;
2711: if (ai[am] != 0) {
2712: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2713: } else {
2714: fact->info.fill_ratio_needed = 0.0;
2715: }
2716: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2717: return 0;
2718: }
2720: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2721: {
2722: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2723: Mat_SeqSBAIJ *b;
2724: PetscBool perm_identity,missing;
2725: PetscReal fill = info->fill;
2726: const PetscInt *rip,*riip;
2727: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2728: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2729: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2730: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2731: PetscBT lnkbt;
2732: IS iperm;
2735: MatMissingDiagonal(A,&missing,&i);
2738: /* check whether perm is the identity mapping */
2739: ISIdentity(perm,&perm_identity);
2740: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2741: ISGetIndices(iperm,&riip);
2742: ISGetIndices(perm,&rip);
2744: /* initialization */
2745: PetscMalloc1(am+1,&ui);
2746: PetscMalloc1(am+1,&udiag);
2747: ui[0] = 0;
2749: /* jl: linked list for storing indices of the pivot rows
2750: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2751: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2752: for (i=0; i<am; i++) {
2753: jl[i] = am; il[i] = 0;
2754: }
2756: /* create and initialize a linked list for storing column indices of the active row k */
2757: nlnk = am + 1;
2758: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2760: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2761: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2762: current_space = free_space;
2764: for (k=0; k<am; k++) { /* for each active row k */
2765: /* initialize lnk by the column indices of row rip[k] of A */
2766: nzk = 0;
2767: ncols = ai[rip[k]+1] - ai[rip[k]];
2769: ncols_upper = 0;
2770: for (j=0; j<ncols; j++) {
2771: i = riip[*(aj + ai[rip[k]] + j)];
2772: if (i >= k) { /* only take upper triangular entry */
2773: cols[ncols_upper] = i;
2774: ncols_upper++;
2775: }
2776: }
2777: PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt);
2778: nzk += nlnk;
2780: /* update lnk by computing fill-in for each pivot row to be merged in */
2781: prow = jl[k]; /* 1st pivot row */
2783: while (prow < k) {
2784: nextprow = jl[prow];
2785: /* merge prow into k-th row */
2786: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2787: jmax = ui[prow+1];
2788: ncols = jmax-jmin;
2789: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2790: PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt);
2791: nzk += nlnk;
2793: /* update il and jl for prow */
2794: if (jmin < jmax) {
2795: il[prow] = jmin;
2796: j = *uj_ptr;
2797: jl[prow] = jl[j];
2798: jl[j] = prow;
2799: }
2800: prow = nextprow;
2801: }
2803: /* if free space is not available, make more free space */
2804: if (current_space->local_remaining<nzk) {
2805: i = am - k + 1; /* num of unfactored rows */
2806: i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2807: PetscFreeSpaceGet(i,¤t_space);
2808: reallocs++;
2809: }
2811: /* copy data into free space, then initialize lnk */
2812: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2814: /* add the k-th row into il and jl */
2815: if (nzk > 1) {
2816: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2817: jl[k] = jl[i]; jl[i] = k;
2818: il[k] = ui[k] + 1;
2819: }
2820: ui_ptr[k] = current_space->array;
2822: current_space->array += nzk;
2823: current_space->local_used += nzk;
2824: current_space->local_remaining -= nzk;
2826: ui[k+1] = ui[k] + nzk;
2827: }
2829: ISRestoreIndices(perm,&rip);
2830: ISRestoreIndices(iperm,&riip);
2831: PetscFree4(ui_ptr,jl,il,cols);
2833: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2834: PetscMalloc1(ui[am]+1,&uj);
2835: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2836: PetscLLDestroy(lnk,lnkbt);
2838: /* put together the new matrix in MATSEQSBAIJ format */
2840: b = (Mat_SeqSBAIJ*)fact->data;
2841: b->singlemalloc = PETSC_FALSE;
2842: b->free_a = PETSC_TRUE;
2843: b->free_ij = PETSC_TRUE;
2845: PetscMalloc1(ui[am]+1,&b->a);
2847: b->j = uj;
2848: b->i = ui;
2849: b->diag = udiag;
2850: b->free_diag = PETSC_TRUE;
2851: b->ilen = NULL;
2852: b->imax = NULL;
2853: b->row = perm;
2854: b->col = perm;
2856: PetscObjectReference((PetscObject)perm);
2857: PetscObjectReference((PetscObject)perm);
2859: b->icol = iperm;
2860: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2862: PetscMalloc1(am+1,&b->solve_work);
2863: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2865: b->maxnz = b->nz = ui[am];
2867: fact->info.factor_mallocs = reallocs;
2868: fact->info.fill_ratio_given = fill;
2869: if (ai[am] != 0) {
2870: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2871: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2872: } else {
2873: fact->info.fill_ratio_needed = 0.0;
2874: }
2875: #if defined(PETSC_USE_INFO)
2876: if (ai[am] != 0) {
2877: PetscReal af = fact->info.fill_ratio_needed;
2878: PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2879: PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2880: PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2881: } else {
2882: PetscInfo(A,"Empty matrix\n");
2883: }
2884: #endif
2885: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2886: return 0;
2887: }
2889: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2890: {
2891: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2892: Mat_SeqSBAIJ *b;
2893: PetscBool perm_identity,missing;
2894: PetscReal fill = info->fill;
2895: const PetscInt *rip,*riip;
2896: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2897: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2898: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2899: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2900: PetscBT lnkbt;
2901: IS iperm;
2904: MatMissingDiagonal(A,&missing,&i);
2907: /* check whether perm is the identity mapping */
2908: ISIdentity(perm,&perm_identity);
2909: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2910: ISGetIndices(iperm,&riip);
2911: ISGetIndices(perm,&rip);
2913: /* initialization */
2914: PetscMalloc1(am+1,&ui);
2915: ui[0] = 0;
2917: /* jl: linked list for storing indices of the pivot rows
2918: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2919: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2920: for (i=0; i<am; i++) {
2921: jl[i] = am; il[i] = 0;
2922: }
2924: /* create and initialize a linked list for storing column indices of the active row k */
2925: nlnk = am + 1;
2926: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2928: /* initial FreeSpace size is fill*(ai[am]+1) */
2929: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2930: current_space = free_space;
2932: for (k=0; k<am; k++) { /* for each active row k */
2933: /* initialize lnk by the column indices of row rip[k] of A */
2934: nzk = 0;
2935: ncols = ai[rip[k]+1] - ai[rip[k]];
2937: ncols_upper = 0;
2938: for (j=0; j<ncols; j++) {
2939: i = riip[*(aj + ai[rip[k]] + j)];
2940: if (i >= k) { /* only take upper triangular entry */
2941: cols[ncols_upper] = i;
2942: ncols_upper++;
2943: }
2944: }
2945: PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt);
2946: nzk += nlnk;
2948: /* update lnk by computing fill-in for each pivot row to be merged in */
2949: prow = jl[k]; /* 1st pivot row */
2951: while (prow < k) {
2952: nextprow = jl[prow];
2953: /* merge prow into k-th row */
2954: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2955: jmax = ui[prow+1];
2956: ncols = jmax-jmin;
2957: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2958: PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt);
2959: nzk += nlnk;
2961: /* update il and jl for prow */
2962: if (jmin < jmax) {
2963: il[prow] = jmin;
2964: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2965: }
2966: prow = nextprow;
2967: }
2969: /* if free space is not available, make more free space */
2970: if (current_space->local_remaining<nzk) {
2971: i = am - k + 1; /* num of unfactored rows */
2972: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2973: PetscFreeSpaceGet(i,¤t_space);
2974: reallocs++;
2975: }
2977: /* copy data into free space, then initialize lnk */
2978: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2980: /* add the k-th row into il and jl */
2981: if (nzk-1 > 0) {
2982: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2983: jl[k] = jl[i]; jl[i] = k;
2984: il[k] = ui[k] + 1;
2985: }
2986: ui_ptr[k] = current_space->array;
2988: current_space->array += nzk;
2989: current_space->local_used += nzk;
2990: current_space->local_remaining -= nzk;
2992: ui[k+1] = ui[k] + nzk;
2993: }
2995: #if defined(PETSC_USE_INFO)
2996: if (ai[am] != 0) {
2997: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
2998: PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2999: PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3000: PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3001: } else {
3002: PetscInfo(A,"Empty matrix\n");
3003: }
3004: #endif
3006: ISRestoreIndices(perm,&rip);
3007: ISRestoreIndices(iperm,&riip);
3008: PetscFree4(ui_ptr,jl,il,cols);
3010: /* destroy list of free space and other temporary array(s) */
3011: PetscMalloc1(ui[am]+1,&uj);
3012: PetscFreeSpaceContiguous(&free_space,uj);
3013: PetscLLDestroy(lnk,lnkbt);
3015: /* put together the new matrix in MATSEQSBAIJ format */
3017: b = (Mat_SeqSBAIJ*)fact->data;
3018: b->singlemalloc = PETSC_FALSE;
3019: b->free_a = PETSC_TRUE;
3020: b->free_ij = PETSC_TRUE;
3022: PetscMalloc1(ui[am]+1,&b->a);
3024: b->j = uj;
3025: b->i = ui;
3026: b->diag = NULL;
3027: b->ilen = NULL;
3028: b->imax = NULL;
3029: b->row = perm;
3030: b->col = perm;
3032: PetscObjectReference((PetscObject)perm);
3033: PetscObjectReference((PetscObject)perm);
3035: b->icol = iperm;
3036: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3038: PetscMalloc1(am+1,&b->solve_work);
3039: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3040: b->maxnz = b->nz = ui[am];
3042: fact->info.factor_mallocs = reallocs;
3043: fact->info.fill_ratio_given = fill;
3044: if (ai[am] != 0) {
3045: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3046: } else {
3047: fact->info.fill_ratio_needed = 0.0;
3048: }
3049: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3050: return 0;
3051: }
3053: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3054: {
3055: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3056: PetscInt n = A->rmap->n;
3057: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3058: PetscScalar *x,sum;
3059: const PetscScalar *b;
3060: const MatScalar *aa = a->a,*v;
3061: PetscInt i,nz;
3063: if (!n) return 0;
3065: VecGetArrayRead(bb,&b);
3066: VecGetArrayWrite(xx,&x);
3068: /* forward solve the lower triangular */
3069: x[0] = b[0];
3070: v = aa;
3071: vi = aj;
3072: for (i=1; i<n; i++) {
3073: nz = ai[i+1] - ai[i];
3074: sum = b[i];
3075: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3076: v += nz;
3077: vi += nz;
3078: x[i] = sum;
3079: }
3081: /* backward solve the upper triangular */
3082: for (i=n-1; i>=0; i--) {
3083: v = aa + adiag[i+1] + 1;
3084: vi = aj + adiag[i+1] + 1;
3085: nz = adiag[i] - adiag[i+1]-1;
3086: sum = x[i];
3087: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3088: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3089: }
3091: PetscLogFlops(2.0*a->nz - A->cmap->n);
3092: VecRestoreArrayRead(bb,&b);
3093: VecRestoreArrayWrite(xx,&x);
3094: return 0;
3095: }
3097: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3098: {
3099: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3100: IS iscol = a->col,isrow = a->row;
3101: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3102: const PetscInt *rout,*cout,*r,*c;
3103: PetscScalar *x,*tmp,sum;
3104: const PetscScalar *b;
3105: const MatScalar *aa = a->a,*v;
3107: if (!n) return 0;
3109: VecGetArrayRead(bb,&b);
3110: VecGetArrayWrite(xx,&x);
3111: tmp = a->solve_work;
3113: ISGetIndices(isrow,&rout); r = rout;
3114: ISGetIndices(iscol,&cout); c = cout;
3116: /* forward solve the lower triangular */
3117: tmp[0] = b[r[0]];
3118: v = aa;
3119: vi = aj;
3120: for (i=1; i<n; i++) {
3121: nz = ai[i+1] - ai[i];
3122: sum = b[r[i]];
3123: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3124: tmp[i] = sum;
3125: v += nz; vi += nz;
3126: }
3128: /* backward solve the upper triangular */
3129: for (i=n-1; i>=0; i--) {
3130: v = aa + adiag[i+1]+1;
3131: vi = aj + adiag[i+1]+1;
3132: nz = adiag[i]-adiag[i+1]-1;
3133: sum = tmp[i];
3134: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3135: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3136: }
3138: ISRestoreIndices(isrow,&rout);
3139: ISRestoreIndices(iscol,&cout);
3140: VecRestoreArrayRead(bb,&b);
3141: VecRestoreArrayWrite(xx,&x);
3142: PetscLogFlops(2.0*a->nz - A->cmap->n);
3143: return 0;
3144: }
3146: /*
3147: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3148: */
3149: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3150: {
3151: Mat B = *fact;
3152: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3153: IS isicol;
3154: const PetscInt *r,*ic;
3155: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3156: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3157: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3158: PetscInt nlnk,*lnk;
3159: PetscBT lnkbt;
3160: PetscBool row_identity,icol_identity;
3161: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3162: const PetscInt *ics;
3163: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3164: PetscReal dt =info->dt,shift=info->shiftamount;
3165: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3166: PetscBool missing;
3168: if (dt == PETSC_DEFAULT) dt = 0.005;
3169: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3171: /* ------- symbolic factorization, can be reused ---------*/
3172: MatMissingDiagonal(A,&missing,&i);
3174: adiag=a->diag;
3176: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3178: /* bdiag is location of diagonal in factor */
3179: PetscMalloc1(n+1,&bdiag); /* becomes b->diag */
3180: PetscMalloc1(n+1,&bdiag_rev); /* temporary */
3182: /* allocate row pointers bi */
3183: PetscMalloc1(2*n+2,&bi);
3185: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3186: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3187: nnz_max = ai[n]+2*n*dtcount+2;
3189: PetscMalloc1(nnz_max+1,&bj);
3190: PetscMalloc1(nnz_max+1,&ba);
3192: /* put together the new matrix */
3193: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3194: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3195: b = (Mat_SeqAIJ*)B->data;
3197: b->free_a = PETSC_TRUE;
3198: b->free_ij = PETSC_TRUE;
3199: b->singlemalloc = PETSC_FALSE;
3201: b->a = ba;
3202: b->j = bj;
3203: b->i = bi;
3204: b->diag = bdiag;
3205: b->ilen = NULL;
3206: b->imax = NULL;
3207: b->row = isrow;
3208: b->col = iscol;
3209: PetscObjectReference((PetscObject)isrow);
3210: PetscObjectReference((PetscObject)iscol);
3211: b->icol = isicol;
3213: PetscMalloc1(n+1,&b->solve_work);
3214: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3215: b->maxnz = nnz_max;
3217: B->factortype = MAT_FACTOR_ILUDT;
3218: B->info.factor_mallocs = 0;
3219: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3220: /* ------- end of symbolic factorization ---------*/
3222: ISGetIndices(isrow,&r);
3223: ISGetIndices(isicol,&ic);
3224: ics = ic;
3226: /* linked list for storing column indices of the active row */
3227: nlnk = n + 1;
3228: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3230: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3231: PetscMalloc2(n,&im,n,&jtmp);
3232: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3233: PetscMalloc2(n,&rtmp,n,&vtmp);
3234: PetscArrayzero(rtmp,n);
3236: bi[0] = 0;
3237: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3238: bdiag_rev[n] = bdiag[0];
3239: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3240: for (i=0; i<n; i++) {
3241: /* copy initial fill into linked list */
3242: nzi = ai[r[i]+1] - ai[r[i]];
3244: nzi_al = adiag[r[i]] - ai[r[i]];
3245: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3246: ajtmp = aj + ai[r[i]];
3247: PetscLLAddPerm(nzi,ajtmp,ic,n,&nlnk,lnk,lnkbt);
3249: /* load in initial (unfactored row) */
3250: aatmp = a->a + ai[r[i]];
3251: for (j=0; j<nzi; j++) {
3252: rtmp[ics[*ajtmp++]] = *aatmp++;
3253: }
3255: /* add pivot rows into linked list */
3256: row = lnk[n];
3257: while (row < i) {
3258: nzi_bl = bi[row+1] - bi[row] + 1;
3259: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3260: PetscLLAddSortedLU(bjtmp,row,&nlnk,lnk,lnkbt,i,nzi_bl,im);
3261: nzi += nlnk;
3262: row = lnk[row];
3263: }
3265: /* copy data from lnk into jtmp, then initialize lnk */
3266: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3268: /* numerical factorization */
3269: bjtmp = jtmp;
3270: row = *bjtmp++; /* 1st pivot row */
3271: while (row < i) {
3272: pc = rtmp + row;
3273: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3274: multiplier = (*pc) * (*pv);
3275: *pc = multiplier;
3276: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3277: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3278: pv = ba + bdiag[row+1] + 1;
3279: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3280: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3281: PetscLogFlops(1+2.0*nz);
3282: }
3283: row = *bjtmp++;
3284: }
3286: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3287: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3288: nzi_bl = 0; j = 0;
3289: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3290: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3291: nzi_bl++; j++;
3292: }
3293: nzi_bu = nzi - nzi_bl -1;
3294: while (j < nzi) {
3295: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3296: j++;
3297: }
3299: bjtmp = bj + bi[i];
3300: batmp = ba + bi[i];
3301: /* apply level dropping rule to L part */
3302: ncut = nzi_al + dtcount;
3303: if (ncut < nzi_bl) {
3304: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3305: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3306: } else {
3307: ncut = nzi_bl;
3308: }
3309: for (j=0; j<ncut; j++) {
3310: bjtmp[j] = jtmp[j];
3311: batmp[j] = vtmp[j];
3312: }
3313: bi[i+1] = bi[i] + ncut;
3314: nzi = ncut + 1;
3316: /* apply level dropping rule to U part */
3317: ncut = nzi_au + dtcount;
3318: if (ncut < nzi_bu) {
3319: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3320: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3321: } else {
3322: ncut = nzi_bu;
3323: }
3324: nzi += ncut;
3326: /* mark bdiagonal */
3327: bdiag[i+1] = bdiag[i] - (ncut + 1);
3328: bdiag_rev[n-i-1] = bdiag[i+1];
3329: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3330: bjtmp = bj + bdiag[i];
3331: batmp = ba + bdiag[i];
3332: *bjtmp = i;
3333: *batmp = diag_tmp; /* rtmp[i]; */
3334: if (*batmp == 0.0) {
3335: *batmp = dt+shift;
3336: }
3337: *batmp = 1.0/(*batmp); /* invert diagonal entries for simpler triangular solves */
3339: bjtmp = bj + bdiag[i+1]+1;
3340: batmp = ba + bdiag[i+1]+1;
3341: for (k=0; k<ncut; k++) {
3342: bjtmp[k] = jtmp[nzi_bl+1+k];
3343: batmp[k] = vtmp[nzi_bl+1+k];
3344: }
3346: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3347: } /* for (i=0; i<n; i++) */
3350: ISRestoreIndices(isrow,&r);
3351: ISRestoreIndices(isicol,&ic);
3353: PetscLLDestroy(lnk,lnkbt);
3354: PetscFree2(im,jtmp);
3355: PetscFree2(rtmp,vtmp);
3356: PetscFree(bdiag_rev);
3358: PetscLogFlops(B->cmap->n);
3359: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3361: ISIdentity(isrow,&row_identity);
3362: ISIdentity(isicol,&icol_identity);
3363: if (row_identity && icol_identity) {
3364: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3365: } else {
3366: B->ops->solve = MatSolve_SeqAIJ;
3367: }
3369: B->ops->solveadd = NULL;
3370: B->ops->solvetranspose = NULL;
3371: B->ops->solvetransposeadd = NULL;
3372: B->ops->matsolve = NULL;
3373: B->assembled = PETSC_TRUE;
3374: B->preallocated = PETSC_TRUE;
3375: return 0;
3376: }
3378: /* a wraper of MatILUDTFactor_SeqAIJ() */
3379: /*
3380: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3381: */
3383: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3384: {
3385: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3386: return 0;
3387: }
3389: /*
3390: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3391: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3392: */
3393: /*
3394: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3395: */
3397: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3398: {
3399: Mat C =fact;
3400: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3401: IS isrow = b->row,isicol = b->icol;
3402: const PetscInt *r,*ic,*ics;
3403: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3404: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3405: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3406: PetscReal dt=info->dt,shift=info->shiftamount;
3407: PetscBool row_identity, col_identity;
3409: ISGetIndices(isrow,&r);
3410: ISGetIndices(isicol,&ic);
3411: PetscMalloc1(n+1,&rtmp);
3412: ics = ic;
3414: for (i=0; i<n; i++) {
3415: /* initialize rtmp array */
3416: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3417: bjtmp = bj + bi[i];
3418: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3419: rtmp[i] = 0.0;
3420: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3421: bjtmp = bj + bdiag[i+1] + 1;
3422: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3424: /* load in initial unfactored row of A */
3425: nz = ai[r[i]+1] - ai[r[i]];
3426: ajtmp = aj + ai[r[i]];
3427: v = aa + ai[r[i]];
3428: for (j=0; j<nz; j++) {
3429: rtmp[ics[*ajtmp++]] = v[j];
3430: }
3432: /* numerical factorization */
3433: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3434: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3435: k = 0;
3436: while (k < nzl) {
3437: row = *bjtmp++;
3438: pc = rtmp + row;
3439: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3440: multiplier = (*pc) * (*pv);
3441: *pc = multiplier;
3442: if (PetscAbsScalar(multiplier) > dt) {
3443: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3444: pv = b->a + bdiag[row+1] + 1;
3445: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3446: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3447: PetscLogFlops(1+2.0*nz);
3448: }
3449: k++;
3450: }
3452: /* finished row so stick it into b->a */
3453: /* L-part */
3454: pv = b->a + bi[i];
3455: pj = bj + bi[i];
3456: nzl = bi[i+1] - bi[i];
3457: for (j=0; j<nzl; j++) {
3458: pv[j] = rtmp[pj[j]];
3459: }
3461: /* diagonal: invert diagonal entries for simpler triangular solves */
3462: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3463: b->a[bdiag[i]] = 1.0/rtmp[i];
3465: /* U-part */
3466: pv = b->a + bdiag[i+1] + 1;
3467: pj = bj + bdiag[i+1] + 1;
3468: nzu = bdiag[i] - bdiag[i+1] - 1;
3469: for (j=0; j<nzu; j++) {
3470: pv[j] = rtmp[pj[j]];
3471: }
3472: }
3474: PetscFree(rtmp);
3475: ISRestoreIndices(isicol,&ic);
3476: ISRestoreIndices(isrow,&r);
3478: ISIdentity(isrow,&row_identity);
3479: ISIdentity(isicol,&col_identity);
3480: if (row_identity && col_identity) {
3481: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3482: } else {
3483: C->ops->solve = MatSolve_SeqAIJ;
3484: }
3485: C->ops->solveadd = NULL;
3486: C->ops->solvetranspose = NULL;
3487: C->ops->solvetransposeadd = NULL;
3488: C->ops->matsolve = NULL;
3489: C->assembled = PETSC_TRUE;
3490: C->preallocated = PETSC_TRUE;
3492: PetscLogFlops(C->cmap->n);
3493: return 0;
3494: }