Actual source code: aijfact.c
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
7: /*
8: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
10: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11: */
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
15: PetscErrorCode ierr;
16: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
17: const PetscInt *ai = a->i, *aj = a->j;
18: const PetscScalar *aa = a->a;
19: PetscBool *done;
20: PetscReal best,past = 0,future;
23: /* pick initial row */
24: best = -1;
25: for (i=0; i<n; i++) {
26: future = 0.0;
27: for (j=ai[i]; j<ai[i+1]; j++) {
28: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
29: else past = PetscAbsScalar(aa[j]);
30: }
31: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
32: if (past/future > best) {
33: best = past/future;
34: current = i;
35: }
36: }
38: PetscMalloc1(n,&done);
39: PetscArrayzero(done,n);
40: PetscMalloc1(n,&order);
41: order[0] = current;
42: for (i=0; i<n-1; i++) {
43: done[current] = PETSC_TRUE;
44: best = -1;
45: /* loop over all neighbors of current pivot */
46: for (j=ai[current]; j<ai[current+1]; j++) {
47: jj = aj[j];
48: if (done[jj]) continue;
49: /* loop over columns of potential next row computing weights for below and above diagonal */
50: past = future = 0.0;
51: for (k=ai[jj]; k<ai[jj+1]; k++) {
52: kk = aj[k];
53: if (done[kk]) past += PetscAbsScalar(aa[k]);
54: else if (kk != jj) future += PetscAbsScalar(aa[k]);
55: }
56: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
57: if (past/future > best) {
58: best = past/future;
59: newcurrent = jj;
60: }
61: }
62: if (best == -1) { /* no neighbors to select from so select best of all that remain */
63: best = -1;
64: for (k=0; k<n; k++) {
65: if (done[k]) continue;
66: future = 0.0;
67: past = 0.0;
68: for (j=ai[k]; j<ai[k+1]; j++) {
69: kk = aj[j];
70: if (done[kk]) past += PetscAbsScalar(aa[j]);
71: else if (kk != k) future += PetscAbsScalar(aa[j]);
72: }
73: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
74: if (past/future > best) {
75: best = past/future;
76: newcurrent = k;
77: }
78: }
79: }
80: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
81: current = newcurrent;
82: order[i+1] = current;
83: }
84: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
85: *icol = *irow;
86: PetscObjectReference((PetscObject)*irow);
87: PetscFree(done);
88: PetscFree(order);
89: return(0);
90: }
92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A,MatSolverType *type)
93: {
95: *type = MATSOLVERPETSC;
96: return(0);
97: }
99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
100: {
101: PetscInt n = A->rmap->n;
105: #if defined(PETSC_USE_COMPLEX)
106: if (A->hermitian && !A->symmetric && (ftype == MAT_FACTOR_CHOLESKY||ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported");
107: #endif
108: MatCreate(PetscObjectComm((PetscObject)A),B);
109: MatSetSizes(*B,n,n,n,n);
110: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
111: MatSetType(*B,MATSEQAIJ);
113: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
114: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
116: MatSetBlockSizesFromMats(*B,A,A);
117: PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_LU]);
118: PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILU]);
119: PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILUDT]);
120: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
121: MatSetType(*B,MATSEQSBAIJ);
122: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
124: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
125: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
126: PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]);
127: PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ICC]);
128: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
129: (*B)->factortype = ftype;
131: PetscFree((*B)->solvertype);
132: PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);
133: (*B)->canuseordering = PETSC_TRUE;
134: PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverType_C",MatFactorGetSolverType_petsc);
135: return(0);
136: }
138: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
139: {
140: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
141: IS isicol;
142: PetscErrorCode ierr;
143: const PetscInt *r,*ic;
144: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
145: PetscInt *bi,*bj,*ajtmp;
146: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
147: PetscReal f;
148: PetscInt nlnk,*lnk,k,**bi_ptr;
149: PetscFreeSpaceList free_space=NULL,current_space=NULL;
150: PetscBT lnkbt;
151: PetscBool missing;
154: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
155: MatMissingDiagonal(A,&missing,&i);
156: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
158: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
159: ISGetIndices(isrow,&r);
160: ISGetIndices(isicol,&ic);
162: /* get new row pointers */
163: PetscMalloc1(n+1,&bi);
164: bi[0] = 0;
166: /* bdiag is location of diagonal in factor */
167: PetscMalloc1(n+1,&bdiag);
168: bdiag[0] = 0;
170: /* linked list for storing column indices of the active row */
171: nlnk = n + 1;
172: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
174: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
176: /* initial FreeSpace size is f*(ai[n]+1) */
177: f = info->fill;
178: if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
179: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
180: current_space = free_space;
182: for (i=0; i<n; i++) {
183: /* copy previous fill into linked list */
184: nzi = 0;
185: nnz = ai[r[i]+1] - ai[r[i]];
186: ajtmp = aj + ai[r[i]];
187: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
188: nzi += nlnk;
190: /* add pivot rows into linked list */
191: row = lnk[n];
192: while (row < i) {
193: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
194: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
195: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
196: nzi += nlnk;
197: row = lnk[row];
198: }
199: bi[i+1] = bi[i] + nzi;
200: im[i] = nzi;
202: /* mark bdiag */
203: nzbd = 0;
204: nnz = nzi;
205: k = lnk[n];
206: while (nnz-- && k < i) {
207: nzbd++;
208: k = lnk[k];
209: }
210: bdiag[i] = bi[i] + nzbd;
212: /* if free space is not available, make more free space */
213: if (current_space->local_remaining<nzi) {
214: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
215: PetscFreeSpaceGet(nnz,¤t_space);
216: reallocs++;
217: }
219: /* copy data into free space, then initialize lnk */
220: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
222: bi_ptr[i] = current_space->array;
223: current_space->array += nzi;
224: current_space->local_used += nzi;
225: current_space->local_remaining -= nzi;
226: }
227: #if defined(PETSC_USE_INFO)
228: if (ai[n] != 0) {
229: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
230: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
231: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
232: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
233: PetscInfo(A,"for best performance.\n");
234: } else {
235: PetscInfo(A,"Empty matrix\n");
236: }
237: #endif
239: ISRestoreIndices(isrow,&r);
240: ISRestoreIndices(isicol,&ic);
242: /* destroy list of free space and other temporary array(s) */
243: PetscMalloc1(bi[n]+1,&bj);
244: PetscFreeSpaceContiguous(&free_space,bj);
245: PetscLLDestroy(lnk,lnkbt);
246: PetscFree2(bi_ptr,im);
248: /* put together the new matrix */
249: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
250: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
251: b = (Mat_SeqAIJ*)(B)->data;
253: b->free_a = PETSC_TRUE;
254: b->free_ij = PETSC_TRUE;
255: b->singlemalloc = PETSC_FALSE;
257: PetscMalloc1(bi[n]+1,&b->a);
258: b->j = bj;
259: b->i = bi;
260: b->diag = bdiag;
261: b->ilen = NULL;
262: b->imax = NULL;
263: b->row = isrow;
264: b->col = iscol;
265: PetscObjectReference((PetscObject)isrow);
266: PetscObjectReference((PetscObject)iscol);
267: b->icol = isicol;
268: PetscMalloc1(n+1,&b->solve_work);
270: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
271: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
272: b->maxnz = b->nz = bi[n];
274: (B)->factortype = MAT_FACTOR_LU;
275: (B)->info.factor_mallocs = reallocs;
276: (B)->info.fill_ratio_given = f;
278: if (ai[n]) {
279: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
280: } else {
281: (B)->info.fill_ratio_needed = 0.0;
282: }
283: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
284: if (a->inode.size) {
285: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
286: }
287: return(0);
288: }
290: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
291: {
292: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
293: IS isicol;
294: PetscErrorCode ierr;
295: const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
296: PetscInt i,n=A->rmap->n;
297: PetscInt *bi,*bj;
298: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
299: PetscReal f;
300: PetscInt nlnk,*lnk,k,**bi_ptr;
301: PetscFreeSpaceList free_space=NULL,current_space=NULL;
302: PetscBT lnkbt;
303: PetscBool missing;
306: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
307: MatMissingDiagonal(A,&missing,&i);
308: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
310: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
311: ISGetIndices(isrow,&r);
312: ISGetIndices(isicol,&ic);
314: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
315: PetscMalloc1(n+1,&bi);
316: PetscMalloc1(n+1,&bdiag);
317: bi[0] = bdiag[0] = 0;
319: /* linked list for storing column indices of the active row */
320: nlnk = n + 1;
321: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
323: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
325: /* initial FreeSpace size is f*(ai[n]+1) */
326: f = info->fill;
327: if (n==1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
328: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
329: current_space = free_space;
331: for (i=0; i<n; i++) {
332: /* copy previous fill into linked list */
333: nzi = 0;
334: nnz = ai[r[i]+1] - ai[r[i]];
335: ajtmp = aj + ai[r[i]];
336: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
337: nzi += nlnk;
339: /* add pivot rows into linked list */
340: row = lnk[n];
341: while (row < i) {
342: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
343: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
344: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
345: nzi += nlnk;
346: row = lnk[row];
347: }
348: bi[i+1] = bi[i] + nzi;
349: im[i] = nzi;
351: /* mark bdiag */
352: nzbd = 0;
353: nnz = nzi;
354: k = lnk[n];
355: while (nnz-- && k < i) {
356: nzbd++;
357: k = lnk[k];
358: }
359: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
361: /* if free space is not available, make more free space */
362: if (current_space->local_remaining<nzi) {
363: /* estimated additional space needed */
364: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
365: PetscFreeSpaceGet(nnz,¤t_space);
366: reallocs++;
367: }
369: /* copy data into free space, then initialize lnk */
370: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
372: bi_ptr[i] = current_space->array;
373: current_space->array += nzi;
374: current_space->local_used += nzi;
375: current_space->local_remaining -= nzi;
376: }
378: ISRestoreIndices(isrow,&r);
379: ISRestoreIndices(isicol,&ic);
381: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
382: PetscMalloc1(bi[n]+1,&bj);
383: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
384: PetscLLDestroy(lnk,lnkbt);
385: PetscFree2(bi_ptr,im);
387: /* put together the new matrix */
388: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
389: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
390: b = (Mat_SeqAIJ*)(B)->data;
392: b->free_a = PETSC_TRUE;
393: b->free_ij = PETSC_TRUE;
394: b->singlemalloc = PETSC_FALSE;
396: PetscMalloc1(bdiag[0]+1,&b->a);
398: b->j = bj;
399: b->i = bi;
400: b->diag = bdiag;
401: b->ilen = NULL;
402: b->imax = NULL;
403: b->row = isrow;
404: b->col = iscol;
405: PetscObjectReference((PetscObject)isrow);
406: PetscObjectReference((PetscObject)iscol);
407: b->icol = isicol;
408: PetscMalloc1(n+1,&b->solve_work);
410: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
411: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
412: b->maxnz = b->nz = bdiag[0]+1;
414: B->factortype = MAT_FACTOR_LU;
415: B->info.factor_mallocs = reallocs;
416: B->info.fill_ratio_given = f;
418: if (ai[n]) {
419: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
420: } else {
421: B->info.fill_ratio_needed = 0.0;
422: }
423: #if defined(PETSC_USE_INFO)
424: if (ai[n] != 0) {
425: PetscReal af = B->info.fill_ratio_needed;
426: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
427: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
428: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
429: PetscInfo(A,"for best performance.\n");
430: } else {
431: PetscInfo(A,"Empty matrix\n");
432: }
433: #endif
434: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
435: if (a->inode.size) {
436: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
437: }
438: MatSeqAIJCheckInode_FactorLU(B);
439: return(0);
440: }
442: /*
443: Trouble in factorization, should we dump the original matrix?
444: */
445: PetscErrorCode MatFactorDumpMatrix(Mat A)
446: {
448: PetscBool flg = PETSC_FALSE;
451: PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);
452: if (flg) {
453: PetscViewer viewer;
454: char filename[PETSC_MAX_PATH_LEN];
456: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
457: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
458: MatView(A,viewer);
459: PetscViewerDestroy(&viewer);
460: }
461: return(0);
462: }
464: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
465: {
466: Mat C =B;
467: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
468: IS isrow = b->row,isicol = b->icol;
469: PetscErrorCode ierr;
470: const PetscInt *r,*ic,*ics;
471: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
472: PetscInt i,j,k,nz,nzL,row,*pj;
473: const PetscInt *ajtmp,*bjtmp;
474: MatScalar *rtmp,*pc,multiplier,*pv;
475: const MatScalar *aa=a->a,*v;
476: PetscBool row_identity,col_identity;
477: FactorShiftCtx sctx;
478: const PetscInt *ddiag;
479: PetscReal rs;
480: MatScalar d;
483: /* MatPivotSetUp(): initialize shift context sctx */
484: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
486: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
487: ddiag = a->diag;
488: sctx.shift_top = info->zeropivot;
489: for (i=0; i<n; i++) {
490: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
491: d = (aa)[ddiag[i]];
492: rs = -PetscAbsScalar(d) - PetscRealPart(d);
493: v = aa+ai[i];
494: nz = ai[i+1] - ai[i];
495: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
496: if (rs>sctx.shift_top) sctx.shift_top = rs;
497: }
498: sctx.shift_top *= 1.1;
499: sctx.nshift_max = 5;
500: sctx.shift_lo = 0.;
501: sctx.shift_hi = 1.;
502: }
504: ISGetIndices(isrow,&r);
505: ISGetIndices(isicol,&ic);
506: PetscMalloc1(n+1,&rtmp);
507: ics = ic;
509: do {
510: sctx.newshift = PETSC_FALSE;
511: for (i=0; i<n; i++) {
512: /* zero rtmp */
513: /* L part */
514: nz = bi[i+1] - bi[i];
515: bjtmp = bj + bi[i];
516: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
518: /* U part */
519: nz = bdiag[i]-bdiag[i+1];
520: bjtmp = bj + bdiag[i+1]+1;
521: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
523: /* load in initial (unfactored row) */
524: nz = ai[r[i]+1] - ai[r[i]];
525: ajtmp = aj + ai[r[i]];
526: v = aa + ai[r[i]];
527: for (j=0; j<nz; j++) {
528: rtmp[ics[ajtmp[j]]] = v[j];
529: }
530: /* ZeropivotApply() */
531: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
533: /* elimination */
534: bjtmp = bj + bi[i];
535: row = *bjtmp++;
536: nzL = bi[i+1] - bi[i];
537: for (k=0; k < nzL; k++) {
538: pc = rtmp + row;
539: if (*pc != 0.0) {
540: pv = b->a + bdiag[row];
541: multiplier = *pc * (*pv);
542: *pc = multiplier;
544: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
545: pv = b->a + bdiag[row+1]+1;
546: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
548: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
549: PetscLogFlops(1+2.0*nz);
550: }
551: row = *bjtmp++;
552: }
554: /* finished row so stick it into b->a */
555: rs = 0.0;
556: /* L part */
557: pv = b->a + bi[i];
558: pj = b->j + bi[i];
559: nz = bi[i+1] - bi[i];
560: for (j=0; j<nz; j++) {
561: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
562: }
564: /* U part */
565: pv = b->a + bdiag[i+1]+1;
566: pj = b->j + bdiag[i+1]+1;
567: nz = bdiag[i] - bdiag[i+1]-1;
568: for (j=0; j<nz; j++) {
569: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
570: }
572: sctx.rs = rs;
573: sctx.pv = rtmp[i];
574: MatPivotCheck(B,A,info,&sctx,i);
575: if (sctx.newshift) break; /* break for-loop */
576: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
578: /* Mark diagonal and invert diagonal for simpler triangular solves */
579: pv = b->a + bdiag[i];
580: *pv = 1.0/rtmp[i];
582: } /* endof for (i=0; i<n; i++) { */
584: /* MatPivotRefine() */
585: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
586: /*
587: * if no shift in this attempt & shifting & started shifting & can refine,
588: * then try lower shift
589: */
590: sctx.shift_hi = sctx.shift_fraction;
591: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
592: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
593: sctx.newshift = PETSC_TRUE;
594: sctx.nshift++;
595: }
596: } while (sctx.newshift);
598: PetscFree(rtmp);
599: ISRestoreIndices(isicol,&ic);
600: ISRestoreIndices(isrow,&r);
602: ISIdentity(isrow,&row_identity);
603: ISIdentity(isicol,&col_identity);
604: if (b->inode.size) {
605: C->ops->solve = MatSolve_SeqAIJ_Inode;
606: } else if (row_identity && col_identity) {
607: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
608: } else {
609: C->ops->solve = MatSolve_SeqAIJ;
610: }
611: C->ops->solveadd = MatSolveAdd_SeqAIJ;
612: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
613: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
614: C->ops->matsolve = MatMatSolve_SeqAIJ;
615: C->assembled = PETSC_TRUE;
616: C->preallocated = PETSC_TRUE;
618: PetscLogFlops(C->cmap->n);
620: /* MatShiftView(A,info,&sctx) */
621: if (sctx.nshift) {
622: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
623: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
624: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
625: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
626: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
627: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
628: }
629: }
630: return(0);
631: }
633: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
634: {
635: Mat C =B;
636: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
637: IS isrow = b->row,isicol = b->icol;
638: PetscErrorCode ierr;
639: const PetscInt *r,*ic,*ics;
640: PetscInt nz,row,i,j,n=A->rmap->n,diag;
641: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
642: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
643: MatScalar *pv,*rtmp,*pc,multiplier,d;
644: const MatScalar *v,*aa=a->a;
645: PetscReal rs=0.0;
646: FactorShiftCtx sctx;
647: const PetscInt *ddiag;
648: PetscBool row_identity, col_identity;
651: /* MatPivotSetUp(): initialize shift context sctx */
652: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
654: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
655: ddiag = a->diag;
656: sctx.shift_top = info->zeropivot;
657: for (i=0; i<n; i++) {
658: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
659: d = (aa)[ddiag[i]];
660: rs = -PetscAbsScalar(d) - PetscRealPart(d);
661: v = aa+ai[i];
662: nz = ai[i+1] - ai[i];
663: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
664: if (rs>sctx.shift_top) sctx.shift_top = rs;
665: }
666: sctx.shift_top *= 1.1;
667: sctx.nshift_max = 5;
668: sctx.shift_lo = 0.;
669: sctx.shift_hi = 1.;
670: }
672: ISGetIndices(isrow,&r);
673: ISGetIndices(isicol,&ic);
674: PetscMalloc1(n+1,&rtmp);
675: ics = ic;
677: do {
678: sctx.newshift = PETSC_FALSE;
679: for (i=0; i<n; i++) {
680: nz = bi[i+1] - bi[i];
681: bjtmp = bj + bi[i];
682: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
684: /* load in initial (unfactored row) */
685: nz = ai[r[i]+1] - ai[r[i]];
686: ajtmp = aj + ai[r[i]];
687: v = aa + ai[r[i]];
688: for (j=0; j<nz; j++) {
689: rtmp[ics[ajtmp[j]]] = v[j];
690: }
691: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
693: row = *bjtmp++;
694: while (row < i) {
695: pc = rtmp + row;
696: if (*pc != 0.0) {
697: pv = b->a + diag_offset[row];
698: pj = b->j + diag_offset[row] + 1;
699: multiplier = *pc / *pv++;
700: *pc = multiplier;
701: nz = bi[row+1] - diag_offset[row] - 1;
702: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
703: PetscLogFlops(1+2.0*nz);
704: }
705: row = *bjtmp++;
706: }
707: /* finished row so stick it into b->a */
708: pv = b->a + bi[i];
709: pj = b->j + bi[i];
710: nz = bi[i+1] - bi[i];
711: diag = diag_offset[i] - bi[i];
712: rs = 0.0;
713: for (j=0; j<nz; j++) {
714: pv[j] = rtmp[pj[j]];
715: rs += PetscAbsScalar(pv[j]);
716: }
717: rs -= PetscAbsScalar(pv[diag]);
719: sctx.rs = rs;
720: sctx.pv = pv[diag];
721: MatPivotCheck(B,A,info,&sctx,i);
722: if (sctx.newshift) break;
723: pv[diag] = sctx.pv;
724: }
726: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
727: /*
728: * if no shift in this attempt & shifting & started shifting & can refine,
729: * then try lower shift
730: */
731: sctx.shift_hi = sctx.shift_fraction;
732: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
733: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
734: sctx.newshift = PETSC_TRUE;
735: sctx.nshift++;
736: }
737: } while (sctx.newshift);
739: /* invert diagonal entries for simpler triangular solves */
740: for (i=0; i<n; i++) {
741: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
742: }
743: PetscFree(rtmp);
744: ISRestoreIndices(isicol,&ic);
745: ISRestoreIndices(isrow,&r);
747: ISIdentity(isrow,&row_identity);
748: ISIdentity(isicol,&col_identity);
749: if (row_identity && col_identity) {
750: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
751: } else {
752: C->ops->solve = MatSolve_SeqAIJ_inplace;
753: }
754: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
755: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
756: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
757: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
759: C->assembled = PETSC_TRUE;
760: C->preallocated = PETSC_TRUE;
762: PetscLogFlops(C->cmap->n);
763: if (sctx.nshift) {
764: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
765: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
766: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
767: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
768: }
769: }
770: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
771: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
773: MatSeqAIJCheckInode(C);
774: return(0);
775: }
777: /*
778: This routine implements inplace ILU(0) with row or/and column permutations.
779: Input:
780: A - original matrix
781: Output;
782: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
783: a->j (col index) is permuted by the inverse of colperm, then sorted
784: a->a reordered accordingly with a->j
785: a->diag (ptr to diagonal elements) is updated.
786: */
787: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
788: {
789: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
790: IS isrow = a->row,isicol = a->icol;
791: PetscErrorCode ierr;
792: const PetscInt *r,*ic,*ics;
793: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
794: PetscInt *ajtmp,nz,row;
795: PetscInt *diag = a->diag,nbdiag,*pj;
796: PetscScalar *rtmp,*pc,multiplier,d;
797: MatScalar *pv,*v;
798: PetscReal rs;
799: FactorShiftCtx sctx;
800: const MatScalar *aa=a->a,*vtmp;
803: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
805: /* MatPivotSetUp(): initialize shift context sctx */
806: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
808: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
809: const PetscInt *ddiag = a->diag;
810: sctx.shift_top = info->zeropivot;
811: for (i=0; i<n; i++) {
812: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
813: d = (aa)[ddiag[i]];
814: rs = -PetscAbsScalar(d) - PetscRealPart(d);
815: vtmp = aa+ai[i];
816: nz = ai[i+1] - ai[i];
817: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
818: if (rs>sctx.shift_top) sctx.shift_top = rs;
819: }
820: sctx.shift_top *= 1.1;
821: sctx.nshift_max = 5;
822: sctx.shift_lo = 0.;
823: sctx.shift_hi = 1.;
824: }
826: ISGetIndices(isrow,&r);
827: ISGetIndices(isicol,&ic);
828: PetscMalloc1(n+1,&rtmp);
829: PetscArrayzero(rtmp,n+1);
830: ics = ic;
832: #if defined(MV)
833: sctx.shift_top = 0.;
834: sctx.nshift_max = 0;
835: sctx.shift_lo = 0.;
836: sctx.shift_hi = 0.;
837: sctx.shift_fraction = 0.;
839: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
840: sctx.shift_top = 0.;
841: for (i=0; i<n; i++) {
842: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
843: d = (a->a)[diag[i]];
844: rs = -PetscAbsScalar(d) - PetscRealPart(d);
845: v = a->a+ai[i];
846: nz = ai[i+1] - ai[i];
847: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
848: if (rs>sctx.shift_top) sctx.shift_top = rs;
849: }
850: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
851: sctx.shift_top *= 1.1;
852: sctx.nshift_max = 5;
853: sctx.shift_lo = 0.;
854: sctx.shift_hi = 1.;
855: }
857: sctx.shift_amount = 0.;
858: sctx.nshift = 0;
859: #endif
861: do {
862: sctx.newshift = PETSC_FALSE;
863: for (i=0; i<n; i++) {
864: /* load in initial unfactored row */
865: nz = ai[r[i]+1] - ai[r[i]];
866: ajtmp = aj + ai[r[i]];
867: v = a->a + ai[r[i]];
868: /* sort permuted ajtmp and values v accordingly */
869: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
870: PetscSortIntWithScalarArray(nz,ajtmp,v);
872: diag[r[i]] = ai[r[i]];
873: for (j=0; j<nz; j++) {
874: rtmp[ajtmp[j]] = v[j];
875: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
876: }
877: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
879: row = *ajtmp++;
880: while (row < i) {
881: pc = rtmp + row;
882: if (*pc != 0.0) {
883: pv = a->a + diag[r[row]];
884: pj = aj + diag[r[row]] + 1;
886: multiplier = *pc / *pv++;
887: *pc = multiplier;
888: nz = ai[r[row]+1] - diag[r[row]] - 1;
889: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
890: PetscLogFlops(1+2.0*nz);
891: }
892: row = *ajtmp++;
893: }
894: /* finished row so overwrite it onto a->a */
895: pv = a->a + ai[r[i]];
896: pj = aj + ai[r[i]];
897: nz = ai[r[i]+1] - ai[r[i]];
898: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
900: rs = 0.0;
901: for (j=0; j<nz; j++) {
902: pv[j] = rtmp[pj[j]];
903: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
904: }
906: sctx.rs = rs;
907: sctx.pv = pv[nbdiag];
908: MatPivotCheck(B,A,info,&sctx,i);
909: if (sctx.newshift) break;
910: pv[nbdiag] = sctx.pv;
911: }
913: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
914: /*
915: * if no shift in this attempt & shifting & started shifting & can refine,
916: * then try lower shift
917: */
918: sctx.shift_hi = sctx.shift_fraction;
919: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
920: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
921: sctx.newshift = PETSC_TRUE;
922: sctx.nshift++;
923: }
924: } while (sctx.newshift);
926: /* invert diagonal entries for simpler triangular solves */
927: for (i=0; i<n; i++) {
928: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
929: }
931: PetscFree(rtmp);
932: ISRestoreIndices(isicol,&ic);
933: ISRestoreIndices(isrow,&r);
935: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
936: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
937: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
938: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
940: A->assembled = PETSC_TRUE;
941: A->preallocated = PETSC_TRUE;
943: PetscLogFlops(A->cmap->n);
944: if (sctx.nshift) {
945: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
946: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
947: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
948: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
949: }
950: }
951: return(0);
952: }
954: /* ----------------------------------------------------------- */
955: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
956: {
958: Mat C;
961: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
962: MatLUFactorSymbolic(C,A,row,col,info);
963: MatLUFactorNumeric(C,A,info);
965: A->ops->solve = C->ops->solve;
966: A->ops->solvetranspose = C->ops->solvetranspose;
968: MatHeaderMerge(A,&C);
969: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
970: return(0);
971: }
972: /* ----------------------------------------------------------- */
974: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
975: {
976: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
977: IS iscol = a->col,isrow = a->row;
978: PetscErrorCode ierr;
979: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
980: PetscInt nz;
981: const PetscInt *rout,*cout,*r,*c;
982: PetscScalar *x,*tmp,*tmps,sum;
983: const PetscScalar *b;
984: const MatScalar *aa = a->a,*v;
987: if (!n) return(0);
989: VecGetArrayRead(bb,&b);
990: VecGetArrayWrite(xx,&x);
991: tmp = a->solve_work;
993: ISGetIndices(isrow,&rout); r = rout;
994: ISGetIndices(iscol,&cout); c = cout + (n-1);
996: /* forward solve the lower triangular */
997: tmp[0] = b[*r++];
998: tmps = tmp;
999: for (i=1; i<n; i++) {
1000: v = aa + ai[i];
1001: vi = aj + ai[i];
1002: nz = a->diag[i] - ai[i];
1003: sum = b[*r++];
1004: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1005: tmp[i] = sum;
1006: }
1008: /* backward solve the upper triangular */
1009: for (i=n-1; i>=0; i--) {
1010: v = aa + a->diag[i] + 1;
1011: vi = aj + a->diag[i] + 1;
1012: nz = ai[i+1] - a->diag[i] - 1;
1013: sum = tmp[i];
1014: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1015: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1016: }
1018: ISRestoreIndices(isrow,&rout);
1019: ISRestoreIndices(iscol,&cout);
1020: VecRestoreArrayRead(bb,&b);
1021: VecRestoreArrayWrite(xx,&x);
1022: PetscLogFlops(2.0*a->nz - A->cmap->n);
1023: return(0);
1024: }
1026: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1027: {
1028: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1029: IS iscol = a->col,isrow = a->row;
1030: PetscErrorCode ierr;
1031: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1032: PetscInt nz,neq,ldb,ldx;
1033: const PetscInt *rout,*cout,*r,*c;
1034: PetscScalar *x,*tmp = a->solve_work,*tmps,sum;
1035: const PetscScalar *b,*aa = a->a,*v;
1036: PetscBool isdense;
1039: if (!n) return(0);
1040: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);
1041: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1042: if (X != B) {
1043: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);
1044: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1045: }
1046: MatDenseGetArrayRead(B,&b);
1047: MatDenseGetLDA(B,&ldb);
1048: MatDenseGetArray(X,&x);
1049: MatDenseGetLDA(X,&ldx);
1050: ISGetIndices(isrow,&rout); r = rout;
1051: ISGetIndices(iscol,&cout); c = cout;
1052: for (neq=0; neq<B->cmap->n; neq++) {
1053: /* forward solve the lower triangular */
1054: tmp[0] = b[r[0]];
1055: tmps = tmp;
1056: for (i=1; i<n; i++) {
1057: v = aa + ai[i];
1058: vi = aj + ai[i];
1059: nz = a->diag[i] - ai[i];
1060: sum = b[r[i]];
1061: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1062: tmp[i] = sum;
1063: }
1064: /* backward solve the upper triangular */
1065: for (i=n-1; i>=0; i--) {
1066: v = aa + a->diag[i] + 1;
1067: vi = aj + a->diag[i] + 1;
1068: nz = ai[i+1] - a->diag[i] - 1;
1069: sum = tmp[i];
1070: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1071: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1072: }
1073: b += ldb;
1074: x += ldx;
1075: }
1076: ISRestoreIndices(isrow,&rout);
1077: ISRestoreIndices(iscol,&cout);
1078: MatDenseRestoreArrayRead(B,&b);
1079: MatDenseRestoreArray(X,&x);
1080: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1081: return(0);
1082: }
1084: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1085: {
1086: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1087: IS iscol = a->col,isrow = a->row;
1088: PetscErrorCode ierr;
1089: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1090: PetscInt nz,neq,ldb,ldx;
1091: const PetscInt *rout,*cout,*r,*c;
1092: PetscScalar *x,*tmp = a->solve_work,sum;
1093: const PetscScalar *b,*aa = a->a,*v;
1094: PetscBool isdense;
1097: if (!n) return(0);
1098: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);
1099: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1100: if (X != B) {
1101: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);
1102: if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1103: }
1104: MatDenseGetArrayRead(B,&b);
1105: MatDenseGetLDA(B,&ldb);
1106: MatDenseGetArray(X,&x);
1107: MatDenseGetLDA(X,&ldx);
1108: ISGetIndices(isrow,&rout); r = rout;
1109: ISGetIndices(iscol,&cout); c = cout;
1110: for (neq=0; neq<B->cmap->n; neq++) {
1111: /* forward solve the lower triangular */
1112: tmp[0] = b[r[0]];
1113: v = aa;
1114: vi = aj;
1115: for (i=1; i<n; i++) {
1116: nz = ai[i+1] - ai[i];
1117: sum = b[r[i]];
1118: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1119: tmp[i] = sum;
1120: v += nz; vi += nz;
1121: }
1122: /* backward solve the upper triangular */
1123: for (i=n-1; i>=0; i--) {
1124: v = aa + adiag[i+1]+1;
1125: vi = aj + adiag[i+1]+1;
1126: nz = adiag[i]-adiag[i+1]-1;
1127: sum = tmp[i];
1128: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1129: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1130: }
1131: b += ldb;
1132: x += ldx;
1133: }
1134: ISRestoreIndices(isrow,&rout);
1135: ISRestoreIndices(iscol,&cout);
1136: MatDenseRestoreArrayRead(B,&b);
1137: MatDenseRestoreArray(X,&x);
1138: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1139: return(0);
1140: }
1142: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1143: {
1144: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1145: IS iscol = a->col,isrow = a->row;
1146: PetscErrorCode ierr;
1147: const PetscInt *r,*c,*rout,*cout;
1148: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1149: PetscInt nz,row;
1150: PetscScalar *x,*tmp,*tmps,sum;
1151: const PetscScalar *b;
1152: const MatScalar *aa = a->a,*v;
1155: if (!n) return(0);
1157: VecGetArrayRead(bb,&b);
1158: VecGetArrayWrite(xx,&x);
1159: tmp = a->solve_work;
1161: ISGetIndices(isrow,&rout); r = rout;
1162: ISGetIndices(iscol,&cout); c = cout + (n-1);
1164: /* forward solve the lower triangular */
1165: tmp[0] = b[*r++];
1166: tmps = tmp;
1167: for (row=1; row<n; row++) {
1168: i = rout[row]; /* permuted row */
1169: v = aa + ai[i];
1170: vi = aj + ai[i];
1171: nz = a->diag[i] - ai[i];
1172: sum = b[*r++];
1173: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1174: tmp[row] = sum;
1175: }
1177: /* backward solve the upper triangular */
1178: for (row=n-1; row>=0; row--) {
1179: i = rout[row]; /* permuted row */
1180: v = aa + a->diag[i] + 1;
1181: vi = aj + a->diag[i] + 1;
1182: nz = ai[i+1] - a->diag[i] - 1;
1183: sum = tmp[row];
1184: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1185: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1186: }
1188: ISRestoreIndices(isrow,&rout);
1189: ISRestoreIndices(iscol,&cout);
1190: VecRestoreArrayRead(bb,&b);
1191: VecRestoreArrayWrite(xx,&x);
1192: PetscLogFlops(2.0*a->nz - A->cmap->n);
1193: return(0);
1194: }
1196: /* ----------------------------------------------------------- */
1197: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1198: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1199: {
1200: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1201: PetscErrorCode ierr;
1202: PetscInt n = A->rmap->n;
1203: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1204: PetscScalar *x;
1205: const PetscScalar *b;
1206: const MatScalar *aa = a->a;
1207: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1208: PetscInt adiag_i,i,nz,ai_i;
1209: const PetscInt *vi;
1210: const MatScalar *v;
1211: PetscScalar sum;
1212: #endif
1215: if (!n) return(0);
1217: VecGetArrayRead(bb,&b);
1218: VecGetArrayWrite(xx,&x);
1220: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1221: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1222: #else
1223: /* forward solve the lower triangular */
1224: x[0] = b[0];
1225: for (i=1; i<n; i++) {
1226: ai_i = ai[i];
1227: v = aa + ai_i;
1228: vi = aj + ai_i;
1229: nz = adiag[i] - ai_i;
1230: sum = b[i];
1231: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1232: x[i] = sum;
1233: }
1235: /* backward solve the upper triangular */
1236: for (i=n-1; i>=0; i--) {
1237: adiag_i = adiag[i];
1238: v = aa + adiag_i + 1;
1239: vi = aj + adiag_i + 1;
1240: nz = ai[i+1] - adiag_i - 1;
1241: sum = x[i];
1242: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1243: x[i] = sum*aa[adiag_i];
1244: }
1245: #endif
1246: PetscLogFlops(2.0*a->nz - A->cmap->n);
1247: VecRestoreArrayRead(bb,&b);
1248: VecRestoreArrayWrite(xx,&x);
1249: return(0);
1250: }
1252: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1253: {
1254: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1255: IS iscol = a->col,isrow = a->row;
1256: PetscErrorCode ierr;
1257: PetscInt i, n = A->rmap->n,j;
1258: PetscInt nz;
1259: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1260: PetscScalar *x,*tmp,sum;
1261: const PetscScalar *b;
1262: const MatScalar *aa = a->a,*v;
1265: if (yy != xx) {VecCopy(yy,xx);}
1267: VecGetArrayRead(bb,&b);
1268: VecGetArray(xx,&x);
1269: tmp = a->solve_work;
1271: ISGetIndices(isrow,&rout); r = rout;
1272: ISGetIndices(iscol,&cout); c = cout + (n-1);
1274: /* forward solve the lower triangular */
1275: tmp[0] = b[*r++];
1276: for (i=1; i<n; i++) {
1277: v = aa + ai[i];
1278: vi = aj + ai[i];
1279: nz = a->diag[i] - ai[i];
1280: sum = b[*r++];
1281: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1282: tmp[i] = sum;
1283: }
1285: /* backward solve the upper triangular */
1286: for (i=n-1; i>=0; i--) {
1287: v = aa + a->diag[i] + 1;
1288: vi = aj + a->diag[i] + 1;
1289: nz = ai[i+1] - a->diag[i] - 1;
1290: sum = tmp[i];
1291: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1292: tmp[i] = sum*aa[a->diag[i]];
1293: x[*c--] += tmp[i];
1294: }
1296: ISRestoreIndices(isrow,&rout);
1297: ISRestoreIndices(iscol,&cout);
1298: VecRestoreArrayRead(bb,&b);
1299: VecRestoreArray(xx,&x);
1300: PetscLogFlops(2.0*a->nz);
1301: return(0);
1302: }
1304: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1305: {
1306: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1307: IS iscol = a->col,isrow = a->row;
1308: PetscErrorCode ierr;
1309: PetscInt i, n = A->rmap->n,j;
1310: PetscInt nz;
1311: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1312: PetscScalar *x,*tmp,sum;
1313: const PetscScalar *b;
1314: const MatScalar *aa = a->a,*v;
1317: if (yy != xx) {VecCopy(yy,xx);}
1319: VecGetArrayRead(bb,&b);
1320: VecGetArray(xx,&x);
1321: tmp = a->solve_work;
1323: ISGetIndices(isrow,&rout); r = rout;
1324: ISGetIndices(iscol,&cout); c = cout;
1326: /* forward solve the lower triangular */
1327: tmp[0] = b[r[0]];
1328: v = aa;
1329: vi = aj;
1330: for (i=1; i<n; i++) {
1331: nz = ai[i+1] - ai[i];
1332: sum = b[r[i]];
1333: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1334: tmp[i] = sum;
1335: v += nz;
1336: vi += nz;
1337: }
1339: /* backward solve the upper triangular */
1340: v = aa + adiag[n-1];
1341: vi = aj + adiag[n-1];
1342: for (i=n-1; i>=0; i--) {
1343: nz = adiag[i] - adiag[i+1] - 1;
1344: sum = tmp[i];
1345: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1346: tmp[i] = sum*v[nz];
1347: x[c[i]] += tmp[i];
1348: v += nz+1; vi += nz+1;
1349: }
1351: ISRestoreIndices(isrow,&rout);
1352: ISRestoreIndices(iscol,&cout);
1353: VecRestoreArrayRead(bb,&b);
1354: VecRestoreArray(xx,&x);
1355: PetscLogFlops(2.0*a->nz);
1356: return(0);
1357: }
1359: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1360: {
1361: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1362: IS iscol = a->col,isrow = a->row;
1363: PetscErrorCode ierr;
1364: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1365: PetscInt i,n = A->rmap->n,j;
1366: PetscInt nz;
1367: PetscScalar *x,*tmp,s1;
1368: const MatScalar *aa = a->a,*v;
1369: const PetscScalar *b;
1372: VecGetArrayRead(bb,&b);
1373: VecGetArrayWrite(xx,&x);
1374: tmp = a->solve_work;
1376: ISGetIndices(isrow,&rout); r = rout;
1377: ISGetIndices(iscol,&cout); c = cout;
1379: /* copy the b into temp work space according to permutation */
1380: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1382: /* forward solve the U^T */
1383: for (i=0; i<n; i++) {
1384: v = aa + diag[i];
1385: vi = aj + diag[i] + 1;
1386: nz = ai[i+1] - diag[i] - 1;
1387: s1 = tmp[i];
1388: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1389: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1390: tmp[i] = s1;
1391: }
1393: /* backward solve the L^T */
1394: for (i=n-1; i>=0; i--) {
1395: v = aa + diag[i] - 1;
1396: vi = aj + diag[i] - 1;
1397: nz = diag[i] - ai[i];
1398: s1 = tmp[i];
1399: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1400: }
1402: /* copy tmp into x according to permutation */
1403: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1405: ISRestoreIndices(isrow,&rout);
1406: ISRestoreIndices(iscol,&cout);
1407: VecRestoreArrayRead(bb,&b);
1408: VecRestoreArrayWrite(xx,&x);
1410: PetscLogFlops(2.0*a->nz-A->cmap->n);
1411: return(0);
1412: }
1414: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1415: {
1416: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1417: IS iscol = a->col,isrow = a->row;
1418: PetscErrorCode ierr;
1419: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1420: PetscInt i,n = A->rmap->n,j;
1421: PetscInt nz;
1422: PetscScalar *x,*tmp,s1;
1423: const MatScalar *aa = a->a,*v;
1424: const PetscScalar *b;
1427: VecGetArrayRead(bb,&b);
1428: VecGetArrayWrite(xx,&x);
1429: tmp = a->solve_work;
1431: ISGetIndices(isrow,&rout); r = rout;
1432: ISGetIndices(iscol,&cout); c = cout;
1434: /* copy the b into temp work space according to permutation */
1435: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1437: /* forward solve the U^T */
1438: for (i=0; i<n; i++) {
1439: v = aa + adiag[i+1] + 1;
1440: vi = aj + adiag[i+1] + 1;
1441: nz = adiag[i] - adiag[i+1] - 1;
1442: s1 = tmp[i];
1443: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1444: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1445: tmp[i] = s1;
1446: }
1448: /* backward solve the L^T */
1449: for (i=n-1; i>=0; i--) {
1450: v = aa + ai[i];
1451: vi = aj + ai[i];
1452: nz = ai[i+1] - ai[i];
1453: s1 = tmp[i];
1454: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1455: }
1457: /* copy tmp into x according to permutation */
1458: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1460: ISRestoreIndices(isrow,&rout);
1461: ISRestoreIndices(iscol,&cout);
1462: VecRestoreArrayRead(bb,&b);
1463: VecRestoreArrayWrite(xx,&x);
1465: PetscLogFlops(2.0*a->nz-A->cmap->n);
1466: return(0);
1467: }
1469: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1470: {
1471: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1472: IS iscol = a->col,isrow = a->row;
1473: PetscErrorCode ierr;
1474: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1475: PetscInt i,n = A->rmap->n,j;
1476: PetscInt nz;
1477: PetscScalar *x,*tmp,s1;
1478: const MatScalar *aa = a->a,*v;
1479: const PetscScalar *b;
1482: if (zz != xx) {VecCopy(zz,xx);}
1483: VecGetArrayRead(bb,&b);
1484: VecGetArray(xx,&x);
1485: tmp = a->solve_work;
1487: ISGetIndices(isrow,&rout); r = rout;
1488: ISGetIndices(iscol,&cout); c = cout;
1490: /* copy the b into temp work space according to permutation */
1491: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1493: /* forward solve the U^T */
1494: for (i=0; i<n; i++) {
1495: v = aa + diag[i];
1496: vi = aj + diag[i] + 1;
1497: nz = ai[i+1] - diag[i] - 1;
1498: s1 = tmp[i];
1499: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1500: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1501: tmp[i] = s1;
1502: }
1504: /* backward solve the L^T */
1505: for (i=n-1; i>=0; i--) {
1506: v = aa + diag[i] - 1;
1507: vi = aj + diag[i] - 1;
1508: nz = diag[i] - ai[i];
1509: s1 = tmp[i];
1510: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1511: }
1513: /* copy tmp into x according to permutation */
1514: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1516: ISRestoreIndices(isrow,&rout);
1517: ISRestoreIndices(iscol,&cout);
1518: VecRestoreArrayRead(bb,&b);
1519: VecRestoreArray(xx,&x);
1521: PetscLogFlops(2.0*a->nz-A->cmap->n);
1522: return(0);
1523: }
1525: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1526: {
1527: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1528: IS iscol = a->col,isrow = a->row;
1529: PetscErrorCode ierr;
1530: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1531: PetscInt i,n = A->rmap->n,j;
1532: PetscInt nz;
1533: PetscScalar *x,*tmp,s1;
1534: const MatScalar *aa = a->a,*v;
1535: const PetscScalar *b;
1538: if (zz != xx) {VecCopy(zz,xx);}
1539: VecGetArrayRead(bb,&b);
1540: VecGetArray(xx,&x);
1541: tmp = a->solve_work;
1543: ISGetIndices(isrow,&rout); r = rout;
1544: ISGetIndices(iscol,&cout); c = cout;
1546: /* copy the b into temp work space according to permutation */
1547: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1549: /* forward solve the U^T */
1550: for (i=0; i<n; i++) {
1551: v = aa + adiag[i+1] + 1;
1552: vi = aj + adiag[i+1] + 1;
1553: nz = adiag[i] - adiag[i+1] - 1;
1554: s1 = tmp[i];
1555: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1556: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1557: tmp[i] = s1;
1558: }
1560: /* backward solve the L^T */
1561: for (i=n-1; i>=0; i--) {
1562: v = aa + ai[i];
1563: vi = aj + ai[i];
1564: nz = ai[i+1] - ai[i];
1565: s1 = tmp[i];
1566: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1567: }
1569: /* copy tmp into x according to permutation */
1570: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1572: ISRestoreIndices(isrow,&rout);
1573: ISRestoreIndices(iscol,&cout);
1574: VecRestoreArrayRead(bb,&b);
1575: VecRestoreArray(xx,&x);
1577: PetscLogFlops(2.0*a->nz-A->cmap->n);
1578: return(0);
1579: }
1581: /* ----------------------------------------------------------------*/
1583: /*
1584: ilu() under revised new data structure.
1585: Factored arrays bj and ba are stored as
1586: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1588: bi=fact->i is an array of size n+1, in which
1589: bi+
1590: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1591: bi[n]: points to L(n-1,n-1)+1
1593: bdiag=fact->diag is an array of size n+1,in which
1594: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1595: bdiag[n]: points to entry of U(n-1,0)-1
1597: U(i,:) contains bdiag[i] as its last entry, i.e.,
1598: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1599: */
1600: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1601: {
1602: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1604: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1605: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1606: IS isicol;
1609: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1610: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1611: b = (Mat_SeqAIJ*)(fact)->data;
1613: /* allocate matrix arrays for new data structure */
1614: PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1615: PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1617: b->singlemalloc = PETSC_TRUE;
1618: if (!b->diag) {
1619: PetscMalloc1(n+1,&b->diag);
1620: PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1621: }
1622: bdiag = b->diag;
1624: if (n > 0) {
1625: PetscArrayzero(b->a,ai[n]);
1626: }
1628: /* set bi and bj with new data structure */
1629: bi = b->i;
1630: bj = b->j;
1632: /* L part */
1633: bi[0] = 0;
1634: for (i=0; i<n; i++) {
1635: nz = adiag[i] - ai[i];
1636: bi[i+1] = bi[i] + nz;
1637: aj = a->j + ai[i];
1638: for (j=0; j<nz; j++) {
1639: /* *bj = aj[j]; bj++; */
1640: bj[k++] = aj[j];
1641: }
1642: }
1644: /* U part */
1645: bdiag[n] = bi[n]-1;
1646: for (i=n-1; i>=0; i--) {
1647: nz = ai[i+1] - adiag[i] - 1;
1648: aj = a->j + adiag[i] + 1;
1649: for (j=0; j<nz; j++) {
1650: /* *bj = aj[j]; bj++; */
1651: bj[k++] = aj[j];
1652: }
1653: /* diag[i] */
1654: /* *bj = i; bj++; */
1655: bj[k++] = i;
1656: bdiag[i] = bdiag[i+1] + nz + 1;
1657: }
1659: fact->factortype = MAT_FACTOR_ILU;
1660: fact->info.factor_mallocs = 0;
1661: fact->info.fill_ratio_given = info->fill;
1662: fact->info.fill_ratio_needed = 1.0;
1663: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1664: MatSeqAIJCheckInode_FactorLU(fact);
1666: b = (Mat_SeqAIJ*)(fact)->data;
1667: b->row = isrow;
1668: b->col = iscol;
1669: b->icol = isicol;
1670: PetscMalloc1(fact->rmap->n+1,&b->solve_work);
1671: PetscObjectReference((PetscObject)isrow);
1672: PetscObjectReference((PetscObject)iscol);
1673: return(0);
1674: }
1676: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1677: {
1678: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1679: IS isicol;
1680: PetscErrorCode ierr;
1681: const PetscInt *r,*ic;
1682: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1683: PetscInt *bi,*cols,nnz,*cols_lvl;
1684: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1685: PetscInt i,levels,diagonal_fill;
1686: PetscBool col_identity,row_identity,missing;
1687: PetscReal f;
1688: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1689: PetscBT lnkbt;
1690: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1691: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1692: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1695: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1696: MatMissingDiagonal(A,&missing,&i);
1697: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1699: levels = (PetscInt)info->levels;
1700: ISIdentity(isrow,&row_identity);
1701: ISIdentity(iscol,&col_identity);
1702: if (!levels && row_identity && col_identity) {
1703: /* special case: ilu(0) with natural ordering */
1704: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1705: if (a->inode.size) {
1706: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1707: }
1708: return(0);
1709: }
1711: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1712: ISGetIndices(isrow,&r);
1713: ISGetIndices(isicol,&ic);
1715: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1716: PetscMalloc1(n+1,&bi);
1717: PetscMalloc1(n+1,&bdiag);
1718: bi[0] = bdiag[0] = 0;
1719: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1721: /* create a linked list for storing column indices of the active row */
1722: nlnk = n + 1;
1723: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1725: /* initial FreeSpace size is f*(ai[n]+1) */
1726: f = info->fill;
1727: diagonal_fill = (PetscInt)info->diagonal_fill;
1728: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1729: current_space = free_space;
1730: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1731: current_space_lvl = free_space_lvl;
1732: for (i=0; i<n; i++) {
1733: nzi = 0;
1734: /* copy current row into linked list */
1735: nnz = ai[r[i]+1] - ai[r[i]];
1736: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1737: cols = aj + ai[r[i]];
1738: lnk[i] = -1; /* marker to indicate if diagonal exists */
1739: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1740: nzi += nlnk;
1742: /* make sure diagonal entry is included */
1743: if (diagonal_fill && lnk[i] == -1) {
1744: fm = n;
1745: while (lnk[fm] < i) fm = lnk[fm];
1746: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1747: lnk[fm] = i;
1748: lnk_lvl[i] = 0;
1749: nzi++; dcount++;
1750: }
1752: /* add pivot rows into the active row */
1753: nzbd = 0;
1754: prow = lnk[n];
1755: while (prow < i) {
1756: nnz = bdiag[prow];
1757: cols = bj_ptr[prow] + nnz + 1;
1758: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1759: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1760: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1761: nzi += nlnk;
1762: prow = lnk[prow];
1763: nzbd++;
1764: }
1765: bdiag[i] = nzbd;
1766: bi[i+1] = bi[i] + nzi;
1767: /* if free space is not available, make more free space */
1768: if (current_space->local_remaining<nzi) {
1769: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1770: PetscFreeSpaceGet(nnz,¤t_space);
1771: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1772: reallocs++;
1773: }
1775: /* copy data into free_space and free_space_lvl, then initialize lnk */
1776: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1777: bj_ptr[i] = current_space->array;
1778: bjlvl_ptr[i] = current_space_lvl->array;
1780: /* make sure the active row i has diagonal entry */
1781: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1783: current_space->array += nzi;
1784: current_space->local_used += nzi;
1785: current_space->local_remaining -= nzi;
1786: current_space_lvl->array += nzi;
1787: current_space_lvl->local_used += nzi;
1788: current_space_lvl->local_remaining -= nzi;
1789: }
1791: ISRestoreIndices(isrow,&r);
1792: ISRestoreIndices(isicol,&ic);
1793: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1794: PetscMalloc1(bi[n]+1,&bj);
1795: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1797: PetscIncompleteLLDestroy(lnk,lnkbt);
1798: PetscFreeSpaceDestroy(free_space_lvl);
1799: PetscFree2(bj_ptr,bjlvl_ptr);
1801: #if defined(PETSC_USE_INFO)
1802: {
1803: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1804: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1805: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1806: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1807: PetscInfo(A,"for best performance.\n");
1808: if (diagonal_fill) {
1809: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1810: }
1811: }
1812: #endif
1813: /* put together the new matrix */
1814: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1815: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1816: b = (Mat_SeqAIJ*)(fact)->data;
1818: b->free_a = PETSC_TRUE;
1819: b->free_ij = PETSC_TRUE;
1820: b->singlemalloc = PETSC_FALSE;
1822: PetscMalloc1(bdiag[0]+1,&b->a);
1824: b->j = bj;
1825: b->i = bi;
1826: b->diag = bdiag;
1827: b->ilen = NULL;
1828: b->imax = NULL;
1829: b->row = isrow;
1830: b->col = iscol;
1831: PetscObjectReference((PetscObject)isrow);
1832: PetscObjectReference((PetscObject)iscol);
1833: b->icol = isicol;
1835: PetscMalloc1(n+1,&b->solve_work);
1836: /* In b structure: Free imax, ilen, old a, old j.
1837: Allocate bdiag, solve_work, new a, new j */
1838: PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1839: b->maxnz = b->nz = bdiag[0]+1;
1841: (fact)->info.factor_mallocs = reallocs;
1842: (fact)->info.fill_ratio_given = f;
1843: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1844: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1845: if (a->inode.size) {
1846: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1847: }
1848: MatSeqAIJCheckInode_FactorLU(fact);
1849: return(0);
1850: }
1852: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1853: {
1854: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1855: IS isicol;
1856: PetscErrorCode ierr;
1857: const PetscInt *r,*ic;
1858: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1859: PetscInt *bi,*cols,nnz,*cols_lvl;
1860: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1861: PetscInt i,levels,diagonal_fill;
1862: PetscBool col_identity,row_identity;
1863: PetscReal f;
1864: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1865: PetscBT lnkbt;
1866: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1867: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1868: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1869: PetscBool missing;
1872: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1873: MatMissingDiagonal(A,&missing,&i);
1874: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1876: f = info->fill;
1877: levels = (PetscInt)info->levels;
1878: diagonal_fill = (PetscInt)info->diagonal_fill;
1880: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1882: ISIdentity(isrow,&row_identity);
1883: ISIdentity(iscol,&col_identity);
1884: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1885: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1887: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1888: if (a->inode.size) {
1889: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1890: }
1891: fact->factortype = MAT_FACTOR_ILU;
1892: (fact)->info.factor_mallocs = 0;
1893: (fact)->info.fill_ratio_given = info->fill;
1894: (fact)->info.fill_ratio_needed = 1.0;
1896: b = (Mat_SeqAIJ*)(fact)->data;
1897: b->row = isrow;
1898: b->col = iscol;
1899: b->icol = isicol;
1900: PetscMalloc1((fact)->rmap->n+1,&b->solve_work);
1901: PetscObjectReference((PetscObject)isrow);
1902: PetscObjectReference((PetscObject)iscol);
1903: return(0);
1904: }
1906: ISGetIndices(isrow,&r);
1907: ISGetIndices(isicol,&ic);
1909: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1910: PetscMalloc1(n+1,&bi);
1911: PetscMalloc1(n+1,&bdiag);
1912: bi[0] = bdiag[0] = 0;
1914: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1916: /* create a linked list for storing column indices of the active row */
1917: nlnk = n + 1;
1918: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1920: /* initial FreeSpace size is f*(ai[n]+1) */
1921: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1922: current_space = free_space;
1923: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1924: current_space_lvl = free_space_lvl;
1926: for (i=0; i<n; i++) {
1927: nzi = 0;
1928: /* copy current row into linked list */
1929: nnz = ai[r[i]+1] - ai[r[i]];
1930: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1931: cols = aj + ai[r[i]];
1932: lnk[i] = -1; /* marker to indicate if diagonal exists */
1933: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1934: nzi += nlnk;
1936: /* make sure diagonal entry is included */
1937: if (diagonal_fill && lnk[i] == -1) {
1938: fm = n;
1939: while (lnk[fm] < i) fm = lnk[fm];
1940: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1941: lnk[fm] = i;
1942: lnk_lvl[i] = 0;
1943: nzi++; dcount++;
1944: }
1946: /* add pivot rows into the active row */
1947: nzbd = 0;
1948: prow = lnk[n];
1949: while (prow < i) {
1950: nnz = bdiag[prow];
1951: cols = bj_ptr[prow] + nnz + 1;
1952: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1953: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1954: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1955: nzi += nlnk;
1956: prow = lnk[prow];
1957: nzbd++;
1958: }
1959: bdiag[i] = nzbd;
1960: bi[i+1] = bi[i] + nzi;
1962: /* if free space is not available, make more free space */
1963: if (current_space->local_remaining<nzi) {
1964: nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1965: PetscFreeSpaceGet(nnz,¤t_space);
1966: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1967: reallocs++;
1968: }
1970: /* copy data into free_space and free_space_lvl, then initialize lnk */
1971: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1972: bj_ptr[i] = current_space->array;
1973: bjlvl_ptr[i] = current_space_lvl->array;
1975: /* make sure the active row i has diagonal entry */
1976: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1978: current_space->array += nzi;
1979: current_space->local_used += nzi;
1980: current_space->local_remaining -= nzi;
1981: current_space_lvl->array += nzi;
1982: current_space_lvl->local_used += nzi;
1983: current_space_lvl->local_remaining -= nzi;
1984: }
1986: ISRestoreIndices(isrow,&r);
1987: ISRestoreIndices(isicol,&ic);
1989: /* destroy list of free space and other temporary arrays */
1990: PetscMalloc1(bi[n]+1,&bj);
1991: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
1992: PetscIncompleteLLDestroy(lnk,lnkbt);
1993: PetscFreeSpaceDestroy(free_space_lvl);
1994: PetscFree2(bj_ptr,bjlvl_ptr);
1996: #if defined(PETSC_USE_INFO)
1997: {
1998: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1999: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
2000: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
2001: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
2002: PetscInfo(A,"for best performance.\n");
2003: if (diagonal_fill) {
2004: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
2005: }
2006: }
2007: #endif
2009: /* put together the new matrix */
2010: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
2011: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
2012: b = (Mat_SeqAIJ*)(fact)->data;
2014: b->free_a = PETSC_TRUE;
2015: b->free_ij = PETSC_TRUE;
2016: b->singlemalloc = PETSC_FALSE;
2018: PetscMalloc1(bi[n],&b->a);
2019: b->j = bj;
2020: b->i = bi;
2021: for (i=0; i<n; i++) bdiag[i] += bi[i];
2022: b->diag = bdiag;
2023: b->ilen = NULL;
2024: b->imax = NULL;
2025: b->row = isrow;
2026: b->col = iscol;
2027: PetscObjectReference((PetscObject)isrow);
2028: PetscObjectReference((PetscObject)iscol);
2029: b->icol = isicol;
2030: PetscMalloc1(n+1,&b->solve_work);
2031: /* In b structure: Free imax, ilen, old a, old j.
2032: Allocate bdiag, solve_work, new a, new j */
2033: PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2034: b->maxnz = b->nz = bi[n];
2036: (fact)->info.factor_mallocs = reallocs;
2037: (fact)->info.fill_ratio_given = f;
2038: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2039: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2040: if (a->inode.size) {
2041: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2042: }
2043: return(0);
2044: }
2046: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2047: {
2048: Mat C = B;
2049: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2050: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2051: IS ip=b->row,iip = b->icol;
2053: const PetscInt *rip,*riip;
2054: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2055: PetscInt *ai=a->i,*aj=a->j;
2056: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2057: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2058: PetscBool perm_identity;
2059: FactorShiftCtx sctx;
2060: PetscReal rs;
2061: MatScalar d,*v;
2064: /* MatPivotSetUp(): initialize shift context sctx */
2065: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2067: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2068: sctx.shift_top = info->zeropivot;
2069: for (i=0; i<mbs; i++) {
2070: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2071: d = (aa)[a->diag[i]];
2072: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2073: v = aa+ai[i];
2074: nz = ai[i+1] - ai[i];
2075: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2076: if (rs>sctx.shift_top) sctx.shift_top = rs;
2077: }
2078: sctx.shift_top *= 1.1;
2079: sctx.nshift_max = 5;
2080: sctx.shift_lo = 0.;
2081: sctx.shift_hi = 1.;
2082: }
2084: ISGetIndices(ip,&rip);
2085: ISGetIndices(iip,&riip);
2087: /* allocate working arrays
2088: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2089: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2090: */
2091: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);
2093: do {
2094: sctx.newshift = PETSC_FALSE;
2096: for (i=0; i<mbs; i++) c2r[i] = mbs;
2097: if (mbs) il[0] = 0;
2099: for (k = 0; k<mbs; k++) {
2100: /* zero rtmp */
2101: nz = bi[k+1] - bi[k];
2102: bjtmp = bj + bi[k];
2103: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2105: /* load in initial unfactored row */
2106: bval = ba + bi[k];
2107: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2108: for (j = jmin; j < jmax; j++) {
2109: col = riip[aj[j]];
2110: if (col >= k) { /* only take upper triangular entry */
2111: rtmp[col] = aa[j];
2112: *bval++ = 0.0; /* for in-place factorization */
2113: }
2114: }
2115: /* shift the diagonal of the matrix: ZeropivotApply() */
2116: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2118: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2119: dk = rtmp[k];
2120: i = c2r[k]; /* first row to be added to k_th row */
2122: while (i < k) {
2123: nexti = c2r[i]; /* next row to be added to k_th row */
2125: /* compute multiplier, update diag(k) and U(i,k) */
2126: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2127: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2128: dk += uikdi*ba[ili]; /* update diag[k] */
2129: ba[ili] = uikdi; /* -U(i,k) */
2131: /* add multiple of row i to k-th row */
2132: jmin = ili + 1; jmax = bi[i+1];
2133: if (jmin < jmax) {
2134: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2135: /* update il and c2r for row i */
2136: il[i] = jmin;
2137: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2138: }
2139: i = nexti;
2140: }
2142: /* copy data into U(k,:) */
2143: rs = 0.0;
2144: jmin = bi[k]; jmax = bi[k+1]-1;
2145: if (jmin < jmax) {
2146: for (j=jmin; j<jmax; j++) {
2147: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2148: }
2149: /* add the k-th row into il and c2r */
2150: il[k] = jmin;
2151: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2152: }
2154: /* MatPivotCheck() */
2155: sctx.rs = rs;
2156: sctx.pv = dk;
2157: MatPivotCheck(B,A,info,&sctx,i);
2158: if (sctx.newshift) break;
2159: dk = sctx.pv;
2161: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2162: }
2163: } while (sctx.newshift);
2165: PetscFree3(rtmp,il,c2r);
2166: ISRestoreIndices(ip,&rip);
2167: ISRestoreIndices(iip,&riip);
2169: ISIdentity(ip,&perm_identity);
2170: if (perm_identity) {
2171: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2172: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2173: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2174: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2175: } else {
2176: B->ops->solve = MatSolve_SeqSBAIJ_1;
2177: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2178: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2179: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2180: }
2182: C->assembled = PETSC_TRUE;
2183: C->preallocated = PETSC_TRUE;
2185: PetscLogFlops(C->rmap->n);
2187: /* MatPivotView() */
2188: if (sctx.nshift) {
2189: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2190: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2191: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2192: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2193: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2194: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2195: }
2196: }
2197: return(0);
2198: }
2200: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2201: {
2202: Mat C = B;
2203: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2204: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2205: IS ip=b->row,iip = b->icol;
2207: const PetscInt *rip,*riip;
2208: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2209: PetscInt *ai=a->i,*aj=a->j;
2210: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2211: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2212: PetscBool perm_identity;
2213: FactorShiftCtx sctx;
2214: PetscReal rs;
2215: MatScalar d,*v;
2218: /* MatPivotSetUp(): initialize shift context sctx */
2219: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2221: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2222: sctx.shift_top = info->zeropivot;
2223: for (i=0; i<mbs; i++) {
2224: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2225: d = (aa)[a->diag[i]];
2226: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2227: v = aa+ai[i];
2228: nz = ai[i+1] - ai[i];
2229: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2230: if (rs>sctx.shift_top) sctx.shift_top = rs;
2231: }
2232: sctx.shift_top *= 1.1;
2233: sctx.nshift_max = 5;
2234: sctx.shift_lo = 0.;
2235: sctx.shift_hi = 1.;
2236: }
2238: ISGetIndices(ip,&rip);
2239: ISGetIndices(iip,&riip);
2241: /* initialization */
2242: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
2244: do {
2245: sctx.newshift = PETSC_FALSE;
2247: for (i=0; i<mbs; i++) jl[i] = mbs;
2248: il[0] = 0;
2250: for (k = 0; k<mbs; k++) {
2251: /* zero rtmp */
2252: nz = bi[k+1] - bi[k];
2253: bjtmp = bj + bi[k];
2254: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2256: bval = ba + bi[k];
2257: /* initialize k-th row by the perm[k]-th row of A */
2258: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2259: for (j = jmin; j < jmax; j++) {
2260: col = riip[aj[j]];
2261: if (col >= k) { /* only take upper triangular entry */
2262: rtmp[col] = aa[j];
2263: *bval++ = 0.0; /* for in-place factorization */
2264: }
2265: }
2266: /* shift the diagonal of the matrix */
2267: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2269: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2270: dk = rtmp[k];
2271: i = jl[k]; /* first row to be added to k_th row */
2273: while (i < k) {
2274: nexti = jl[i]; /* next row to be added to k_th row */
2276: /* compute multiplier, update diag(k) and U(i,k) */
2277: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2278: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2279: dk += uikdi*ba[ili];
2280: ba[ili] = uikdi; /* -U(i,k) */
2282: /* add multiple of row i to k-th row */
2283: jmin = ili + 1; jmax = bi[i+1];
2284: if (jmin < jmax) {
2285: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2286: /* update il and jl for row i */
2287: il[i] = jmin;
2288: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2289: }
2290: i = nexti;
2291: }
2293: /* shift the diagonals when zero pivot is detected */
2294: /* compute rs=sum of abs(off-diagonal) */
2295: rs = 0.0;
2296: jmin = bi[k]+1;
2297: nz = bi[k+1] - jmin;
2298: bcol = bj + jmin;
2299: for (j=0; j<nz; j++) {
2300: rs += PetscAbsScalar(rtmp[bcol[j]]);
2301: }
2303: sctx.rs = rs;
2304: sctx.pv = dk;
2305: MatPivotCheck(B,A,info,&sctx,k);
2306: if (sctx.newshift) break;
2307: dk = sctx.pv;
2309: /* copy data into U(k,:) */
2310: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2311: jmin = bi[k]+1; jmax = bi[k+1];
2312: if (jmin < jmax) {
2313: for (j=jmin; j<jmax; j++) {
2314: col = bj[j]; ba[j] = rtmp[col];
2315: }
2316: /* add the k-th row into il and jl */
2317: il[k] = jmin;
2318: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2319: }
2320: }
2321: } while (sctx.newshift);
2323: PetscFree3(rtmp,il,jl);
2324: ISRestoreIndices(ip,&rip);
2325: ISRestoreIndices(iip,&riip);
2327: ISIdentity(ip,&perm_identity);
2328: if (perm_identity) {
2329: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2330: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2331: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2332: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2333: } else {
2334: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2335: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2336: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2337: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2338: }
2340: C->assembled = PETSC_TRUE;
2341: C->preallocated = PETSC_TRUE;
2343: PetscLogFlops(C->rmap->n);
2344: if (sctx.nshift) {
2345: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2346: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2347: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2348: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2349: }
2350: }
2351: return(0);
2352: }
2354: /*
2355: icc() under revised new data structure.
2356: Factored arrays bj and ba are stored as
2357: U(0,:),...,U(i,:),U(n-1,:)
2359: ui=fact->i is an array of size n+1, in which
2360: ui+
2361: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2362: ui[n]: points to U(n-1,n-1)+1
2364: udiag=fact->diag is an array of size n,in which
2365: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2367: U(i,:) contains udiag[i] as its last entry, i.e.,
2368: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2369: */
2371: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2372: {
2373: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2374: Mat_SeqSBAIJ *b;
2375: PetscErrorCode ierr;
2376: PetscBool perm_identity,missing;
2377: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2378: const PetscInt *rip,*riip;
2379: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2380: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2381: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2382: PetscReal fill =info->fill,levels=info->levels;
2383: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2384: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2385: PetscBT lnkbt;
2386: IS iperm;
2389: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2390: MatMissingDiagonal(A,&missing,&d);
2391: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2392: ISIdentity(perm,&perm_identity);
2393: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2395: PetscMalloc1(am+1,&ui);
2396: PetscMalloc1(am+1,&udiag);
2397: ui[0] = 0;
2399: /* ICC(0) without matrix ordering: simply rearrange column indices */
2400: if (!levels && perm_identity) {
2401: for (i=0; i<am; i++) {
2402: ncols = ai[i+1] - a->diag[i];
2403: ui[i+1] = ui[i] + ncols;
2404: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2405: }
2406: PetscMalloc1(ui[am]+1,&uj);
2407: cols = uj;
2408: for (i=0; i<am; i++) {
2409: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2410: ncols = ai[i+1] - a->diag[i] -1;
2411: for (j=0; j<ncols; j++) *cols++ = aj[j];
2412: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2413: }
2414: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2415: ISGetIndices(iperm,&riip);
2416: ISGetIndices(perm,&rip);
2418: /* initialization */
2419: PetscMalloc1(am+1,&ajtmp);
2421: /* jl: linked list for storing indices of the pivot rows
2422: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2423: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2424: for (i=0; i<am; i++) {
2425: jl[i] = am; il[i] = 0;
2426: }
2428: /* create and initialize a linked list for storing column indices of the active row k */
2429: nlnk = am + 1;
2430: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2432: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2433: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2434: current_space = free_space;
2435: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);
2436: current_space_lvl = free_space_lvl;
2438: for (k=0; k<am; k++) { /* for each active row k */
2439: /* initialize lnk by the column indices of row rip[k] of A */
2440: nzk = 0;
2441: ncols = ai[rip[k]+1] - ai[rip[k]];
2442: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2443: ncols_upper = 0;
2444: for (j=0; j<ncols; j++) {
2445: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2446: if (riip[i] >= k) { /* only take upper triangular entry */
2447: ajtmp[ncols_upper] = i;
2448: ncols_upper++;
2449: }
2450: }
2451: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2452: nzk += nlnk;
2454: /* update lnk by computing fill-in for each pivot row to be merged in */
2455: prow = jl[k]; /* 1st pivot row */
2457: while (prow < k) {
2458: nextprow = jl[prow];
2460: /* merge prow into k-th row */
2461: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2462: jmax = ui[prow+1];
2463: ncols = jmax-jmin;
2464: i = jmin - ui[prow];
2465: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2466: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2467: j = *(uj - 1);
2468: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2469: nzk += nlnk;
2471: /* update il and jl for prow */
2472: if (jmin < jmax) {
2473: il[prow] = jmin;
2474: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2475: }
2476: prow = nextprow;
2477: }
2479: /* if free space is not available, make more free space */
2480: if (current_space->local_remaining<nzk) {
2481: i = am - k + 1; /* num of unfactored rows */
2482: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2483: PetscFreeSpaceGet(i,¤t_space);
2484: PetscFreeSpaceGet(i,¤t_space_lvl);
2485: reallocs++;
2486: }
2488: /* copy data into free_space and free_space_lvl, then initialize lnk */
2489: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2490: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2492: /* add the k-th row into il and jl */
2493: if (nzk > 1) {
2494: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2495: jl[k] = jl[i]; jl[i] = k;
2496: il[k] = ui[k] + 1;
2497: }
2498: uj_ptr[k] = current_space->array;
2499: uj_lvl_ptr[k] = current_space_lvl->array;
2501: current_space->array += nzk;
2502: current_space->local_used += nzk;
2503: current_space->local_remaining -= nzk;
2505: current_space_lvl->array += nzk;
2506: current_space_lvl->local_used += nzk;
2507: current_space_lvl->local_remaining -= nzk;
2509: ui[k+1] = ui[k] + nzk;
2510: }
2512: ISRestoreIndices(perm,&rip);
2513: ISRestoreIndices(iperm,&riip);
2514: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2515: PetscFree(ajtmp);
2517: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2518: PetscMalloc1(ui[am]+1,&uj);
2519: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2520: PetscIncompleteLLDestroy(lnk,lnkbt);
2521: PetscFreeSpaceDestroy(free_space_lvl);
2523: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2525: /* put together the new matrix in MATSEQSBAIJ format */
2526: b = (Mat_SeqSBAIJ*)(fact)->data;
2527: b->singlemalloc = PETSC_FALSE;
2529: PetscMalloc1(ui[am]+1,&b->a);
2531: b->j = uj;
2532: b->i = ui;
2533: b->diag = udiag;
2534: b->free_diag = PETSC_TRUE;
2535: b->ilen = NULL;
2536: b->imax = NULL;
2537: b->row = perm;
2538: b->col = perm;
2539: PetscObjectReference((PetscObject)perm);
2540: PetscObjectReference((PetscObject)perm);
2541: b->icol = iperm;
2542: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2544: PetscMalloc1(am+1,&b->solve_work);
2545: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2547: b->maxnz = b->nz = ui[am];
2548: b->free_a = PETSC_TRUE;
2549: b->free_ij = PETSC_TRUE;
2551: fact->info.factor_mallocs = reallocs;
2552: fact->info.fill_ratio_given = fill;
2553: if (ai[am] != 0) {
2554: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2555: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2556: } else {
2557: fact->info.fill_ratio_needed = 0.0;
2558: }
2559: #if defined(PETSC_USE_INFO)
2560: if (ai[am] != 0) {
2561: PetscReal af = fact->info.fill_ratio_needed;
2562: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2563: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2564: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2565: } else {
2566: PetscInfo(A,"Empty matrix\n");
2567: }
2568: #endif
2569: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2570: return(0);
2571: }
2573: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2574: {
2575: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2576: Mat_SeqSBAIJ *b;
2577: PetscErrorCode ierr;
2578: PetscBool perm_identity,missing;
2579: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2580: const PetscInt *rip,*riip;
2581: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2582: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2583: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2584: PetscReal fill =info->fill,levels=info->levels;
2585: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2586: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2587: PetscBT lnkbt;
2588: IS iperm;
2591: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2592: MatMissingDiagonal(A,&missing,&d);
2593: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2594: ISIdentity(perm,&perm_identity);
2595: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2597: PetscMalloc1(am+1,&ui);
2598: PetscMalloc1(am+1,&udiag);
2599: ui[0] = 0;
2601: /* ICC(0) without matrix ordering: simply copies fill pattern */
2602: if (!levels && perm_identity) {
2604: for (i=0; i<am; i++) {
2605: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2606: udiag[i] = ui[i];
2607: }
2608: PetscMalloc1(ui[am]+1,&uj);
2609: cols = uj;
2610: for (i=0; i<am; i++) {
2611: aj = a->j + a->diag[i];
2612: ncols = ui[i+1] - ui[i];
2613: for (j=0; j<ncols; j++) *cols++ = *aj++;
2614: }
2615: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2616: ISGetIndices(iperm,&riip);
2617: ISGetIndices(perm,&rip);
2619: /* initialization */
2620: PetscMalloc1(am+1,&ajtmp);
2622: /* jl: linked list for storing indices of the pivot rows
2623: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2624: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2625: for (i=0; i<am; i++) {
2626: jl[i] = am; il[i] = 0;
2627: }
2629: /* create and initialize a linked list for storing column indices of the active row k */
2630: nlnk = am + 1;
2631: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2633: /* initial FreeSpace size is fill*(ai[am]+1) */
2634: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2635: current_space = free_space;
2636: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);
2637: current_space_lvl = free_space_lvl;
2639: for (k=0; k<am; k++) { /* for each active row k */
2640: /* initialize lnk by the column indices of row rip[k] of A */
2641: nzk = 0;
2642: ncols = ai[rip[k]+1] - ai[rip[k]];
2643: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2644: ncols_upper = 0;
2645: for (j=0; j<ncols; j++) {
2646: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2647: if (riip[i] >= k) { /* only take upper triangular entry */
2648: ajtmp[ncols_upper] = i;
2649: ncols_upper++;
2650: }
2651: }
2652: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2653: nzk += nlnk;
2655: /* update lnk by computing fill-in for each pivot row to be merged in */
2656: prow = jl[k]; /* 1st pivot row */
2658: while (prow < k) {
2659: nextprow = jl[prow];
2661: /* merge prow into k-th row */
2662: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2663: jmax = ui[prow+1];
2664: ncols = jmax-jmin;
2665: i = jmin - ui[prow];
2666: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2667: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2668: j = *(uj - 1);
2669: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2670: nzk += nlnk;
2672: /* update il and jl for prow */
2673: if (jmin < jmax) {
2674: il[prow] = jmin;
2675: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2676: }
2677: prow = nextprow;
2678: }
2680: /* if free space is not available, make more free space */
2681: if (current_space->local_remaining<nzk) {
2682: i = am - k + 1; /* num of unfactored rows */
2683: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2684: PetscFreeSpaceGet(i,¤t_space);
2685: PetscFreeSpaceGet(i,¤t_space_lvl);
2686: reallocs++;
2687: }
2689: /* copy data into free_space and free_space_lvl, then initialize lnk */
2690: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2691: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2693: /* add the k-th row into il and jl */
2694: if (nzk > 1) {
2695: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2696: jl[k] = jl[i]; jl[i] = k;
2697: il[k] = ui[k] + 1;
2698: }
2699: uj_ptr[k] = current_space->array;
2700: uj_lvl_ptr[k] = current_space_lvl->array;
2702: current_space->array += nzk;
2703: current_space->local_used += nzk;
2704: current_space->local_remaining -= nzk;
2706: current_space_lvl->array += nzk;
2707: current_space_lvl->local_used += nzk;
2708: current_space_lvl->local_remaining -= nzk;
2710: ui[k+1] = ui[k] + nzk;
2711: }
2713: #if defined(PETSC_USE_INFO)
2714: if (ai[am] != 0) {
2715: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2716: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2717: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2718: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2719: } else {
2720: PetscInfo(A,"Empty matrix\n");
2721: }
2722: #endif
2724: ISRestoreIndices(perm,&rip);
2725: ISRestoreIndices(iperm,&riip);
2726: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2727: PetscFree(ajtmp);
2729: /* destroy list of free space and other temporary array(s) */
2730: PetscMalloc1(ui[am]+1,&uj);
2731: PetscFreeSpaceContiguous(&free_space,uj);
2732: PetscIncompleteLLDestroy(lnk,lnkbt);
2733: PetscFreeSpaceDestroy(free_space_lvl);
2735: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2737: /* put together the new matrix in MATSEQSBAIJ format */
2739: b = (Mat_SeqSBAIJ*)fact->data;
2740: b->singlemalloc = PETSC_FALSE;
2742: PetscMalloc1(ui[am]+1,&b->a);
2744: b->j = uj;
2745: b->i = ui;
2746: b->diag = udiag;
2747: b->free_diag = PETSC_TRUE;
2748: b->ilen = NULL;
2749: b->imax = NULL;
2750: b->row = perm;
2751: b->col = perm;
2753: PetscObjectReference((PetscObject)perm);
2754: PetscObjectReference((PetscObject)perm);
2756: b->icol = iperm;
2757: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2758: PetscMalloc1(am+1,&b->solve_work);
2759: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2760: b->maxnz = b->nz = ui[am];
2761: b->free_a = PETSC_TRUE;
2762: b->free_ij = PETSC_TRUE;
2764: fact->info.factor_mallocs = reallocs;
2765: fact->info.fill_ratio_given = fill;
2766: if (ai[am] != 0) {
2767: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2768: } else {
2769: fact->info.fill_ratio_needed = 0.0;
2770: }
2771: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2772: return(0);
2773: }
2775: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2776: {
2777: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2778: Mat_SeqSBAIJ *b;
2779: PetscErrorCode ierr;
2780: PetscBool perm_identity,missing;
2781: PetscReal fill = info->fill;
2782: const PetscInt *rip,*riip;
2783: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2784: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2785: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2786: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2787: PetscBT lnkbt;
2788: IS iperm;
2791: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2792: MatMissingDiagonal(A,&missing,&i);
2793: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2795: /* check whether perm is the identity mapping */
2796: ISIdentity(perm,&perm_identity);
2797: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2798: ISGetIndices(iperm,&riip);
2799: ISGetIndices(perm,&rip);
2801: /* initialization */
2802: PetscMalloc1(am+1,&ui);
2803: PetscMalloc1(am+1,&udiag);
2804: ui[0] = 0;
2806: /* jl: linked list for storing indices of the pivot rows
2807: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2808: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2809: for (i=0; i<am; i++) {
2810: jl[i] = am; il[i] = 0;
2811: }
2813: /* create and initialize a linked list for storing column indices of the active row k */
2814: nlnk = am + 1;
2815: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2817: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2818: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2819: current_space = free_space;
2821: for (k=0; k<am; k++) { /* for each active row k */
2822: /* initialize lnk by the column indices of row rip[k] of A */
2823: nzk = 0;
2824: ncols = ai[rip[k]+1] - ai[rip[k]];
2825: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2826: ncols_upper = 0;
2827: for (j=0; j<ncols; j++) {
2828: i = riip[*(aj + ai[rip[k]] + j)];
2829: if (i >= k) { /* only take upper triangular entry */
2830: cols[ncols_upper] = i;
2831: ncols_upper++;
2832: }
2833: }
2834: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2835: nzk += nlnk;
2837: /* update lnk by computing fill-in for each pivot row to be merged in */
2838: prow = jl[k]; /* 1st pivot row */
2840: while (prow < k) {
2841: nextprow = jl[prow];
2842: /* merge prow into k-th row */
2843: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2844: jmax = ui[prow+1];
2845: ncols = jmax-jmin;
2846: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2847: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2848: nzk += nlnk;
2850: /* update il and jl for prow */
2851: if (jmin < jmax) {
2852: il[prow] = jmin;
2853: j = *uj_ptr;
2854: jl[prow] = jl[j];
2855: jl[j] = prow;
2856: }
2857: prow = nextprow;
2858: }
2860: /* if free space is not available, make more free space */
2861: if (current_space->local_remaining<nzk) {
2862: i = am - k + 1; /* num of unfactored rows */
2863: i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2864: PetscFreeSpaceGet(i,¤t_space);
2865: reallocs++;
2866: }
2868: /* copy data into free space, then initialize lnk */
2869: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2871: /* add the k-th row into il and jl */
2872: if (nzk > 1) {
2873: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2874: jl[k] = jl[i]; jl[i] = k;
2875: il[k] = ui[k] + 1;
2876: }
2877: ui_ptr[k] = current_space->array;
2879: current_space->array += nzk;
2880: current_space->local_used += nzk;
2881: current_space->local_remaining -= nzk;
2883: ui[k+1] = ui[k] + nzk;
2884: }
2886: ISRestoreIndices(perm,&rip);
2887: ISRestoreIndices(iperm,&riip);
2888: PetscFree4(ui_ptr,jl,il,cols);
2890: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2891: PetscMalloc1(ui[am]+1,&uj);
2892: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2893: PetscLLDestroy(lnk,lnkbt);
2895: /* put together the new matrix in MATSEQSBAIJ format */
2897: b = (Mat_SeqSBAIJ*)fact->data;
2898: b->singlemalloc = PETSC_FALSE;
2899: b->free_a = PETSC_TRUE;
2900: b->free_ij = PETSC_TRUE;
2902: PetscMalloc1(ui[am]+1,&b->a);
2904: b->j = uj;
2905: b->i = ui;
2906: b->diag = udiag;
2907: b->free_diag = PETSC_TRUE;
2908: b->ilen = NULL;
2909: b->imax = NULL;
2910: b->row = perm;
2911: b->col = perm;
2913: PetscObjectReference((PetscObject)perm);
2914: PetscObjectReference((PetscObject)perm);
2916: b->icol = iperm;
2917: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2919: PetscMalloc1(am+1,&b->solve_work);
2920: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2922: b->maxnz = b->nz = ui[am];
2924: fact->info.factor_mallocs = reallocs;
2925: fact->info.fill_ratio_given = fill;
2926: if (ai[am] != 0) {
2927: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2928: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2929: } else {
2930: fact->info.fill_ratio_needed = 0.0;
2931: }
2932: #if defined(PETSC_USE_INFO)
2933: if (ai[am] != 0) {
2934: PetscReal af = fact->info.fill_ratio_needed;
2935: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2936: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2937: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2938: } else {
2939: PetscInfo(A,"Empty matrix\n");
2940: }
2941: #endif
2942: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2943: return(0);
2944: }
2946: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2947: {
2948: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2949: Mat_SeqSBAIJ *b;
2950: PetscErrorCode ierr;
2951: PetscBool perm_identity,missing;
2952: PetscReal fill = info->fill;
2953: const PetscInt *rip,*riip;
2954: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2955: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2956: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2957: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2958: PetscBT lnkbt;
2959: IS iperm;
2962: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2963: MatMissingDiagonal(A,&missing,&i);
2964: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2966: /* check whether perm is the identity mapping */
2967: ISIdentity(perm,&perm_identity);
2968: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2969: ISGetIndices(iperm,&riip);
2970: ISGetIndices(perm,&rip);
2972: /* initialization */
2973: PetscMalloc1(am+1,&ui);
2974: ui[0] = 0;
2976: /* jl: linked list for storing indices of the pivot rows
2977: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2978: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2979: for (i=0; i<am; i++) {
2980: jl[i] = am; il[i] = 0;
2981: }
2983: /* create and initialize a linked list for storing column indices of the active row k */
2984: nlnk = am + 1;
2985: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2987: /* initial FreeSpace size is fill*(ai[am]+1) */
2988: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2989: current_space = free_space;
2991: for (k=0; k<am; k++) { /* for each active row k */
2992: /* initialize lnk by the column indices of row rip[k] of A */
2993: nzk = 0;
2994: ncols = ai[rip[k]+1] - ai[rip[k]];
2995: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2996: ncols_upper = 0;
2997: for (j=0; j<ncols; j++) {
2998: i = riip[*(aj + ai[rip[k]] + j)];
2999: if (i >= k) { /* only take upper triangular entry */
3000: cols[ncols_upper] = i;
3001: ncols_upper++;
3002: }
3003: }
3004: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3005: nzk += nlnk;
3007: /* update lnk by computing fill-in for each pivot row to be merged in */
3008: prow = jl[k]; /* 1st pivot row */
3010: while (prow < k) {
3011: nextprow = jl[prow];
3012: /* merge prow into k-th row */
3013: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3014: jmax = ui[prow+1];
3015: ncols = jmax-jmin;
3016: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3017: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3018: nzk += nlnk;
3020: /* update il and jl for prow */
3021: if (jmin < jmax) {
3022: il[prow] = jmin;
3023: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3024: }
3025: prow = nextprow;
3026: }
3028: /* if free space is not available, make more free space */
3029: if (current_space->local_remaining<nzk) {
3030: i = am - k + 1; /* num of unfactored rows */
3031: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3032: PetscFreeSpaceGet(i,¤t_space);
3033: reallocs++;
3034: }
3036: /* copy data into free space, then initialize lnk */
3037: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3039: /* add the k-th row into il and jl */
3040: if (nzk-1 > 0) {
3041: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3042: jl[k] = jl[i]; jl[i] = k;
3043: il[k] = ui[k] + 1;
3044: }
3045: ui_ptr[k] = current_space->array;
3047: current_space->array += nzk;
3048: current_space->local_used += nzk;
3049: current_space->local_remaining -= nzk;
3051: ui[k+1] = ui[k] + nzk;
3052: }
3054: #if defined(PETSC_USE_INFO)
3055: if (ai[am] != 0) {
3056: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3057: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3058: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3059: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3060: } else {
3061: PetscInfo(A,"Empty matrix\n");
3062: }
3063: #endif
3065: ISRestoreIndices(perm,&rip);
3066: ISRestoreIndices(iperm,&riip);
3067: PetscFree4(ui_ptr,jl,il,cols);
3069: /* destroy list of free space and other temporary array(s) */
3070: PetscMalloc1(ui[am]+1,&uj);
3071: PetscFreeSpaceContiguous(&free_space,uj);
3072: PetscLLDestroy(lnk,lnkbt);
3074: /* put together the new matrix in MATSEQSBAIJ format */
3076: b = (Mat_SeqSBAIJ*)fact->data;
3077: b->singlemalloc = PETSC_FALSE;
3078: b->free_a = PETSC_TRUE;
3079: b->free_ij = PETSC_TRUE;
3081: PetscMalloc1(ui[am]+1,&b->a);
3083: b->j = uj;
3084: b->i = ui;
3085: b->diag = NULL;
3086: b->ilen = NULL;
3087: b->imax = NULL;
3088: b->row = perm;
3089: b->col = perm;
3091: PetscObjectReference((PetscObject)perm);
3092: PetscObjectReference((PetscObject)perm);
3094: b->icol = iperm;
3095: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3097: PetscMalloc1(am+1,&b->solve_work);
3098: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3099: b->maxnz = b->nz = ui[am];
3101: fact->info.factor_mallocs = reallocs;
3102: fact->info.fill_ratio_given = fill;
3103: if (ai[am] != 0) {
3104: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3105: } else {
3106: fact->info.fill_ratio_needed = 0.0;
3107: }
3108: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3109: return(0);
3110: }
3112: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3113: {
3114: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3115: PetscErrorCode ierr;
3116: PetscInt n = A->rmap->n;
3117: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3118: PetscScalar *x,sum;
3119: const PetscScalar *b;
3120: const MatScalar *aa = a->a,*v;
3121: PetscInt i,nz;
3124: if (!n) return(0);
3126: VecGetArrayRead(bb,&b);
3127: VecGetArrayWrite(xx,&x);
3129: /* forward solve the lower triangular */
3130: x[0] = b[0];
3131: v = aa;
3132: vi = aj;
3133: for (i=1; i<n; i++) {
3134: nz = ai[i+1] - ai[i];
3135: sum = b[i];
3136: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3137: v += nz;
3138: vi += nz;
3139: x[i] = sum;
3140: }
3142: /* backward solve the upper triangular */
3143: for (i=n-1; i>=0; i--) {
3144: v = aa + adiag[i+1] + 1;
3145: vi = aj + adiag[i+1] + 1;
3146: nz = adiag[i] - adiag[i+1]-1;
3147: sum = x[i];
3148: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3149: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3150: }
3152: PetscLogFlops(2.0*a->nz - A->cmap->n);
3153: VecRestoreArrayRead(bb,&b);
3154: VecRestoreArrayWrite(xx,&x);
3155: return(0);
3156: }
3158: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3159: {
3160: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3161: IS iscol = a->col,isrow = a->row;
3162: PetscErrorCode ierr;
3163: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3164: const PetscInt *rout,*cout,*r,*c;
3165: PetscScalar *x,*tmp,sum;
3166: const PetscScalar *b;
3167: const MatScalar *aa = a->a,*v;
3170: if (!n) return(0);
3172: VecGetArrayRead(bb,&b);
3173: VecGetArrayWrite(xx,&x);
3174: tmp = a->solve_work;
3176: ISGetIndices(isrow,&rout); r = rout;
3177: ISGetIndices(iscol,&cout); c = cout;
3179: /* forward solve the lower triangular */
3180: tmp[0] = b[r[0]];
3181: v = aa;
3182: vi = aj;
3183: for (i=1; i<n; i++) {
3184: nz = ai[i+1] - ai[i];
3185: sum = b[r[i]];
3186: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3187: tmp[i] = sum;
3188: v += nz; vi += nz;
3189: }
3191: /* backward solve the upper triangular */
3192: for (i=n-1; i>=0; i--) {
3193: v = aa + adiag[i+1]+1;
3194: vi = aj + adiag[i+1]+1;
3195: nz = adiag[i]-adiag[i+1]-1;
3196: sum = tmp[i];
3197: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3198: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3199: }
3201: ISRestoreIndices(isrow,&rout);
3202: ISRestoreIndices(iscol,&cout);
3203: VecRestoreArrayRead(bb,&b);
3204: VecRestoreArrayWrite(xx,&x);
3205: PetscLogFlops(2.0*a->nz - A->cmap->n);
3206: return(0);
3207: }
3209: /*
3210: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3211: */
3212: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3213: {
3214: Mat B = *fact;
3215: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3216: IS isicol;
3218: const PetscInt *r,*ic;
3219: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3220: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3221: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3222: PetscInt nlnk,*lnk;
3223: PetscBT lnkbt;
3224: PetscBool row_identity,icol_identity;
3225: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3226: const PetscInt *ics;
3227: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3228: PetscReal dt =info->dt,shift=info->shiftamount;
3229: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3230: PetscBool missing;
3233: if (dt == PETSC_DEFAULT) dt = 0.005;
3234: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3236: /* ------- symbolic factorization, can be reused ---------*/
3237: MatMissingDiagonal(A,&missing,&i);
3238: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3239: adiag=a->diag;
3241: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3243: /* bdiag is location of diagonal in factor */
3244: PetscMalloc1(n+1,&bdiag); /* becomes b->diag */
3245: PetscMalloc1(n+1,&bdiag_rev); /* temporary */
3247: /* allocate row pointers bi */
3248: PetscMalloc1(2*n+2,&bi);
3250: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3251: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3252: nnz_max = ai[n]+2*n*dtcount+2;
3254: PetscMalloc1(nnz_max+1,&bj);
3255: PetscMalloc1(nnz_max+1,&ba);
3257: /* put together the new matrix */
3258: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3259: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3260: b = (Mat_SeqAIJ*)B->data;
3262: b->free_a = PETSC_TRUE;
3263: b->free_ij = PETSC_TRUE;
3264: b->singlemalloc = PETSC_FALSE;
3266: b->a = ba;
3267: b->j = bj;
3268: b->i = bi;
3269: b->diag = bdiag;
3270: b->ilen = NULL;
3271: b->imax = NULL;
3272: b->row = isrow;
3273: b->col = iscol;
3274: PetscObjectReference((PetscObject)isrow);
3275: PetscObjectReference((PetscObject)iscol);
3276: b->icol = isicol;
3278: PetscMalloc1(n+1,&b->solve_work);
3279: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3280: b->maxnz = nnz_max;
3282: B->factortype = MAT_FACTOR_ILUDT;
3283: B->info.factor_mallocs = 0;
3284: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3285: /* ------- end of symbolic factorization ---------*/
3287: ISGetIndices(isrow,&r);
3288: ISGetIndices(isicol,&ic);
3289: ics = ic;
3291: /* linked list for storing column indices of the active row */
3292: nlnk = n + 1;
3293: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3295: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3296: PetscMalloc2(n,&im,n,&jtmp);
3297: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3298: PetscMalloc2(n,&rtmp,n,&vtmp);
3299: PetscArrayzero(rtmp,n);
3301: bi[0] = 0;
3302: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3303: bdiag_rev[n] = bdiag[0];
3304: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3305: for (i=0; i<n; i++) {
3306: /* copy initial fill into linked list */
3307: nzi = ai[r[i]+1] - ai[r[i]];
3308: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3309: nzi_al = adiag[r[i]] - ai[r[i]];
3310: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3311: ajtmp = aj + ai[r[i]];
3312: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3314: /* load in initial (unfactored row) */
3315: aatmp = a->a + ai[r[i]];
3316: for (j=0; j<nzi; j++) {
3317: rtmp[ics[*ajtmp++]] = *aatmp++;
3318: }
3320: /* add pivot rows into linked list */
3321: row = lnk[n];
3322: while (row < i) {
3323: nzi_bl = bi[row+1] - bi[row] + 1;
3324: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3325: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3326: nzi += nlnk;
3327: row = lnk[row];
3328: }
3330: /* copy data from lnk into jtmp, then initialize lnk */
3331: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3333: /* numerical factorization */
3334: bjtmp = jtmp;
3335: row = *bjtmp++; /* 1st pivot row */
3336: while (row < i) {
3337: pc = rtmp + row;
3338: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3339: multiplier = (*pc) * (*pv);
3340: *pc = multiplier;
3341: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3342: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3343: pv = ba + bdiag[row+1] + 1;
3344: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3345: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3346: PetscLogFlops(1+2.0*nz);
3347: }
3348: row = *bjtmp++;
3349: }
3351: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3352: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3353: nzi_bl = 0; j = 0;
3354: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3355: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3356: nzi_bl++; j++;
3357: }
3358: nzi_bu = nzi - nzi_bl -1;
3359: while (j < nzi) {
3360: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3361: j++;
3362: }
3364: bjtmp = bj + bi[i];
3365: batmp = ba + bi[i];
3366: /* apply level dropping rule to L part */
3367: ncut = nzi_al + dtcount;
3368: if (ncut < nzi_bl) {
3369: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3370: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3371: } else {
3372: ncut = nzi_bl;
3373: }
3374: for (j=0; j<ncut; j++) {
3375: bjtmp[j] = jtmp[j];
3376: batmp[j] = vtmp[j];
3377: }
3378: bi[i+1] = bi[i] + ncut;
3379: nzi = ncut + 1;
3381: /* apply level dropping rule to U part */
3382: ncut = nzi_au + dtcount;
3383: if (ncut < nzi_bu) {
3384: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3385: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3386: } else {
3387: ncut = nzi_bu;
3388: }
3389: nzi += ncut;
3391: /* mark bdiagonal */
3392: bdiag[i+1] = bdiag[i] - (ncut + 1);
3393: bdiag_rev[n-i-1] = bdiag[i+1];
3394: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3395: bjtmp = bj + bdiag[i];
3396: batmp = ba + bdiag[i];
3397: *bjtmp = i;
3398: *batmp = diag_tmp; /* rtmp[i]; */
3399: if (*batmp == 0.0) {
3400: *batmp = dt+shift;
3401: }
3402: *batmp = 1.0/(*batmp); /* invert diagonal entries for simpler triangular solves */
3404: bjtmp = bj + bdiag[i+1]+1;
3405: batmp = ba + bdiag[i+1]+1;
3406: for (k=0; k<ncut; k++) {
3407: bjtmp[k] = jtmp[nzi_bl+1+k];
3408: batmp[k] = vtmp[nzi_bl+1+k];
3409: }
3411: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3412: } /* for (i=0; i<n; i++) */
3413: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3415: ISRestoreIndices(isrow,&r);
3416: ISRestoreIndices(isicol,&ic);
3418: PetscLLDestroy(lnk,lnkbt);
3419: PetscFree2(im,jtmp);
3420: PetscFree2(rtmp,vtmp);
3421: PetscFree(bdiag_rev);
3423: PetscLogFlops(B->cmap->n);
3424: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3426: ISIdentity(isrow,&row_identity);
3427: ISIdentity(isicol,&icol_identity);
3428: if (row_identity && icol_identity) {
3429: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3430: } else {
3431: B->ops->solve = MatSolve_SeqAIJ;
3432: }
3434: B->ops->solveadd = NULL;
3435: B->ops->solvetranspose = NULL;
3436: B->ops->solvetransposeadd = NULL;
3437: B->ops->matsolve = NULL;
3438: B->assembled = PETSC_TRUE;
3439: B->preallocated = PETSC_TRUE;
3440: return(0);
3441: }
3443: /* a wraper of MatILUDTFactor_SeqAIJ() */
3444: /*
3445: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3446: */
3448: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3449: {
3453: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3454: return(0);
3455: }
3457: /*
3458: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3459: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3460: */
3461: /*
3462: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3463: */
3465: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3466: {
3467: Mat C =fact;
3468: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3469: IS isrow = b->row,isicol = b->icol;
3471: const PetscInt *r,*ic,*ics;
3472: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3473: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3474: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3475: PetscReal dt=info->dt,shift=info->shiftamount;
3476: PetscBool row_identity, col_identity;
3479: ISGetIndices(isrow,&r);
3480: ISGetIndices(isicol,&ic);
3481: PetscMalloc1(n+1,&rtmp);
3482: ics = ic;
3484: for (i=0; i<n; i++) {
3485: /* initialize rtmp array */
3486: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3487: bjtmp = bj + bi[i];
3488: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3489: rtmp[i] = 0.0;
3490: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3491: bjtmp = bj + bdiag[i+1] + 1;
3492: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3494: /* load in initial unfactored row of A */
3495: nz = ai[r[i]+1] - ai[r[i]];
3496: ajtmp = aj + ai[r[i]];
3497: v = aa + ai[r[i]];
3498: for (j=0; j<nz; j++) {
3499: rtmp[ics[*ajtmp++]] = v[j];
3500: }
3502: /* numerical factorization */
3503: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3504: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3505: k = 0;
3506: while (k < nzl) {
3507: row = *bjtmp++;
3508: pc = rtmp + row;
3509: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3510: multiplier = (*pc) * (*pv);
3511: *pc = multiplier;
3512: if (PetscAbsScalar(multiplier) > dt) {
3513: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3514: pv = b->a + bdiag[row+1] + 1;
3515: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3516: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3517: PetscLogFlops(1+2.0*nz);
3518: }
3519: k++;
3520: }
3522: /* finished row so stick it into b->a */
3523: /* L-part */
3524: pv = b->a + bi[i];
3525: pj = bj + bi[i];
3526: nzl = bi[i+1] - bi[i];
3527: for (j=0; j<nzl; j++) {
3528: pv[j] = rtmp[pj[j]];
3529: }
3531: /* diagonal: invert diagonal entries for simpler triangular solves */
3532: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3533: b->a[bdiag[i]] = 1.0/rtmp[i];
3535: /* U-part */
3536: pv = b->a + bdiag[i+1] + 1;
3537: pj = bj + bdiag[i+1] + 1;
3538: nzu = bdiag[i] - bdiag[i+1] - 1;
3539: for (j=0; j<nzu; j++) {
3540: pv[j] = rtmp[pj[j]];
3541: }
3542: }
3544: PetscFree(rtmp);
3545: ISRestoreIndices(isicol,&ic);
3546: ISRestoreIndices(isrow,&r);
3548: ISIdentity(isrow,&row_identity);
3549: ISIdentity(isicol,&col_identity);
3550: if (row_identity && col_identity) {
3551: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3552: } else {
3553: C->ops->solve = MatSolve_SeqAIJ;
3554: }
3555: C->ops->solveadd = NULL;
3556: C->ops->solvetranspose = NULL;
3557: C->ops->solvetransposeadd = NULL;
3558: C->ops->matsolve = NULL;
3559: C->assembled = PETSC_TRUE;
3560: C->preallocated = PETSC_TRUE;
3562: PetscLogFlops(C->cmap->n);
3563: return(0);
3564: }