Actual source code: aijfact.c
petsc-3.5.4 2015-05-23
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
9: /*
10: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
12: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
13: */
14: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
15: {
16: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
17: PetscErrorCode ierr;
18: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
19: const PetscInt *ai = a->i, *aj = a->j;
20: const PetscScalar *aa = a->a;
21: PetscBool *done;
22: PetscReal best,past = 0,future;
25: /* pick initial row */
26: best = -1;
27: for (i=0; i<n; i++) {
28: future = 0.0;
29: for (j=ai[i]; j<ai[i+1]; j++) {
30: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
31: else past = PetscAbsScalar(aa[j]);
32: }
33: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34: if (past/future > best) {
35: best = past/future;
36: current = i;
37: }
38: }
40: PetscMalloc1(n,&done);
41: PetscMemzero(done,n*sizeof(PetscBool));
42: PetscMalloc1(n,&order);
43: order[0] = current;
44: for (i=0; i<n-1; i++) {
45: done[current] = PETSC_TRUE;
46: best = -1;
47: /* loop over all neighbors of current pivot */
48: for (j=ai[current]; j<ai[current+1]; j++) {
49: jj = aj[j];
50: if (done[jj]) continue;
51: /* loop over columns of potential next row computing weights for below and above diagonal */
52: past = future = 0.0;
53: for (k=ai[jj]; k<ai[jj+1]; k++) {
54: kk = aj[k];
55: if (done[kk]) past += PetscAbsScalar(aa[k]);
56: else if (kk != jj) future += PetscAbsScalar(aa[k]);
57: }
58: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59: if (past/future > best) {
60: best = past/future;
61: newcurrent = jj;
62: }
63: }
64: if (best == -1) { /* no neighbors to select from so select best of all that remain */
65: best = -1;
66: for (k=0; k<n; k++) {
67: if (done[k]) continue;
68: future = 0.0;
69: past = 0.0;
70: for (j=ai[k]; j<ai[k+1]; j++) {
71: kk = aj[j];
72: if (done[kk]) past += PetscAbsScalar(aa[j]);
73: else if (kk != k) future += PetscAbsScalar(aa[j]);
74: }
75: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76: if (past/future > best) {
77: best = past/future;
78: newcurrent = k;
79: }
80: }
81: }
82: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83: current = newcurrent;
84: order[i+1] = current;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
87: *icol = *irow;
88: PetscObjectReference((PetscObject)*irow);
89: PetscFree(done);
90: PetscFree(order);
91: return(0);
92: }
96: PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg)
97: {
99: *flg = PETSC_TRUE;
100: return(0);
101: }
105: PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
106: {
107: PetscInt n = A->rmap->n;
111: #if defined(PETSC_USE_COMPLEX)
112: if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
113: #endif
114: MatCreate(PetscObjectComm((PetscObject)A),B);
115: MatSetSizes(*B,n,n,n,n);
116: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
117: MatSetType(*B,MATSEQAIJ);
119: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
122: MatSetBlockSizesFromMats(*B,A,A);
123: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
124: MatSetType(*B,MATSEQSBAIJ);
125: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
127: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
128: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
129: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
130: (*B)->factortype = ftype;
131: return(0);
132: }
136: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
137: {
138: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
139: IS isicol;
140: PetscErrorCode ierr;
141: const PetscInt *r,*ic;
142: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
143: PetscInt *bi,*bj,*ajtmp;
144: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
145: PetscReal f;
146: PetscInt nlnk,*lnk,k,**bi_ptr;
147: PetscFreeSpaceList free_space=NULL,current_space=NULL;
148: PetscBT lnkbt;
149: PetscBool missing;
152: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
153: MatMissingDiagonal(A,&missing,&i);
154: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
156: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
157: ISGetIndices(isrow,&r);
158: ISGetIndices(isicol,&ic);
160: /* get new row pointers */
161: PetscMalloc1((n+1),&bi);
162: bi[0] = 0;
164: /* bdiag is location of diagonal in factor */
165: PetscMalloc1((n+1),&bdiag);
166: bdiag[0] = 0;
168: /* linked list for storing column indices of the active row */
169: nlnk = n + 1;
170: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
172: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
174: /* initial FreeSpace size is f*(ai[n]+1) */
175: f = info->fill;
176: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
177: current_space = free_space;
179: for (i=0; i<n; i++) {
180: /* copy previous fill into linked list */
181: nzi = 0;
182: nnz = ai[r[i]+1] - ai[r[i]];
183: ajtmp = aj + ai[r[i]];
184: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
185: nzi += nlnk;
187: /* add pivot rows into linked list */
188: row = lnk[n];
189: while (row < i) {
190: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
191: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
192: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
193: nzi += nlnk;
194: row = lnk[row];
195: }
196: bi[i+1] = bi[i] + nzi;
197: im[i] = nzi;
199: /* mark bdiag */
200: nzbd = 0;
201: nnz = nzi;
202: k = lnk[n];
203: while (nnz-- && k < i) {
204: nzbd++;
205: k = lnk[k];
206: }
207: bdiag[i] = bi[i] + nzbd;
209: /* if free space is not available, make more free space */
210: if (current_space->local_remaining<nzi) {
211: nnz = (n - i)*nzi; /* estimated and max additional space needed */
212: PetscFreeSpaceGet(nnz,¤t_space);
213: reallocs++;
214: }
216: /* copy data into free space, then initialize lnk */
217: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
219: bi_ptr[i] = current_space->array;
220: current_space->array += nzi;
221: current_space->local_used += nzi;
222: current_space->local_remaining -= nzi;
223: }
224: #if defined(PETSC_USE_INFO)
225: if (ai[n] != 0) {
226: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
227: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
228: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
229: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
230: PetscInfo(A,"for best performance.\n");
231: } else {
232: PetscInfo(A,"Empty matrix\n");
233: }
234: #endif
236: ISRestoreIndices(isrow,&r);
237: ISRestoreIndices(isicol,&ic);
239: /* destroy list of free space and other temporary array(s) */
240: PetscMalloc1((bi[n]+1),&bj);
241: PetscFreeSpaceContiguous(&free_space,bj);
242: PetscLLDestroy(lnk,lnkbt);
243: PetscFree2(bi_ptr,im);
245: /* put together the new matrix */
246: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
247: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
248: b = (Mat_SeqAIJ*)(B)->data;
250: b->free_a = PETSC_TRUE;
251: b->free_ij = PETSC_TRUE;
252: b->singlemalloc = PETSC_FALSE;
254: PetscMalloc1((bi[n]+1),&b->a);
255: b->j = bj;
256: b->i = bi;
257: b->diag = bdiag;
258: b->ilen = 0;
259: b->imax = 0;
260: b->row = isrow;
261: b->col = iscol;
262: PetscObjectReference((PetscObject)isrow);
263: PetscObjectReference((PetscObject)iscol);
264: b->icol = isicol;
265: PetscMalloc1((n+1),&b->solve_work);
267: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
268: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
269: b->maxnz = b->nz = bi[n];
271: (B)->factortype = MAT_FACTOR_LU;
272: (B)->info.factor_mallocs = reallocs;
273: (B)->info.fill_ratio_given = f;
275: if (ai[n]) {
276: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
277: } else {
278: (B)->info.fill_ratio_needed = 0.0;
279: }
280: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
281: if (a->inode.size) {
282: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
283: }
284: return(0);
285: }
289: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
290: {
291: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
292: IS isicol;
293: PetscErrorCode ierr;
294: const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
295: PetscInt i,n=A->rmap->n;
296: PetscInt *bi,*bj;
297: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
298: PetscReal f;
299: PetscInt nlnk,*lnk,k,**bi_ptr;
300: PetscFreeSpaceList free_space=NULL,current_space=NULL;
301: PetscBT lnkbt;
302: PetscBool missing;
305: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
306: MatMissingDiagonal(A,&missing,&i);
307: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
308:
309: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
310: ISGetIndices(isrow,&r);
311: ISGetIndices(isicol,&ic);
313: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
314: PetscMalloc1((n+1),&bi);
315: PetscMalloc1((n+1),&bdiag);
316: bi[0] = bdiag[0] = 0;
318: /* linked list for storing column indices of the active row */
319: nlnk = n + 1;
320: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
322: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
324: /* initial FreeSpace size is f*(ai[n]+1) */
325: f = info->fill;
326: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
327: current_space = free_space;
329: for (i=0; i<n; i++) {
330: /* copy previous fill into linked list */
331: nzi = 0;
332: nnz = ai[r[i]+1] - ai[r[i]];
333: ajtmp = aj + ai[r[i]];
334: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
335: nzi += nlnk;
337: /* add pivot rows into linked list */
338: row = lnk[n];
339: while (row < i) {
340: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
341: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
342: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
343: nzi += nlnk;
344: row = lnk[row];
345: }
346: bi[i+1] = bi[i] + nzi;
347: im[i] = nzi;
349: /* mark bdiag */
350: nzbd = 0;
351: nnz = nzi;
352: k = lnk[n];
353: while (nnz-- && k < i) {
354: nzbd++;
355: k = lnk[k];
356: }
357: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
359: /* if free space is not available, make more free space */
360: if (current_space->local_remaining<nzi) {
361: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
362: PetscFreeSpaceGet(nnz,¤t_space);
363: reallocs++;
364: }
366: /* copy data into free space, then initialize lnk */
367: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
369: bi_ptr[i] = current_space->array;
370: current_space->array += nzi;
371: current_space->local_used += nzi;
372: current_space->local_remaining -= nzi;
373: }
375: ISRestoreIndices(isrow,&r);
376: ISRestoreIndices(isicol,&ic);
378: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
379: PetscMalloc1((bi[n]+1),&bj);
380: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
381: PetscLLDestroy(lnk,lnkbt);
382: PetscFree2(bi_ptr,im);
384: /* put together the new matrix */
385: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
386: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
387: b = (Mat_SeqAIJ*)(B)->data;
389: b->free_a = PETSC_TRUE;
390: b->free_ij = PETSC_TRUE;
391: b->singlemalloc = PETSC_FALSE;
393: PetscMalloc1((bdiag[0]+1),&b->a);
395: b->j = bj;
396: b->i = bi;
397: b->diag = bdiag;
398: b->ilen = 0;
399: b->imax = 0;
400: b->row = isrow;
401: b->col = iscol;
402: PetscObjectReference((PetscObject)isrow);
403: PetscObjectReference((PetscObject)iscol);
404: b->icol = isicol;
405: PetscMalloc1((n+1),&b->solve_work);
407: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
408: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
409: b->maxnz = b->nz = bdiag[0]+1;
411: B->factortype = MAT_FACTOR_LU;
412: B->info.factor_mallocs = reallocs;
413: B->info.fill_ratio_given = f;
415: if (ai[n]) {
416: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
417: } else {
418: B->info.fill_ratio_needed = 0.0;
419: }
420: #if defined(PETSC_USE_INFO)
421: if (ai[n] != 0) {
422: PetscReal af = B->info.fill_ratio_needed;
423: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
424: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
425: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
426: PetscInfo(A,"for best performance.\n");
427: } else {
428: PetscInfo(A,"Empty matrix\n");
429: }
430: #endif
431: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
432: if (a->inode.size) {
433: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
434: }
435: Mat_CheckInode_FactorLU(B);
436: return(0);
437: }
439: /*
440: Trouble in factorization, should we dump the original matrix?
441: */
444: PetscErrorCode MatFactorDumpMatrix(Mat A)
445: {
447: PetscBool flg = PETSC_FALSE;
450: PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);
451: if (flg) {
452: PetscViewer viewer;
453: char filename[PETSC_MAX_PATH_LEN];
455: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
456: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
457: MatView(A,viewer);
458: PetscViewerDestroy(&viewer);
459: }
460: return(0);
461: }
465: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
466: {
467: Mat C =B;
468: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
469: IS isrow = b->row,isicol = b->icol;
470: PetscErrorCode ierr;
471: const PetscInt *r,*ic,*ics;
472: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
473: PetscInt i,j,k,nz,nzL,row,*pj;
474: const PetscInt *ajtmp,*bjtmp;
475: MatScalar *rtmp,*pc,multiplier,*pv;
476: const MatScalar *aa=a->a,*v;
477: PetscBool row_identity,col_identity;
478: FactorShiftCtx sctx;
479: const PetscInt *ddiag;
480: PetscReal rs;
481: MatScalar d;
484: /* MatPivotSetUp(): initialize shift context sctx */
485: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
487: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
488: ddiag = a->diag;
489: sctx.shift_top = info->zeropivot;
490: for (i=0; i<n; i++) {
491: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
492: d = (aa)[ddiag[i]];
493: rs = -PetscAbsScalar(d) - PetscRealPart(d);
494: v = aa+ai[i];
495: nz = ai[i+1] - ai[i];
496: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
497: if (rs>sctx.shift_top) sctx.shift_top = rs;
498: }
499: sctx.shift_top *= 1.1;
500: sctx.nshift_max = 5;
501: sctx.shift_lo = 0.;
502: sctx.shift_hi = 1.;
503: }
505: ISGetIndices(isrow,&r);
506: ISGetIndices(isicol,&ic);
507: PetscMalloc1((n+1),&rtmp);
508: ics = ic;
510: do {
511: sctx.newshift = PETSC_FALSE;
512: for (i=0; i<n; i++) {
513: /* zero rtmp */
514: /* L part */
515: nz = bi[i+1] - bi[i];
516: bjtmp = bj + bi[i];
517: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
519: /* U part */
520: nz = bdiag[i]-bdiag[i+1];
521: bjtmp = bj + bdiag[i+1]+1;
522: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
524: /* load in initial (unfactored row) */
525: nz = ai[r[i]+1] - ai[r[i]];
526: ajtmp = aj + ai[r[i]];
527: v = aa + ai[r[i]];
528: for (j=0; j<nz; j++) {
529: rtmp[ics[ajtmp[j]]] = v[j];
530: }
531: /* ZeropivotApply() */
532: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
534: /* elimination */
535: bjtmp = bj + bi[i];
536: row = *bjtmp++;
537: nzL = bi[i+1] - bi[i];
538: for (k=0; k < nzL; k++) {
539: pc = rtmp + row;
540: if (*pc != 0.0) {
541: pv = b->a + bdiag[row];
542: multiplier = *pc * (*pv);
543: *pc = multiplier;
545: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
546: pv = b->a + bdiag[row+1]+1;
547: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
549: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
550: PetscLogFlops(1+2*nz);
551: }
552: row = *bjtmp++;
553: }
555: /* finished row so stick it into b->a */
556: rs = 0.0;
557: /* L part */
558: pv = b->a + bi[i];
559: pj = b->j + bi[i];
560: nz = bi[i+1] - bi[i];
561: for (j=0; j<nz; j++) {
562: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
563: }
565: /* U part */
566: pv = b->a + bdiag[i+1]+1;
567: pj = b->j + bdiag[i+1]+1;
568: nz = bdiag[i] - bdiag[i+1]-1;
569: for (j=0; j<nz; j++) {
570: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
571: }
573: sctx.rs = rs;
574: sctx.pv = rtmp[i];
575: MatPivotCheck(A,info,&sctx,i);
576: if (sctx.newshift) break; /* break for-loop */
577: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
579: /* Mark diagonal and invert diagonal for simplier triangular solves */
580: pv = b->a + bdiag[i];
581: *pv = 1.0/rtmp[i];
583: } /* endof for (i=0; i<n; i++) { */
585: /* MatPivotRefine() */
586: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
587: /*
588: * if no shift in this attempt & shifting & started shifting & can refine,
589: * then try lower shift
590: */
591: sctx.shift_hi = sctx.shift_fraction;
592: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
593: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
594: sctx.newshift = PETSC_TRUE;
595: sctx.nshift++;
596: }
597: } while (sctx.newshift);
599: PetscFree(rtmp);
600: ISRestoreIndices(isicol,&ic);
601: ISRestoreIndices(isrow,&r);
603: ISIdentity(isrow,&row_identity);
604: ISIdentity(isicol,&col_identity);
605: if (b->inode.size) {
606: C->ops->solve = MatSolve_SeqAIJ_Inode;
607: } else if (row_identity && col_identity) {
608: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
609: } else {
610: C->ops->solve = MatSolve_SeqAIJ;
611: }
612: C->ops->solveadd = MatSolveAdd_SeqAIJ;
613: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
614: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
615: C->ops->matsolve = MatMatSolve_SeqAIJ;
616: C->assembled = PETSC_TRUE;
617: C->preallocated = PETSC_TRUE;
619: PetscLogFlops(C->cmap->n);
621: /* MatShiftView(A,info,&sctx) */
622: if (sctx.nshift) {
623: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
624: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
625: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
626: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
627: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
628: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
629: }
630: }
631: return(0);
632: }
636: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
637: {
638: Mat C =B;
639: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
640: IS isrow = b->row,isicol = b->icol;
641: PetscErrorCode ierr;
642: const PetscInt *r,*ic,*ics;
643: PetscInt nz,row,i,j,n=A->rmap->n,diag;
644: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
645: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
646: MatScalar *pv,*rtmp,*pc,multiplier,d;
647: const MatScalar *v,*aa=a->a;
648: PetscReal rs=0.0;
649: FactorShiftCtx sctx;
650: const PetscInt *ddiag;
651: PetscBool row_identity, col_identity;
654: /* MatPivotSetUp(): initialize shift context sctx */
655: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
657: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
658: ddiag = a->diag;
659: sctx.shift_top = info->zeropivot;
660: for (i=0; i<n; i++) {
661: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
662: d = (aa)[ddiag[i]];
663: rs = -PetscAbsScalar(d) - PetscRealPart(d);
664: v = aa+ai[i];
665: nz = ai[i+1] - ai[i];
666: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
667: if (rs>sctx.shift_top) sctx.shift_top = rs;
668: }
669: sctx.shift_top *= 1.1;
670: sctx.nshift_max = 5;
671: sctx.shift_lo = 0.;
672: sctx.shift_hi = 1.;
673: }
675: ISGetIndices(isrow,&r);
676: ISGetIndices(isicol,&ic);
677: PetscMalloc1((n+1),&rtmp);
678: ics = ic;
680: do {
681: sctx.newshift = PETSC_FALSE;
682: for (i=0; i<n; i++) {
683: nz = bi[i+1] - bi[i];
684: bjtmp = bj + bi[i];
685: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
687: /* load in initial (unfactored row) */
688: nz = ai[r[i]+1] - ai[r[i]];
689: ajtmp = aj + ai[r[i]];
690: v = aa + ai[r[i]];
691: for (j=0; j<nz; j++) {
692: rtmp[ics[ajtmp[j]]] = v[j];
693: }
694: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
696: row = *bjtmp++;
697: while (row < i) {
698: pc = rtmp + row;
699: if (*pc != 0.0) {
700: pv = b->a + diag_offset[row];
701: pj = b->j + diag_offset[row] + 1;
702: multiplier = *pc / *pv++;
703: *pc = multiplier;
704: nz = bi[row+1] - diag_offset[row] - 1;
705: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
706: PetscLogFlops(1+2*nz);
707: }
708: row = *bjtmp++;
709: }
710: /* finished row so stick it into b->a */
711: pv = b->a + bi[i];
712: pj = b->j + bi[i];
713: nz = bi[i+1] - bi[i];
714: diag = diag_offset[i] - bi[i];
715: rs = 0.0;
716: for (j=0; j<nz; j++) {
717: pv[j] = rtmp[pj[j]];
718: rs += PetscAbsScalar(pv[j]);
719: }
720: rs -= PetscAbsScalar(pv[diag]);
722: sctx.rs = rs;
723: sctx.pv = pv[diag];
724: MatPivotCheck(A,info,&sctx,i);
725: if (sctx.newshift) break;
726: pv[diag] = sctx.pv;
727: }
729: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
730: /*
731: * if no shift in this attempt & shifting & started shifting & can refine,
732: * then try lower shift
733: */
734: sctx.shift_hi = sctx.shift_fraction;
735: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
736: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
737: sctx.newshift = PETSC_TRUE;
738: sctx.nshift++;
739: }
740: } while (sctx.newshift);
742: /* invert diagonal entries for simplier triangular solves */
743: for (i=0; i<n; i++) {
744: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
745: }
746: PetscFree(rtmp);
747: ISRestoreIndices(isicol,&ic);
748: ISRestoreIndices(isrow,&r);
750: ISIdentity(isrow,&row_identity);
751: ISIdentity(isicol,&col_identity);
752: if (row_identity && col_identity) {
753: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
754: } else {
755: C->ops->solve = MatSolve_SeqAIJ_inplace;
756: }
757: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
758: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
759: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
760: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
762: C->assembled = PETSC_TRUE;
763: C->preallocated = PETSC_TRUE;
765: PetscLogFlops(C->cmap->n);
766: if (sctx.nshift) {
767: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
768: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
769: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
770: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
771: }
772: }
773: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
774: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
776: Mat_CheckInode(C,PETSC_FALSE);
777: return(0);
778: }
780: /*
781: This routine implements inplace ILU(0) with row or/and column permutations.
782: Input:
783: A - original matrix
784: Output;
785: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
786: a->j (col index) is permuted by the inverse of colperm, then sorted
787: a->a reordered accordingly with a->j
788: a->diag (ptr to diagonal elements) is updated.
789: */
792: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
793: {
794: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
795: IS isrow = a->row,isicol = a->icol;
796: PetscErrorCode ierr;
797: const PetscInt *r,*ic,*ics;
798: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
799: PetscInt *ajtmp,nz,row;
800: PetscInt *diag = a->diag,nbdiag,*pj;
801: PetscScalar *rtmp,*pc,multiplier,d;
802: MatScalar *pv,*v;
803: PetscReal rs;
804: FactorShiftCtx sctx;
805: const MatScalar *aa=a->a,*vtmp;
808: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
810: /* MatPivotSetUp(): initialize shift context sctx */
811: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
813: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
814: const PetscInt *ddiag = a->diag;
815: sctx.shift_top = info->zeropivot;
816: for (i=0; i<n; i++) {
817: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
818: d = (aa)[ddiag[i]];
819: rs = -PetscAbsScalar(d) - PetscRealPart(d);
820: vtmp = aa+ai[i];
821: nz = ai[i+1] - ai[i];
822: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
823: if (rs>sctx.shift_top) sctx.shift_top = rs;
824: }
825: sctx.shift_top *= 1.1;
826: sctx.nshift_max = 5;
827: sctx.shift_lo = 0.;
828: sctx.shift_hi = 1.;
829: }
831: ISGetIndices(isrow,&r);
832: ISGetIndices(isicol,&ic);
833: PetscMalloc1((n+1),&rtmp);
834: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
835: ics = ic;
837: #if defined(MV)
838: sctx.shift_top = 0.;
839: sctx.nshift_max = 0;
840: sctx.shift_lo = 0.;
841: sctx.shift_hi = 0.;
842: sctx.shift_fraction = 0.;
844: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
845: sctx.shift_top = 0.;
846: for (i=0; i<n; i++) {
847: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
848: d = (a->a)[diag[i]];
849: rs = -PetscAbsScalar(d) - PetscRealPart(d);
850: v = a->a+ai[i];
851: nz = ai[i+1] - ai[i];
852: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
853: if (rs>sctx.shift_top) sctx.shift_top = rs;
854: }
855: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
856: sctx.shift_top *= 1.1;
857: sctx.nshift_max = 5;
858: sctx.shift_lo = 0.;
859: sctx.shift_hi = 1.;
860: }
862: sctx.shift_amount = 0.;
863: sctx.nshift = 0;
864: #endif
866: do {
867: sctx.newshift = PETSC_FALSE;
868: for (i=0; i<n; i++) {
869: /* load in initial unfactored row */
870: nz = ai[r[i]+1] - ai[r[i]];
871: ajtmp = aj + ai[r[i]];
872: v = a->a + ai[r[i]];
873: /* sort permuted ajtmp and values v accordingly */
874: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
875: PetscSortIntWithScalarArray(nz,ajtmp,v);
877: diag[r[i]] = ai[r[i]];
878: for (j=0; j<nz; j++) {
879: rtmp[ajtmp[j]] = v[j];
880: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
881: }
882: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
884: row = *ajtmp++;
885: while (row < i) {
886: pc = rtmp + row;
887: if (*pc != 0.0) {
888: pv = a->a + diag[r[row]];
889: pj = aj + diag[r[row]] + 1;
891: multiplier = *pc / *pv++;
892: *pc = multiplier;
893: nz = ai[r[row]+1] - diag[r[row]] - 1;
894: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
895: PetscLogFlops(1+2*nz);
896: }
897: row = *ajtmp++;
898: }
899: /* finished row so overwrite it onto a->a */
900: pv = a->a + ai[r[i]];
901: pj = aj + ai[r[i]];
902: nz = ai[r[i]+1] - ai[r[i]];
903: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
905: rs = 0.0;
906: for (j=0; j<nz; j++) {
907: pv[j] = rtmp[pj[j]];
908: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
909: }
911: sctx.rs = rs;
912: sctx.pv = pv[nbdiag];
913: MatPivotCheck(A,info,&sctx,i);
914: if (sctx.newshift) break;
915: pv[nbdiag] = sctx.pv;
916: }
918: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
919: /*
920: * if no shift in this attempt & shifting & started shifting & can refine,
921: * then try lower shift
922: */
923: sctx.shift_hi = sctx.shift_fraction;
924: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
925: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
926: sctx.newshift = PETSC_TRUE;
927: sctx.nshift++;
928: }
929: } while (sctx.newshift);
931: /* invert diagonal entries for simplier triangular solves */
932: for (i=0; i<n; i++) {
933: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
934: }
936: PetscFree(rtmp);
937: ISRestoreIndices(isicol,&ic);
938: ISRestoreIndices(isrow,&r);
940: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
941: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
942: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
943: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
945: A->assembled = PETSC_TRUE;
946: A->preallocated = PETSC_TRUE;
948: PetscLogFlops(A->cmap->n);
949: if (sctx.nshift) {
950: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
951: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
952: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
953: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
954: }
955: }
956: return(0);
957: }
959: /* ----------------------------------------------------------- */
962: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
963: {
965: Mat C;
968: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
969: MatLUFactorSymbolic(C,A,row,col,info);
970: MatLUFactorNumeric(C,A,info);
972: A->ops->solve = C->ops->solve;
973: A->ops->solvetranspose = C->ops->solvetranspose;
975: MatHeaderMerge(A,C);
976: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
977: return(0);
978: }
979: /* ----------------------------------------------------------- */
984: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
985: {
986: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
987: IS iscol = a->col,isrow = a->row;
988: PetscErrorCode ierr;
989: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
990: PetscInt nz;
991: const PetscInt *rout,*cout,*r,*c;
992: PetscScalar *x,*tmp,*tmps,sum;
993: const PetscScalar *b;
994: const MatScalar *aa = a->a,*v;
997: if (!n) return(0);
999: VecGetArrayRead(bb,&b);
1000: VecGetArray(xx,&x);
1001: tmp = a->solve_work;
1003: ISGetIndices(isrow,&rout); r = rout;
1004: ISGetIndices(iscol,&cout); c = cout + (n-1);
1006: /* forward solve the lower triangular */
1007: tmp[0] = b[*r++];
1008: tmps = tmp;
1009: for (i=1; i<n; i++) {
1010: v = aa + ai[i];
1011: vi = aj + ai[i];
1012: nz = a->diag[i] - ai[i];
1013: sum = b[*r++];
1014: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1015: tmp[i] = sum;
1016: }
1018: /* backward solve the upper triangular */
1019: for (i=n-1; i>=0; i--) {
1020: v = aa + a->diag[i] + 1;
1021: vi = aj + a->diag[i] + 1;
1022: nz = ai[i+1] - a->diag[i] - 1;
1023: sum = tmp[i];
1024: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1025: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1026: }
1028: ISRestoreIndices(isrow,&rout);
1029: ISRestoreIndices(iscol,&cout);
1030: VecRestoreArrayRead(bb,&b);
1031: VecRestoreArray(xx,&x);
1032: PetscLogFlops(2.0*a->nz - A->cmap->n);
1033: return(0);
1034: }
1038: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1039: {
1040: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1041: IS iscol = a->col,isrow = a->row;
1042: PetscErrorCode ierr;
1043: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1044: PetscInt nz,neq;
1045: const PetscInt *rout,*cout,*r,*c;
1046: PetscScalar *x,*b,*tmp,*tmps,sum;
1047: const MatScalar *aa = a->a,*v;
1048: PetscBool bisdense,xisdense;
1051: if (!n) return(0);
1053: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1054: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1055: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1056: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1058: MatDenseGetArray(B,&b);
1059: MatDenseGetArray(X,&x);
1061: tmp = a->solve_work;
1062: ISGetIndices(isrow,&rout); r = rout;
1063: ISGetIndices(iscol,&cout); c = cout;
1065: for (neq=0; neq<B->cmap->n; neq++) {
1066: /* forward solve the lower triangular */
1067: tmp[0] = b[r[0]];
1068: tmps = tmp;
1069: for (i=1; i<n; i++) {
1070: v = aa + ai[i];
1071: vi = aj + ai[i];
1072: nz = a->diag[i] - ai[i];
1073: sum = b[r[i]];
1074: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1075: tmp[i] = sum;
1076: }
1077: /* backward solve the upper triangular */
1078: for (i=n-1; i>=0; i--) {
1079: v = aa + a->diag[i] + 1;
1080: vi = aj + a->diag[i] + 1;
1081: nz = ai[i+1] - a->diag[i] - 1;
1082: sum = tmp[i];
1083: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1084: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1085: }
1087: b += n;
1088: x += n;
1089: }
1090: ISRestoreIndices(isrow,&rout);
1091: ISRestoreIndices(iscol,&cout);
1092: MatDenseRestoreArray(B,&b);
1093: MatDenseRestoreArray(X,&x);
1094: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1095: return(0);
1096: }
1100: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1101: {
1102: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1103: IS iscol = a->col,isrow = a->row;
1104: PetscErrorCode ierr;
1105: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1106: PetscInt nz,neq;
1107: const PetscInt *rout,*cout,*r,*c;
1108: PetscScalar *x,*b,*tmp,sum;
1109: const MatScalar *aa = a->a,*v;
1110: PetscBool bisdense,xisdense;
1113: if (!n) return(0);
1115: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1116: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1117: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1118: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1120: MatDenseGetArray(B,&b);
1121: MatDenseGetArray(X,&x);
1123: tmp = a->solve_work;
1124: ISGetIndices(isrow,&rout); r = rout;
1125: ISGetIndices(iscol,&cout); c = cout;
1127: for (neq=0; neq<B->cmap->n; neq++) {
1128: /* forward solve the lower triangular */
1129: tmp[0] = b[r[0]];
1130: v = aa;
1131: vi = aj;
1132: for (i=1; i<n; i++) {
1133: nz = ai[i+1] - ai[i];
1134: sum = b[r[i]];
1135: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1136: tmp[i] = sum;
1137: v += nz; vi += nz;
1138: }
1140: /* backward solve the upper triangular */
1141: for (i=n-1; i>=0; i--) {
1142: v = aa + adiag[i+1]+1;
1143: vi = aj + adiag[i+1]+1;
1144: nz = adiag[i]-adiag[i+1]-1;
1145: sum = tmp[i];
1146: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1147: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1148: }
1150: b += n;
1151: x += n;
1152: }
1153: ISRestoreIndices(isrow,&rout);
1154: ISRestoreIndices(iscol,&cout);
1155: MatDenseRestoreArray(B,&b);
1156: MatDenseRestoreArray(X,&x);
1157: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1158: return(0);
1159: }
1163: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1164: {
1165: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1166: IS iscol = a->col,isrow = a->row;
1167: PetscErrorCode ierr;
1168: const PetscInt *r,*c,*rout,*cout;
1169: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1170: PetscInt nz,row;
1171: PetscScalar *x,*b,*tmp,*tmps,sum;
1172: const MatScalar *aa = a->a,*v;
1175: if (!n) return(0);
1177: VecGetArray(bb,&b);
1178: VecGetArray(xx,&x);
1179: tmp = a->solve_work;
1181: ISGetIndices(isrow,&rout); r = rout;
1182: ISGetIndices(iscol,&cout); c = cout + (n-1);
1184: /* forward solve the lower triangular */
1185: tmp[0] = b[*r++];
1186: tmps = tmp;
1187: for (row=1; row<n; row++) {
1188: i = rout[row]; /* permuted row */
1189: v = aa + ai[i];
1190: vi = aj + ai[i];
1191: nz = a->diag[i] - ai[i];
1192: sum = b[*r++];
1193: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1194: tmp[row] = sum;
1195: }
1197: /* backward solve the upper triangular */
1198: for (row=n-1; row>=0; row--) {
1199: i = rout[row]; /* permuted row */
1200: v = aa + a->diag[i] + 1;
1201: vi = aj + a->diag[i] + 1;
1202: nz = ai[i+1] - a->diag[i] - 1;
1203: sum = tmp[row];
1204: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1205: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1206: }
1208: ISRestoreIndices(isrow,&rout);
1209: ISRestoreIndices(iscol,&cout);
1210: VecRestoreArray(bb,&b);
1211: VecRestoreArray(xx,&x);
1212: PetscLogFlops(2.0*a->nz - A->cmap->n);
1213: return(0);
1214: }
1216: /* ----------------------------------------------------------- */
1217: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1220: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1221: {
1222: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1223: PetscErrorCode ierr;
1224: PetscInt n = A->rmap->n;
1225: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1226: PetscScalar *x;
1227: const PetscScalar *b;
1228: const MatScalar *aa = a->a;
1229: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1230: PetscInt adiag_i,i,nz,ai_i;
1231: const PetscInt *vi;
1232: const MatScalar *v;
1233: PetscScalar sum;
1234: #endif
1237: if (!n) return(0);
1239: VecGetArrayRead(bb,&b);
1240: VecGetArray(xx,&x);
1242: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1243: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1244: #else
1245: /* forward solve the lower triangular */
1246: x[0] = b[0];
1247: for (i=1; i<n; i++) {
1248: ai_i = ai[i];
1249: v = aa + ai_i;
1250: vi = aj + ai_i;
1251: nz = adiag[i] - ai_i;
1252: sum = b[i];
1253: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1254: x[i] = sum;
1255: }
1257: /* backward solve the upper triangular */
1258: for (i=n-1; i>=0; i--) {
1259: adiag_i = adiag[i];
1260: v = aa + adiag_i + 1;
1261: vi = aj + adiag_i + 1;
1262: nz = ai[i+1] - adiag_i - 1;
1263: sum = x[i];
1264: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1265: x[i] = sum*aa[adiag_i];
1266: }
1267: #endif
1268: PetscLogFlops(2.0*a->nz - A->cmap->n);
1269: VecRestoreArrayRead(bb,&b);
1270: VecRestoreArray(xx,&x);
1271: return(0);
1272: }
1276: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1277: {
1278: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1279: IS iscol = a->col,isrow = a->row;
1280: PetscErrorCode ierr;
1281: PetscInt i, n = A->rmap->n,j;
1282: PetscInt nz;
1283: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1284: PetscScalar *x,*tmp,sum;
1285: const PetscScalar *b;
1286: const MatScalar *aa = a->a,*v;
1289: if (yy != xx) {VecCopy(yy,xx);}
1291: VecGetArrayRead(bb,&b);
1292: VecGetArray(xx,&x);
1293: tmp = a->solve_work;
1295: ISGetIndices(isrow,&rout); r = rout;
1296: ISGetIndices(iscol,&cout); c = cout + (n-1);
1298: /* forward solve the lower triangular */
1299: tmp[0] = b[*r++];
1300: for (i=1; i<n; i++) {
1301: v = aa + ai[i];
1302: vi = aj + ai[i];
1303: nz = a->diag[i] - ai[i];
1304: sum = b[*r++];
1305: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1306: tmp[i] = sum;
1307: }
1309: /* backward solve the upper triangular */
1310: for (i=n-1; i>=0; i--) {
1311: v = aa + a->diag[i] + 1;
1312: vi = aj + a->diag[i] + 1;
1313: nz = ai[i+1] - a->diag[i] - 1;
1314: sum = tmp[i];
1315: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1316: tmp[i] = sum*aa[a->diag[i]];
1317: x[*c--] += tmp[i];
1318: }
1320: ISRestoreIndices(isrow,&rout);
1321: ISRestoreIndices(iscol,&cout);
1322: VecRestoreArrayRead(bb,&b);
1323: VecRestoreArray(xx,&x);
1324: PetscLogFlops(2.0*a->nz);
1325: return(0);
1326: }
1330: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1331: {
1332: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1333: IS iscol = a->col,isrow = a->row;
1334: PetscErrorCode ierr;
1335: PetscInt i, n = A->rmap->n,j;
1336: PetscInt nz;
1337: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1338: PetscScalar *x,*tmp,sum;
1339: const PetscScalar *b;
1340: const MatScalar *aa = a->a,*v;
1343: if (yy != xx) {VecCopy(yy,xx);}
1345: VecGetArrayRead(bb,&b);
1346: VecGetArray(xx,&x);
1347: tmp = a->solve_work;
1349: ISGetIndices(isrow,&rout); r = rout;
1350: ISGetIndices(iscol,&cout); c = cout;
1352: /* forward solve the lower triangular */
1353: tmp[0] = b[r[0]];
1354: v = aa;
1355: vi = aj;
1356: for (i=1; i<n; i++) {
1357: nz = ai[i+1] - ai[i];
1358: sum = b[r[i]];
1359: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1360: tmp[i] = sum;
1361: v += nz;
1362: vi += nz;
1363: }
1365: /* backward solve the upper triangular */
1366: v = aa + adiag[n-1];
1367: vi = aj + adiag[n-1];
1368: for (i=n-1; i>=0; i--) {
1369: nz = adiag[i] - adiag[i+1] - 1;
1370: sum = tmp[i];
1371: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1372: tmp[i] = sum*v[nz];
1373: x[c[i]] += tmp[i];
1374: v += nz+1; vi += nz+1;
1375: }
1377: ISRestoreIndices(isrow,&rout);
1378: ISRestoreIndices(iscol,&cout);
1379: VecRestoreArrayRead(bb,&b);
1380: VecRestoreArray(xx,&x);
1381: PetscLogFlops(2.0*a->nz);
1382: return(0);
1383: }
1387: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1388: {
1389: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1390: IS iscol = a->col,isrow = a->row;
1391: PetscErrorCode ierr;
1392: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1393: PetscInt i,n = A->rmap->n,j;
1394: PetscInt nz;
1395: PetscScalar *x,*tmp,s1;
1396: const MatScalar *aa = a->a,*v;
1397: const PetscScalar *b;
1400: VecGetArrayRead(bb,&b);
1401: VecGetArray(xx,&x);
1402: tmp = a->solve_work;
1404: ISGetIndices(isrow,&rout); r = rout;
1405: ISGetIndices(iscol,&cout); c = cout;
1407: /* copy the b into temp work space according to permutation */
1408: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1410: /* forward solve the U^T */
1411: for (i=0; i<n; i++) {
1412: v = aa + diag[i];
1413: vi = aj + diag[i] + 1;
1414: nz = ai[i+1] - diag[i] - 1;
1415: s1 = tmp[i];
1416: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1417: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1418: tmp[i] = s1;
1419: }
1421: /* backward solve the L^T */
1422: for (i=n-1; i>=0; i--) {
1423: v = aa + diag[i] - 1;
1424: vi = aj + diag[i] - 1;
1425: nz = diag[i] - ai[i];
1426: s1 = tmp[i];
1427: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1428: }
1430: /* copy tmp into x according to permutation */
1431: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1433: ISRestoreIndices(isrow,&rout);
1434: ISRestoreIndices(iscol,&cout);
1435: VecRestoreArrayRead(bb,&b);
1436: VecRestoreArray(xx,&x);
1438: PetscLogFlops(2.0*a->nz-A->cmap->n);
1439: return(0);
1440: }
1444: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1445: {
1446: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1447: IS iscol = a->col,isrow = a->row;
1448: PetscErrorCode ierr;
1449: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1450: PetscInt i,n = A->rmap->n,j;
1451: PetscInt nz;
1452: PetscScalar *x,*tmp,s1;
1453: const MatScalar *aa = a->a,*v;
1454: const PetscScalar *b;
1457: VecGetArrayRead(bb,&b);
1458: VecGetArray(xx,&x);
1459: tmp = a->solve_work;
1461: ISGetIndices(isrow,&rout); r = rout;
1462: ISGetIndices(iscol,&cout); c = cout;
1464: /* copy the b into temp work space according to permutation */
1465: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1467: /* forward solve the U^T */
1468: for (i=0; i<n; i++) {
1469: v = aa + adiag[i+1] + 1;
1470: vi = aj + adiag[i+1] + 1;
1471: nz = adiag[i] - adiag[i+1] - 1;
1472: s1 = tmp[i];
1473: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1474: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1475: tmp[i] = s1;
1476: }
1478: /* backward solve the L^T */
1479: for (i=n-1; i>=0; i--) {
1480: v = aa + ai[i];
1481: vi = aj + ai[i];
1482: nz = ai[i+1] - ai[i];
1483: s1 = tmp[i];
1484: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1485: }
1487: /* copy tmp into x according to permutation */
1488: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1490: ISRestoreIndices(isrow,&rout);
1491: ISRestoreIndices(iscol,&cout);
1492: VecRestoreArrayRead(bb,&b);
1493: VecRestoreArray(xx,&x);
1495: PetscLogFlops(2.0*a->nz-A->cmap->n);
1496: return(0);
1497: }
1501: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1502: {
1503: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1504: IS iscol = a->col,isrow = a->row;
1505: PetscErrorCode ierr;
1506: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1507: PetscInt i,n = A->rmap->n,j;
1508: PetscInt nz;
1509: PetscScalar *x,*tmp,s1;
1510: const MatScalar *aa = a->a,*v;
1511: const PetscScalar *b;
1514: if (zz != xx) {VecCopy(zz,xx);}
1515: VecGetArrayRead(bb,&b);
1516: VecGetArray(xx,&x);
1517: tmp = a->solve_work;
1519: ISGetIndices(isrow,&rout); r = rout;
1520: ISGetIndices(iscol,&cout); c = cout;
1522: /* copy the b into temp work space according to permutation */
1523: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1525: /* forward solve the U^T */
1526: for (i=0; i<n; i++) {
1527: v = aa + diag[i];
1528: vi = aj + diag[i] + 1;
1529: nz = ai[i+1] - diag[i] - 1;
1530: s1 = tmp[i];
1531: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1532: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1533: tmp[i] = s1;
1534: }
1536: /* backward solve the L^T */
1537: for (i=n-1; i>=0; i--) {
1538: v = aa + diag[i] - 1;
1539: vi = aj + diag[i] - 1;
1540: nz = diag[i] - ai[i];
1541: s1 = tmp[i];
1542: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1543: }
1545: /* copy tmp into x according to permutation */
1546: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1548: ISRestoreIndices(isrow,&rout);
1549: ISRestoreIndices(iscol,&cout);
1550: VecRestoreArrayRead(bb,&b);
1551: VecRestoreArray(xx,&x);
1553: PetscLogFlops(2.0*a->nz-A->cmap->n);
1554: return(0);
1555: }
1559: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1560: {
1561: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1562: IS iscol = a->col,isrow = a->row;
1563: PetscErrorCode ierr;
1564: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1565: PetscInt i,n = A->rmap->n,j;
1566: PetscInt nz;
1567: PetscScalar *x,*tmp,s1;
1568: const MatScalar *aa = a->a,*v;
1569: const PetscScalar *b;
1572: if (zz != xx) {VecCopy(zz,xx);}
1573: VecGetArrayRead(bb,&b);
1574: VecGetArray(xx,&x);
1575: tmp = a->solve_work;
1577: ISGetIndices(isrow,&rout); r = rout;
1578: ISGetIndices(iscol,&cout); c = cout;
1580: /* copy the b into temp work space according to permutation */
1581: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1583: /* forward solve the U^T */
1584: for (i=0; i<n; i++) {
1585: v = aa + adiag[i+1] + 1;
1586: vi = aj + adiag[i+1] + 1;
1587: nz = adiag[i] - adiag[i+1] - 1;
1588: s1 = tmp[i];
1589: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1590: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1591: tmp[i] = s1;
1592: }
1595: /* backward solve the L^T */
1596: for (i=n-1; i>=0; i--) {
1597: v = aa + ai[i];
1598: vi = aj + ai[i];
1599: nz = ai[i+1] - ai[i];
1600: s1 = tmp[i];
1601: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1602: }
1604: /* copy tmp into x according to permutation */
1605: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1607: ISRestoreIndices(isrow,&rout);
1608: ISRestoreIndices(iscol,&cout);
1609: VecRestoreArrayRead(bb,&b);
1610: VecRestoreArray(xx,&x);
1612: PetscLogFlops(2.0*a->nz-A->cmap->n);
1613: return(0);
1614: }
1616: /* ----------------------------------------------------------------*/
1618: extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool);
1620: /*
1621: ilu() under revised new data structure.
1622: Factored arrays bj and ba are stored as
1623: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1625: bi=fact->i is an array of size n+1, in which
1626: bi+
1627: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1628: bi[n]: points to L(n-1,n-1)+1
1630: bdiag=fact->diag is an array of size n+1,in which
1631: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1632: bdiag[n]: points to entry of U(n-1,0)-1
1634: U(i,:) contains bdiag[i] as its last entry, i.e.,
1635: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1636: */
1639: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1640: {
1641: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1643: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1644: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1645: IS isicol;
1648: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1649: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1650: b = (Mat_SeqAIJ*)(fact)->data;
1652: /* allocate matrix arrays for new data structure */
1653: PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1654: PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1656: b->singlemalloc = PETSC_TRUE;
1657: if (!b->diag) {
1658: PetscMalloc1((n+1),&b->diag);
1659: PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1660: }
1661: bdiag = b->diag;
1663: if (n > 0) {
1664: PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1665: }
1667: /* set bi and bj with new data structure */
1668: bi = b->i;
1669: bj = b->j;
1671: /* L part */
1672: bi[0] = 0;
1673: for (i=0; i<n; i++) {
1674: nz = adiag[i] - ai[i];
1675: bi[i+1] = bi[i] + nz;
1676: aj = a->j + ai[i];
1677: for (j=0; j<nz; j++) {
1678: /* *bj = aj[j]; bj++; */
1679: bj[k++] = aj[j];
1680: }
1681: }
1683: /* U part */
1684: bdiag[n] = bi[n]-1;
1685: for (i=n-1; i>=0; i--) {
1686: nz = ai[i+1] - adiag[i] - 1;
1687: aj = a->j + adiag[i] + 1;
1688: for (j=0; j<nz; j++) {
1689: /* *bj = aj[j]; bj++; */
1690: bj[k++] = aj[j];
1691: }
1692: /* diag[i] */
1693: /* *bj = i; bj++; */
1694: bj[k++] = i;
1695: bdiag[i] = bdiag[i+1] + nz + 1;
1696: }
1698: fact->factortype = MAT_FACTOR_ILU;
1699: fact->info.factor_mallocs = 0;
1700: fact->info.fill_ratio_given = info->fill;
1701: fact->info.fill_ratio_needed = 1.0;
1702: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1703: Mat_CheckInode_FactorLU(fact);
1705: b = (Mat_SeqAIJ*)(fact)->data;
1706: b->row = isrow;
1707: b->col = iscol;
1708: b->icol = isicol;
1709: PetscMalloc1((fact->rmap->n+1),&b->solve_work);
1710: PetscObjectReference((PetscObject)isrow);
1711: PetscObjectReference((PetscObject)iscol);
1712: return(0);
1713: }
1717: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1718: {
1719: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1720: IS isicol;
1721: PetscErrorCode ierr;
1722: const PetscInt *r,*ic;
1723: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1724: PetscInt *bi,*cols,nnz,*cols_lvl;
1725: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1726: PetscInt i,levels,diagonal_fill;
1727: PetscBool col_identity,row_identity,missing;
1728: PetscReal f;
1729: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1730: PetscBT lnkbt;
1731: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1732: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1733: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1736: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1737: MatMissingDiagonal(A,&missing,&i);
1738: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1739:
1740: levels = (PetscInt)info->levels;
1741: ISIdentity(isrow,&row_identity);
1742: ISIdentity(iscol,&col_identity);
1743: if (!levels && row_identity && col_identity) {
1744: /* special case: ilu(0) with natural ordering */
1745: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1746: if (a->inode.size) {
1747: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1748: }
1749: return(0);
1750: }
1752: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1753: ISGetIndices(isrow,&r);
1754: ISGetIndices(isicol,&ic);
1756: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1757: PetscMalloc1((n+1),&bi);
1758: PetscMalloc1((n+1),&bdiag);
1759: bi[0] = bdiag[0] = 0;
1760: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1762: /* create a linked list for storing column indices of the active row */
1763: nlnk = n + 1;
1764: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1766: /* initial FreeSpace size is f*(ai[n]+1) */
1767: f = info->fill;
1768: diagonal_fill = (PetscInt)info->diagonal_fill;
1769: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1770: current_space = free_space;
1771: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1772: current_space_lvl = free_space_lvl;
1773: for (i=0; i<n; i++) {
1774: nzi = 0;
1775: /* copy current row into linked list */
1776: nnz = ai[r[i]+1] - ai[r[i]];
1777: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1778: cols = aj + ai[r[i]];
1779: lnk[i] = -1; /* marker to indicate if diagonal exists */
1780: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1781: nzi += nlnk;
1783: /* make sure diagonal entry is included */
1784: if (diagonal_fill && lnk[i] == -1) {
1785: fm = n;
1786: while (lnk[fm] < i) fm = lnk[fm];
1787: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1788: lnk[fm] = i;
1789: lnk_lvl[i] = 0;
1790: nzi++; dcount++;
1791: }
1793: /* add pivot rows into the active row */
1794: nzbd = 0;
1795: prow = lnk[n];
1796: while (prow < i) {
1797: nnz = bdiag[prow];
1798: cols = bj_ptr[prow] + nnz + 1;
1799: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1800: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1801: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1802: nzi += nlnk;
1803: prow = lnk[prow];
1804: nzbd++;
1805: }
1806: bdiag[i] = nzbd;
1807: bi[i+1] = bi[i] + nzi;
1808: /* if free space is not available, make more free space */
1809: if (current_space->local_remaining<nzi) {
1810: nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1811: PetscFreeSpaceGet(nnz,¤t_space);
1812: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1813: reallocs++;
1814: }
1816: /* copy data into free_space and free_space_lvl, then initialize lnk */
1817: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1818: bj_ptr[i] = current_space->array;
1819: bjlvl_ptr[i] = current_space_lvl->array;
1821: /* make sure the active row i has diagonal entry */
1822: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1824: current_space->array += nzi;
1825: current_space->local_used += nzi;
1826: current_space->local_remaining -= nzi;
1827: current_space_lvl->array += nzi;
1828: current_space_lvl->local_used += nzi;
1829: current_space_lvl->local_remaining -= nzi;
1830: }
1832: ISRestoreIndices(isrow,&r);
1833: ISRestoreIndices(isicol,&ic);
1834: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1835: PetscMalloc1((bi[n]+1),&bj);
1836: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1838: PetscIncompleteLLDestroy(lnk,lnkbt);
1839: PetscFreeSpaceDestroy(free_space_lvl);
1840: PetscFree2(bj_ptr,bjlvl_ptr);
1842: #if defined(PETSC_USE_INFO)
1843: {
1844: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1845: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1846: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1847: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1848: PetscInfo(A,"for best performance.\n");
1849: if (diagonal_fill) {
1850: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1851: }
1852: }
1853: #endif
1854: /* put together the new matrix */
1855: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1856: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1857: b = (Mat_SeqAIJ*)(fact)->data;
1859: b->free_a = PETSC_TRUE;
1860: b->free_ij = PETSC_TRUE;
1861: b->singlemalloc = PETSC_FALSE;
1863: PetscMalloc1((bdiag[0]+1),&b->a);
1865: b->j = bj;
1866: b->i = bi;
1867: b->diag = bdiag;
1868: b->ilen = 0;
1869: b->imax = 0;
1870: b->row = isrow;
1871: b->col = iscol;
1872: PetscObjectReference((PetscObject)isrow);
1873: PetscObjectReference((PetscObject)iscol);
1874: b->icol = isicol;
1876: PetscMalloc1((n+1),&b->solve_work);
1877: /* In b structure: Free imax, ilen, old a, old j.
1878: Allocate bdiag, solve_work, new a, new j */
1879: PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1880: b->maxnz = b->nz = bdiag[0]+1;
1882: (fact)->info.factor_mallocs = reallocs;
1883: (fact)->info.fill_ratio_given = f;
1884: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1885: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1886: if (a->inode.size) {
1887: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1888: }
1889: Mat_CheckInode_FactorLU(fact);
1890: return(0);
1891: }
1895: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1896: {
1897: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1898: IS isicol;
1899: PetscErrorCode ierr;
1900: const PetscInt *r,*ic;
1901: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1902: PetscInt *bi,*cols,nnz,*cols_lvl;
1903: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1904: PetscInt i,levels,diagonal_fill;
1905: PetscBool col_identity,row_identity;
1906: PetscReal f;
1907: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1908: PetscBT lnkbt;
1909: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1910: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1911: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1912: PetscBool missing;
1915: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1916: MatMissingDiagonal(A,&missing,&i);
1917: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1919: f = info->fill;
1920: levels = (PetscInt)info->levels;
1921: diagonal_fill = (PetscInt)info->diagonal_fill;
1923: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1925: ISIdentity(isrow,&row_identity);
1926: ISIdentity(iscol,&col_identity);
1927: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1928: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1930: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1931: if (a->inode.size) {
1932: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1933: }
1934: fact->factortype = MAT_FACTOR_ILU;
1935: (fact)->info.factor_mallocs = 0;
1936: (fact)->info.fill_ratio_given = info->fill;
1937: (fact)->info.fill_ratio_needed = 1.0;
1939: b = (Mat_SeqAIJ*)(fact)->data;
1940: b->row = isrow;
1941: b->col = iscol;
1942: b->icol = isicol;
1943: PetscMalloc1(((fact)->rmap->n+1),&b->solve_work);
1944: PetscObjectReference((PetscObject)isrow);
1945: PetscObjectReference((PetscObject)iscol);
1946: return(0);
1947: }
1949: ISGetIndices(isrow,&r);
1950: ISGetIndices(isicol,&ic);
1952: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1953: PetscMalloc1((n+1),&bi);
1954: PetscMalloc1((n+1),&bdiag);
1955: bi[0] = bdiag[0] = 0;
1957: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1959: /* create a linked list for storing column indices of the active row */
1960: nlnk = n + 1;
1961: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1963: /* initial FreeSpace size is f*(ai[n]+1) */
1964: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1965: current_space = free_space;
1966: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1967: current_space_lvl = free_space_lvl;
1969: for (i=0; i<n; i++) {
1970: nzi = 0;
1971: /* copy current row into linked list */
1972: nnz = ai[r[i]+1] - ai[r[i]];
1973: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1974: cols = aj + ai[r[i]];
1975: lnk[i] = -1; /* marker to indicate if diagonal exists */
1976: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1977: nzi += nlnk;
1979: /* make sure diagonal entry is included */
1980: if (diagonal_fill && lnk[i] == -1) {
1981: fm = n;
1982: while (lnk[fm] < i) fm = lnk[fm];
1983: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1984: lnk[fm] = i;
1985: lnk_lvl[i] = 0;
1986: nzi++; dcount++;
1987: }
1989: /* add pivot rows into the active row */
1990: nzbd = 0;
1991: prow = lnk[n];
1992: while (prow < i) {
1993: nnz = bdiag[prow];
1994: cols = bj_ptr[prow] + nnz + 1;
1995: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1996: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1997: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1998: nzi += nlnk;
1999: prow = lnk[prow];
2000: nzbd++;
2001: }
2002: bdiag[i] = nzbd;
2003: bi[i+1] = bi[i] + nzi;
2005: /* if free space is not available, make more free space */
2006: if (current_space->local_remaining<nzi) {
2007: nnz = nzi*(n - i); /* estimated and max additional space needed */
2008: PetscFreeSpaceGet(nnz,¤t_space);
2009: PetscFreeSpaceGet(nnz,¤t_space_lvl);
2010: reallocs++;
2011: }
2013: /* copy data into free_space and free_space_lvl, then initialize lnk */
2014: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2015: bj_ptr[i] = current_space->array;
2016: bjlvl_ptr[i] = current_space_lvl->array;
2018: /* make sure the active row i has diagonal entry */
2019: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2021: current_space->array += nzi;
2022: current_space->local_used += nzi;
2023: current_space->local_remaining -= nzi;
2024: current_space_lvl->array += nzi;
2025: current_space_lvl->local_used += nzi;
2026: current_space_lvl->local_remaining -= nzi;
2027: }
2029: ISRestoreIndices(isrow,&r);
2030: ISRestoreIndices(isicol,&ic);
2032: /* destroy list of free space and other temporary arrays */
2033: PetscMalloc1((bi[n]+1),&bj);
2034: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2035: PetscIncompleteLLDestroy(lnk,lnkbt);
2036: PetscFreeSpaceDestroy(free_space_lvl);
2037: PetscFree2(bj_ptr,bjlvl_ptr);
2039: #if defined(PETSC_USE_INFO)
2040: {
2041: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2042: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
2043: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
2044: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
2045: PetscInfo(A,"for best performance.\n");
2046: if (diagonal_fill) {
2047: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
2048: }
2049: }
2050: #endif
2052: /* put together the new matrix */
2053: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
2054: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
2055: b = (Mat_SeqAIJ*)(fact)->data;
2057: b->free_a = PETSC_TRUE;
2058: b->free_ij = PETSC_TRUE;
2059: b->singlemalloc = PETSC_FALSE;
2061: PetscMalloc1(bi[n],&b->a);
2062: b->j = bj;
2063: b->i = bi;
2064: for (i=0; i<n; i++) bdiag[i] += bi[i];
2065: b->diag = bdiag;
2066: b->ilen = 0;
2067: b->imax = 0;
2068: b->row = isrow;
2069: b->col = iscol;
2070: PetscObjectReference((PetscObject)isrow);
2071: PetscObjectReference((PetscObject)iscol);
2072: b->icol = isicol;
2073: PetscMalloc1((n+1),&b->solve_work);
2074: /* In b structure: Free imax, ilen, old a, old j.
2075: Allocate bdiag, solve_work, new a, new j */
2076: PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2077: b->maxnz = b->nz = bi[n];
2079: (fact)->info.factor_mallocs = reallocs;
2080: (fact)->info.fill_ratio_given = f;
2081: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2082: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2083: if (a->inode.size) {
2084: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2085: }
2086: return(0);
2087: }
2091: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2092: {
2093: Mat C = B;
2094: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2095: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2096: IS ip=b->row,iip = b->icol;
2098: const PetscInt *rip,*riip;
2099: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2100: PetscInt *ai=a->i,*aj=a->j;
2101: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2102: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2103: PetscBool perm_identity;
2104: FactorShiftCtx sctx;
2105: PetscReal rs;
2106: MatScalar d,*v;
2109: /* MatPivotSetUp(): initialize shift context sctx */
2110: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2112: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2113: sctx.shift_top = info->zeropivot;
2114: for (i=0; i<mbs; i++) {
2115: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2116: d = (aa)[a->diag[i]];
2117: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2118: v = aa+ai[i];
2119: nz = ai[i+1] - ai[i];
2120: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2121: if (rs>sctx.shift_top) sctx.shift_top = rs;
2122: }
2123: sctx.shift_top *= 1.1;
2124: sctx.nshift_max = 5;
2125: sctx.shift_lo = 0.;
2126: sctx.shift_hi = 1.;
2127: }
2129: ISGetIndices(ip,&rip);
2130: ISGetIndices(iip,&riip);
2132: /* allocate working arrays
2133: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2134: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2135: */
2136: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);
2138: do {
2139: sctx.newshift = PETSC_FALSE;
2141: for (i=0; i<mbs; i++) c2r[i] = mbs;
2142: if (mbs) il[0] = 0;
2144: for (k = 0; k<mbs; k++) {
2145: /* zero rtmp */
2146: nz = bi[k+1] - bi[k];
2147: bjtmp = bj + bi[k];
2148: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2150: /* load in initial unfactored row */
2151: bval = ba + bi[k];
2152: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2153: for (j = jmin; j < jmax; j++) {
2154: col = riip[aj[j]];
2155: if (col >= k) { /* only take upper triangular entry */
2156: rtmp[col] = aa[j];
2157: *bval++ = 0.0; /* for in-place factorization */
2158: }
2159: }
2160: /* shift the diagonal of the matrix: ZeropivotApply() */
2161: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2163: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2164: dk = rtmp[k];
2165: i = c2r[k]; /* first row to be added to k_th row */
2167: while (i < k) {
2168: nexti = c2r[i]; /* next row to be added to k_th row */
2170: /* compute multiplier, update diag(k) and U(i,k) */
2171: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2172: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2173: dk += uikdi*ba[ili]; /* update diag[k] */
2174: ba[ili] = uikdi; /* -U(i,k) */
2176: /* add multiple of row i to k-th row */
2177: jmin = ili + 1; jmax = bi[i+1];
2178: if (jmin < jmax) {
2179: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2180: /* update il and c2r for row i */
2181: il[i] = jmin;
2182: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2183: }
2184: i = nexti;
2185: }
2187: /* copy data into U(k,:) */
2188: rs = 0.0;
2189: jmin = bi[k]; jmax = bi[k+1]-1;
2190: if (jmin < jmax) {
2191: for (j=jmin; j<jmax; j++) {
2192: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2193: }
2194: /* add the k-th row into il and c2r */
2195: il[k] = jmin;
2196: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2197: }
2199: /* MatPivotCheck() */
2200: sctx.rs = rs;
2201: sctx.pv = dk;
2202: MatPivotCheck(A,info,&sctx,i);
2203: if (sctx.newshift) break;
2204: dk = sctx.pv;
2206: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2207: }
2208: } while (sctx.newshift);
2210: PetscFree3(rtmp,il,c2r);
2211: ISRestoreIndices(ip,&rip);
2212: ISRestoreIndices(iip,&riip);
2214: ISIdentity(ip,&perm_identity);
2215: if (perm_identity) {
2216: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2217: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2218: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2219: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2220: } else {
2221: B->ops->solve = MatSolve_SeqSBAIJ_1;
2222: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2223: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2224: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2225: }
2227: C->assembled = PETSC_TRUE;
2228: C->preallocated = PETSC_TRUE;
2230: PetscLogFlops(C->rmap->n);
2232: /* MatPivotView() */
2233: if (sctx.nshift) {
2234: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2235: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2236: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2237: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2238: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2239: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2240: }
2241: }
2242: return(0);
2243: }
2247: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2248: {
2249: Mat C = B;
2250: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2251: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2252: IS ip=b->row,iip = b->icol;
2254: const PetscInt *rip,*riip;
2255: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2256: PetscInt *ai=a->i,*aj=a->j;
2257: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2258: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2259: PetscBool perm_identity;
2260: FactorShiftCtx sctx;
2261: PetscReal rs;
2262: MatScalar d,*v;
2265: /* MatPivotSetUp(): initialize shift context sctx */
2266: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2268: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2269: sctx.shift_top = info->zeropivot;
2270: for (i=0; i<mbs; i++) {
2271: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2272: d = (aa)[a->diag[i]];
2273: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2274: v = aa+ai[i];
2275: nz = ai[i+1] - ai[i];
2276: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2277: if (rs>sctx.shift_top) sctx.shift_top = rs;
2278: }
2279: sctx.shift_top *= 1.1;
2280: sctx.nshift_max = 5;
2281: sctx.shift_lo = 0.;
2282: sctx.shift_hi = 1.;
2283: }
2285: ISGetIndices(ip,&rip);
2286: ISGetIndices(iip,&riip);
2288: /* initialization */
2289: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
2291: do {
2292: sctx.newshift = PETSC_FALSE;
2294: for (i=0; i<mbs; i++) jl[i] = mbs;
2295: il[0] = 0;
2297: for (k = 0; k<mbs; k++) {
2298: /* zero rtmp */
2299: nz = bi[k+1] - bi[k];
2300: bjtmp = bj + bi[k];
2301: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2303: bval = ba + bi[k];
2304: /* initialize k-th row by the perm[k]-th row of A */
2305: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2306: for (j = jmin; j < jmax; j++) {
2307: col = riip[aj[j]];
2308: if (col >= k) { /* only take upper triangular entry */
2309: rtmp[col] = aa[j];
2310: *bval++ = 0.0; /* for in-place factorization */
2311: }
2312: }
2313: /* shift the diagonal of the matrix */
2314: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2316: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2317: dk = rtmp[k];
2318: i = jl[k]; /* first row to be added to k_th row */
2320: while (i < k) {
2321: nexti = jl[i]; /* next row to be added to k_th row */
2323: /* compute multiplier, update diag(k) and U(i,k) */
2324: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2325: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2326: dk += uikdi*ba[ili];
2327: ba[ili] = uikdi; /* -U(i,k) */
2329: /* add multiple of row i to k-th row */
2330: jmin = ili + 1; jmax = bi[i+1];
2331: if (jmin < jmax) {
2332: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2333: /* update il and jl for row i */
2334: il[i] = jmin;
2335: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2336: }
2337: i = nexti;
2338: }
2340: /* shift the diagonals when zero pivot is detected */
2341: /* compute rs=sum of abs(off-diagonal) */
2342: rs = 0.0;
2343: jmin = bi[k]+1;
2344: nz = bi[k+1] - jmin;
2345: bcol = bj + jmin;
2346: for (j=0; j<nz; j++) {
2347: rs += PetscAbsScalar(rtmp[bcol[j]]);
2348: }
2350: sctx.rs = rs;
2351: sctx.pv = dk;
2352: MatPivotCheck(A,info,&sctx,k);
2353: if (sctx.newshift) break;
2354: dk = sctx.pv;
2356: /* copy data into U(k,:) */
2357: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2358: jmin = bi[k]+1; jmax = bi[k+1];
2359: if (jmin < jmax) {
2360: for (j=jmin; j<jmax; j++) {
2361: col = bj[j]; ba[j] = rtmp[col];
2362: }
2363: /* add the k-th row into il and jl */
2364: il[k] = jmin;
2365: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2366: }
2367: }
2368: } while (sctx.newshift);
2370: PetscFree3(rtmp,il,jl);
2371: ISRestoreIndices(ip,&rip);
2372: ISRestoreIndices(iip,&riip);
2374: ISIdentity(ip,&perm_identity);
2375: if (perm_identity) {
2376: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2377: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2378: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2379: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2380: } else {
2381: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2382: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2383: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2384: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2385: }
2387: C->assembled = PETSC_TRUE;
2388: C->preallocated = PETSC_TRUE;
2390: PetscLogFlops(C->rmap->n);
2391: if (sctx.nshift) {
2392: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2393: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2394: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2395: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2396: }
2397: }
2398: return(0);
2399: }
2401: /*
2402: icc() under revised new data structure.
2403: Factored arrays bj and ba are stored as
2404: U(0,:),...,U(i,:),U(n-1,:)
2406: ui=fact->i is an array of size n+1, in which
2407: ui+
2408: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2409: ui[n]: points to U(n-1,n-1)+1
2411: udiag=fact->diag is an array of size n,in which
2412: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2414: U(i,:) contains udiag[i] as its last entry, i.e.,
2415: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2416: */
2420: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2421: {
2422: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2423: Mat_SeqSBAIJ *b;
2424: PetscErrorCode ierr;
2425: PetscBool perm_identity,missing;
2426: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2427: const PetscInt *rip,*riip;
2428: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2429: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2430: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2431: PetscReal fill =info->fill,levels=info->levels;
2432: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2433: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2434: PetscBT lnkbt;
2435: IS iperm;
2438: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2439: MatMissingDiagonal(A,&missing,&d);
2440: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2441: ISIdentity(perm,&perm_identity);
2442: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2444: PetscMalloc1((am+1),&ui);
2445: PetscMalloc1((am+1),&udiag);
2446: ui[0] = 0;
2448: /* ICC(0) without matrix ordering: simply rearrange column indices */
2449: if (!levels && perm_identity) {
2450: for (i=0; i<am; i++) {
2451: ncols = ai[i+1] - a->diag[i];
2452: ui[i+1] = ui[i] + ncols;
2453: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2454: }
2455: PetscMalloc1((ui[am]+1),&uj);
2456: cols = uj;
2457: for (i=0; i<am; i++) {
2458: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2459: ncols = ai[i+1] - a->diag[i] -1;
2460: for (j=0; j<ncols; j++) *cols++ = aj[j];
2461: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2462: }
2463: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2464: ISGetIndices(iperm,&riip);
2465: ISGetIndices(perm,&rip);
2467: /* initialization */
2468: PetscMalloc1((am+1),&ajtmp);
2470: /* jl: linked list for storing indices of the pivot rows
2471: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2472: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2473: for (i=0; i<am; i++) {
2474: jl[i] = am; il[i] = 0;
2475: }
2477: /* create and initialize a linked list for storing column indices of the active row k */
2478: nlnk = am + 1;
2479: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2481: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2482: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2483: current_space = free_space;
2484: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);
2485: current_space_lvl = free_space_lvl;
2487: for (k=0; k<am; k++) { /* for each active row k */
2488: /* initialize lnk by the column indices of row rip[k] of A */
2489: nzk = 0;
2490: ncols = ai[rip[k]+1] - ai[rip[k]];
2491: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2492: ncols_upper = 0;
2493: for (j=0; j<ncols; j++) {
2494: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2495: if (riip[i] >= k) { /* only take upper triangular entry */
2496: ajtmp[ncols_upper] = i;
2497: ncols_upper++;
2498: }
2499: }
2500: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2501: nzk += nlnk;
2503: /* update lnk by computing fill-in for each pivot row to be merged in */
2504: prow = jl[k]; /* 1st pivot row */
2506: while (prow < k) {
2507: nextprow = jl[prow];
2509: /* merge prow into k-th row */
2510: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2511: jmax = ui[prow+1];
2512: ncols = jmax-jmin;
2513: i = jmin - ui[prow];
2514: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2515: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2516: j = *(uj - 1);
2517: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2518: nzk += nlnk;
2520: /* update il and jl for prow */
2521: if (jmin < jmax) {
2522: il[prow] = jmin;
2523: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2524: }
2525: prow = nextprow;
2526: }
2528: /* if free space is not available, make more free space */
2529: if (current_space->local_remaining<nzk) {
2530: i = am - k + 1; /* num of unfactored rows */
2531: i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2532: PetscFreeSpaceGet(i,¤t_space);
2533: PetscFreeSpaceGet(i,¤t_space_lvl);
2534: reallocs++;
2535: }
2537: /* copy data into free_space and free_space_lvl, then initialize lnk */
2538: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2539: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2541: /* add the k-th row into il and jl */
2542: if (nzk > 1) {
2543: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2544: jl[k] = jl[i]; jl[i] = k;
2545: il[k] = ui[k] + 1;
2546: }
2547: uj_ptr[k] = current_space->array;
2548: uj_lvl_ptr[k] = current_space_lvl->array;
2550: current_space->array += nzk;
2551: current_space->local_used += nzk;
2552: current_space->local_remaining -= nzk;
2554: current_space_lvl->array += nzk;
2555: current_space_lvl->local_used += nzk;
2556: current_space_lvl->local_remaining -= nzk;
2558: ui[k+1] = ui[k] + nzk;
2559: }
2561: ISRestoreIndices(perm,&rip);
2562: ISRestoreIndices(iperm,&riip);
2563: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2564: PetscFree(ajtmp);
2566: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2567: PetscMalloc1((ui[am]+1),&uj);
2568: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2569: PetscIncompleteLLDestroy(lnk,lnkbt);
2570: PetscFreeSpaceDestroy(free_space_lvl);
2572: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2574: /* put together the new matrix in MATSEQSBAIJ format */
2575: b = (Mat_SeqSBAIJ*)(fact)->data;
2576: b->singlemalloc = PETSC_FALSE;
2578: PetscMalloc1((ui[am]+1),&b->a);
2580: b->j = uj;
2581: b->i = ui;
2582: b->diag = udiag;
2583: b->free_diag = PETSC_TRUE;
2584: b->ilen = 0;
2585: b->imax = 0;
2586: b->row = perm;
2587: b->col = perm;
2588: PetscObjectReference((PetscObject)perm);
2589: PetscObjectReference((PetscObject)perm);
2590: b->icol = iperm;
2591: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2593: PetscMalloc1((am+1),&b->solve_work);
2594: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2596: b->maxnz = b->nz = ui[am];
2597: b->free_a = PETSC_TRUE;
2598: b->free_ij = PETSC_TRUE;
2600: fact->info.factor_mallocs = reallocs;
2601: fact->info.fill_ratio_given = fill;
2602: if (ai[am] != 0) {
2603: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2604: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2605: } else {
2606: fact->info.fill_ratio_needed = 0.0;
2607: }
2608: #if defined(PETSC_USE_INFO)
2609: if (ai[am] != 0) {
2610: PetscReal af = fact->info.fill_ratio_needed;
2611: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2612: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2613: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2614: } else {
2615: PetscInfo(A,"Empty matrix.\n");
2616: }
2617: #endif
2618: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2619: return(0);
2620: }
2624: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2625: {
2626: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2627: Mat_SeqSBAIJ *b;
2628: PetscErrorCode ierr;
2629: PetscBool perm_identity,missing;
2630: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2631: const PetscInt *rip,*riip;
2632: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2633: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2634: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2635: PetscReal fill =info->fill,levels=info->levels;
2636: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2637: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2638: PetscBT lnkbt;
2639: IS iperm;
2642: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2643: MatMissingDiagonal(A,&missing,&d);
2644: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2645: ISIdentity(perm,&perm_identity);
2646: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2648: PetscMalloc1((am+1),&ui);
2649: PetscMalloc1((am+1),&udiag);
2650: ui[0] = 0;
2652: /* ICC(0) without matrix ordering: simply copies fill pattern */
2653: if (!levels && perm_identity) {
2655: for (i=0; i<am; i++) {
2656: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2657: udiag[i] = ui[i];
2658: }
2659: PetscMalloc1((ui[am]+1),&uj);
2660: cols = uj;
2661: for (i=0; i<am; i++) {
2662: aj = a->j + a->diag[i];
2663: ncols = ui[i+1] - ui[i];
2664: for (j=0; j<ncols; j++) *cols++ = *aj++;
2665: }
2666: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2667: ISGetIndices(iperm,&riip);
2668: ISGetIndices(perm,&rip);
2670: /* initialization */
2671: PetscMalloc1((am+1),&ajtmp);
2673: /* jl: linked list for storing indices of the pivot rows
2674: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2675: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2676: for (i=0; i<am; i++) {
2677: jl[i] = am; il[i] = 0;
2678: }
2680: /* create and initialize a linked list for storing column indices of the active row k */
2681: nlnk = am + 1;
2682: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2684: /* initial FreeSpace size is fill*(ai[am]+1) */
2685: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2686: current_space = free_space;
2687: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2688: current_space_lvl = free_space_lvl;
2690: for (k=0; k<am; k++) { /* for each active row k */
2691: /* initialize lnk by the column indices of row rip[k] of A */
2692: nzk = 0;
2693: ncols = ai[rip[k]+1] - ai[rip[k]];
2694: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2695: ncols_upper = 0;
2696: for (j=0; j<ncols; j++) {
2697: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2698: if (riip[i] >= k) { /* only take upper triangular entry */
2699: ajtmp[ncols_upper] = i;
2700: ncols_upper++;
2701: }
2702: }
2703: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2704: nzk += nlnk;
2706: /* update lnk by computing fill-in for each pivot row to be merged in */
2707: prow = jl[k]; /* 1st pivot row */
2709: while (prow < k) {
2710: nextprow = jl[prow];
2712: /* merge prow into k-th row */
2713: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2714: jmax = ui[prow+1];
2715: ncols = jmax-jmin;
2716: i = jmin - ui[prow];
2717: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2718: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2719: j = *(uj - 1);
2720: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2721: nzk += nlnk;
2723: /* update il and jl for prow */
2724: if (jmin < jmax) {
2725: il[prow] = jmin;
2726: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2727: }
2728: prow = nextprow;
2729: }
2731: /* if free space is not available, make more free space */
2732: if (current_space->local_remaining<nzk) {
2733: i = am - k + 1; /* num of unfactored rows */
2734: i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2735: PetscFreeSpaceGet(i,¤t_space);
2736: PetscFreeSpaceGet(i,¤t_space_lvl);
2737: reallocs++;
2738: }
2740: /* copy data into free_space and free_space_lvl, then initialize lnk */
2741: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2742: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2744: /* add the k-th row into il and jl */
2745: if (nzk > 1) {
2746: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2747: jl[k] = jl[i]; jl[i] = k;
2748: il[k] = ui[k] + 1;
2749: }
2750: uj_ptr[k] = current_space->array;
2751: uj_lvl_ptr[k] = current_space_lvl->array;
2753: current_space->array += nzk;
2754: current_space->local_used += nzk;
2755: current_space->local_remaining -= nzk;
2757: current_space_lvl->array += nzk;
2758: current_space_lvl->local_used += nzk;
2759: current_space_lvl->local_remaining -= nzk;
2761: ui[k+1] = ui[k] + nzk;
2762: }
2764: #if defined(PETSC_USE_INFO)
2765: if (ai[am] != 0) {
2766: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2767: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2768: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2769: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2770: } else {
2771: PetscInfo(A,"Empty matrix.\n");
2772: }
2773: #endif
2775: ISRestoreIndices(perm,&rip);
2776: ISRestoreIndices(iperm,&riip);
2777: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2778: PetscFree(ajtmp);
2780: /* destroy list of free space and other temporary array(s) */
2781: PetscMalloc1((ui[am]+1),&uj);
2782: PetscFreeSpaceContiguous(&free_space,uj);
2783: PetscIncompleteLLDestroy(lnk,lnkbt);
2784: PetscFreeSpaceDestroy(free_space_lvl);
2786: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2788: /* put together the new matrix in MATSEQSBAIJ format */
2790: b = (Mat_SeqSBAIJ*)fact->data;
2791: b->singlemalloc = PETSC_FALSE;
2793: PetscMalloc1((ui[am]+1),&b->a);
2795: b->j = uj;
2796: b->i = ui;
2797: b->diag = udiag;
2798: b->free_diag = PETSC_TRUE;
2799: b->ilen = 0;
2800: b->imax = 0;
2801: b->row = perm;
2802: b->col = perm;
2804: PetscObjectReference((PetscObject)perm);
2805: PetscObjectReference((PetscObject)perm);
2807: b->icol = iperm;
2808: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2809: PetscMalloc1((am+1),&b->solve_work);
2810: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2811: b->maxnz = b->nz = ui[am];
2812: b->free_a = PETSC_TRUE;
2813: b->free_ij = PETSC_TRUE;
2815: fact->info.factor_mallocs = reallocs;
2816: fact->info.fill_ratio_given = fill;
2817: if (ai[am] != 0) {
2818: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2819: } else {
2820: fact->info.fill_ratio_needed = 0.0;
2821: }
2822: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2823: return(0);
2824: }
2828: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2829: {
2830: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2831: Mat_SeqSBAIJ *b;
2832: PetscErrorCode ierr;
2833: PetscBool perm_identity,missing;
2834: PetscReal fill = info->fill;
2835: const PetscInt *rip,*riip;
2836: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2837: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2838: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2839: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2840: PetscBT lnkbt;
2841: IS iperm;
2844: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2845: MatMissingDiagonal(A,&missing,&i);
2846: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2848: /* check whether perm is the identity mapping */
2849: ISIdentity(perm,&perm_identity);
2850: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2851: ISGetIndices(iperm,&riip);
2852: ISGetIndices(perm,&rip);
2854: /* initialization */
2855: PetscMalloc1((am+1),&ui);
2856: PetscMalloc1((am+1),&udiag);
2857: ui[0] = 0;
2859: /* jl: linked list for storing indices of the pivot rows
2860: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2861: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2862: for (i=0; i<am; i++) {
2863: jl[i] = am; il[i] = 0;
2864: }
2866: /* create and initialize a linked list for storing column indices of the active row k */
2867: nlnk = am + 1;
2868: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2870: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2871: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2872: current_space = free_space;
2874: for (k=0; k<am; k++) { /* for each active row k */
2875: /* initialize lnk by the column indices of row rip[k] of A */
2876: nzk = 0;
2877: ncols = ai[rip[k]+1] - ai[rip[k]];
2878: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2879: ncols_upper = 0;
2880: for (j=0; j<ncols; j++) {
2881: i = riip[*(aj + ai[rip[k]] + j)];
2882: if (i >= k) { /* only take upper triangular entry */
2883: cols[ncols_upper] = i;
2884: ncols_upper++;
2885: }
2886: }
2887: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2888: nzk += nlnk;
2890: /* update lnk by computing fill-in for each pivot row to be merged in */
2891: prow = jl[k]; /* 1st pivot row */
2893: while (prow < k) {
2894: nextprow = jl[prow];
2895: /* merge prow into k-th row */
2896: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2897: jmax = ui[prow+1];
2898: ncols = jmax-jmin;
2899: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2900: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2901: nzk += nlnk;
2903: /* update il and jl for prow */
2904: if (jmin < jmax) {
2905: il[prow] = jmin;
2906: j = *uj_ptr;
2907: jl[prow] = jl[j];
2908: jl[j] = prow;
2909: }
2910: prow = nextprow;
2911: }
2913: /* if free space is not available, make more free space */
2914: if (current_space->local_remaining<nzk) {
2915: i = am - k + 1; /* num of unfactored rows */
2916: i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2917: PetscFreeSpaceGet(i,¤t_space);
2918: reallocs++;
2919: }
2921: /* copy data into free space, then initialize lnk */
2922: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2924: /* add the k-th row into il and jl */
2925: if (nzk > 1) {
2926: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2927: jl[k] = jl[i]; jl[i] = k;
2928: il[k] = ui[k] + 1;
2929: }
2930: ui_ptr[k] = current_space->array;
2932: current_space->array += nzk;
2933: current_space->local_used += nzk;
2934: current_space->local_remaining -= nzk;
2936: ui[k+1] = ui[k] + nzk;
2937: }
2939: ISRestoreIndices(perm,&rip);
2940: ISRestoreIndices(iperm,&riip);
2941: PetscFree4(ui_ptr,jl,il,cols);
2943: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2944: PetscMalloc1((ui[am]+1),&uj);
2945: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2946: PetscLLDestroy(lnk,lnkbt);
2948: /* put together the new matrix in MATSEQSBAIJ format */
2950: b = (Mat_SeqSBAIJ*)fact->data;
2951: b->singlemalloc = PETSC_FALSE;
2952: b->free_a = PETSC_TRUE;
2953: b->free_ij = PETSC_TRUE;
2955: PetscMalloc1((ui[am]+1),&b->a);
2957: b->j = uj;
2958: b->i = ui;
2959: b->diag = udiag;
2960: b->free_diag = PETSC_TRUE;
2961: b->ilen = 0;
2962: b->imax = 0;
2963: b->row = perm;
2964: b->col = perm;
2966: PetscObjectReference((PetscObject)perm);
2967: PetscObjectReference((PetscObject)perm);
2969: b->icol = iperm;
2970: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2972: PetscMalloc1((am+1),&b->solve_work);
2973: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2975: b->maxnz = b->nz = ui[am];
2977: fact->info.factor_mallocs = reallocs;
2978: fact->info.fill_ratio_given = fill;
2979: if (ai[am] != 0) {
2980: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2981: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2982: } else {
2983: fact->info.fill_ratio_needed = 0.0;
2984: }
2985: #if defined(PETSC_USE_INFO)
2986: if (ai[am] != 0) {
2987: PetscReal af = fact->info.fill_ratio_needed;
2988: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2989: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2990: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2991: } else {
2992: PetscInfo(A,"Empty matrix.\n");
2993: }
2994: #endif
2995: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2996: return(0);
2997: }
3001: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
3002: {
3003: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3004: Mat_SeqSBAIJ *b;
3005: PetscErrorCode ierr;
3006: PetscBool perm_identity,missing;
3007: PetscReal fill = info->fill;
3008: const PetscInt *rip,*riip;
3009: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
3010: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
3011: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
3012: PetscFreeSpaceList free_space=NULL,current_space=NULL;
3013: PetscBT lnkbt;
3014: IS iperm;
3017: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
3018: MatMissingDiagonal(A,&missing,&i);
3019: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3021: /* check whether perm is the identity mapping */
3022: ISIdentity(perm,&perm_identity);
3023: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
3024: ISGetIndices(iperm,&riip);
3025: ISGetIndices(perm,&rip);
3027: /* initialization */
3028: PetscMalloc1((am+1),&ui);
3029: ui[0] = 0;
3031: /* jl: linked list for storing indices of the pivot rows
3032: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3033: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
3034: for (i=0; i<am; i++) {
3035: jl[i] = am; il[i] = 0;
3036: }
3038: /* create and initialize a linked list for storing column indices of the active row k */
3039: nlnk = am + 1;
3040: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
3042: /* initial FreeSpace size is fill*(ai[am]+1) */
3043: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
3044: current_space = free_space;
3046: for (k=0; k<am; k++) { /* for each active row k */
3047: /* initialize lnk by the column indices of row rip[k] of A */
3048: nzk = 0;
3049: ncols = ai[rip[k]+1] - ai[rip[k]];
3050: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3051: ncols_upper = 0;
3052: for (j=0; j<ncols; j++) {
3053: i = riip[*(aj + ai[rip[k]] + j)];
3054: if (i >= k) { /* only take upper triangular entry */
3055: cols[ncols_upper] = i;
3056: ncols_upper++;
3057: }
3058: }
3059: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3060: nzk += nlnk;
3062: /* update lnk by computing fill-in for each pivot row to be merged in */
3063: prow = jl[k]; /* 1st pivot row */
3065: while (prow < k) {
3066: nextprow = jl[prow];
3067: /* merge prow into k-th row */
3068: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3069: jmax = ui[prow+1];
3070: ncols = jmax-jmin;
3071: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3072: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3073: nzk += nlnk;
3075: /* update il and jl for prow */
3076: if (jmin < jmax) {
3077: il[prow] = jmin;
3078: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3079: }
3080: prow = nextprow;
3081: }
3083: /* if free space is not available, make more free space */
3084: if (current_space->local_remaining<nzk) {
3085: i = am - k + 1; /* num of unfactored rows */
3086: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3087: PetscFreeSpaceGet(i,¤t_space);
3088: reallocs++;
3089: }
3091: /* copy data into free space, then initialize lnk */
3092: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3094: /* add the k-th row into il and jl */
3095: if (nzk-1 > 0) {
3096: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3097: jl[k] = jl[i]; jl[i] = k;
3098: il[k] = ui[k] + 1;
3099: }
3100: ui_ptr[k] = current_space->array;
3102: current_space->array += nzk;
3103: current_space->local_used += nzk;
3104: current_space->local_remaining -= nzk;
3106: ui[k+1] = ui[k] + nzk;
3107: }
3109: #if defined(PETSC_USE_INFO)
3110: if (ai[am] != 0) {
3111: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3112: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3113: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3114: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3115: } else {
3116: PetscInfo(A,"Empty matrix.\n");
3117: }
3118: #endif
3120: ISRestoreIndices(perm,&rip);
3121: ISRestoreIndices(iperm,&riip);
3122: PetscFree4(ui_ptr,jl,il,cols);
3124: /* destroy list of free space and other temporary array(s) */
3125: PetscMalloc1((ui[am]+1),&uj);
3126: PetscFreeSpaceContiguous(&free_space,uj);
3127: PetscLLDestroy(lnk,lnkbt);
3129: /* put together the new matrix in MATSEQSBAIJ format */
3131: b = (Mat_SeqSBAIJ*)fact->data;
3132: b->singlemalloc = PETSC_FALSE;
3133: b->free_a = PETSC_TRUE;
3134: b->free_ij = PETSC_TRUE;
3136: PetscMalloc1((ui[am]+1),&b->a);
3138: b->j = uj;
3139: b->i = ui;
3140: b->diag = 0;
3141: b->ilen = 0;
3142: b->imax = 0;
3143: b->row = perm;
3144: b->col = perm;
3146: PetscObjectReference((PetscObject)perm);
3147: PetscObjectReference((PetscObject)perm);
3149: b->icol = iperm;
3150: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3152: PetscMalloc1((am+1),&b->solve_work);
3153: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3154: b->maxnz = b->nz = ui[am];
3156: fact->info.factor_mallocs = reallocs;
3157: fact->info.fill_ratio_given = fill;
3158: if (ai[am] != 0) {
3159: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3160: } else {
3161: fact->info.fill_ratio_needed = 0.0;
3162: }
3163: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3164: return(0);
3165: }
3169: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3170: {
3171: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3172: PetscErrorCode ierr;
3173: PetscInt n = A->rmap->n;
3174: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3175: PetscScalar *x,sum;
3176: const PetscScalar *b;
3177: const MatScalar *aa = a->a,*v;
3178: PetscInt i,nz;
3181: if (!n) return(0);
3183: VecGetArrayRead(bb,&b);
3184: VecGetArray(xx,&x);
3186: /* forward solve the lower triangular */
3187: x[0] = b[0];
3188: v = aa;
3189: vi = aj;
3190: for (i=1; i<n; i++) {
3191: nz = ai[i+1] - ai[i];
3192: sum = b[i];
3193: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3194: v += nz;
3195: vi += nz;
3196: x[i] = sum;
3197: }
3199: /* backward solve the upper triangular */
3200: for (i=n-1; i>=0; i--) {
3201: v = aa + adiag[i+1] + 1;
3202: vi = aj + adiag[i+1] + 1;
3203: nz = adiag[i] - adiag[i+1]-1;
3204: sum = x[i];
3205: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3206: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3207: }
3209: PetscLogFlops(2.0*a->nz - A->cmap->n);
3210: VecRestoreArrayRead(bb,&b);
3211: VecRestoreArray(xx,&x);
3212: return(0);
3213: }
3217: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3218: {
3219: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3220: IS iscol = a->col,isrow = a->row;
3221: PetscErrorCode ierr;
3222: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3223: const PetscInt *rout,*cout,*r,*c;
3224: PetscScalar *x,*tmp,sum;
3225: const PetscScalar *b;
3226: const MatScalar *aa = a->a,*v;
3229: if (!n) return(0);
3231: VecGetArrayRead(bb,&b);
3232: VecGetArray(xx,&x);
3233: tmp = a->solve_work;
3235: ISGetIndices(isrow,&rout); r = rout;
3236: ISGetIndices(iscol,&cout); c = cout;
3238: /* forward solve the lower triangular */
3239: tmp[0] = b[r[0]];
3240: v = aa;
3241: vi = aj;
3242: for (i=1; i<n; i++) {
3243: nz = ai[i+1] - ai[i];
3244: sum = b[r[i]];
3245: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3246: tmp[i] = sum;
3247: v += nz; vi += nz;
3248: }
3250: /* backward solve the upper triangular */
3251: for (i=n-1; i>=0; i--) {
3252: v = aa + adiag[i+1]+1;
3253: vi = aj + adiag[i+1]+1;
3254: nz = adiag[i]-adiag[i+1]-1;
3255: sum = tmp[i];
3256: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3257: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3258: }
3260: ISRestoreIndices(isrow,&rout);
3261: ISRestoreIndices(iscol,&cout);
3262: VecRestoreArrayRead(bb,&b);
3263: VecRestoreArray(xx,&x);
3264: PetscLogFlops(2*a->nz - A->cmap->n);
3265: return(0);
3266: }
3270: /*
3271: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3272: */
3273: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3274: {
3275: Mat B = *fact;
3276: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3277: IS isicol;
3279: const PetscInt *r,*ic;
3280: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3281: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3282: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3283: PetscInt nlnk,*lnk;
3284: PetscBT lnkbt;
3285: PetscBool row_identity,icol_identity;
3286: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3287: const PetscInt *ics;
3288: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3289: PetscReal dt =info->dt,shift=info->shiftamount;
3290: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3291: PetscBool missing;
3294: if (dt == PETSC_DEFAULT) dt = 0.005;
3295: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3297: /* ------- symbolic factorization, can be reused ---------*/
3298: MatMissingDiagonal(A,&missing,&i);
3299: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3300: adiag=a->diag;
3302: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3304: /* bdiag is location of diagonal in factor */
3305: PetscMalloc1((n+1),&bdiag); /* becomes b->diag */
3306: PetscMalloc1((n+1),&bdiag_rev); /* temporary */
3308: /* allocate row pointers bi */
3309: PetscMalloc1((2*n+2),&bi);
3311: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3312: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3313: nnz_max = ai[n]+2*n*dtcount+2;
3315: PetscMalloc1((nnz_max+1),&bj);
3316: PetscMalloc1((nnz_max+1),&ba);
3318: /* put together the new matrix */
3319: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3320: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3321: b = (Mat_SeqAIJ*)B->data;
3323: b->free_a = PETSC_TRUE;
3324: b->free_ij = PETSC_TRUE;
3325: b->singlemalloc = PETSC_FALSE;
3327: b->a = ba;
3328: b->j = bj;
3329: b->i = bi;
3330: b->diag = bdiag;
3331: b->ilen = 0;
3332: b->imax = 0;
3333: b->row = isrow;
3334: b->col = iscol;
3335: PetscObjectReference((PetscObject)isrow);
3336: PetscObjectReference((PetscObject)iscol);
3337: b->icol = isicol;
3339: PetscMalloc1((n+1),&b->solve_work);
3340: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3341: b->maxnz = nnz_max;
3343: B->factortype = MAT_FACTOR_ILUDT;
3344: B->info.factor_mallocs = 0;
3345: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3346: /* ------- end of symbolic factorization ---------*/
3348: ISGetIndices(isrow,&r);
3349: ISGetIndices(isicol,&ic);
3350: ics = ic;
3352: /* linked list for storing column indices of the active row */
3353: nlnk = n + 1;
3354: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3356: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3357: PetscMalloc2(n,&im,n,&jtmp);
3358: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3359: PetscMalloc2(n,&rtmp,n,&vtmp);
3360: PetscMemzero(rtmp,n*sizeof(MatScalar));
3362: bi[0] = 0;
3363: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3364: bdiag_rev[n] = bdiag[0];
3365: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3366: for (i=0; i<n; i++) {
3367: /* copy initial fill into linked list */
3368: nzi = ai[r[i]+1] - ai[r[i]];
3369: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3370: nzi_al = adiag[r[i]] - ai[r[i]];
3371: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3372: ajtmp = aj + ai[r[i]];
3373: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3375: /* load in initial (unfactored row) */
3376: aatmp = a->a + ai[r[i]];
3377: for (j=0; j<nzi; j++) {
3378: rtmp[ics[*ajtmp++]] = *aatmp++;
3379: }
3381: /* add pivot rows into linked list */
3382: row = lnk[n];
3383: while (row < i) {
3384: nzi_bl = bi[row+1] - bi[row] + 1;
3385: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3386: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3387: nzi += nlnk;
3388: row = lnk[row];
3389: }
3391: /* copy data from lnk into jtmp, then initialize lnk */
3392: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3394: /* numerical factorization */
3395: bjtmp = jtmp;
3396: row = *bjtmp++; /* 1st pivot row */
3397: while (row < i) {
3398: pc = rtmp + row;
3399: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3400: multiplier = (*pc) * (*pv);
3401: *pc = multiplier;
3402: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3403: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3404: pv = ba + bdiag[row+1] + 1;
3405: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3406: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3407: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3408: PetscLogFlops(1+2*nz);
3409: }
3410: row = *bjtmp++;
3411: }
3413: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3414: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3415: nzi_bl = 0; j = 0;
3416: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3417: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3418: nzi_bl++; j++;
3419: }
3420: nzi_bu = nzi - nzi_bl -1;
3421: while (j < nzi) {
3422: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3423: j++;
3424: }
3426: bjtmp = bj + bi[i];
3427: batmp = ba + bi[i];
3428: /* apply level dropping rule to L part */
3429: ncut = nzi_al + dtcount;
3430: if (ncut < nzi_bl) {
3431: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3432: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3433: } else {
3434: ncut = nzi_bl;
3435: }
3436: for (j=0; j<ncut; j++) {
3437: bjtmp[j] = jtmp[j];
3438: batmp[j] = vtmp[j];
3439: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3440: }
3441: bi[i+1] = bi[i] + ncut;
3442: nzi = ncut + 1;
3444: /* apply level dropping rule to U part */
3445: ncut = nzi_au + dtcount;
3446: if (ncut < nzi_bu) {
3447: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3448: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3449: } else {
3450: ncut = nzi_bu;
3451: }
3452: nzi += ncut;
3454: /* mark bdiagonal */
3455: bdiag[i+1] = bdiag[i] - (ncut + 1);
3456: bdiag_rev[n-i-1] = bdiag[i+1];
3457: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3458: bjtmp = bj + bdiag[i];
3459: batmp = ba + bdiag[i];
3460: *bjtmp = i;
3461: *batmp = diag_tmp; /* rtmp[i]; */
3462: if (*batmp == 0.0) {
3463: *batmp = dt+shift;
3464: /* printf(" row %d add shift %g\n",i,shift); */
3465: }
3466: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3467: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3469: bjtmp = bj + bdiag[i+1]+1;
3470: batmp = ba + bdiag[i+1]+1;
3471: for (k=0; k<ncut; k++) {
3472: bjtmp[k] = jtmp[nzi_bl+1+k];
3473: batmp[k] = vtmp[nzi_bl+1+k];
3474: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3475: }
3476: /* printf("\n"); */
3478: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3479: /*
3480: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3481: printf(" ----------------------------\n");
3482: */
3483: } /* for (i=0; i<n; i++) */
3484: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3485: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3487: ISRestoreIndices(isrow,&r);
3488: ISRestoreIndices(isicol,&ic);
3490: PetscLLDestroy(lnk,lnkbt);
3491: PetscFree2(im,jtmp);
3492: PetscFree2(rtmp,vtmp);
3493: PetscFree(bdiag_rev);
3495: PetscLogFlops(B->cmap->n);
3496: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3498: ISIdentity(isrow,&row_identity);
3499: ISIdentity(isicol,&icol_identity);
3500: if (row_identity && icol_identity) {
3501: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3502: } else {
3503: B->ops->solve = MatSolve_SeqAIJ;
3504: }
3506: B->ops->solveadd = 0;
3507: B->ops->solvetranspose = 0;
3508: B->ops->solvetransposeadd = 0;
3509: B->ops->matsolve = 0;
3510: B->assembled = PETSC_TRUE;
3511: B->preallocated = PETSC_TRUE;
3512: return(0);
3513: }
3515: /* a wraper of MatILUDTFactor_SeqAIJ() */
3518: /*
3519: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3520: */
3522: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3523: {
3527: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3528: return(0);
3529: }
3531: /*
3532: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3533: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3534: */
3537: /*
3538: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3539: */
3541: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3542: {
3543: Mat C =fact;
3544: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3545: IS isrow = b->row,isicol = b->icol;
3547: const PetscInt *r,*ic,*ics;
3548: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3549: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3550: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3551: PetscReal dt=info->dt,shift=info->shiftamount;
3552: PetscBool row_identity, col_identity;
3555: ISGetIndices(isrow,&r);
3556: ISGetIndices(isicol,&ic);
3557: PetscMalloc1((n+1),&rtmp);
3558: ics = ic;
3560: for (i=0; i<n; i++) {
3561: /* initialize rtmp array */
3562: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3563: bjtmp = bj + bi[i];
3564: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3565: rtmp[i] = 0.0;
3566: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3567: bjtmp = bj + bdiag[i+1] + 1;
3568: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3570: /* load in initial unfactored row of A */
3571: /* printf("row %d\n",i); */
3572: nz = ai[r[i]+1] - ai[r[i]];
3573: ajtmp = aj + ai[r[i]];
3574: v = aa + ai[r[i]];
3575: for (j=0; j<nz; j++) {
3576: rtmp[ics[*ajtmp++]] = v[j];
3577: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3578: }
3579: /* printf("\n"); */
3581: /* numerical factorization */
3582: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3583: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3584: k = 0;
3585: while (k < nzl) {
3586: row = *bjtmp++;
3587: /* printf(" prow %d\n",row); */
3588: pc = rtmp + row;
3589: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3590: multiplier = (*pc) * (*pv);
3591: *pc = multiplier;
3592: if (PetscAbsScalar(multiplier) > dt) {
3593: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3594: pv = b->a + bdiag[row+1] + 1;
3595: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3596: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3597: PetscLogFlops(1+2*nz);
3598: }
3599: k++;
3600: }
3602: /* finished row so stick it into b->a */
3603: /* L-part */
3604: pv = b->a + bi[i];
3605: pj = bj + bi[i];
3606: nzl = bi[i+1] - bi[i];
3607: for (j=0; j<nzl; j++) {
3608: pv[j] = rtmp[pj[j]];
3609: /* printf(" (%d,%g),",pj[j],pv[j]); */
3610: }
3612: /* diagonal: invert diagonal entries for simplier triangular solves */
3613: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3614: b->a[bdiag[i]] = 1.0/rtmp[i];
3615: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3617: /* U-part */
3618: pv = b->a + bdiag[i+1] + 1;
3619: pj = bj + bdiag[i+1] + 1;
3620: nzu = bdiag[i] - bdiag[i+1] - 1;
3621: for (j=0; j<nzu; j++) {
3622: pv[j] = rtmp[pj[j]];
3623: /* printf(" (%d,%g),",pj[j],pv[j]); */
3624: }
3625: /* printf("\n"); */
3626: }
3628: PetscFree(rtmp);
3629: ISRestoreIndices(isicol,&ic);
3630: ISRestoreIndices(isrow,&r);
3632: ISIdentity(isrow,&row_identity);
3633: ISIdentity(isicol,&col_identity);
3634: if (row_identity && col_identity) {
3635: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3636: } else {
3637: C->ops->solve = MatSolve_SeqAIJ;
3638: }
3639: C->ops->solveadd = 0;
3640: C->ops->solvetranspose = 0;
3641: C->ops->solvetransposeadd = 0;
3642: C->ops->matsolve = 0;
3643: C->assembled = PETSC_TRUE;
3644: C->preallocated = PETSC_TRUE;
3646: PetscLogFlops(C->cmap->n);
3647: return(0);
3648: }