Actual source code: aijfact.c
petsc-3.3-p7 2013-05-11
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
7: EXTERN_C_BEGIN
10: /*
11: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
13: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14: */
15: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16: {
17: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
18: PetscErrorCode ierr;
19: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20: const PetscInt *ai = a->i, *aj = a->j;
21: const PetscScalar *aa = a->a;
22: PetscBool *done;
23: PetscReal best,past = 0,future;
26: /* pick initial row */
27: best = -1;
28: for (i=0; i<n; i++) {
29: future = 0.0;
30: for (j=ai[i]; j<ai[i+1]; j++) {
31: if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32: }
33: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34: if (past/future > best) {
35: best = past/future;
36: current = i;
37: }
38: }
40: PetscMalloc(n*sizeof(PetscBool),&done);
41: PetscMemzero(done,n*sizeof(PetscBool));
42: PetscMalloc(n*sizeof(PetscInt),&order);
43: order[0] = current;
44: for (i=0; i<n-1; i++) {
45: done[current] = PETSC_TRUE;
46: best = -1;
47: /* loop over all neighbors of current pivot */
48: for (j=ai[current]; j<ai[current+1]; j++) {
49: jj = aj[j];
50: if (done[jj]) continue;
51: /* loop over columns of potential next row computing weights for below and above diagonal */
52: past = future = 0.0;
53: for (k=ai[jj]; k<ai[jj+1]; k++) {
54: kk = aj[k];
55: if (done[kk]) past += PetscAbsScalar(aa[k]);
56: else if (kk != jj) future += PetscAbsScalar(aa[k]);
57: }
58: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59: if (past/future > best) {
60: best = past/future;
61: newcurrent = jj;
62: }
63: }
64: if (best == -1) { /* no neighbors to select from so select best of all that remain */
65: best = -1;
66: for (k=0; k<n; k++) {
67: if (done[k]) continue;
68: future = 0.0;
69: past = 0.0;
70: for (j=ai[k]; j<ai[k+1]; j++) {
71: kk = aj[j];
72: if (done[kk]) past += PetscAbsScalar(aa[j]);
73: else if (kk != k) future += PetscAbsScalar(aa[j]);
74: }
75: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76: if (past/future > best) {
77: best = past/future;
78: newcurrent = k;
79: }
80: }
81: }
82: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83: current = newcurrent;
84: order[i+1] = current;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
87: *icol = *irow;
88: PetscObjectReference((PetscObject)*irow);
89: PetscFree(done);
90: PetscFree(order);
91: return(0);
92: }
93: EXTERN_C_END
95: EXTERN_C_BEGIN
98: PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg)
99: {
101: *flg = PETSC_TRUE;
102: return(0);
103: }
104: EXTERN_C_END
106: EXTERN_C_BEGIN
109: PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110: {
111: PetscInt n = A->rmap->n;
112: PetscErrorCode ierr;
115: #if defined(PETSC_USE_COMPLEX)
116: if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
117: #endif
118: MatCreate(((PetscObject)A)->comm,B);
119: MatSetSizes(*B,n,n,n,n);
120: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
121: MatSetType(*B,MATSEQAIJ);
122: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
123: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
124: MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);
125: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
126: MatSetType(*B,MATSEQSBAIJ);
127: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
128: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
129: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
131: (*B)->factortype = ftype;
132: return(0);
133: }
134: EXTERN_C_END
138: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
139: {
140: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
141: IS isicol;
142: PetscErrorCode ierr;
143: const PetscInt *r,*ic;
144: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
145: PetscInt *bi,*bj,*ajtmp;
146: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
147: PetscReal f;
148: PetscInt nlnk,*lnk,k,**bi_ptr;
149: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
150: PetscBT lnkbt;
151:
153: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
154: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
155: ISGetIndices(isrow,&r);
156: ISGetIndices(isicol,&ic);
158: /* get new row pointers */
159: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
160: bi[0] = 0;
162: /* bdiag is location of diagonal in factor */
163: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
164: bdiag[0] = 0;
166: /* linked list for storing column indices of the active row */
167: nlnk = n + 1;
168: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
170: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
172: /* initial FreeSpace size is f*(ai[n]+1) */
173: f = info->fill;
174: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
175: current_space = free_space;
177: for (i=0; i<n; i++) {
178: /* copy previous fill into linked list */
179: nzi = 0;
180: nnz = ai[r[i]+1] - ai[r[i]];
181: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
182: ajtmp = aj + ai[r[i]];
183: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
184: nzi += nlnk;
186: /* add pivot rows into linked list */
187: row = lnk[n];
188: while (row < i) {
189: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
190: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
191: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
192: nzi += nlnk;
193: row = lnk[row];
194: }
195: bi[i+1] = bi[i] + nzi;
196: im[i] = nzi;
198: /* mark bdiag */
199: nzbd = 0;
200: nnz = nzi;
201: k = lnk[n];
202: while (nnz-- && k < i){
203: nzbd++;
204: k = lnk[k];
205: }
206: bdiag[i] = bi[i] + nzbd;
208: /* if free space is not available, make more free space */
209: if (current_space->local_remaining<nzi) {
210: nnz = (n - i)*nzi; /* estimated and max additional space needed */
211: PetscFreeSpaceGet(nnz,¤t_space);
212: reallocs++;
213: }
215: /* copy data into free space, then initialize lnk */
216: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
217: bi_ptr[i] = current_space->array;
218: current_space->array += nzi;
219: current_space->local_used += nzi;
220: current_space->local_remaining -= nzi;
221: }
222: #if defined(PETSC_USE_INFO)
223: if (ai[n] != 0) {
224: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
225: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
226: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
227: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
228: PetscInfo(A,"for best performance.\n");
229: } else {
230: PetscInfo(A,"Empty matrix\n");
231: }
232: #endif
234: ISRestoreIndices(isrow,&r);
235: ISRestoreIndices(isicol,&ic);
237: /* destroy list of free space and other temporary array(s) */
238: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
239: PetscFreeSpaceContiguous(&free_space,bj);
240: PetscLLDestroy(lnk,lnkbt);
241: PetscFree2(bi_ptr,im);
243: /* put together the new matrix */
244: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
245: PetscLogObjectParent(B,isicol);
246: b = (Mat_SeqAIJ*)(B)->data;
247: b->free_a = PETSC_TRUE;
248: b->free_ij = PETSC_TRUE;
249: b->singlemalloc = PETSC_FALSE;
250: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
251: b->j = bj;
252: b->i = bi;
253: b->diag = bdiag;
254: b->ilen = 0;
255: b->imax = 0;
256: b->row = isrow;
257: b->col = iscol;
258: PetscObjectReference((PetscObject)isrow);
259: PetscObjectReference((PetscObject)iscol);
260: b->icol = isicol;
261: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
263: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
264: PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
265: b->maxnz = b->nz = bi[n] ;
267: (B)->factortype = MAT_FACTOR_LU;
268: (B)->info.factor_mallocs = reallocs;
269: (B)->info.fill_ratio_given = f;
271: if (ai[n]) {
272: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
273: } else {
274: (B)->info.fill_ratio_needed = 0.0;
275: }
276: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
277: if (a->inode.size) {
278: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
279: }
280: return(0);
281: }
285: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
286: {
287: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
288: IS isicol;
289: PetscErrorCode ierr;
290: const PetscInt *r,*ic;
291: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
292: PetscInt *bi,*bj,*ajtmp;
293: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
294: PetscReal f;
295: PetscInt nlnk,*lnk,k,**bi_ptr;
296: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
297: PetscBT lnkbt;
300: /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
301: /*
302: PetscBool olddatastruct=PETSC_FALSE;
303: PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);
304: if(olddatastruct){
305: MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);
306: return(0);
307: }
308: */
309: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
310: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
311: ISGetIndices(isrow,&r);
312: ISGetIndices(isicol,&ic);
314: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
315: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
316: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
317: bi[0] = bdiag[0] = 0;
319: /* linked list for storing column indices of the active row */
320: nlnk = n + 1;
321: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
323: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
325: /* initial FreeSpace size is f*(ai[n]+1) */
326: f = info->fill;
327: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
328: current_space = free_space;
330: for (i=0; i<n; i++) {
331: /* copy previous fill into linked list */
332: nzi = 0;
333: nnz = ai[r[i]+1] - ai[r[i]];
334: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
335: ajtmp = aj + ai[r[i]];
336: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
337: nzi += nlnk;
339: /* add pivot rows into linked list */
340: row = lnk[n];
341: while (row < i){
342: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
343: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
344: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
345: nzi += nlnk;
346: row = lnk[row];
347: }
348: bi[i+1] = bi[i] + nzi;
349: im[i] = nzi;
351: /* mark bdiag */
352: nzbd = 0;
353: nnz = nzi;
354: k = lnk[n];
355: while (nnz-- && k < i){
356: nzbd++;
357: k = lnk[k];
358: }
359: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
361: /* if free space is not available, make more free space */
362: if (current_space->local_remaining<nzi) {
363: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
364: PetscFreeSpaceGet(nnz,¤t_space);
365: reallocs++;
366: }
368: /* copy data into free space, then initialize lnk */
369: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
370: bi_ptr[i] = current_space->array;
371: current_space->array += nzi;
372: current_space->local_used += nzi;
373: current_space->local_remaining -= nzi;
374: }
376: ISRestoreIndices(isrow,&r);
377: ISRestoreIndices(isicol,&ic);
379: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
380: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
381: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
382: PetscLLDestroy(lnk,lnkbt);
383: PetscFree2(bi_ptr,im);
385: /* put together the new matrix */
386: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
387: PetscLogObjectParent(B,isicol);
388: b = (Mat_SeqAIJ*)(B)->data;
389: b->free_a = PETSC_TRUE;
390: b->free_ij = PETSC_TRUE;
391: b->singlemalloc = PETSC_FALSE;
392: PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
393: b->j = bj;
394: b->i = bi;
395: b->diag = bdiag;
396: b->ilen = 0;
397: b->imax = 0;
398: b->row = isrow;
399: b->col = iscol;
400: PetscObjectReference((PetscObject)isrow);
401: PetscObjectReference((PetscObject)iscol);
402: b->icol = isicol;
403: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
405: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
406: PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
407: b->maxnz = b->nz = bdiag[0]+1;
408: B->factortype = MAT_FACTOR_LU;
409: B->info.factor_mallocs = reallocs;
410: B->info.fill_ratio_given = f;
412: if (ai[n]) {
413: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
414: } else {
415: B->info.fill_ratio_needed = 0.0;
416: }
417: #if defined(PETSC_USE_INFO)
418: if (ai[n] != 0) {
419: PetscReal af = B->info.fill_ratio_needed;
420: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
421: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
422: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
423: PetscInfo(A,"for best performance.\n");
424: } else {
425: PetscInfo(A,"Empty matrix\n");
426: }
427: #endif
428: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
429: if (a->inode.size) {
430: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
431: }
432: return(0);
433: }
435: /*
436: Trouble in factorization, should we dump the original matrix?
437: */
440: PetscErrorCode MatFactorDumpMatrix(Mat A)
441: {
443: PetscBool flg = PETSC_FALSE;
446: PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);
447: if (flg) {
448: PetscViewer viewer;
449: char filename[PETSC_MAX_PATH_LEN];
451: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
452: PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);
453: MatView(A,viewer);
454: PetscViewerDestroy(&viewer);
455: }
456: return(0);
457: }
461: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
462: {
463: Mat C=B;
464: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
465: IS isrow = b->row,isicol = b->icol;
466: PetscErrorCode ierr;
467: const PetscInt *r,*ic,*ics;
468: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
469: PetscInt i,j,k,nz,nzL,row,*pj;
470: const PetscInt *ajtmp,*bjtmp;
471: MatScalar *rtmp,*pc,multiplier,*pv;
472: const MatScalar *aa=a->a,*v;
473: PetscBool row_identity,col_identity;
474: FactorShiftCtx sctx;
475: const PetscInt *ddiag;
476: PetscReal rs;
477: MatScalar d;
480: /* MatPivotSetUp(): initialize shift context sctx */
481: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
483: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
484: ddiag = a->diag;
485: sctx.shift_top = info->zeropivot;
486: for (i=0; i<n; i++) {
487: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
488: d = (aa)[ddiag[i]];
489: rs = -PetscAbsScalar(d) - PetscRealPart(d);
490: v = aa+ai[i];
491: nz = ai[i+1] - ai[i];
492: for (j=0; j<nz; j++)
493: rs += PetscAbsScalar(v[j]);
494: if (rs>sctx.shift_top) sctx.shift_top = rs;
495: }
496: sctx.shift_top *= 1.1;
497: sctx.nshift_max = 5;
498: sctx.shift_lo = 0.;
499: sctx.shift_hi = 1.;
500: }
502: ISGetIndices(isrow,&r);
503: ISGetIndices(isicol,&ic);
504: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
505: ics = ic;
507: do {
508: sctx.newshift = PETSC_FALSE;
509: for (i=0; i<n; i++){
510: /* zero rtmp */
511: /* L part */
512: nz = bi[i+1] - bi[i];
513: bjtmp = bj + bi[i];
514: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
516: /* U part */
517: nz = bdiag[i]-bdiag[i+1];
518: bjtmp = bj + bdiag[i+1]+1;
519: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
520:
521: /* load in initial (unfactored row) */
522: nz = ai[r[i]+1] - ai[r[i]];
523: ajtmp = aj + ai[r[i]];
524: v = aa + ai[r[i]];
525: for (j=0; j<nz; j++) {
526: rtmp[ics[ajtmp[j]]] = v[j];
527: }
528: /* ZeropivotApply() */
529: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
530:
531: /* elimination */
532: bjtmp = bj + bi[i];
533: row = *bjtmp++;
534: nzL = bi[i+1] - bi[i];
535: for(k=0; k < nzL;k++) {
536: pc = rtmp + row;
537: if (*pc != 0.0) {
538: pv = b->a + bdiag[row];
539: multiplier = *pc * (*pv);
540: *pc = multiplier;
541: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
542: pv = b->a + bdiag[row+1]+1;
543: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
544: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
545: PetscLogFlops(1+2*nz);
546: }
547: row = *bjtmp++;
548: }
550: /* finished row so stick it into b->a */
551: rs = 0.0;
552: /* L part */
553: pv = b->a + bi[i] ;
554: pj = b->j + bi[i] ;
555: nz = bi[i+1] - bi[i];
556: for (j=0; j<nz; j++) {
557: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
558: }
560: /* U part */
561: pv = b->a + bdiag[i+1]+1;
562: pj = b->j + bdiag[i+1]+1;
563: nz = bdiag[i] - bdiag[i+1]-1;
564: for (j=0; j<nz; j++) {
565: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
566: }
568: sctx.rs = rs;
569: sctx.pv = rtmp[i];
570: MatPivotCheck(A,info,&sctx,i);
571: if(sctx.newshift) break; /* break for-loop */
572: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
574: /* Mark diagonal and invert diagonal for simplier triangular solves */
575: pv = b->a + bdiag[i];
576: *pv = 1.0/rtmp[i];
578: } /* endof for (i=0; i<n; i++){ */
580: /* MatPivotRefine() */
581: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
582: /*
583: * if no shift in this attempt & shifting & started shifting & can refine,
584: * then try lower shift
585: */
586: sctx.shift_hi = sctx.shift_fraction;
587: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
588: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
589: sctx.newshift = PETSC_TRUE;
590: sctx.nshift++;
591: }
592: } while (sctx.newshift);
594: PetscFree(rtmp);
595: ISRestoreIndices(isicol,&ic);
596: ISRestoreIndices(isrow,&r);
597:
598: ISIdentity(isrow,&row_identity);
599: ISIdentity(isicol,&col_identity);
600: if (row_identity && col_identity) {
601: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
602: } else {
603: C->ops->solve = MatSolve_SeqAIJ;
604: }
605: C->ops->solveadd = MatSolveAdd_SeqAIJ;
606: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
607: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
608: C->ops->matsolve = MatMatSolve_SeqAIJ;
609: C->assembled = PETSC_TRUE;
610: C->preallocated = PETSC_TRUE;
611: PetscLogFlops(C->cmap->n);
613: /* MatShiftView(A,info,&sctx) */
614: if (sctx.nshift){
615: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
616: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
617: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
618: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
619: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
620: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
621: }
622: }
623: Mat_CheckInode_FactorLU(C,PETSC_FALSE);
624: return(0);
625: }
629: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
630: {
631: Mat C=B;
632: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
633: IS isrow = b->row,isicol = b->icol;
634: PetscErrorCode ierr;
635: const PetscInt *r,*ic,*ics;
636: PetscInt nz,row,i,j,n=A->rmap->n,diag;
637: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
638: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
639: MatScalar *pv,*rtmp,*pc,multiplier,d;
640: const MatScalar *v,*aa=a->a;
641: PetscReal rs=0.0;
642: FactorShiftCtx sctx;
643: const PetscInt *ddiag;
644: PetscBool row_identity, col_identity;
647: /* MatPivotSetUp(): initialize shift context sctx */
648: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
650: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
651: ddiag = a->diag;
652: sctx.shift_top = info->zeropivot;
653: for (i=0; i<n; i++) {
654: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
655: d = (aa)[ddiag[i]];
656: rs = -PetscAbsScalar(d) - PetscRealPart(d);
657: v = aa+ai[i];
658: nz = ai[i+1] - ai[i];
659: for (j=0; j<nz; j++)
660: rs += PetscAbsScalar(v[j]);
661: if (rs>sctx.shift_top) sctx.shift_top = rs;
662: }
663: sctx.shift_top *= 1.1;
664: sctx.nshift_max = 5;
665: sctx.shift_lo = 0.;
666: sctx.shift_hi = 1.;
667: }
669: ISGetIndices(isrow,&r);
670: ISGetIndices(isicol,&ic);
671: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
672: ics = ic;
674: do {
675: sctx.newshift = PETSC_FALSE;
676: for (i=0; i<n; i++){
677: nz = bi[i+1] - bi[i];
678: bjtmp = bj + bi[i];
679: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
681: /* load in initial (unfactored row) */
682: nz = ai[r[i]+1] - ai[r[i]];
683: ajtmp = aj + ai[r[i]];
684: v = aa + ai[r[i]];
685: for (j=0; j<nz; j++) {
686: rtmp[ics[ajtmp[j]]] = v[j];
687: }
688: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
690: row = *bjtmp++;
691: while (row < i) {
692: pc = rtmp + row;
693: if (*pc != 0.0) {
694: pv = b->a + diag_offset[row];
695: pj = b->j + diag_offset[row] + 1;
696: multiplier = *pc / *pv++;
697: *pc = multiplier;
698: nz = bi[row+1] - diag_offset[row] - 1;
699: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
700: PetscLogFlops(1+2*nz);
701: }
702: row = *bjtmp++;
703: }
704: /* finished row so stick it into b->a */
705: pv = b->a + bi[i] ;
706: pj = b->j + bi[i] ;
707: nz = bi[i+1] - bi[i];
708: diag = diag_offset[i] - bi[i];
709: rs = 0.0;
710: for (j=0; j<nz; j++) {
711: pv[j] = rtmp[pj[j]];
712: rs += PetscAbsScalar(pv[j]);
713: }
714: rs -= PetscAbsScalar(pv[diag]);
716: sctx.rs = rs;
717: sctx.pv = pv[diag];
718: MatPivotCheck(A,info,&sctx,i);
719: if (sctx.newshift) break;
720: pv[diag] = sctx.pv;
721: }
723: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
724: /*
725: * if no shift in this attempt & shifting & started shifting & can refine,
726: * then try lower shift
727: */
728: sctx.shift_hi = sctx.shift_fraction;
729: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
730: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
731: sctx.newshift = PETSC_TRUE;
732: sctx.nshift++;
733: }
734: } while (sctx.newshift);
736: /* invert diagonal entries for simplier triangular solves */
737: for (i=0; i<n; i++) {
738: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
739: }
740: PetscFree(rtmp);
741: ISRestoreIndices(isicol,&ic);
742: ISRestoreIndices(isrow,&r);
744: ISIdentity(isrow,&row_identity);
745: ISIdentity(isicol,&col_identity);
746: if (row_identity && col_identity) {
747: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
748: } else {
749: C->ops->solve = MatSolve_SeqAIJ_inplace;
750: }
751: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
752: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
753: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
754: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
755: C->assembled = PETSC_TRUE;
756: C->preallocated = PETSC_TRUE;
757: PetscLogFlops(C->cmap->n);
758: if (sctx.nshift){
759: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
760: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
761: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
762: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
763: }
764: }
765: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
766: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
767: Mat_CheckInode(C,PETSC_FALSE);
768: return(0);
769: }
771: /*
772: This routine implements inplace ILU(0) with row or/and column permutations.
773: Input:
774: A - original matrix
775: Output;
776: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
777: a->j (col index) is permuted by the inverse of colperm, then sorted
778: a->a reordered accordingly with a->j
779: a->diag (ptr to diagonal elements) is updated.
780: */
783: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
784: {
785: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
786: IS isrow = a->row,isicol = a->icol;
788: const PetscInt *r,*ic,*ics;
789: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
790: PetscInt *ajtmp,nz,row;
791: PetscInt *diag = a->diag,nbdiag,*pj;
792: PetscScalar *rtmp,*pc,multiplier,d;
793: MatScalar *pv,*v;
794: PetscReal rs;
795: FactorShiftCtx sctx;
796: const MatScalar *aa=a->a,*vtmp;
799: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
801: /* MatPivotSetUp(): initialize shift context sctx */
802: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
804: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
805: const PetscInt *ddiag = a->diag;
806: sctx.shift_top = info->zeropivot;
807: for (i=0; i<n; i++) {
808: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
809: d = (aa)[ddiag[i]];
810: rs = -PetscAbsScalar(d) - PetscRealPart(d);
811: vtmp = aa+ai[i];
812: nz = ai[i+1] - ai[i];
813: for (j=0; j<nz; j++)
814: rs += PetscAbsScalar(vtmp[j]);
815: if (rs>sctx.shift_top) sctx.shift_top = rs;
816: }
817: sctx.shift_top *= 1.1;
818: sctx.nshift_max = 5;
819: sctx.shift_lo = 0.;
820: sctx.shift_hi = 1.;
821: }
823: ISGetIndices(isrow,&r);
824: ISGetIndices(isicol,&ic);
825: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
826: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
827: ics = ic;
829: #if defined(MV)
830: sctx.shift_top = 0.;
831: sctx.nshift_max = 0;
832: sctx.shift_lo = 0.;
833: sctx.shift_hi = 0.;
834: sctx.shift_fraction = 0.;
836: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
837: sctx.shift_top = 0.;
838: for (i=0; i<n; i++) {
839: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
840: d = (a->a)[diag[i]];
841: rs = -PetscAbsScalar(d) - PetscRealPart(d);
842: v = a->a+ai[i];
843: nz = ai[i+1] - ai[i];
844: for (j=0; j<nz; j++)
845: rs += PetscAbsScalar(v[j]);
846: if (rs>sctx.shift_top) sctx.shift_top = rs;
847: }
848: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
849: sctx.shift_top *= 1.1;
850: sctx.nshift_max = 5;
851: sctx.shift_lo = 0.;
852: sctx.shift_hi = 1.;
853: }
855: sctx.shift_amount = 0.;
856: sctx.nshift = 0;
857: #endif
859: do {
860: sctx.newshift = PETSC_FALSE;
861: for (i=0; i<n; i++){
862: /* load in initial unfactored row */
863: nz = ai[r[i]+1] - ai[r[i]];
864: ajtmp = aj + ai[r[i]];
865: v = a->a + ai[r[i]];
866: /* sort permuted ajtmp and values v accordingly */
867: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
868: PetscSortIntWithScalarArray(nz,ajtmp,v);
870: diag[r[i]] = ai[r[i]];
871: for (j=0; j<nz; j++) {
872: rtmp[ajtmp[j]] = v[j];
873: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
874: }
875: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
877: row = *ajtmp++;
878: while (row < i) {
879: pc = rtmp + row;
880: if (*pc != 0.0) {
881: pv = a->a + diag[r[row]];
882: pj = aj + diag[r[row]] + 1;
884: multiplier = *pc / *pv++;
885: *pc = multiplier;
886: nz = ai[r[row]+1] - diag[r[row]] - 1;
887: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
888: PetscLogFlops(1+2*nz);
889: }
890: row = *ajtmp++;
891: }
892: /* finished row so overwrite it onto a->a */
893: pv = a->a + ai[r[i]] ;
894: pj = aj + ai[r[i]] ;
895: nz = ai[r[i]+1] - ai[r[i]];
896: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
897:
898: rs = 0.0;
899: for (j=0; j<nz; j++) {
900: pv[j] = rtmp[pj[j]];
901: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
902: }
904: sctx.rs = rs;
905: sctx.pv = pv[nbdiag];
906: MatPivotCheck(A,info,&sctx,i);
907: if (sctx.newshift) break;
908: pv[nbdiag] = sctx.pv;
909: }
911: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
912: /*
913: * if no shift in this attempt & shifting & started shifting & can refine,
914: * then try lower shift
915: */
916: sctx.shift_hi = sctx.shift_fraction;
917: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
918: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
919: sctx.newshift = PETSC_TRUE;
920: sctx.nshift++;
921: }
922: } while (sctx.newshift);
924: /* invert diagonal entries for simplier triangular solves */
925: for (i=0; i<n; i++) {
926: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
927: }
929: PetscFree(rtmp);
930: ISRestoreIndices(isicol,&ic);
931: ISRestoreIndices(isrow,&r);
932: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
933: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
934: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
935: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
936: A->assembled = PETSC_TRUE;
937: A->preallocated = PETSC_TRUE;
938: PetscLogFlops(A->cmap->n);
939: if (sctx.nshift){
940: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
941: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
942: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
943: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
944: }
945: }
946: return(0);
947: }
949: /* ----------------------------------------------------------- */
952: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
953: {
955: Mat C;
958: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
959: MatLUFactorSymbolic(C,A,row,col,info);
960: MatLUFactorNumeric(C,A,info);
961: A->ops->solve = C->ops->solve;
962: A->ops->solvetranspose = C->ops->solvetranspose;
963: MatHeaderMerge(A,C);
964: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
965: return(0);
966: }
967: /* ----------------------------------------------------------- */
972: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
973: {
974: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
975: IS iscol = a->col,isrow = a->row;
976: PetscErrorCode ierr;
977: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
978: PetscInt nz;
979: const PetscInt *rout,*cout,*r,*c;
980: PetscScalar *x,*tmp,*tmps,sum;
981: const PetscScalar *b;
982: const MatScalar *aa = a->a,*v;
983:
985: if (!n) return(0);
987: VecGetArrayRead(bb,&b);
988: VecGetArray(xx,&x);
989: tmp = a->solve_work;
991: ISGetIndices(isrow,&rout); r = rout;
992: ISGetIndices(iscol,&cout); c = cout + (n-1);
994: /* forward solve the lower triangular */
995: tmp[0] = b[*r++];
996: tmps = tmp;
997: for (i=1; i<n; i++) {
998: v = aa + ai[i] ;
999: vi = aj + ai[i] ;
1000: nz = a->diag[i] - ai[i];
1001: sum = b[*r++];
1002: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1003: tmp[i] = sum;
1004: }
1006: /* backward solve the upper triangular */
1007: for (i=n-1; i>=0; i--){
1008: v = aa + a->diag[i] + 1;
1009: vi = aj + a->diag[i] + 1;
1010: nz = ai[i+1] - a->diag[i] - 1;
1011: sum = tmp[i];
1012: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1013: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1014: }
1016: ISRestoreIndices(isrow,&rout);
1017: ISRestoreIndices(iscol,&cout);
1018: VecRestoreArrayRead(bb,&b);
1019: VecRestoreArray(xx,&x);
1020: PetscLogFlops(2.0*a->nz - A->cmap->n);
1021: return(0);
1022: }
1026: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1027: {
1028: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1029: IS iscol = a->col,isrow = a->row;
1030: PetscErrorCode ierr;
1031: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1032: PetscInt nz,neq;
1033: const PetscInt *rout,*cout,*r,*c;
1034: PetscScalar *x,*b,*tmp,*tmps,sum;
1035: const MatScalar *aa = a->a,*v;
1036: PetscBool bisdense,xisdense;
1039: if (!n) return(0);
1041: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1042: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1043: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1044: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1046: MatGetArray(B,&b);
1047: MatGetArray(X,&x);
1048:
1049: tmp = a->solve_work;
1050: ISGetIndices(isrow,&rout); r = rout;
1051: ISGetIndices(iscol,&cout); c = cout;
1053: for (neq=0; neq<B->cmap->n; neq++){
1054: /* forward solve the lower triangular */
1055: tmp[0] = b[r[0]];
1056: tmps = tmp;
1057: for (i=1; i<n; i++) {
1058: v = aa + ai[i] ;
1059: vi = aj + ai[i] ;
1060: nz = a->diag[i] - ai[i];
1061: sum = b[r[i]];
1062: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1063: tmp[i] = sum;
1064: }
1065: /* backward solve the upper triangular */
1066: for (i=n-1; i>=0; i--){
1067: v = aa + a->diag[i] + 1;
1068: vi = aj + a->diag[i] + 1;
1069: nz = ai[i+1] - a->diag[i] - 1;
1070: sum = tmp[i];
1071: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1072: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1073: }
1075: b += n;
1076: x += n;
1077: }
1078: ISRestoreIndices(isrow,&rout);
1079: ISRestoreIndices(iscol,&cout);
1080: MatRestoreArray(B,&b);
1081: MatRestoreArray(X,&x);
1082: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1083: return(0);
1084: }
1088: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1089: {
1090: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1091: IS iscol = a->col,isrow = a->row;
1092: PetscErrorCode ierr;
1093: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1094: PetscInt nz,neq;
1095: const PetscInt *rout,*cout,*r,*c;
1096: PetscScalar *x,*b,*tmp,sum;
1097: const MatScalar *aa = a->a,*v;
1098: PetscBool bisdense,xisdense;
1101: if (!n) return(0);
1103: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1104: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1105: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1106: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1108: MatGetArray(B,&b);
1109: MatGetArray(X,&x);
1110:
1111: tmp = a->solve_work;
1112: ISGetIndices(isrow,&rout); r = rout;
1113: ISGetIndices(iscol,&cout); c = cout;
1115: for (neq=0; neq<B->cmap->n; neq++){
1116: /* forward solve the lower triangular */
1117: tmp[0] = b[r[0]];
1118: v = aa;
1119: vi = aj;
1120: for (i=1; i<n; i++) {
1121: nz = ai[i+1] - ai[i];
1122: sum = b[r[i]];
1123: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1124: tmp[i] = sum;
1125: v += nz; vi += nz;
1126: }
1128: /* backward solve the upper triangular */
1129: for (i=n-1; i>=0; i--){
1130: v = aa + adiag[i+1]+1;
1131: vi = aj + adiag[i+1]+1;
1132: nz = adiag[i]-adiag[i+1]-1;
1133: sum = tmp[i];
1134: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1135: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1136: }
1137:
1138: b += n;
1139: x += n;
1140: }
1141: ISRestoreIndices(isrow,&rout);
1142: ISRestoreIndices(iscol,&cout);
1143: MatRestoreArray(B,&b);
1144: MatRestoreArray(X,&x);
1145: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1146: return(0);
1147: }
1151: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1152: {
1153: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1154: IS iscol = a->col,isrow = a->row;
1155: PetscErrorCode ierr;
1156: const PetscInt *r,*c,*rout,*cout;
1157: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1158: PetscInt nz,row;
1159: PetscScalar *x,*b,*tmp,*tmps,sum;
1160: const MatScalar *aa = a->a,*v;
1163: if (!n) return(0);
1165: VecGetArray(bb,&b);
1166: VecGetArray(xx,&x);
1167: tmp = a->solve_work;
1169: ISGetIndices(isrow,&rout); r = rout;
1170: ISGetIndices(iscol,&cout); c = cout + (n-1);
1172: /* forward solve the lower triangular */
1173: tmp[0] = b[*r++];
1174: tmps = tmp;
1175: for (row=1; row<n; row++) {
1176: i = rout[row]; /* permuted row */
1177: v = aa + ai[i] ;
1178: vi = aj + ai[i] ;
1179: nz = a->diag[i] - ai[i];
1180: sum = b[*r++];
1181: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1182: tmp[row] = sum;
1183: }
1185: /* backward solve the upper triangular */
1186: for (row=n-1; row>=0; row--){
1187: i = rout[row]; /* permuted row */
1188: v = aa + a->diag[i] + 1;
1189: vi = aj + a->diag[i] + 1;
1190: nz = ai[i+1] - a->diag[i] - 1;
1191: sum = tmp[row];
1192: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1193: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1194: }
1196: ISRestoreIndices(isrow,&rout);
1197: ISRestoreIndices(iscol,&cout);
1198: VecRestoreArray(bb,&b);
1199: VecRestoreArray(xx,&x);
1200: PetscLogFlops(2.0*a->nz - A->cmap->n);
1201: return(0);
1202: }
1204: /* ----------------------------------------------------------- */
1205: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1208: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1209: {
1210: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1211: PetscErrorCode ierr;
1212: PetscInt n = A->rmap->n;
1213: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1214: PetscScalar *x;
1215: const PetscScalar *b;
1216: const MatScalar *aa = a->a;
1217: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1218: PetscInt adiag_i,i,nz,ai_i;
1219: const PetscInt *vi;
1220: const MatScalar *v;
1221: PetscScalar sum;
1222: #endif
1225: if (!n) return(0);
1227: VecGetArrayRead(bb,&b);
1228: VecGetArray(xx,&x);
1230: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1231: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1232: #else
1233: /* forward solve the lower triangular */
1234: x[0] = b[0];
1235: for (i=1; i<n; i++) {
1236: ai_i = ai[i];
1237: v = aa + ai_i;
1238: vi = aj + ai_i;
1239: nz = adiag[i] - ai_i;
1240: sum = b[i];
1241: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1242: x[i] = sum;
1243: }
1245: /* backward solve the upper triangular */
1246: for (i=n-1; i>=0; i--){
1247: adiag_i = adiag[i];
1248: v = aa + adiag_i + 1;
1249: vi = aj + adiag_i + 1;
1250: nz = ai[i+1] - adiag_i - 1;
1251: sum = x[i];
1252: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1253: x[i] = sum*aa[adiag_i];
1254: }
1255: #endif
1256: PetscLogFlops(2.0*a->nz - A->cmap->n);
1257: VecRestoreArrayRead(bb,&b);
1258: VecRestoreArray(xx,&x);
1259: return(0);
1260: }
1264: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1265: {
1266: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1267: IS iscol = a->col,isrow = a->row;
1268: PetscErrorCode ierr;
1269: PetscInt i, n = A->rmap->n,j;
1270: PetscInt nz;
1271: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1272: PetscScalar *x,*tmp,sum;
1273: const PetscScalar *b;
1274: const MatScalar *aa = a->a,*v;
1277: if (yy != xx) {VecCopy(yy,xx);}
1279: VecGetArrayRead(bb,&b);
1280: VecGetArray(xx,&x);
1281: tmp = a->solve_work;
1283: ISGetIndices(isrow,&rout); r = rout;
1284: ISGetIndices(iscol,&cout); c = cout + (n-1);
1286: /* forward solve the lower triangular */
1287: tmp[0] = b[*r++];
1288: for (i=1; i<n; i++) {
1289: v = aa + ai[i] ;
1290: vi = aj + ai[i] ;
1291: nz = a->diag[i] - ai[i];
1292: sum = b[*r++];
1293: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1294: tmp[i] = sum;
1295: }
1297: /* backward solve the upper triangular */
1298: for (i=n-1; i>=0; i--){
1299: v = aa + a->diag[i] + 1;
1300: vi = aj + a->diag[i] + 1;
1301: nz = ai[i+1] - a->diag[i] - 1;
1302: sum = tmp[i];
1303: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1304: tmp[i] = sum*aa[a->diag[i]];
1305: x[*c--] += tmp[i];
1306: }
1308: ISRestoreIndices(isrow,&rout);
1309: ISRestoreIndices(iscol,&cout);
1310: VecRestoreArrayRead(bb,&b);
1311: VecRestoreArray(xx,&x);
1312: PetscLogFlops(2.0*a->nz);
1314: return(0);
1315: }
1319: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1320: {
1321: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1322: IS iscol = a->col,isrow = a->row;
1323: PetscErrorCode ierr;
1324: PetscInt i, n = A->rmap->n,j;
1325: PetscInt nz;
1326: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1327: PetscScalar *x,*tmp,sum;
1328: const PetscScalar *b;
1329: const MatScalar *aa = a->a,*v;
1332: if (yy != xx) {VecCopy(yy,xx);}
1334: VecGetArrayRead(bb,&b);
1335: VecGetArray(xx,&x);
1336: tmp = a->solve_work;
1338: ISGetIndices(isrow,&rout); r = rout;
1339: ISGetIndices(iscol,&cout); c = cout;
1341: /* forward solve the lower triangular */
1342: tmp[0] = b[r[0]];
1343: v = aa;
1344: vi = aj;
1345: for (i=1; i<n; i++) {
1346: nz = ai[i+1] - ai[i];
1347: sum = b[r[i]];
1348: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1349: tmp[i] = sum;
1350: v += nz; vi += nz;
1351: }
1353: /* backward solve the upper triangular */
1354: v = aa + adiag[n-1];
1355: vi = aj + adiag[n-1];
1356: for (i=n-1; i>=0; i--){
1357: nz = adiag[i] - adiag[i+1] - 1;
1358: sum = tmp[i];
1359: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1360: tmp[i] = sum*v[nz];
1361: x[c[i]] += tmp[i];
1362: v += nz+1; vi += nz+1;
1363: }
1365: ISRestoreIndices(isrow,&rout);
1366: ISRestoreIndices(iscol,&cout);
1367: VecRestoreArrayRead(bb,&b);
1368: VecRestoreArray(xx,&x);
1369: PetscLogFlops(2.0*a->nz);
1371: return(0);
1372: }
1376: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1377: {
1378: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1379: IS iscol = a->col,isrow = a->row;
1380: PetscErrorCode ierr;
1381: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1382: PetscInt i,n = A->rmap->n,j;
1383: PetscInt nz;
1384: PetscScalar *x,*tmp,s1;
1385: const MatScalar *aa = a->a,*v;
1386: const PetscScalar *b;
1389: VecGetArrayRead(bb,&b);
1390: VecGetArray(xx,&x);
1391: tmp = a->solve_work;
1393: ISGetIndices(isrow,&rout); r = rout;
1394: ISGetIndices(iscol,&cout); c = cout;
1396: /* copy the b into temp work space according to permutation */
1397: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1399: /* forward solve the U^T */
1400: for (i=0; i<n; i++) {
1401: v = aa + diag[i] ;
1402: vi = aj + diag[i] + 1;
1403: nz = ai[i+1] - diag[i] - 1;
1404: s1 = tmp[i];
1405: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1406: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1407: tmp[i] = s1;
1408: }
1410: /* backward solve the L^T */
1411: for (i=n-1; i>=0; i--){
1412: v = aa + diag[i] - 1 ;
1413: vi = aj + diag[i] - 1 ;
1414: nz = diag[i] - ai[i];
1415: s1 = tmp[i];
1416: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1417: }
1419: /* copy tmp into x according to permutation */
1420: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1422: ISRestoreIndices(isrow,&rout);
1423: ISRestoreIndices(iscol,&cout);
1424: VecRestoreArrayRead(bb,&b);
1425: VecRestoreArray(xx,&x);
1427: PetscLogFlops(2.0*a->nz-A->cmap->n);
1428: return(0);
1429: }
1433: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1434: {
1435: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1436: IS iscol = a->col,isrow = a->row;
1437: PetscErrorCode ierr;
1438: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1439: PetscInt i,n = A->rmap->n,j;
1440: PetscInt nz;
1441: PetscScalar *x,*tmp,s1;
1442: const MatScalar *aa = a->a,*v;
1443: const PetscScalar *b;
1446: VecGetArrayRead(bb,&b);
1447: VecGetArray(xx,&x);
1448: tmp = a->solve_work;
1450: ISGetIndices(isrow,&rout); r = rout;
1451: ISGetIndices(iscol,&cout); c = cout;
1453: /* copy the b into temp work space according to permutation */
1454: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1456: /* forward solve the U^T */
1457: for (i=0; i<n; i++) {
1458: v = aa + adiag[i+1] + 1;
1459: vi = aj + adiag[i+1] + 1;
1460: nz = adiag[i] - adiag[i+1] - 1;
1461: s1 = tmp[i];
1462: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1463: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1464: tmp[i] = s1;
1465: }
1467: /* backward solve the L^T */
1468: for (i=n-1; i>=0; i--){
1469: v = aa + ai[i];
1470: vi = aj + ai[i];
1471: nz = ai[i+1] - ai[i];
1472: s1 = tmp[i];
1473: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1474: }
1476: /* copy tmp into x according to permutation */
1477: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1479: ISRestoreIndices(isrow,&rout);
1480: ISRestoreIndices(iscol,&cout);
1481: VecRestoreArrayRead(bb,&b);
1482: VecRestoreArray(xx,&x);
1484: PetscLogFlops(2.0*a->nz-A->cmap->n);
1485: return(0);
1486: }
1490: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1491: {
1492: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1493: IS iscol = a->col,isrow = a->row;
1494: PetscErrorCode ierr;
1495: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1496: PetscInt i,n = A->rmap->n,j;
1497: PetscInt nz;
1498: PetscScalar *x,*tmp,s1;
1499: const MatScalar *aa = a->a,*v;
1500: const PetscScalar *b;
1503: if (zz != xx) {VecCopy(zz,xx);}
1504: VecGetArrayRead(bb,&b);
1505: VecGetArray(xx,&x);
1506: tmp = a->solve_work;
1508: ISGetIndices(isrow,&rout); r = rout;
1509: ISGetIndices(iscol,&cout); c = cout;
1511: /* copy the b into temp work space according to permutation */
1512: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1514: /* forward solve the U^T */
1515: for (i=0; i<n; i++) {
1516: v = aa + diag[i] ;
1517: vi = aj + diag[i] + 1;
1518: nz = ai[i+1] - diag[i] - 1;
1519: s1 = tmp[i];
1520: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1521: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1522: tmp[i] = s1;
1523: }
1525: /* backward solve the L^T */
1526: for (i=n-1; i>=0; i--){
1527: v = aa + diag[i] - 1 ;
1528: vi = aj + diag[i] - 1 ;
1529: nz = diag[i] - ai[i];
1530: s1 = tmp[i];
1531: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1532: }
1534: /* copy tmp into x according to permutation */
1535: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1537: ISRestoreIndices(isrow,&rout);
1538: ISRestoreIndices(iscol,&cout);
1539: VecRestoreArrayRead(bb,&b);
1540: VecRestoreArray(xx,&x);
1542: PetscLogFlops(2.0*a->nz-A->cmap->n);
1543: return(0);
1544: }
1548: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1549: {
1550: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1551: IS iscol = a->col,isrow = a->row;
1552: PetscErrorCode ierr;
1553: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1554: PetscInt i,n = A->rmap->n,j;
1555: PetscInt nz;
1556: PetscScalar *x,*tmp,s1;
1557: const MatScalar *aa = a->a,*v;
1558: const PetscScalar *b;
1561: if (zz != xx) {VecCopy(zz,xx);}
1562: VecGetArrayRead(bb,&b);
1563: VecGetArray(xx,&x);
1564: tmp = a->solve_work;
1566: ISGetIndices(isrow,&rout); r = rout;
1567: ISGetIndices(iscol,&cout); c = cout;
1569: /* copy the b into temp work space according to permutation */
1570: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1572: /* forward solve the U^T */
1573: for (i=0; i<n; i++) {
1574: v = aa + adiag[i+1] + 1;
1575: vi = aj + adiag[i+1] + 1;
1576: nz = adiag[i] - adiag[i+1] - 1;
1577: s1 = tmp[i];
1578: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1579: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1580: tmp[i] = s1;
1581: }
1584: /* backward solve the L^T */
1585: for (i=n-1; i>=0; i--){
1586: v = aa + ai[i] ;
1587: vi = aj + ai[i];
1588: nz = ai[i+1] - ai[i];
1589: s1 = tmp[i];
1590: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1591: }
1593: /* copy tmp into x according to permutation */
1594: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1596: ISRestoreIndices(isrow,&rout);
1597: ISRestoreIndices(iscol,&cout);
1598: VecRestoreArrayRead(bb,&b);
1599: VecRestoreArray(xx,&x);
1601: PetscLogFlops(2.0*a->nz-A->cmap->n);
1602: return(0);
1603: }
1605: /* ----------------------------------------------------------------*/
1607: extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool );
1609: /*
1610: ilu() under revised new data structure.
1611: Factored arrays bj and ba are stored as
1612: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1614: bi=fact->i is an array of size n+1, in which
1615: bi+
1616: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1617: bi[n]: points to L(n-1,n-1)+1
1618:
1619: bdiag=fact->diag is an array of size n+1,in which
1620: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1621: bdiag[n]: points to entry of U(n-1,0)-1
1623: U(i,:) contains bdiag[i] as its last entry, i.e.,
1624: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1625: */
1628: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1629: {
1630:
1631: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1632: PetscErrorCode ierr;
1633: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1634: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1635: PetscBool missing;
1636: IS isicol;
1639: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1640: MatMissingDiagonal(A,&missing,&i);
1641: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1642: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1644: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1645: b = (Mat_SeqAIJ*)(fact)->data;
1647: /* allocate matrix arrays for new data structure */
1648: PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);
1649: PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1650: b->singlemalloc = PETSC_TRUE;
1651: if (!b->diag){
1652: PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);
1653: PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));
1654: }
1655: bdiag = b->diag;
1656:
1657: if (n > 0) {
1658: PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1659: }
1660:
1661: /* set bi and bj with new data structure */
1662: bi = b->i;
1663: bj = b->j;
1665: /* L part */
1666: bi[0] = 0;
1667: for (i=0; i<n; i++){
1668: nz = adiag[i] - ai[i];
1669: bi[i+1] = bi[i] + nz;
1670: aj = a->j + ai[i];
1671: for (j=0; j<nz; j++){
1672: /* *bj = aj[j]; bj++; */
1673: bj[k++] = aj[j];
1674: }
1675: }
1676:
1677: /* U part */
1678: bdiag[n] = bi[n]-1;
1679: for (i=n-1; i>=0; i--){
1680: nz = ai[i+1] - adiag[i] - 1;
1681: aj = a->j + adiag[i] + 1;
1682: for (j=0; j<nz; j++){
1683: /* *bj = aj[j]; bj++; */
1684: bj[k++] = aj[j];
1685: }
1686: /* diag[i] */
1687: /* *bj = i; bj++; */
1688: bj[k++] = i;
1689: bdiag[i] = bdiag[i+1] + nz + 1;
1690: }
1692: fact->factortype = MAT_FACTOR_ILU;
1693: fact->info.factor_mallocs = 0;
1694: fact->info.fill_ratio_given = info->fill;
1695: fact->info.fill_ratio_needed = 1.0;
1696: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1698: b = (Mat_SeqAIJ*)(fact)->data;
1699: b->row = isrow;
1700: b->col = iscol;
1701: b->icol = isicol;
1702: PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1703: PetscObjectReference((PetscObject)isrow);
1704: PetscObjectReference((PetscObject)iscol);
1705: return(0);
1706: }
1710: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1711: {
1712: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1713: IS isicol;
1714: PetscErrorCode ierr;
1715: const PetscInt *r,*ic;
1716: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1717: PetscInt *bi,*cols,nnz,*cols_lvl;
1718: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1719: PetscInt i,levels,diagonal_fill;
1720: PetscBool col_identity,row_identity;
1721: PetscReal f;
1722: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1723: PetscBT lnkbt;
1724: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1725: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1726: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1727:
1729: /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1730: /*
1731: PetscBool olddatastruct=PETSC_FALSE;
1732: PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);
1733: if(olddatastruct){
1734: MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);
1735: return(0);
1736: }
1737: */
1738:
1739: levels = (PetscInt)info->levels;
1740: ISIdentity(isrow,&row_identity);
1741: ISIdentity(iscol,&col_identity);
1743: if (!levels && row_identity && col_identity) {
1744: /* special case: ilu(0) with natural ordering */
1745: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1746: if (a->inode.size) {
1747: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1748: }
1749: return(0);
1750: }
1752: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1753: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1754: ISGetIndices(isrow,&r);
1755: ISGetIndices(isicol,&ic);
1757: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1758: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1759: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1760: bi[0] = bdiag[0] = 0;
1762: PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);
1764: /* create a linked list for storing column indices of the active row */
1765: nlnk = n + 1;
1766: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1768: /* initial FreeSpace size is f*(ai[n]+1) */
1769: f = info->fill;
1770: diagonal_fill = (PetscInt)info->diagonal_fill;
1771: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1772: current_space = free_space;
1773: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1774: current_space_lvl = free_space_lvl;
1775:
1776: for (i=0; i<n; i++) {
1777: nzi = 0;
1778: /* copy current row into linked list */
1779: nnz = ai[r[i]+1] - ai[r[i]];
1780: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1781: cols = aj + ai[r[i]];
1782: lnk[i] = -1; /* marker to indicate if diagonal exists */
1783: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1784: nzi += nlnk;
1786: /* make sure diagonal entry is included */
1787: if (diagonal_fill && lnk[i] == -1) {
1788: fm = n;
1789: while (lnk[fm] < i) fm = lnk[fm];
1790: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1791: lnk[fm] = i;
1792: lnk_lvl[i] = 0;
1793: nzi++; dcount++;
1794: }
1796: /* add pivot rows into the active row */
1797: nzbd = 0;
1798: prow = lnk[n];
1799: while (prow < i) {
1800: nnz = bdiag[prow];
1801: cols = bj_ptr[prow] + nnz + 1;
1802: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1803: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1804: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1805: nzi += nlnk;
1806: prow = lnk[prow];
1807: nzbd++;
1808: }
1809: bdiag[i] = nzbd;
1810: bi[i+1] = bi[i] + nzi;
1812: /* if free space is not available, make more free space */
1813: if (current_space->local_remaining<nzi) {
1814: nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1815: PetscFreeSpaceGet(nnz,¤t_space);
1816: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1817: reallocs++;
1818: }
1820: /* copy data into free_space and free_space_lvl, then initialize lnk */
1821: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1822: bj_ptr[i] = current_space->array;
1823: bjlvl_ptr[i] = current_space_lvl->array;
1825: /* make sure the active row i has diagonal entry */
1826: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1828: current_space->array += nzi;
1829: current_space->local_used += nzi;
1830: current_space->local_remaining -= nzi;
1831: current_space_lvl->array += nzi;
1832: current_space_lvl->local_used += nzi;
1833: current_space_lvl->local_remaining -= nzi;
1834: }
1836: ISRestoreIndices(isrow,&r);
1837: ISRestoreIndices(isicol,&ic);
1839: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1840: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1841: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1842:
1843: PetscIncompleteLLDestroy(lnk,lnkbt);
1844: PetscFreeSpaceDestroy(free_space_lvl);
1845: PetscFree2(bj_ptr,bjlvl_ptr);
1847: #if defined(PETSC_USE_INFO)
1848: {
1849: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1850: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1851: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1852: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1853: PetscInfo(A,"for best performance.\n");
1854: if (diagonal_fill) {
1855: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1856: }
1857: }
1858: #endif
1860: /* put together the new matrix */
1861: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1862: PetscLogObjectParent(fact,isicol);
1863: b = (Mat_SeqAIJ*)(fact)->data;
1864: b->free_a = PETSC_TRUE;
1865: b->free_ij = PETSC_TRUE;
1866: b->singlemalloc = PETSC_FALSE;
1867: PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
1868: b->j = bj;
1869: b->i = bi;
1870: b->diag = bdiag;
1871: b->ilen = 0;
1872: b->imax = 0;
1873: b->row = isrow;
1874: b->col = iscol;
1875: PetscObjectReference((PetscObject)isrow);
1876: PetscObjectReference((PetscObject)iscol);
1877: b->icol = isicol;
1878: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1879: /* In b structure: Free imax, ilen, old a, old j.
1880: Allocate bdiag, solve_work, new a, new j */
1881: PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1882: b->maxnz = b->nz = bdiag[0]+1;
1883: (fact)->info.factor_mallocs = reallocs;
1884: (fact)->info.fill_ratio_given = f;
1885: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1886: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1887: if (a->inode.size) {
1888: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1889: }
1890: return(0);
1891: }
1895: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1896: {
1897: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1898: IS isicol;
1899: PetscErrorCode ierr;
1900: const PetscInt *r,*ic;
1901: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1902: PetscInt *bi,*cols,nnz,*cols_lvl;
1903: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1904: PetscInt i,levels,diagonal_fill;
1905: PetscBool col_identity,row_identity;
1906: PetscReal f;
1907: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1908: PetscBT lnkbt;
1909: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1910: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1911: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1912: PetscBool missing;
1913:
1915: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1916: f = info->fill;
1917: levels = (PetscInt)info->levels;
1918: diagonal_fill = (PetscInt)info->diagonal_fill;
1919: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1921: ISIdentity(isrow,&row_identity);
1922: ISIdentity(iscol,&col_identity);
1923: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1924: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1925: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1926: if (a->inode.size) {
1927: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1928: }
1929: fact->factortype = MAT_FACTOR_ILU;
1930: (fact)->info.factor_mallocs = 0;
1931: (fact)->info.fill_ratio_given = info->fill;
1932: (fact)->info.fill_ratio_needed = 1.0;
1933: b = (Mat_SeqAIJ*)(fact)->data;
1934: MatMissingDiagonal(A,&missing,&d);
1935: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1936: b->row = isrow;
1937: b->col = iscol;
1938: b->icol = isicol;
1939: PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1940: PetscObjectReference((PetscObject)isrow);
1941: PetscObjectReference((PetscObject)iscol);
1942: return(0);
1943: }
1945: ISGetIndices(isrow,&r);
1946: ISGetIndices(isicol,&ic);
1948: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1949: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1950: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1951: bi[0] = bdiag[0] = 0;
1953: PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);
1955: /* create a linked list for storing column indices of the active row */
1956: nlnk = n + 1;
1957: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1959: /* initial FreeSpace size is f*(ai[n]+1) */
1960: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1961: current_space = free_space;
1962: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1963: current_space_lvl = free_space_lvl;
1964:
1965: for (i=0; i<n; i++) {
1966: nzi = 0;
1967: /* copy current row into linked list */
1968: nnz = ai[r[i]+1] - ai[r[i]];
1969: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1970: cols = aj + ai[r[i]];
1971: lnk[i] = -1; /* marker to indicate if diagonal exists */
1972: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1973: nzi += nlnk;
1975: /* make sure diagonal entry is included */
1976: if (diagonal_fill && lnk[i] == -1) {
1977: fm = n;
1978: while (lnk[fm] < i) fm = lnk[fm];
1979: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1980: lnk[fm] = i;
1981: lnk_lvl[i] = 0;
1982: nzi++; dcount++;
1983: }
1985: /* add pivot rows into the active row */
1986: nzbd = 0;
1987: prow = lnk[n];
1988: while (prow < i) {
1989: nnz = bdiag[prow];
1990: cols = bj_ptr[prow] + nnz + 1;
1991: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1992: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1993: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1994: nzi += nlnk;
1995: prow = lnk[prow];
1996: nzbd++;
1997: }
1998: bdiag[i] = nzbd;
1999: bi[i+1] = bi[i] + nzi;
2001: /* if free space is not available, make more free space */
2002: if (current_space->local_remaining<nzi) {
2003: nnz = nzi*(n - i); /* estimated and max additional space needed */
2004: PetscFreeSpaceGet(nnz,¤t_space);
2005: PetscFreeSpaceGet(nnz,¤t_space_lvl);
2006: reallocs++;
2007: }
2009: /* copy data into free_space and free_space_lvl, then initialize lnk */
2010: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2011: bj_ptr[i] = current_space->array;
2012: bjlvl_ptr[i] = current_space_lvl->array;
2014: /* make sure the active row i has diagonal entry */
2015: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2017: current_space->array += nzi;
2018: current_space->local_used += nzi;
2019: current_space->local_remaining -= nzi;
2020: current_space_lvl->array += nzi;
2021: current_space_lvl->local_used += nzi;
2022: current_space_lvl->local_remaining -= nzi;
2023: }
2025: ISRestoreIndices(isrow,&r);
2026: ISRestoreIndices(isicol,&ic);
2028: /* destroy list of free space and other temporary arrays */
2029: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
2030: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2031: PetscIncompleteLLDestroy(lnk,lnkbt);
2032: PetscFreeSpaceDestroy(free_space_lvl);
2033: PetscFree2(bj_ptr,bjlvl_ptr);
2035: #if defined(PETSC_USE_INFO)
2036: {
2037: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2038: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
2039: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
2040: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
2041: PetscInfo(A,"for best performance.\n");
2042: if (diagonal_fill) {
2043: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
2044: }
2045: }
2046: #endif
2048: /* put together the new matrix */
2049: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
2050: PetscLogObjectParent(fact,isicol);
2051: b = (Mat_SeqAIJ*)(fact)->data;
2052: b->free_a = PETSC_TRUE;
2053: b->free_ij = PETSC_TRUE;
2054: b->singlemalloc = PETSC_FALSE;
2055: PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);
2056: b->j = bj;
2057: b->i = bi;
2058: for (i=0; i<n; i++) bdiag[i] += bi[i];
2059: b->diag = bdiag;
2060: b->ilen = 0;
2061: b->imax = 0;
2062: b->row = isrow;
2063: b->col = iscol;
2064: PetscObjectReference((PetscObject)isrow);
2065: PetscObjectReference((PetscObject)iscol);
2066: b->icol = isicol;
2067: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
2068: /* In b structure: Free imax, ilen, old a, old j.
2069: Allocate bdiag, solve_work, new a, new j */
2070: PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2071: b->maxnz = b->nz = bi[n] ;
2072: (fact)->info.factor_mallocs = reallocs;
2073: (fact)->info.fill_ratio_given = f;
2074: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2075: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2076: if (a->inode.size) {
2077: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2078: }
2079: return(0);
2080: }
2084: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2085: {
2086: Mat C = B;
2087: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2088: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2089: IS ip=b->row,iip = b->icol;
2091: const PetscInt *rip,*riip;
2092: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2093: PetscInt *ai=a->i,*aj=a->j;
2094: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2095: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2096: PetscBool perm_identity;
2097: FactorShiftCtx sctx;
2098: PetscReal rs;
2099: MatScalar d,*v;
2102: /* MatPivotSetUp(): initialize shift context sctx */
2103: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2105: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2106: sctx.shift_top = info->zeropivot;
2107: for (i=0; i<mbs; i++) {
2108: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2109: d = (aa)[a->diag[i]];
2110: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2111: v = aa+ai[i];
2112: nz = ai[i+1] - ai[i];
2113: for (j=0; j<nz; j++)
2114: rs += PetscAbsScalar(v[j]);
2115: if (rs>sctx.shift_top) sctx.shift_top = rs;
2116: }
2117: sctx.shift_top *= 1.1;
2118: sctx.nshift_max = 5;
2119: sctx.shift_lo = 0.;
2120: sctx.shift_hi = 1.;
2121: }
2123: ISGetIndices(ip,&rip);
2124: ISGetIndices(iip,&riip);
2125:
2126: /* allocate working arrays
2127: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2128: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2129: */
2130: PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);
2131:
2132: do {
2133: sctx.newshift = PETSC_FALSE;
2135: for (i=0; i<mbs; i++) c2r[i] = mbs;
2136: if (mbs) il[0] = 0;
2137:
2138: for (k = 0; k<mbs; k++){
2139: /* zero rtmp */
2140: nz = bi[k+1] - bi[k];
2141: bjtmp = bj + bi[k];
2142: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2143:
2144: /* load in initial unfactored row */
2145: bval = ba + bi[k];
2146: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2147: for (j = jmin; j < jmax; j++){
2148: col = riip[aj[j]];
2149: if (col >= k){ /* only take upper triangular entry */
2150: rtmp[col] = aa[j];
2151: *bval++ = 0.0; /* for in-place factorization */
2152: }
2153: }
2154: /* shift the diagonal of the matrix: ZeropivotApply() */
2155: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2156:
2157: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2158: dk = rtmp[k];
2159: i = c2r[k]; /* first row to be added to k_th row */
2161: while (i < k){
2162: nexti = c2r[i]; /* next row to be added to k_th row */
2163:
2164: /* compute multiplier, update diag(k) and U(i,k) */
2165: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2166: uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2167: dk += uikdi*ba[ili]; /* update diag[k] */
2168: ba[ili] = uikdi; /* -U(i,k) */
2170: /* add multiple of row i to k-th row */
2171: jmin = ili + 1; jmax = bi[i+1];
2172: if (jmin < jmax){
2173: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2174: /* update il and c2r for row i */
2175: il[i] = jmin;
2176: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2177: }
2178: i = nexti;
2179: }
2181: /* copy data into U(k,:) */
2182: rs = 0.0;
2183: jmin = bi[k]; jmax = bi[k+1]-1;
2184: if (jmin < jmax) {
2185: for (j=jmin; j<jmax; j++){
2186: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2187: }
2188: /* add the k-th row into il and c2r */
2189: il[k] = jmin;
2190: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2191: }
2193: /* MatPivotCheck() */
2194: sctx.rs = rs;
2195: sctx.pv = dk;
2196: MatPivotCheck(A,info,&sctx,i);
2197: if(sctx.newshift) break;
2198: dk = sctx.pv;
2199:
2200: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2201: }
2202: } while (sctx.newshift);
2203:
2204: PetscFree3(rtmp,il,c2r);
2205: ISRestoreIndices(ip,&rip);
2206: ISRestoreIndices(iip,&riip);
2208: ISIdentity(ip,&perm_identity);
2209: if (perm_identity){
2210: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2211: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2212: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2213: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2214: } else {
2215: B->ops->solve = MatSolve_SeqSBAIJ_1;
2216: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2217: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2218: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2219: }
2221: C->assembled = PETSC_TRUE;
2222: C->preallocated = PETSC_TRUE;
2223: PetscLogFlops(C->rmap->n);
2225: /* MatPivotView() */
2226: if (sctx.nshift){
2227: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2228: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
2229: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2230: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2231: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2232: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
2233: }
2234: }
2235: return(0);
2236: }
2240: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2241: {
2242: Mat C = B;
2243: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2244: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2245: IS ip=b->row,iip = b->icol;
2247: const PetscInt *rip,*riip;
2248: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2249: PetscInt *ai=a->i,*aj=a->j;
2250: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2251: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2252: PetscBool perm_identity;
2253: FactorShiftCtx sctx;
2254: PetscReal rs;
2255: MatScalar d,*v;
2258: /* MatPivotSetUp(): initialize shift context sctx */
2259: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2261: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2262: sctx.shift_top = info->zeropivot;
2263: for (i=0; i<mbs; i++) {
2264: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2265: d = (aa)[a->diag[i]];
2266: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2267: v = aa+ai[i];
2268: nz = ai[i+1] - ai[i];
2269: for (j=0; j<nz; j++)
2270: rs += PetscAbsScalar(v[j]);
2271: if (rs>sctx.shift_top) sctx.shift_top = rs;
2272: }
2273: sctx.shift_top *= 1.1;
2274: sctx.nshift_max = 5;
2275: sctx.shift_lo = 0.;
2276: sctx.shift_hi = 1.;
2277: }
2279: ISGetIndices(ip,&rip);
2280: ISGetIndices(iip,&riip);
2281:
2282: /* initialization */
2283: PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);
2284:
2285: do {
2286: sctx.newshift = PETSC_FALSE;
2288: for (i=0; i<mbs; i++) jl[i] = mbs;
2289: il[0] = 0;
2290:
2291: for (k = 0; k<mbs; k++){
2292: /* zero rtmp */
2293: nz = bi[k+1] - bi[k];
2294: bjtmp = bj + bi[k];
2295: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2297: bval = ba + bi[k];
2298: /* initialize k-th row by the perm[k]-th row of A */
2299: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2300: for (j = jmin; j < jmax; j++){
2301: col = riip[aj[j]];
2302: if (col >= k){ /* only take upper triangular entry */
2303: rtmp[col] = aa[j];
2304: *bval++ = 0.0; /* for in-place factorization */
2305: }
2306: }
2307: /* shift the diagonal of the matrix */
2308: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2310: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2311: dk = rtmp[k];
2312: i = jl[k]; /* first row to be added to k_th row */
2314: while (i < k){
2315: nexti = jl[i]; /* next row to be added to k_th row */
2316:
2317: /* compute multiplier, update diag(k) and U(i,k) */
2318: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2319: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
2320: dk += uikdi*ba[ili];
2321: ba[ili] = uikdi; /* -U(i,k) */
2323: /* add multiple of row i to k-th row */
2324: jmin = ili + 1; jmax = bi[i+1];
2325: if (jmin < jmax){
2326: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2327: /* update il and jl for row i */
2328: il[i] = jmin;
2329: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2330: }
2331: i = nexti;
2332: }
2334: /* shift the diagonals when zero pivot is detected */
2335: /* compute rs=sum of abs(off-diagonal) */
2336: rs = 0.0;
2337: jmin = bi[k]+1;
2338: nz = bi[k+1] - jmin;
2339: bcol = bj + jmin;
2340: for (j=0; j<nz; j++) {
2341: rs += PetscAbsScalar(rtmp[bcol[j]]);
2342: }
2344: sctx.rs = rs;
2345: sctx.pv = dk;
2346: MatPivotCheck(A,info,&sctx,k);
2347: if (sctx.newshift) break;
2348: dk = sctx.pv;
2349:
2350: /* copy data into U(k,:) */
2351: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2352: jmin = bi[k]+1; jmax = bi[k+1];
2353: if (jmin < jmax) {
2354: for (j=jmin; j<jmax; j++){
2355: col = bj[j]; ba[j] = rtmp[col];
2356: }
2357: /* add the k-th row into il and jl */
2358: il[k] = jmin;
2359: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2360: }
2361: }
2362: } while (sctx.newshift);
2364: PetscFree3(rtmp,il,jl);
2365: ISRestoreIndices(ip,&rip);
2366: ISRestoreIndices(iip,&riip);
2368: ISIdentity(ip,&perm_identity);
2369: if (perm_identity){
2370: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2371: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2372: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2373: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2374: } else {
2375: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2376: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2377: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2378: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2379: }
2381: C->assembled = PETSC_TRUE;
2382: C->preallocated = PETSC_TRUE;
2383: PetscLogFlops(C->rmap->n);
2384: if (sctx.nshift){
2385: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2386: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2387: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2388: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2389: }
2390: }
2391: return(0);
2392: }
2394: /*
2395: icc() under revised new data structure.
2396: Factored arrays bj and ba are stored as
2397: U(0,:),...,U(i,:),U(n-1,:)
2399: ui=fact->i is an array of size n+1, in which
2400: ui+
2401: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2402: ui[n]: points to U(n-1,n-1)+1
2403:
2404: udiag=fact->diag is an array of size n,in which
2405: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2407: U(i,:) contains udiag[i] as its last entry, i.e.,
2408: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2409: */
2413: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2414: {
2415: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2416: Mat_SeqSBAIJ *b;
2417: PetscErrorCode ierr;
2418: PetscBool perm_identity,missing;
2419: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2420: const PetscInt *rip,*riip;
2421: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2422: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2423: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2424: PetscReal fill=info->fill,levels=info->levels;
2425: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2426: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2427: PetscBT lnkbt;
2428: IS iperm;
2429:
2431: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2432: MatMissingDiagonal(A,&missing,&d);
2433: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2434: ISIdentity(perm,&perm_identity);
2435: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2437: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2438: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2439: ui[0] = 0;
2441: /* ICC(0) without matrix ordering: simply rearrange column indices */
2442: if (!levels && perm_identity) {
2443: for (i=0; i<am; i++) {
2444: ncols = ai[i+1] - a->diag[i];
2445: ui[i+1] = ui[i] + ncols;
2446: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2447: }
2448: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2449: cols = uj;
2450: for (i=0; i<am; i++) {
2451: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2452: ncols = ai[i+1] - a->diag[i] -1;
2453: for (j=0; j<ncols; j++) *cols++ = aj[j];
2454: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2455: }
2456: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2457: ISGetIndices(iperm,&riip);
2458: ISGetIndices(perm,&rip);
2460: /* initialization */
2461: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
2463: /* jl: linked list for storing indices of the pivot rows
2464: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2465: PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2466: for (i=0; i<am; i++){
2467: jl[i] = am; il[i] = 0;
2468: }
2470: /* create and initialize a linked list for storing column indices of the active row k */
2471: nlnk = am + 1;
2472: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2474: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2475: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2476: current_space = free_space;
2477: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);
2478: current_space_lvl = free_space_lvl;
2480: for (k=0; k<am; k++){ /* for each active row k */
2481: /* initialize lnk by the column indices of row rip[k] of A */
2482: nzk = 0;
2483: ncols = ai[rip[k]+1] - ai[rip[k]];
2484: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2485: ncols_upper = 0;
2486: for (j=0; j<ncols; j++){
2487: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2488: if (riip[i] >= k){ /* only take upper triangular entry */
2489: ajtmp[ncols_upper] = i;
2490: ncols_upper++;
2491: }
2492: }
2493: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2494: nzk += nlnk;
2496: /* update lnk by computing fill-in for each pivot row to be merged in */
2497: prow = jl[k]; /* 1st pivot row */
2498:
2499: while (prow < k){
2500: nextprow = jl[prow];
2501:
2502: /* merge prow into k-th row */
2503: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2504: jmax = ui[prow+1];
2505: ncols = jmax-jmin;
2506: i = jmin - ui[prow];
2507: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2508: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2509: j = *(uj - 1);
2510: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2511: nzk += nlnk;
2513: /* update il and jl for prow */
2514: if (jmin < jmax){
2515: il[prow] = jmin;
2516: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2517: }
2518: prow = nextprow;
2519: }
2521: /* if free space is not available, make more free space */
2522: if (current_space->local_remaining<nzk) {
2523: i = am - k + 1; /* num of unfactored rows */
2524: i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2525: PetscFreeSpaceGet(i,¤t_space);
2526: PetscFreeSpaceGet(i,¤t_space_lvl);
2527: reallocs++;
2528: }
2530: /* copy data into free_space and free_space_lvl, then initialize lnk */
2531: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2532: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2534: /* add the k-th row into il and jl */
2535: if (nzk > 1){
2536: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2537: jl[k] = jl[i]; jl[i] = k;
2538: il[k] = ui[k] + 1;
2539: }
2540: uj_ptr[k] = current_space->array;
2541: uj_lvl_ptr[k] = current_space_lvl->array;
2543: current_space->array += nzk;
2544: current_space->local_used += nzk;
2545: current_space->local_remaining -= nzk;
2547: current_space_lvl->array += nzk;
2548: current_space_lvl->local_used += nzk;
2549: current_space_lvl->local_remaining -= nzk;
2551: ui[k+1] = ui[k] + nzk;
2552: }
2554: ISRestoreIndices(perm,&rip);
2555: ISRestoreIndices(iperm,&riip);
2556: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2557: PetscFree(ajtmp);
2559: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2560: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2561: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2562: PetscIncompleteLLDestroy(lnk,lnkbt);
2563: PetscFreeSpaceDestroy(free_space_lvl);
2565: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2567: /* put together the new matrix in MATSEQSBAIJ format */
2568: b = (Mat_SeqSBAIJ*)(fact)->data;
2569: b->singlemalloc = PETSC_FALSE;
2570: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2571: b->j = uj;
2572: b->i = ui;
2573: b->diag = udiag;
2574: b->free_diag = PETSC_TRUE;
2575: b->ilen = 0;
2576: b->imax = 0;
2577: b->row = perm;
2578: b->col = perm;
2579: PetscObjectReference((PetscObject)perm);
2580: PetscObjectReference((PetscObject)perm);
2581: b->icol = iperm;
2582: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2583: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2584: PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2585: b->maxnz = b->nz = ui[am];
2586: b->free_a = PETSC_TRUE;
2587: b->free_ij = PETSC_TRUE;
2588:
2589: fact->info.factor_mallocs = reallocs;
2590: fact->info.fill_ratio_given = fill;
2591: if (ai[am] != 0) {
2592: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2593: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2594: } else {
2595: fact->info.fill_ratio_needed = 0.0;
2596: }
2597: #if defined(PETSC_USE_INFO)
2598: if (ai[am] != 0) {
2599: PetscReal af = fact->info.fill_ratio_needed;
2600: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2601: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2602: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2603: } else {
2604: PetscInfo(A,"Empty matrix.\n");
2605: }
2606: #endif
2607: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2608: return(0);
2609: }
2613: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2614: {
2615: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2616: Mat_SeqSBAIJ *b;
2617: PetscErrorCode ierr;
2618: PetscBool perm_identity,missing;
2619: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2620: const PetscInt *rip,*riip;
2621: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2622: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2623: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2624: PetscReal fill=info->fill,levels=info->levels;
2625: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2626: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2627: PetscBT lnkbt;
2628: IS iperm;
2629:
2631: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2632: MatMissingDiagonal(A,&missing,&d);
2633: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2634: ISIdentity(perm,&perm_identity);
2635: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2637: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2638: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2639: ui[0] = 0;
2641: /* ICC(0) without matrix ordering: simply copies fill pattern */
2642: if (!levels && perm_identity) {
2644: for (i=0; i<am; i++) {
2645: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2646: udiag[i] = ui[i];
2647: }
2648: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2649: cols = uj;
2650: for (i=0; i<am; i++) {
2651: aj = a->j + a->diag[i];
2652: ncols = ui[i+1] - ui[i];
2653: for (j=0; j<ncols; j++) *cols++ = *aj++;
2654: }
2655: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2656: ISGetIndices(iperm,&riip);
2657: ISGetIndices(perm,&rip);
2659: /* initialization */
2660: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
2662: /* jl: linked list for storing indices of the pivot rows
2663: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2664: PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2665: for (i=0; i<am; i++){
2666: jl[i] = am; il[i] = 0;
2667: }
2669: /* create and initialize a linked list for storing column indices of the active row k */
2670: nlnk = am + 1;
2671: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2673: /* initial FreeSpace size is fill*(ai[am]+1) */
2674: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2675: current_space = free_space;
2676: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2677: current_space_lvl = free_space_lvl;
2679: for (k=0; k<am; k++){ /* for each active row k */
2680: /* initialize lnk by the column indices of row rip[k] of A */
2681: nzk = 0;
2682: ncols = ai[rip[k]+1] - ai[rip[k]];
2683: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2684: ncols_upper = 0;
2685: for (j=0; j<ncols; j++){
2686: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2687: if (riip[i] >= k){ /* only take upper triangular entry */
2688: ajtmp[ncols_upper] = i;
2689: ncols_upper++;
2690: }
2691: }
2692: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2693: nzk += nlnk;
2695: /* update lnk by computing fill-in for each pivot row to be merged in */
2696: prow = jl[k]; /* 1st pivot row */
2697:
2698: while (prow < k){
2699: nextprow = jl[prow];
2700:
2701: /* merge prow into k-th row */
2702: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2703: jmax = ui[prow+1];
2704: ncols = jmax-jmin;
2705: i = jmin - ui[prow];
2706: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2707: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2708: j = *(uj - 1);
2709: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2710: nzk += nlnk;
2712: /* update il and jl for prow */
2713: if (jmin < jmax){
2714: il[prow] = jmin;
2715: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2716: }
2717: prow = nextprow;
2718: }
2720: /* if free space is not available, make more free space */
2721: if (current_space->local_remaining<nzk) {
2722: i = am - k + 1; /* num of unfactored rows */
2723: i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2724: PetscFreeSpaceGet(i,¤t_space);
2725: PetscFreeSpaceGet(i,¤t_space_lvl);
2726: reallocs++;
2727: }
2729: /* copy data into free_space and free_space_lvl, then initialize lnk */
2730: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2731: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2733: /* add the k-th row into il and jl */
2734: if (nzk > 1){
2735: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2736: jl[k] = jl[i]; jl[i] = k;
2737: il[k] = ui[k] + 1;
2738: }
2739: uj_ptr[k] = current_space->array;
2740: uj_lvl_ptr[k] = current_space_lvl->array;
2742: current_space->array += nzk;
2743: current_space->local_used += nzk;
2744: current_space->local_remaining -= nzk;
2746: current_space_lvl->array += nzk;
2747: current_space_lvl->local_used += nzk;
2748: current_space_lvl->local_remaining -= nzk;
2750: ui[k+1] = ui[k] + nzk;
2751: }
2753: #if defined(PETSC_USE_INFO)
2754: if (ai[am] != 0) {
2755: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2756: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2757: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2758: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2759: } else {
2760: PetscInfo(A,"Empty matrix.\n");
2761: }
2762: #endif
2764: ISRestoreIndices(perm,&rip);
2765: ISRestoreIndices(iperm,&riip);
2766: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2767: PetscFree(ajtmp);
2769: /* destroy list of free space and other temporary array(s) */
2770: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2771: PetscFreeSpaceContiguous(&free_space,uj);
2772: PetscIncompleteLLDestroy(lnk,lnkbt);
2773: PetscFreeSpaceDestroy(free_space_lvl);
2775: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2777: /* put together the new matrix in MATSEQSBAIJ format */
2779: b = (Mat_SeqSBAIJ*)fact->data;
2780: b->singlemalloc = PETSC_FALSE;
2781: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2782: b->j = uj;
2783: b->i = ui;
2784: b->diag = udiag;
2785: b->free_diag = PETSC_TRUE;
2786: b->ilen = 0;
2787: b->imax = 0;
2788: b->row = perm;
2789: b->col = perm;
2790: PetscObjectReference((PetscObject)perm);
2791: PetscObjectReference((PetscObject)perm);
2792: b->icol = iperm;
2793: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2794: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2795: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2796: b->maxnz = b->nz = ui[am];
2797: b->free_a = PETSC_TRUE;
2798: b->free_ij = PETSC_TRUE;
2799:
2800: fact->info.factor_mallocs = reallocs;
2801: fact->info.fill_ratio_given = fill;
2802: if (ai[am] != 0) {
2803: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2804: } else {
2805: fact->info.fill_ratio_needed = 0.0;
2806: }
2807: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2808: return(0);
2809: }
2813: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2814: {
2815: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2816: Mat_SeqSBAIJ *b;
2817: PetscErrorCode ierr;
2818: PetscBool perm_identity;
2819: PetscReal fill = info->fill;
2820: const PetscInt *rip,*riip;
2821: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2822: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2823: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2824: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2825: PetscBT lnkbt;
2826: IS iperm;
2829: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2830: /* check whether perm is the identity mapping */
2831: ISIdentity(perm,&perm_identity);
2832: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2833: ISGetIndices(iperm,&riip);
2834: ISGetIndices(perm,&rip);
2836: /* initialization */
2837: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2838: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2839: ui[0] = 0;
2841: /* jl: linked list for storing indices of the pivot rows
2842: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2843: PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
2844: for (i=0; i<am; i++){
2845: jl[i] = am; il[i] = 0;
2846: }
2848: /* create and initialize a linked list for storing column indices of the active row k */
2849: nlnk = am + 1;
2850: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2852: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2853: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2854: current_space = free_space;
2856: for (k=0; k<am; k++){ /* for each active row k */
2857: /* initialize lnk by the column indices of row rip[k] of A */
2858: nzk = 0;
2859: ncols = ai[rip[k]+1] - ai[rip[k]];
2860: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2861: ncols_upper = 0;
2862: for (j=0; j<ncols; j++){
2863: i = riip[*(aj + ai[rip[k]] + j)];
2864: if (i >= k){ /* only take upper triangular entry */
2865: cols[ncols_upper] = i;
2866: ncols_upper++;
2867: }
2868: }
2869: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2870: nzk += nlnk;
2872: /* update lnk by computing fill-in for each pivot row to be merged in */
2873: prow = jl[k]; /* 1st pivot row */
2874:
2875: while (prow < k){
2876: nextprow = jl[prow];
2877: /* merge prow into k-th row */
2878: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2879: jmax = ui[prow+1];
2880: ncols = jmax-jmin;
2881: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2882: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2883: nzk += nlnk;
2885: /* update il and jl for prow */
2886: if (jmin < jmax){
2887: il[prow] = jmin;
2888: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2889: }
2890: prow = nextprow;
2891: }
2893: /* if free space is not available, make more free space */
2894: if (current_space->local_remaining<nzk) {
2895: i = am - k + 1; /* num of unfactored rows */
2896: i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2897: PetscFreeSpaceGet(i,¤t_space);
2898: reallocs++;
2899: }
2901: /* copy data into free space, then initialize lnk */
2902: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2904: /* add the k-th row into il and jl */
2905: if (nzk > 1){
2906: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2907: jl[k] = jl[i]; jl[i] = k;
2908: il[k] = ui[k] + 1;
2909: }
2910: ui_ptr[k] = current_space->array;
2911: current_space->array += nzk;
2912: current_space->local_used += nzk;
2913: current_space->local_remaining -= nzk;
2915: ui[k+1] = ui[k] + nzk;
2916: }
2918: ISRestoreIndices(perm,&rip);
2919: ISRestoreIndices(iperm,&riip);
2920: PetscFree4(ui_ptr,jl,il,cols);
2922: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2923: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2924: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2925: PetscLLDestroy(lnk,lnkbt);
2927: /* put together the new matrix in MATSEQSBAIJ format */
2929: b = (Mat_SeqSBAIJ*)fact->data;
2930: b->singlemalloc = PETSC_FALSE;
2931: b->free_a = PETSC_TRUE;
2932: b->free_ij = PETSC_TRUE;
2933: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2934: b->j = uj;
2935: b->i = ui;
2936: b->diag = udiag;
2937: b->free_diag = PETSC_TRUE;
2938: b->ilen = 0;
2939: b->imax = 0;
2940: b->row = perm;
2941: b->col = perm;
2942: PetscObjectReference((PetscObject)perm);
2943: PetscObjectReference((PetscObject)perm);
2944: b->icol = iperm;
2945: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2946: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2947: PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2948: b->maxnz = b->nz = ui[am];
2949:
2950: fact->info.factor_mallocs = reallocs;
2951: fact->info.fill_ratio_given = fill;
2952: if (ai[am] != 0) {
2953: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2954: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2955: } else {
2956: fact->info.fill_ratio_needed = 0.0;
2957: }
2958: #if defined(PETSC_USE_INFO)
2959: if (ai[am] != 0) {
2960: PetscReal af = fact->info.fill_ratio_needed;
2961: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2962: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2963: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2964: } else {
2965: PetscInfo(A,"Empty matrix.\n");
2966: }
2967: #endif
2968: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2969: return(0);
2970: }
2974: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2975: {
2976: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2977: Mat_SeqSBAIJ *b;
2978: PetscErrorCode ierr;
2979: PetscBool perm_identity;
2980: PetscReal fill = info->fill;
2981: const PetscInt *rip,*riip;
2982: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2983: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2984: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2985: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2986: PetscBT lnkbt;
2987: IS iperm;
2990: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2991: /* check whether perm is the identity mapping */
2992: ISIdentity(perm,&perm_identity);
2993: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2994: ISGetIndices(iperm,&riip);
2995: ISGetIndices(perm,&rip);
2997: /* initialization */
2998: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2999: ui[0] = 0;
3001: /* jl: linked list for storing indices of the pivot rows
3002: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3003: PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
3004: for (i=0; i<am; i++){
3005: jl[i] = am; il[i] = 0;
3006: }
3008: /* create and initialize a linked list for storing column indices of the active row k */
3009: nlnk = am + 1;
3010: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
3012: /* initial FreeSpace size is fill*(ai[am]+1) */
3013: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
3014: current_space = free_space;
3016: for (k=0; k<am; k++){ /* for each active row k */
3017: /* initialize lnk by the column indices of row rip[k] of A */
3018: nzk = 0;
3019: ncols = ai[rip[k]+1] - ai[rip[k]];
3020: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3021: ncols_upper = 0;
3022: for (j=0; j<ncols; j++){
3023: i = riip[*(aj + ai[rip[k]] + j)];
3024: if (i >= k){ /* only take upper triangular entry */
3025: cols[ncols_upper] = i;
3026: ncols_upper++;
3027: }
3028: }
3029: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3030: nzk += nlnk;
3032: /* update lnk by computing fill-in for each pivot row to be merged in */
3033: prow = jl[k]; /* 1st pivot row */
3034:
3035: while (prow < k){
3036: nextprow = jl[prow];
3037: /* merge prow into k-th row */
3038: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3039: jmax = ui[prow+1];
3040: ncols = jmax-jmin;
3041: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3042: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3043: nzk += nlnk;
3045: /* update il and jl for prow */
3046: if (jmin < jmax){
3047: il[prow] = jmin;
3048: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3049: }
3050: prow = nextprow;
3051: }
3053: /* if free space is not available, make more free space */
3054: if (current_space->local_remaining<nzk) {
3055: i = am - k + 1; /* num of unfactored rows */
3056: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3057: PetscFreeSpaceGet(i,¤t_space);
3058: reallocs++;
3059: }
3061: /* copy data into free space, then initialize lnk */
3062: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3064: /* add the k-th row into il and jl */
3065: if (nzk-1 > 0){
3066: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3067: jl[k] = jl[i]; jl[i] = k;
3068: il[k] = ui[k] + 1;
3069: }
3070: ui_ptr[k] = current_space->array;
3071: current_space->array += nzk;
3072: current_space->local_used += nzk;
3073: current_space->local_remaining -= nzk;
3075: ui[k+1] = ui[k] + nzk;
3076: }
3078: #if defined(PETSC_USE_INFO)
3079: if (ai[am] != 0) {
3080: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3081: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
3082: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
3083: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
3084: } else {
3085: PetscInfo(A,"Empty matrix.\n");
3086: }
3087: #endif
3089: ISRestoreIndices(perm,&rip);
3090: ISRestoreIndices(iperm,&riip);
3091: PetscFree4(ui_ptr,jl,il,cols);
3093: /* destroy list of free space and other temporary array(s) */
3094: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
3095: PetscFreeSpaceContiguous(&free_space,uj);
3096: PetscLLDestroy(lnk,lnkbt);
3098: /* put together the new matrix in MATSEQSBAIJ format */
3100: b = (Mat_SeqSBAIJ*)fact->data;
3101: b->singlemalloc = PETSC_FALSE;
3102: b->free_a = PETSC_TRUE;
3103: b->free_ij = PETSC_TRUE;
3104: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
3105: b->j = uj;
3106: b->i = ui;
3107: b->diag = 0;
3108: b->ilen = 0;
3109: b->imax = 0;
3110: b->row = perm;
3111: b->col = perm;
3112: PetscObjectReference((PetscObject)perm);
3113: PetscObjectReference((PetscObject)perm);
3114: b->icol = iperm;
3115: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3116: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
3117: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3118: b->maxnz = b->nz = ui[am];
3119:
3120: fact->info.factor_mallocs = reallocs;
3121: fact->info.fill_ratio_given = fill;
3122: if (ai[am] != 0) {
3123: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3124: } else {
3125: fact->info.fill_ratio_needed = 0.0;
3126: }
3127: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3128: return(0);
3129: }
3133: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3134: {
3135: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3136: PetscErrorCode ierr;
3137: PetscInt n = A->rmap->n;
3138: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3139: PetscScalar *x,sum;
3140: const PetscScalar *b;
3141: const MatScalar *aa = a->a,*v;
3142: PetscInt i,nz;
3145: if (!n) return(0);
3147: VecGetArrayRead(bb,&b);
3148: VecGetArray(xx,&x);
3150: /* forward solve the lower triangular */
3151: x[0] = b[0];
3152: v = aa;
3153: vi = aj;
3154: for (i=1; i<n; i++) {
3155: nz = ai[i+1] - ai[i];
3156: sum = b[i];
3157: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3158: v += nz;
3159: vi += nz;
3160: x[i] = sum;
3161: }
3162:
3163: /* backward solve the upper triangular */
3164: for (i=n-1; i>=0; i--){
3165: v = aa + adiag[i+1] + 1;
3166: vi = aj + adiag[i+1] + 1;
3167: nz = adiag[i] - adiag[i+1]-1;
3168: sum = x[i];
3169: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3170: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3171: }
3172:
3173: PetscLogFlops(2.0*a->nz - A->cmap->n);
3174: VecRestoreArrayRead(bb,&b);
3175: VecRestoreArray(xx,&x);
3176: return(0);
3177: }
3181: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3182: {
3183: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3184: IS iscol = a->col,isrow = a->row;
3185: PetscErrorCode ierr;
3186: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3187: const PetscInt *rout,*cout,*r,*c;
3188: PetscScalar *x,*tmp,sum;
3189: const PetscScalar *b;
3190: const MatScalar *aa = a->a,*v;
3193: if (!n) return(0);
3195: VecGetArrayRead(bb,&b);
3196: VecGetArray(xx,&x);
3197: tmp = a->solve_work;
3199: ISGetIndices(isrow,&rout); r = rout;
3200: ISGetIndices(iscol,&cout); c = cout;
3202: /* forward solve the lower triangular */
3203: tmp[0] = b[r[0]];
3204: v = aa;
3205: vi = aj;
3206: for (i=1; i<n; i++) {
3207: nz = ai[i+1] - ai[i];
3208: sum = b[r[i]];
3209: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3210: tmp[i] = sum;
3211: v += nz; vi += nz;
3212: }
3214: /* backward solve the upper triangular */
3215: for (i=n-1; i>=0; i--){
3216: v = aa + adiag[i+1]+1;
3217: vi = aj + adiag[i+1]+1;
3218: nz = adiag[i]-adiag[i+1]-1;
3219: sum = tmp[i];
3220: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3221: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3222: }
3224: ISRestoreIndices(isrow,&rout);
3225: ISRestoreIndices(iscol,&cout);
3226: VecRestoreArrayRead(bb,&b);
3227: VecRestoreArray(xx,&x);
3228: PetscLogFlops(2*a->nz - A->cmap->n);
3229: return(0);
3230: }
3234: /*
3235: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3236: */
3237: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3238: {
3239: Mat B = *fact;
3240: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3241: IS isicol;
3242: PetscErrorCode ierr;
3243: const PetscInt *r,*ic;
3244: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3245: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3246: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3247: PetscInt nlnk,*lnk;
3248: PetscBT lnkbt;
3249: PetscBool row_identity,icol_identity;
3250: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3251: const PetscInt *ics;
3252: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3253: PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3254: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3255: PetscBool missing;
3259: if (dt == PETSC_DEFAULT) dt = 0.005;
3260: if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */
3261: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3263: /* ------- symbolic factorization, can be reused ---------*/
3264: MatMissingDiagonal(A,&missing,&i);
3265: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3266: adiag=a->diag;
3268: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3270: /* bdiag is location of diagonal in factor */
3271: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag); /* becomes b->diag */
3272: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev); /* temporary */
3274: /* allocate row pointers bi */
3275: PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);
3277: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3278: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3279: nnz_max = ai[n]+2*n*dtcount+2;
3281: PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);
3282: PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);
3284: /* put together the new matrix */
3285: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
3286: PetscLogObjectParent(B,isicol);
3287: b = (Mat_SeqAIJ*)B->data;
3288: b->free_a = PETSC_TRUE;
3289: b->free_ij = PETSC_TRUE;
3290: b->singlemalloc = PETSC_FALSE;
3291: b->a = ba;
3292: b->j = bj;
3293: b->i = bi;
3294: b->diag = bdiag;
3295: b->ilen = 0;
3296: b->imax = 0;
3297: b->row = isrow;
3298: b->col = iscol;
3299: PetscObjectReference((PetscObject)isrow);
3300: PetscObjectReference((PetscObject)iscol);
3301: b->icol = isicol;
3302: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
3304: PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3305: b->maxnz = nnz_max;
3307: B->factortype = MAT_FACTOR_ILUDT;
3308: B->info.factor_mallocs = 0;
3309: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3310: CHKMEMQ;
3311: /* ------- end of symbolic factorization ---------*/
3313: ISGetIndices(isrow,&r);
3314: ISGetIndices(isicol,&ic);
3315: ics = ic;
3317: /* linked list for storing column indices of the active row */
3318: nlnk = n + 1;
3319: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3321: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3322: PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);
3323: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3324: PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);
3325: PetscMemzero(rtmp,n*sizeof(MatScalar));
3327: bi[0] = 0;
3328: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3329: bdiag_rev[n] = bdiag[0];
3330: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3331: for (i=0; i<n; i++) {
3332: /* copy initial fill into linked list */
3333: nzi = 0; /* nonzeros for active row i */
3334: nzi = ai[r[i]+1] - ai[r[i]];
3335: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3336: nzi_al = adiag[r[i]] - ai[r[i]];
3337: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3338: ajtmp = aj + ai[r[i]];
3339: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3340:
3341: /* load in initial (unfactored row) */
3342: aatmp = a->a + ai[r[i]];
3343: for (j=0; j<nzi; j++) {
3344: rtmp[ics[*ajtmp++]] = *aatmp++;
3345: }
3346:
3347: /* add pivot rows into linked list */
3348: row = lnk[n];
3349: while (row < i ) {
3350: nzi_bl = bi[row+1] - bi[row] + 1;
3351: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3352: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3353: nzi += nlnk;
3354: row = lnk[row];
3355: }
3356:
3357: /* copy data from lnk into jtmp, then initialize lnk */
3358: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3360: /* numerical factorization */
3361: bjtmp = jtmp;
3362: row = *bjtmp++; /* 1st pivot row */
3363: while ( row < i ) {
3364: pc = rtmp + row;
3365: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3366: multiplier = (*pc) * (*pv);
3367: *pc = multiplier;
3368: if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3369: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3370: pv = ba + bdiag[row+1] + 1;
3371: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3372: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3373: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3374: PetscLogFlops(1+2*nz);
3375: }
3376: row = *bjtmp++;
3377: }
3379: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3380: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3381: nzi_bl = 0; j = 0;
3382: while (jtmp[j] < i){ /* Note: jtmp is sorted */
3383: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3384: nzi_bl++; j++;
3385: }
3386: nzi_bu = nzi - nzi_bl -1;
3387: while (j < nzi){
3388: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3389: j++;
3390: }
3391:
3392: bjtmp = bj + bi[i];
3393: batmp = ba + bi[i];
3394: /* apply level dropping rule to L part */
3395: ncut = nzi_al + dtcount;
3396: if (ncut < nzi_bl){
3397: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3398: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3399: } else {
3400: ncut = nzi_bl;
3401: }
3402: for (j=0; j<ncut; j++){
3403: bjtmp[j] = jtmp[j];
3404: batmp[j] = vtmp[j];
3405: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3406: }
3407: bi[i+1] = bi[i] + ncut;
3408: nzi = ncut + 1;
3409:
3410: /* apply level dropping rule to U part */
3411: ncut = nzi_au + dtcount;
3412: if (ncut < nzi_bu){
3413: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3414: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3415: } else {
3416: ncut = nzi_bu;
3417: }
3418: nzi += ncut;
3420: /* mark bdiagonal */
3421: bdiag[i+1] = bdiag[i] - (ncut + 1);
3422: bdiag_rev[n-i-1] = bdiag[i+1];
3423: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3424: bjtmp = bj + bdiag[i];
3425: batmp = ba + bdiag[i];
3426: *bjtmp = i;
3427: *batmp = diag_tmp; /* rtmp[i]; */
3428: if (*batmp == 0.0) {
3429: *batmp = dt+shift;
3430: /* printf(" row %d add shift %g\n",i,shift); */
3431: }
3432: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3433: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3434:
3435: bjtmp = bj + bdiag[i+1]+1;
3436: batmp = ba + bdiag[i+1]+1;
3437: for (k=0; k<ncut; k++){
3438: bjtmp[k] = jtmp[nzi_bl+1+k];
3439: batmp[k] = vtmp[nzi_bl+1+k];
3440: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3441: }
3442: /* printf("\n"); */
3443:
3444: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3445: /*
3446: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3447: printf(" ----------------------------\n");
3448: */
3449: } /* for (i=0; i<n; i++) */
3450: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3451: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3453: ISRestoreIndices(isrow,&r);
3454: ISRestoreIndices(isicol,&ic);
3456: PetscLLDestroy(lnk,lnkbt);
3457: PetscFree2(im,jtmp);
3458: PetscFree2(rtmp,vtmp);
3459: PetscFree(bdiag_rev);
3461: PetscLogFlops(B->cmap->n);
3462: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3464: ISIdentity(isrow,&row_identity);
3465: ISIdentity(isicol,&icol_identity);
3466: if (row_identity && icol_identity) {
3467: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3468: } else {
3469: B->ops->solve = MatSolve_SeqAIJ;
3470: }
3471:
3472: B->ops->solveadd = 0;
3473: B->ops->solvetranspose = 0;
3474: B->ops->solvetransposeadd = 0;
3475: B->ops->matsolve = 0;
3476: B->assembled = PETSC_TRUE;
3477: B->preallocated = PETSC_TRUE;
3478: return(0);
3479: }
3481: /* a wraper of MatILUDTFactor_SeqAIJ() */
3484: /*
3485: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3486: */
3488: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3489: {
3490: PetscErrorCode ierr;
3493: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3494: return(0);
3495: }
3497: /*
3498: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3499: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3500: */
3503: /*
3504: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3505: */
3507: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3508: {
3509: Mat C=fact;
3510: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3511: IS isrow = b->row,isicol = b->icol;
3513: const PetscInt *r,*ic,*ics;
3514: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3515: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3516: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3517: PetscReal dt=info->dt,shift=info->shiftamount;
3518: PetscBool row_identity, col_identity;
3521: ISGetIndices(isrow,&r);
3522: ISGetIndices(isicol,&ic);
3523: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
3524: ics = ic;
3526: for (i=0; i<n; i++){
3527: /* initialize rtmp array */
3528: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3529: bjtmp = bj + bi[i];
3530: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3531: rtmp[i] = 0.0;
3532: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3533: bjtmp = bj + bdiag[i+1] + 1;
3534: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3536: /* load in initial unfactored row of A */
3537: /* printf("row %d\n",i); */
3538: nz = ai[r[i]+1] - ai[r[i]];
3539: ajtmp = aj + ai[r[i]];
3540: v = aa + ai[r[i]];
3541: for (j=0; j<nz; j++) {
3542: rtmp[ics[*ajtmp++]] = v[j];
3543: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3544: }
3545: /* printf("\n"); */
3547: /* numerical factorization */
3548: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3549: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3550: k = 0;
3551: while (k < nzl){
3552: row = *bjtmp++;
3553: /* printf(" prow %d\n",row); */
3554: pc = rtmp + row;
3555: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3556: multiplier = (*pc) * (*pv);
3557: *pc = multiplier;
3558: if (PetscAbsScalar(multiplier) > dt){
3559: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3560: pv = b->a + bdiag[row+1] + 1;
3561: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3562: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3563: PetscLogFlops(1+2*nz);
3564: }
3565: k++;
3566: }
3567:
3568: /* finished row so stick it into b->a */
3569: /* L-part */
3570: pv = b->a + bi[i] ;
3571: pj = bj + bi[i] ;
3572: nzl = bi[i+1] - bi[i];
3573: for (j=0; j<nzl; j++) {
3574: pv[j] = rtmp[pj[j]];
3575: /* printf(" (%d,%g),",pj[j],pv[j]); */
3576: }
3578: /* diagonal: invert diagonal entries for simplier triangular solves */
3579: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3580: b->a[bdiag[i]] = 1.0/rtmp[i];
3581: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3583: /* U-part */
3584: pv = b->a + bdiag[i+1] + 1;
3585: pj = bj + bdiag[i+1] + 1;
3586: nzu = bdiag[i] - bdiag[i+1] - 1;
3587: for (j=0; j<nzu; j++) {
3588: pv[j] = rtmp[pj[j]];
3589: /* printf(" (%d,%g),",pj[j],pv[j]); */
3590: }
3591: /* printf("\n"); */
3592: }
3594: PetscFree(rtmp);
3595: ISRestoreIndices(isicol,&ic);
3596: ISRestoreIndices(isrow,&r);
3597:
3598: ISIdentity(isrow,&row_identity);
3599: ISIdentity(isicol,&col_identity);
3600: if (row_identity && col_identity) {
3601: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3602: } else {
3603: C->ops->solve = MatSolve_SeqAIJ;
3604: }
3605: C->ops->solveadd = 0;
3606: C->ops->solvetranspose = 0;
3607: C->ops->solvetransposeadd = 0;
3608: C->ops->matsolve = 0;
3609: C->assembled = PETSC_TRUE;
3610: C->preallocated = PETSC_TRUE;
3611: PetscLogFlops(C->cmap->n);
3612: return(0);
3613: }