Actual source code: aijfact.c
petsc-3.3-p1 2012-06-15
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
7: EXTERN_C_BEGIN
10: /*
11: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
13: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
14: */
15: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16: {
17: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
18: PetscErrorCode ierr;
19: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20: const PetscInt *ai = a->i, *aj = a->j;
21: const PetscScalar *aa = a->a;
22: PetscBool *done;
23: PetscReal best,past = 0,future;
26: /* pick initial row */
27: best = -1;
28: for (i=0; i<n; i++) {
29: future = 0.0;
30: for (j=ai[i]; j<ai[i+1]; j++) {
31: if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32: }
33: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34: if (past/future > best) {
35: best = past/future;
36: current = i;
37: }
38: }
40: PetscMalloc(n*sizeof(PetscBool),&done);
41: PetscMemzero(done,n*sizeof(PetscBool));
42: PetscMalloc(n*sizeof(PetscInt),&order);
43: order[0] = current;
44: for (i=0; i<n-1; i++) {
45: done[current] = PETSC_TRUE;
46: best = -1;
47: /* loop over all neighbors of current pivot */
48: for (j=ai[current]; j<ai[current+1]; j++) {
49: jj = aj[j];
50: if (done[jj]) continue;
51: /* loop over columns of potential next row computing weights for below and above diagonal */
52: past = future = 0.0;
53: for (k=ai[jj]; k<ai[jj+1]; k++) {
54: kk = aj[k];
55: if (done[kk]) past += PetscAbsScalar(aa[k]);
56: else if (kk != jj) future += PetscAbsScalar(aa[k]);
57: }
58: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59: if (past/future > best) {
60: best = past/future;
61: newcurrent = jj;
62: }
63: }
64: if (best == -1) { /* no neighbors to select from so select best of all that remain */
65: best = -1;
66: for (k=0; k<n; k++) {
67: if (done[k]) continue;
68: future = 0.0;
69: past = 0.0;
70: for (j=ai[k]; j<ai[k+1]; j++) {
71: kk = aj[j];
72: if (done[kk]) past += PetscAbsScalar(aa[j]);
73: else if (kk != k) future += PetscAbsScalar(aa[j]);
74: }
75: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76: if (past/future > best) {
77: best = past/future;
78: newcurrent = k;
79: }
80: }
81: }
82: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
83: current = newcurrent;
84: order[i+1] = current;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
87: *icol = *irow;
88: PetscObjectReference((PetscObject)*irow);
89: PetscFree(done);
90: PetscFree(order);
91: return(0);
92: }
93: EXTERN_C_END
95: EXTERN_C_BEGIN
98: PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg)
99: {
101: *flg = PETSC_TRUE;
102: return(0);
103: }
104: EXTERN_C_END
106: EXTERN_C_BEGIN
109: PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110: {
111: PetscInt n = A->rmap->n;
112: PetscErrorCode ierr;
115: MatCreate(((PetscObject)A)->comm,B);
116: MatSetSizes(*B,n,n,n,n);
117: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
118: MatSetType(*B,MATSEQAIJ);
119: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
121: MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);
122: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
123: MatSetType(*B,MATSEQSBAIJ);
124: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
125: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
126: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
127: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
128: (*B)->factortype = ftype;
129: return(0);
130: }
131: EXTERN_C_END
135: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
136: {
137: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
138: IS isicol;
139: PetscErrorCode ierr;
140: const PetscInt *r,*ic;
141: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
142: PetscInt *bi,*bj,*ajtmp;
143: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
144: PetscReal f;
145: PetscInt nlnk,*lnk,k,**bi_ptr;
146: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
147: PetscBT lnkbt;
148:
150: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
151: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
152: ISGetIndices(isrow,&r);
153: ISGetIndices(isicol,&ic);
155: /* get new row pointers */
156: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
157: bi[0] = 0;
159: /* bdiag is location of diagonal in factor */
160: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
161: bdiag[0] = 0;
163: /* linked list for storing column indices of the active row */
164: nlnk = n + 1;
165: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
167: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
169: /* initial FreeSpace size is f*(ai[n]+1) */
170: f = info->fill;
171: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
172: current_space = free_space;
174: for (i=0; i<n; i++) {
175: /* copy previous fill into linked list */
176: nzi = 0;
177: nnz = ai[r[i]+1] - ai[r[i]];
178: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
179: ajtmp = aj + ai[r[i]];
180: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
181: nzi += nlnk;
183: /* add pivot rows into linked list */
184: row = lnk[n];
185: while (row < i) {
186: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
187: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
188: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
189: nzi += nlnk;
190: row = lnk[row];
191: }
192: bi[i+1] = bi[i] + nzi;
193: im[i] = nzi;
195: /* mark bdiag */
196: nzbd = 0;
197: nnz = nzi;
198: k = lnk[n];
199: while (nnz-- && k < i){
200: nzbd++;
201: k = lnk[k];
202: }
203: bdiag[i] = bi[i] + nzbd;
205: /* if free space is not available, make more free space */
206: if (current_space->local_remaining<nzi) {
207: nnz = (n - i)*nzi; /* estimated and max additional space needed */
208: PetscFreeSpaceGet(nnz,¤t_space);
209: reallocs++;
210: }
212: /* copy data into free space, then initialize lnk */
213: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
214: bi_ptr[i] = current_space->array;
215: current_space->array += nzi;
216: current_space->local_used += nzi;
217: current_space->local_remaining -= nzi;
218: }
219: #if defined(PETSC_USE_INFO)
220: if (ai[n] != 0) {
221: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
222: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
223: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
224: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
225: PetscInfo(A,"for best performance.\n");
226: } else {
227: PetscInfo(A,"Empty matrix\n");
228: }
229: #endif
231: ISRestoreIndices(isrow,&r);
232: ISRestoreIndices(isicol,&ic);
234: /* destroy list of free space and other temporary array(s) */
235: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
236: PetscFreeSpaceContiguous(&free_space,bj);
237: PetscLLDestroy(lnk,lnkbt);
238: PetscFree2(bi_ptr,im);
240: /* put together the new matrix */
241: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
242: PetscLogObjectParent(B,isicol);
243: b = (Mat_SeqAIJ*)(B)->data;
244: b->free_a = PETSC_TRUE;
245: b->free_ij = PETSC_TRUE;
246: b->singlemalloc = PETSC_FALSE;
247: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
248: b->j = bj;
249: b->i = bi;
250: b->diag = bdiag;
251: b->ilen = 0;
252: b->imax = 0;
253: b->row = isrow;
254: b->col = iscol;
255: PetscObjectReference((PetscObject)isrow);
256: PetscObjectReference((PetscObject)iscol);
257: b->icol = isicol;
258: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
260: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
261: PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
262: b->maxnz = b->nz = bi[n] ;
264: (B)->factortype = MAT_FACTOR_LU;
265: (B)->info.factor_mallocs = reallocs;
266: (B)->info.fill_ratio_given = f;
268: if (ai[n]) {
269: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
270: } else {
271: (B)->info.fill_ratio_needed = 0.0;
272: }
273: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
274: if (a->inode.size) {
275: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
276: }
277: return(0);
278: }
282: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
283: {
284: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
285: IS isicol;
286: PetscErrorCode ierr;
287: const PetscInt *r,*ic;
288: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
289: PetscInt *bi,*bj,*ajtmp;
290: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
291: PetscReal f;
292: PetscInt nlnk,*lnk,k,**bi_ptr;
293: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
294: PetscBT lnkbt;
297: /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
298: /*
299: PetscBool olddatastruct=PETSC_FALSE;
300: PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);
301: if(olddatastruct){
302: MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);
303: return(0);
304: }
305: */
306: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
307: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
308: ISGetIndices(isrow,&r);
309: ISGetIndices(isicol,&ic);
311: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
313: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
314: bi[0] = bdiag[0] = 0;
316: /* linked list for storing column indices of the active row */
317: nlnk = n + 1;
318: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
320: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
322: /* initial FreeSpace size is f*(ai[n]+1) */
323: f = info->fill;
324: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
325: current_space = free_space;
327: for (i=0; i<n; i++) {
328: /* copy previous fill into linked list */
329: nzi = 0;
330: nnz = ai[r[i]+1] - ai[r[i]];
331: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
332: ajtmp = aj + ai[r[i]];
333: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
334: nzi += nlnk;
336: /* add pivot rows into linked list */
337: row = lnk[n];
338: while (row < i){
339: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
340: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
342: nzi += nlnk;
343: row = lnk[row];
344: }
345: bi[i+1] = bi[i] + nzi;
346: im[i] = nzi;
348: /* mark bdiag */
349: nzbd = 0;
350: nnz = nzi;
351: k = lnk[n];
352: while (nnz-- && k < i){
353: nzbd++;
354: k = lnk[k];
355: }
356: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
358: /* if free space is not available, make more free space */
359: if (current_space->local_remaining<nzi) {
360: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
361: PetscFreeSpaceGet(nnz,¤t_space);
362: reallocs++;
363: }
365: /* copy data into free space, then initialize lnk */
366: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
367: bi_ptr[i] = current_space->array;
368: current_space->array += nzi;
369: current_space->local_used += nzi;
370: current_space->local_remaining -= nzi;
371: }
373: ISRestoreIndices(isrow,&r);
374: ISRestoreIndices(isicol,&ic);
376: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
377: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
378: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
379: PetscLLDestroy(lnk,lnkbt);
380: PetscFree2(bi_ptr,im);
382: /* put together the new matrix */
383: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
384: PetscLogObjectParent(B,isicol);
385: b = (Mat_SeqAIJ*)(B)->data;
386: b->free_a = PETSC_TRUE;
387: b->free_ij = PETSC_TRUE;
388: b->singlemalloc = PETSC_FALSE;
389: PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
390: b->j = bj;
391: b->i = bi;
392: b->diag = bdiag;
393: b->ilen = 0;
394: b->imax = 0;
395: b->row = isrow;
396: b->col = iscol;
397: PetscObjectReference((PetscObject)isrow);
398: PetscObjectReference((PetscObject)iscol);
399: b->icol = isicol;
400: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
402: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
403: PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
404: b->maxnz = b->nz = bdiag[0]+1;
405: B->factortype = MAT_FACTOR_LU;
406: B->info.factor_mallocs = reallocs;
407: B->info.fill_ratio_given = f;
409: if (ai[n]) {
410: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
411: } else {
412: B->info.fill_ratio_needed = 0.0;
413: }
414: #if defined(PETSC_USE_INFO)
415: if (ai[n] != 0) {
416: PetscReal af = B->info.fill_ratio_needed;
417: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
418: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
419: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
420: PetscInfo(A,"for best performance.\n");
421: } else {
422: PetscInfo(A,"Empty matrix\n");
423: }
424: #endif
425: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426: if (a->inode.size) {
427: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
428: }
429: return(0);
430: }
432: /*
433: Trouble in factorization, should we dump the original matrix?
434: */
437: PetscErrorCode MatFactorDumpMatrix(Mat A)
438: {
440: PetscBool flg = PETSC_FALSE;
443: PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);
444: if (flg) {
445: PetscViewer viewer;
446: char filename[PETSC_MAX_PATH_LEN];
448: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
449: PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);
450: MatView(A,viewer);
451: PetscViewerDestroy(&viewer);
452: }
453: return(0);
454: }
458: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
459: {
460: Mat C=B;
461: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
462: IS isrow = b->row,isicol = b->icol;
463: PetscErrorCode ierr;
464: const PetscInt *r,*ic,*ics;
465: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
466: PetscInt i,j,k,nz,nzL,row,*pj;
467: const PetscInt *ajtmp,*bjtmp;
468: MatScalar *rtmp,*pc,multiplier,*pv;
469: const MatScalar *aa=a->a,*v;
470: PetscBool row_identity,col_identity;
471: FactorShiftCtx sctx;
472: const PetscInt *ddiag;
473: PetscReal rs;
474: MatScalar d;
477: /* MatPivotSetUp(): initialize shift context sctx */
478: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
480: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
481: ddiag = a->diag;
482: sctx.shift_top = info->zeropivot;
483: for (i=0; i<n; i++) {
484: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
485: d = (aa)[ddiag[i]];
486: rs = -PetscAbsScalar(d) - PetscRealPart(d);
487: v = aa+ai[i];
488: nz = ai[i+1] - ai[i];
489: for (j=0; j<nz; j++)
490: rs += PetscAbsScalar(v[j]);
491: if (rs>sctx.shift_top) sctx.shift_top = rs;
492: }
493: sctx.shift_top *= 1.1;
494: sctx.nshift_max = 5;
495: sctx.shift_lo = 0.;
496: sctx.shift_hi = 1.;
497: }
499: ISGetIndices(isrow,&r);
500: ISGetIndices(isicol,&ic);
501: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
502: ics = ic;
504: do {
505: sctx.newshift = PETSC_FALSE;
506: for (i=0; i<n; i++){
507: /* zero rtmp */
508: /* L part */
509: nz = bi[i+1] - bi[i];
510: bjtmp = bj + bi[i];
511: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
513: /* U part */
514: nz = bdiag[i]-bdiag[i+1];
515: bjtmp = bj + bdiag[i+1]+1;
516: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
517:
518: /* load in initial (unfactored row) */
519: nz = ai[r[i]+1] - ai[r[i]];
520: ajtmp = aj + ai[r[i]];
521: v = aa + ai[r[i]];
522: for (j=0; j<nz; j++) {
523: rtmp[ics[ajtmp[j]]] = v[j];
524: }
525: /* ZeropivotApply() */
526: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
527:
528: /* elimination */
529: bjtmp = bj + bi[i];
530: row = *bjtmp++;
531: nzL = bi[i+1] - bi[i];
532: for(k=0; k < nzL;k++) {
533: pc = rtmp + row;
534: if (*pc != 0.0) {
535: pv = b->a + bdiag[row];
536: multiplier = *pc * (*pv);
537: *pc = multiplier;
538: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
539: pv = b->a + bdiag[row+1]+1;
540: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
541: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
542: PetscLogFlops(1+2*nz);
543: }
544: row = *bjtmp++;
545: }
547: /* finished row so stick it into b->a */
548: rs = 0.0;
549: /* L part */
550: pv = b->a + bi[i] ;
551: pj = b->j + bi[i] ;
552: nz = bi[i+1] - bi[i];
553: for (j=0; j<nz; j++) {
554: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
555: }
557: /* U part */
558: pv = b->a + bdiag[i+1]+1;
559: pj = b->j + bdiag[i+1]+1;
560: nz = bdiag[i] - bdiag[i+1]-1;
561: for (j=0; j<nz; j++) {
562: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
563: }
565: sctx.rs = rs;
566: sctx.pv = rtmp[i];
567: MatPivotCheck(A,info,&sctx,i);
568: if(sctx.newshift) break; /* break for-loop */
569: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
571: /* Mark diagonal and invert diagonal for simplier triangular solves */
572: pv = b->a + bdiag[i];
573: *pv = 1.0/rtmp[i];
575: } /* endof for (i=0; i<n; i++){ */
577: /* MatPivotRefine() */
578: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
579: /*
580: * if no shift in this attempt & shifting & started shifting & can refine,
581: * then try lower shift
582: */
583: sctx.shift_hi = sctx.shift_fraction;
584: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
585: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
586: sctx.newshift = PETSC_TRUE;
587: sctx.nshift++;
588: }
589: } while (sctx.newshift);
591: PetscFree(rtmp);
592: ISRestoreIndices(isicol,&ic);
593: ISRestoreIndices(isrow,&r);
594:
595: ISIdentity(isrow,&row_identity);
596: ISIdentity(isicol,&col_identity);
597: if (row_identity && col_identity) {
598: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
599: } else {
600: C->ops->solve = MatSolve_SeqAIJ;
601: }
602: C->ops->solveadd = MatSolveAdd_SeqAIJ;
603: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
604: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
605: C->ops->matsolve = MatMatSolve_SeqAIJ;
606: C->assembled = PETSC_TRUE;
607: C->preallocated = PETSC_TRUE;
608: PetscLogFlops(C->cmap->n);
610: /* MatShiftView(A,info,&sctx) */
611: if (sctx.nshift){
612: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
613: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
614: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
615: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
616: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
617: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
618: }
619: }
620: Mat_CheckInode_FactorLU(C,PETSC_FALSE);
621: return(0);
622: }
626: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
627: {
628: Mat C=B;
629: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
630: IS isrow = b->row,isicol = b->icol;
631: PetscErrorCode ierr;
632: const PetscInt *r,*ic,*ics;
633: PetscInt nz,row,i,j,n=A->rmap->n,diag;
634: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
635: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
636: MatScalar *pv,*rtmp,*pc,multiplier,d;
637: const MatScalar *v,*aa=a->a;
638: PetscReal rs=0.0;
639: FactorShiftCtx sctx;
640: const PetscInt *ddiag;
641: PetscBool row_identity, col_identity;
644: /* MatPivotSetUp(): initialize shift context sctx */
645: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
647: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
648: ddiag = a->diag;
649: sctx.shift_top = info->zeropivot;
650: for (i=0; i<n; i++) {
651: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
652: d = (aa)[ddiag[i]];
653: rs = -PetscAbsScalar(d) - PetscRealPart(d);
654: v = aa+ai[i];
655: nz = ai[i+1] - ai[i];
656: for (j=0; j<nz; j++)
657: rs += PetscAbsScalar(v[j]);
658: if (rs>sctx.shift_top) sctx.shift_top = rs;
659: }
660: sctx.shift_top *= 1.1;
661: sctx.nshift_max = 5;
662: sctx.shift_lo = 0.;
663: sctx.shift_hi = 1.;
664: }
666: ISGetIndices(isrow,&r);
667: ISGetIndices(isicol,&ic);
668: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
669: ics = ic;
671: do {
672: sctx.newshift = PETSC_FALSE;
673: for (i=0; i<n; i++){
674: nz = bi[i+1] - bi[i];
675: bjtmp = bj + bi[i];
676: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
678: /* load in initial (unfactored row) */
679: nz = ai[r[i]+1] - ai[r[i]];
680: ajtmp = aj + ai[r[i]];
681: v = aa + ai[r[i]];
682: for (j=0; j<nz; j++) {
683: rtmp[ics[ajtmp[j]]] = v[j];
684: }
685: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
687: row = *bjtmp++;
688: while (row < i) {
689: pc = rtmp + row;
690: if (*pc != 0.0) {
691: pv = b->a + diag_offset[row];
692: pj = b->j + diag_offset[row] + 1;
693: multiplier = *pc / *pv++;
694: *pc = multiplier;
695: nz = bi[row+1] - diag_offset[row] - 1;
696: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
697: PetscLogFlops(1+2*nz);
698: }
699: row = *bjtmp++;
700: }
701: /* finished row so stick it into b->a */
702: pv = b->a + bi[i] ;
703: pj = b->j + bi[i] ;
704: nz = bi[i+1] - bi[i];
705: diag = diag_offset[i] - bi[i];
706: rs = 0.0;
707: for (j=0; j<nz; j++) {
708: pv[j] = rtmp[pj[j]];
709: rs += PetscAbsScalar(pv[j]);
710: }
711: rs -= PetscAbsScalar(pv[diag]);
713: sctx.rs = rs;
714: sctx.pv = pv[diag];
715: MatPivotCheck(A,info,&sctx,i);
716: if (sctx.newshift) break;
717: pv[diag] = sctx.pv;
718: }
720: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
721: /*
722: * if no shift in this attempt & shifting & started shifting & can refine,
723: * then try lower shift
724: */
725: sctx.shift_hi = sctx.shift_fraction;
726: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
727: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
728: sctx.newshift = PETSC_TRUE;
729: sctx.nshift++;
730: }
731: } while (sctx.newshift);
733: /* invert diagonal entries for simplier triangular solves */
734: for (i=0; i<n; i++) {
735: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
736: }
737: PetscFree(rtmp);
738: ISRestoreIndices(isicol,&ic);
739: ISRestoreIndices(isrow,&r);
741: ISIdentity(isrow,&row_identity);
742: ISIdentity(isicol,&col_identity);
743: if (row_identity && col_identity) {
744: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
745: } else {
746: C->ops->solve = MatSolve_SeqAIJ_inplace;
747: }
748: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
749: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
750: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
751: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
752: C->assembled = PETSC_TRUE;
753: C->preallocated = PETSC_TRUE;
754: PetscLogFlops(C->cmap->n);
755: if (sctx.nshift){
756: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
757: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
758: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
759: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
760: }
761: }
762: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
763: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
764: Mat_CheckInode(C,PETSC_FALSE);
765: return(0);
766: }
768: /*
769: This routine implements inplace ILU(0) with row or/and column permutations.
770: Input:
771: A - original matrix
772: Output;
773: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
774: a->j (col index) is permuted by the inverse of colperm, then sorted
775: a->a reordered accordingly with a->j
776: a->diag (ptr to diagonal elements) is updated.
777: */
780: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
781: {
782: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
783: IS isrow = a->row,isicol = a->icol;
785: const PetscInt *r,*ic,*ics;
786: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
787: PetscInt *ajtmp,nz,row;
788: PetscInt *diag = a->diag,nbdiag,*pj;
789: PetscScalar *rtmp,*pc,multiplier,d;
790: MatScalar *pv,*v;
791: PetscReal rs;
792: FactorShiftCtx sctx;
793: const MatScalar *aa=a->a,*vtmp;
796: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
798: /* MatPivotSetUp(): initialize shift context sctx */
799: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
801: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
802: const PetscInt *ddiag = a->diag;
803: sctx.shift_top = info->zeropivot;
804: for (i=0; i<n; i++) {
805: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
806: d = (aa)[ddiag[i]];
807: rs = -PetscAbsScalar(d) - PetscRealPart(d);
808: vtmp = aa+ai[i];
809: nz = ai[i+1] - ai[i];
810: for (j=0; j<nz; j++)
811: rs += PetscAbsScalar(vtmp[j]);
812: if (rs>sctx.shift_top) sctx.shift_top = rs;
813: }
814: sctx.shift_top *= 1.1;
815: sctx.nshift_max = 5;
816: sctx.shift_lo = 0.;
817: sctx.shift_hi = 1.;
818: }
820: ISGetIndices(isrow,&r);
821: ISGetIndices(isicol,&ic);
822: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
823: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
824: ics = ic;
826: #if defined(MV)
827: sctx.shift_top = 0.;
828: sctx.nshift_max = 0;
829: sctx.shift_lo = 0.;
830: sctx.shift_hi = 0.;
831: sctx.shift_fraction = 0.;
833: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
834: sctx.shift_top = 0.;
835: for (i=0; i<n; i++) {
836: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
837: d = (a->a)[diag[i]];
838: rs = -PetscAbsScalar(d) - PetscRealPart(d);
839: v = a->a+ai[i];
840: nz = ai[i+1] - ai[i];
841: for (j=0; j<nz; j++)
842: rs += PetscAbsScalar(v[j]);
843: if (rs>sctx.shift_top) sctx.shift_top = rs;
844: }
845: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
846: sctx.shift_top *= 1.1;
847: sctx.nshift_max = 5;
848: sctx.shift_lo = 0.;
849: sctx.shift_hi = 1.;
850: }
852: sctx.shift_amount = 0.;
853: sctx.nshift = 0;
854: #endif
856: do {
857: sctx.newshift = PETSC_FALSE;
858: for (i=0; i<n; i++){
859: /* load in initial unfactored row */
860: nz = ai[r[i]+1] - ai[r[i]];
861: ajtmp = aj + ai[r[i]];
862: v = a->a + ai[r[i]];
863: /* sort permuted ajtmp and values v accordingly */
864: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
865: PetscSortIntWithScalarArray(nz,ajtmp,v);
867: diag[r[i]] = ai[r[i]];
868: for (j=0; j<nz; j++) {
869: rtmp[ajtmp[j]] = v[j];
870: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
871: }
872: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
874: row = *ajtmp++;
875: while (row < i) {
876: pc = rtmp + row;
877: if (*pc != 0.0) {
878: pv = a->a + diag[r[row]];
879: pj = aj + diag[r[row]] + 1;
881: multiplier = *pc / *pv++;
882: *pc = multiplier;
883: nz = ai[r[row]+1] - diag[r[row]] - 1;
884: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
885: PetscLogFlops(1+2*nz);
886: }
887: row = *ajtmp++;
888: }
889: /* finished row so overwrite it onto a->a */
890: pv = a->a + ai[r[i]] ;
891: pj = aj + ai[r[i]] ;
892: nz = ai[r[i]+1] - ai[r[i]];
893: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
894:
895: rs = 0.0;
896: for (j=0; j<nz; j++) {
897: pv[j] = rtmp[pj[j]];
898: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
899: }
901: sctx.rs = rs;
902: sctx.pv = pv[nbdiag];
903: MatPivotCheck(A,info,&sctx,i);
904: if (sctx.newshift) break;
905: pv[nbdiag] = sctx.pv;
906: }
908: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
909: /*
910: * if no shift in this attempt & shifting & started shifting & can refine,
911: * then try lower shift
912: */
913: sctx.shift_hi = sctx.shift_fraction;
914: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
915: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
916: sctx.newshift = PETSC_TRUE;
917: sctx.nshift++;
918: }
919: } while (sctx.newshift);
921: /* invert diagonal entries for simplier triangular solves */
922: for (i=0; i<n; i++) {
923: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
924: }
926: PetscFree(rtmp);
927: ISRestoreIndices(isicol,&ic);
928: ISRestoreIndices(isrow,&r);
929: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
930: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
931: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
932: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
933: A->assembled = PETSC_TRUE;
934: A->preallocated = PETSC_TRUE;
935: PetscLogFlops(A->cmap->n);
936: if (sctx.nshift){
937: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
938: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
939: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
940: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
941: }
942: }
943: return(0);
944: }
946: /* ----------------------------------------------------------- */
949: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
950: {
952: Mat C;
955: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
956: MatLUFactorSymbolic(C,A,row,col,info);
957: MatLUFactorNumeric(C,A,info);
958: A->ops->solve = C->ops->solve;
959: A->ops->solvetranspose = C->ops->solvetranspose;
960: MatHeaderMerge(A,C);
961: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
962: return(0);
963: }
964: /* ----------------------------------------------------------- */
969: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
970: {
971: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
972: IS iscol = a->col,isrow = a->row;
973: PetscErrorCode ierr;
974: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
975: PetscInt nz;
976: const PetscInt *rout,*cout,*r,*c;
977: PetscScalar *x,*tmp,*tmps,sum;
978: const PetscScalar *b;
979: const MatScalar *aa = a->a,*v;
980:
982: if (!n) return(0);
984: VecGetArrayRead(bb,&b);
985: VecGetArray(xx,&x);
986: tmp = a->solve_work;
988: ISGetIndices(isrow,&rout); r = rout;
989: ISGetIndices(iscol,&cout); c = cout + (n-1);
991: /* forward solve the lower triangular */
992: tmp[0] = b[*r++];
993: tmps = tmp;
994: for (i=1; i<n; i++) {
995: v = aa + ai[i] ;
996: vi = aj + ai[i] ;
997: nz = a->diag[i] - ai[i];
998: sum = b[*r++];
999: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1000: tmp[i] = sum;
1001: }
1003: /* backward solve the upper triangular */
1004: for (i=n-1; i>=0; i--){
1005: v = aa + a->diag[i] + 1;
1006: vi = aj + a->diag[i] + 1;
1007: nz = ai[i+1] - a->diag[i] - 1;
1008: sum = tmp[i];
1009: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1010: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1011: }
1013: ISRestoreIndices(isrow,&rout);
1014: ISRestoreIndices(iscol,&cout);
1015: VecRestoreArrayRead(bb,&b);
1016: VecRestoreArray(xx,&x);
1017: PetscLogFlops(2.0*a->nz - A->cmap->n);
1018: return(0);
1019: }
1023: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1024: {
1025: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1026: IS iscol = a->col,isrow = a->row;
1027: PetscErrorCode ierr;
1028: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1029: PetscInt nz,neq;
1030: const PetscInt *rout,*cout,*r,*c;
1031: PetscScalar *x,*b,*tmp,*tmps,sum;
1032: const MatScalar *aa = a->a,*v;
1033: PetscBool bisdense,xisdense;
1036: if (!n) return(0);
1038: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1039: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1040: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1041: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1043: MatGetArray(B,&b);
1044: MatGetArray(X,&x);
1045:
1046: tmp = a->solve_work;
1047: ISGetIndices(isrow,&rout); r = rout;
1048: ISGetIndices(iscol,&cout); c = cout;
1050: for (neq=0; neq<B->cmap->n; neq++){
1051: /* forward solve the lower triangular */
1052: tmp[0] = b[r[0]];
1053: tmps = tmp;
1054: for (i=1; i<n; i++) {
1055: v = aa + ai[i] ;
1056: vi = aj + ai[i] ;
1057: nz = a->diag[i] - ai[i];
1058: sum = b[r[i]];
1059: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1060: tmp[i] = sum;
1061: }
1062: /* backward solve the upper triangular */
1063: for (i=n-1; i>=0; i--){
1064: v = aa + a->diag[i] + 1;
1065: vi = aj + a->diag[i] + 1;
1066: nz = ai[i+1] - a->diag[i] - 1;
1067: sum = tmp[i];
1068: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1069: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1070: }
1072: b += n;
1073: x += n;
1074: }
1075: ISRestoreIndices(isrow,&rout);
1076: ISRestoreIndices(iscol,&cout);
1077: MatRestoreArray(B,&b);
1078: MatRestoreArray(X,&x);
1079: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1080: return(0);
1081: }
1085: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1086: {
1087: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1088: IS iscol = a->col,isrow = a->row;
1089: PetscErrorCode ierr;
1090: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1091: PetscInt nz,neq;
1092: const PetscInt *rout,*cout,*r,*c;
1093: PetscScalar *x,*b,*tmp,sum;
1094: const MatScalar *aa = a->a,*v;
1095: PetscBool bisdense,xisdense;
1098: if (!n) return(0);
1100: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1101: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1102: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1103: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1105: MatGetArray(B,&b);
1106: MatGetArray(X,&x);
1107:
1108: tmp = a->solve_work;
1109: ISGetIndices(isrow,&rout); r = rout;
1110: ISGetIndices(iscol,&cout); c = cout;
1112: for (neq=0; neq<B->cmap->n; neq++){
1113: /* forward solve the lower triangular */
1114: tmp[0] = b[r[0]];
1115: v = aa;
1116: vi = aj;
1117: for (i=1; i<n; i++) {
1118: nz = ai[i+1] - ai[i];
1119: sum = b[r[i]];
1120: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1121: tmp[i] = sum;
1122: v += nz; vi += nz;
1123: }
1125: /* backward solve the upper triangular */
1126: for (i=n-1; i>=0; i--){
1127: v = aa + adiag[i+1]+1;
1128: vi = aj + adiag[i+1]+1;
1129: nz = adiag[i]-adiag[i+1]-1;
1130: sum = tmp[i];
1131: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1132: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1133: }
1134:
1135: b += n;
1136: x += n;
1137: }
1138: ISRestoreIndices(isrow,&rout);
1139: ISRestoreIndices(iscol,&cout);
1140: MatRestoreArray(B,&b);
1141: MatRestoreArray(X,&x);
1142: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1143: return(0);
1144: }
1148: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1149: {
1150: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1151: IS iscol = a->col,isrow = a->row;
1152: PetscErrorCode ierr;
1153: const PetscInt *r,*c,*rout,*cout;
1154: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1155: PetscInt nz,row;
1156: PetscScalar *x,*b,*tmp,*tmps,sum;
1157: const MatScalar *aa = a->a,*v;
1160: if (!n) return(0);
1162: VecGetArray(bb,&b);
1163: VecGetArray(xx,&x);
1164: tmp = a->solve_work;
1166: ISGetIndices(isrow,&rout); r = rout;
1167: ISGetIndices(iscol,&cout); c = cout + (n-1);
1169: /* forward solve the lower triangular */
1170: tmp[0] = b[*r++];
1171: tmps = tmp;
1172: for (row=1; row<n; row++) {
1173: i = rout[row]; /* permuted row */
1174: v = aa + ai[i] ;
1175: vi = aj + ai[i] ;
1176: nz = a->diag[i] - ai[i];
1177: sum = b[*r++];
1178: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1179: tmp[row] = sum;
1180: }
1182: /* backward solve the upper triangular */
1183: for (row=n-1; row>=0; row--){
1184: i = rout[row]; /* permuted row */
1185: v = aa + a->diag[i] + 1;
1186: vi = aj + a->diag[i] + 1;
1187: nz = ai[i+1] - a->diag[i] - 1;
1188: sum = tmp[row];
1189: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1190: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1191: }
1193: ISRestoreIndices(isrow,&rout);
1194: ISRestoreIndices(iscol,&cout);
1195: VecRestoreArray(bb,&b);
1196: VecRestoreArray(xx,&x);
1197: PetscLogFlops(2.0*a->nz - A->cmap->n);
1198: return(0);
1199: }
1201: /* ----------------------------------------------------------- */
1202: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1205: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1206: {
1207: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1208: PetscErrorCode ierr;
1209: PetscInt n = A->rmap->n;
1210: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1211: PetscScalar *x;
1212: const PetscScalar *b;
1213: const MatScalar *aa = a->a;
1214: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1215: PetscInt adiag_i,i,nz,ai_i;
1216: const PetscInt *vi;
1217: const MatScalar *v;
1218: PetscScalar sum;
1219: #endif
1222: if (!n) return(0);
1224: VecGetArrayRead(bb,&b);
1225: VecGetArray(xx,&x);
1227: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1228: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1229: #else
1230: /* forward solve the lower triangular */
1231: x[0] = b[0];
1232: for (i=1; i<n; i++) {
1233: ai_i = ai[i];
1234: v = aa + ai_i;
1235: vi = aj + ai_i;
1236: nz = adiag[i] - ai_i;
1237: sum = b[i];
1238: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1239: x[i] = sum;
1240: }
1242: /* backward solve the upper triangular */
1243: for (i=n-1; i>=0; i--){
1244: adiag_i = adiag[i];
1245: v = aa + adiag_i + 1;
1246: vi = aj + adiag_i + 1;
1247: nz = ai[i+1] - adiag_i - 1;
1248: sum = x[i];
1249: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1250: x[i] = sum*aa[adiag_i];
1251: }
1252: #endif
1253: PetscLogFlops(2.0*a->nz - A->cmap->n);
1254: VecRestoreArrayRead(bb,&b);
1255: VecRestoreArray(xx,&x);
1256: return(0);
1257: }
1261: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1262: {
1263: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1264: IS iscol = a->col,isrow = a->row;
1265: PetscErrorCode ierr;
1266: PetscInt i, n = A->rmap->n,j;
1267: PetscInt nz;
1268: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1269: PetscScalar *x,*tmp,sum;
1270: const PetscScalar *b;
1271: const MatScalar *aa = a->a,*v;
1274: if (yy != xx) {VecCopy(yy,xx);}
1276: VecGetArrayRead(bb,&b);
1277: VecGetArray(xx,&x);
1278: tmp = a->solve_work;
1280: ISGetIndices(isrow,&rout); r = rout;
1281: ISGetIndices(iscol,&cout); c = cout + (n-1);
1283: /* forward solve the lower triangular */
1284: tmp[0] = b[*r++];
1285: for (i=1; i<n; i++) {
1286: v = aa + ai[i] ;
1287: vi = aj + ai[i] ;
1288: nz = a->diag[i] - ai[i];
1289: sum = b[*r++];
1290: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1291: tmp[i] = sum;
1292: }
1294: /* backward solve the upper triangular */
1295: for (i=n-1; i>=0; i--){
1296: v = aa + a->diag[i] + 1;
1297: vi = aj + a->diag[i] + 1;
1298: nz = ai[i+1] - a->diag[i] - 1;
1299: sum = tmp[i];
1300: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1301: tmp[i] = sum*aa[a->diag[i]];
1302: x[*c--] += tmp[i];
1303: }
1305: ISRestoreIndices(isrow,&rout);
1306: ISRestoreIndices(iscol,&cout);
1307: VecRestoreArrayRead(bb,&b);
1308: VecRestoreArray(xx,&x);
1309: PetscLogFlops(2.0*a->nz);
1311: return(0);
1312: }
1316: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1317: {
1318: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1319: IS iscol = a->col,isrow = a->row;
1320: PetscErrorCode ierr;
1321: PetscInt i, n = A->rmap->n,j;
1322: PetscInt nz;
1323: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1324: PetscScalar *x,*tmp,sum;
1325: const PetscScalar *b;
1326: const MatScalar *aa = a->a,*v;
1329: if (yy != xx) {VecCopy(yy,xx);}
1331: VecGetArrayRead(bb,&b);
1332: VecGetArray(xx,&x);
1333: tmp = a->solve_work;
1335: ISGetIndices(isrow,&rout); r = rout;
1336: ISGetIndices(iscol,&cout); c = cout;
1338: /* forward solve the lower triangular */
1339: tmp[0] = b[r[0]];
1340: v = aa;
1341: vi = aj;
1342: for (i=1; i<n; i++) {
1343: nz = ai[i+1] - ai[i];
1344: sum = b[r[i]];
1345: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1346: tmp[i] = sum;
1347: v += nz; vi += nz;
1348: }
1350: /* backward solve the upper triangular */
1351: v = aa + adiag[n-1];
1352: vi = aj + adiag[n-1];
1353: for (i=n-1; i>=0; i--){
1354: nz = adiag[i] - adiag[i+1] - 1;
1355: sum = tmp[i];
1356: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1357: tmp[i] = sum*v[nz];
1358: x[c[i]] += tmp[i];
1359: v += nz+1; vi += nz+1;
1360: }
1362: ISRestoreIndices(isrow,&rout);
1363: ISRestoreIndices(iscol,&cout);
1364: VecRestoreArrayRead(bb,&b);
1365: VecRestoreArray(xx,&x);
1366: PetscLogFlops(2.0*a->nz);
1368: return(0);
1369: }
1373: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1374: {
1375: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1376: IS iscol = a->col,isrow = a->row;
1377: PetscErrorCode ierr;
1378: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1379: PetscInt i,n = A->rmap->n,j;
1380: PetscInt nz;
1381: PetscScalar *x,*tmp,s1;
1382: const MatScalar *aa = a->a,*v;
1383: const PetscScalar *b;
1386: VecGetArrayRead(bb,&b);
1387: VecGetArray(xx,&x);
1388: tmp = a->solve_work;
1390: ISGetIndices(isrow,&rout); r = rout;
1391: ISGetIndices(iscol,&cout); c = cout;
1393: /* copy the b into temp work space according to permutation */
1394: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1396: /* forward solve the U^T */
1397: for (i=0; i<n; i++) {
1398: v = aa + diag[i] ;
1399: vi = aj + diag[i] + 1;
1400: nz = ai[i+1] - diag[i] - 1;
1401: s1 = tmp[i];
1402: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1403: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1404: tmp[i] = s1;
1405: }
1407: /* backward solve the L^T */
1408: for (i=n-1; i>=0; i--){
1409: v = aa + diag[i] - 1 ;
1410: vi = aj + diag[i] - 1 ;
1411: nz = diag[i] - ai[i];
1412: s1 = tmp[i];
1413: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1414: }
1416: /* copy tmp into x according to permutation */
1417: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1419: ISRestoreIndices(isrow,&rout);
1420: ISRestoreIndices(iscol,&cout);
1421: VecRestoreArrayRead(bb,&b);
1422: VecRestoreArray(xx,&x);
1424: PetscLogFlops(2.0*a->nz-A->cmap->n);
1425: return(0);
1426: }
1430: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1431: {
1432: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1433: IS iscol = a->col,isrow = a->row;
1434: PetscErrorCode ierr;
1435: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1436: PetscInt i,n = A->rmap->n,j;
1437: PetscInt nz;
1438: PetscScalar *x,*tmp,s1;
1439: const MatScalar *aa = a->a,*v;
1440: const PetscScalar *b;
1443: VecGetArrayRead(bb,&b);
1444: VecGetArray(xx,&x);
1445: tmp = a->solve_work;
1447: ISGetIndices(isrow,&rout); r = rout;
1448: ISGetIndices(iscol,&cout); c = cout;
1450: /* copy the b into temp work space according to permutation */
1451: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1453: /* forward solve the U^T */
1454: for (i=0; i<n; i++) {
1455: v = aa + adiag[i+1] + 1;
1456: vi = aj + adiag[i+1] + 1;
1457: nz = adiag[i] - adiag[i+1] - 1;
1458: s1 = tmp[i];
1459: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1460: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1461: tmp[i] = s1;
1462: }
1464: /* backward solve the L^T */
1465: for (i=n-1; i>=0; i--){
1466: v = aa + ai[i];
1467: vi = aj + ai[i];
1468: nz = ai[i+1] - ai[i];
1469: s1 = tmp[i];
1470: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1471: }
1473: /* copy tmp into x according to permutation */
1474: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1476: ISRestoreIndices(isrow,&rout);
1477: ISRestoreIndices(iscol,&cout);
1478: VecRestoreArrayRead(bb,&b);
1479: VecRestoreArray(xx,&x);
1481: PetscLogFlops(2.0*a->nz-A->cmap->n);
1482: return(0);
1483: }
1487: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1488: {
1489: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1490: IS iscol = a->col,isrow = a->row;
1491: PetscErrorCode ierr;
1492: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1493: PetscInt i,n = A->rmap->n,j;
1494: PetscInt nz;
1495: PetscScalar *x,*tmp,s1;
1496: const MatScalar *aa = a->a,*v;
1497: const PetscScalar *b;
1500: if (zz != xx) {VecCopy(zz,xx);}
1501: VecGetArrayRead(bb,&b);
1502: VecGetArray(xx,&x);
1503: tmp = a->solve_work;
1505: ISGetIndices(isrow,&rout); r = rout;
1506: ISGetIndices(iscol,&cout); c = cout;
1508: /* copy the b into temp work space according to permutation */
1509: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1511: /* forward solve the U^T */
1512: for (i=0; i<n; i++) {
1513: v = aa + diag[i] ;
1514: vi = aj + diag[i] + 1;
1515: nz = ai[i+1] - diag[i] - 1;
1516: s1 = tmp[i];
1517: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1518: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1519: tmp[i] = s1;
1520: }
1522: /* backward solve the L^T */
1523: for (i=n-1; i>=0; i--){
1524: v = aa + diag[i] - 1 ;
1525: vi = aj + diag[i] - 1 ;
1526: nz = diag[i] - ai[i];
1527: s1 = tmp[i];
1528: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1529: }
1531: /* copy tmp into x according to permutation */
1532: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1534: ISRestoreIndices(isrow,&rout);
1535: ISRestoreIndices(iscol,&cout);
1536: VecRestoreArrayRead(bb,&b);
1537: VecRestoreArray(xx,&x);
1539: PetscLogFlops(2.0*a->nz-A->cmap->n);
1540: return(0);
1541: }
1545: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1546: {
1547: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1548: IS iscol = a->col,isrow = a->row;
1549: PetscErrorCode ierr;
1550: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1551: PetscInt i,n = A->rmap->n,j;
1552: PetscInt nz;
1553: PetscScalar *x,*tmp,s1;
1554: const MatScalar *aa = a->a,*v;
1555: const PetscScalar *b;
1558: if (zz != xx) {VecCopy(zz,xx);}
1559: VecGetArrayRead(bb,&b);
1560: VecGetArray(xx,&x);
1561: tmp = a->solve_work;
1563: ISGetIndices(isrow,&rout); r = rout;
1564: ISGetIndices(iscol,&cout); c = cout;
1566: /* copy the b into temp work space according to permutation */
1567: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1569: /* forward solve the U^T */
1570: for (i=0; i<n; i++) {
1571: v = aa + adiag[i+1] + 1;
1572: vi = aj + adiag[i+1] + 1;
1573: nz = adiag[i] - adiag[i+1] - 1;
1574: s1 = tmp[i];
1575: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1576: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1577: tmp[i] = s1;
1578: }
1581: /* backward solve the L^T */
1582: for (i=n-1; i>=0; i--){
1583: v = aa + ai[i] ;
1584: vi = aj + ai[i];
1585: nz = ai[i+1] - ai[i];
1586: s1 = tmp[i];
1587: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1588: }
1590: /* copy tmp into x according to permutation */
1591: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1593: ISRestoreIndices(isrow,&rout);
1594: ISRestoreIndices(iscol,&cout);
1595: VecRestoreArrayRead(bb,&b);
1596: VecRestoreArray(xx,&x);
1598: PetscLogFlops(2.0*a->nz-A->cmap->n);
1599: return(0);
1600: }
1602: /* ----------------------------------------------------------------*/
1604: extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool );
1606: /*
1607: ilu() under revised new data structure.
1608: Factored arrays bj and ba are stored as
1609: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1611: bi=fact->i is an array of size n+1, in which
1612: bi+
1613: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1614: bi[n]: points to L(n-1,n-1)+1
1615:
1616: bdiag=fact->diag is an array of size n+1,in which
1617: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1618: bdiag[n]: points to entry of U(n-1,0)-1
1620: U(i,:) contains bdiag[i] as its last entry, i.e.,
1621: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1622: */
1625: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1626: {
1627:
1628: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1629: PetscErrorCode ierr;
1630: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1631: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1632: PetscBool missing;
1633: IS isicol;
1636: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1637: MatMissingDiagonal(A,&missing,&i);
1638: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1639: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1641: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1642: b = (Mat_SeqAIJ*)(fact)->data;
1644: /* allocate matrix arrays for new data structure */
1645: PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);
1646: PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1647: b->singlemalloc = PETSC_TRUE;
1648: if (!b->diag){
1649: PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);
1650: PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));
1651: }
1652: bdiag = b->diag;
1653:
1654: if (n > 0) {
1655: PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1656: }
1657:
1658: /* set bi and bj with new data structure */
1659: bi = b->i;
1660: bj = b->j;
1662: /* L part */
1663: bi[0] = 0;
1664: for (i=0; i<n; i++){
1665: nz = adiag[i] - ai[i];
1666: bi[i+1] = bi[i] + nz;
1667: aj = a->j + ai[i];
1668: for (j=0; j<nz; j++){
1669: /* *bj = aj[j]; bj++; */
1670: bj[k++] = aj[j];
1671: }
1672: }
1673:
1674: /* U part */
1675: bdiag[n] = bi[n]-1;
1676: for (i=n-1; i>=0; i--){
1677: nz = ai[i+1] - adiag[i] - 1;
1678: aj = a->j + adiag[i] + 1;
1679: for (j=0; j<nz; j++){
1680: /* *bj = aj[j]; bj++; */
1681: bj[k++] = aj[j];
1682: }
1683: /* diag[i] */
1684: /* *bj = i; bj++; */
1685: bj[k++] = i;
1686: bdiag[i] = bdiag[i+1] + nz + 1;
1687: }
1689: fact->factortype = MAT_FACTOR_ILU;
1690: fact->info.factor_mallocs = 0;
1691: fact->info.fill_ratio_given = info->fill;
1692: fact->info.fill_ratio_needed = 1.0;
1693: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1695: b = (Mat_SeqAIJ*)(fact)->data;
1696: b->row = isrow;
1697: b->col = iscol;
1698: b->icol = isicol;
1699: PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1700: PetscObjectReference((PetscObject)isrow);
1701: PetscObjectReference((PetscObject)iscol);
1702: return(0);
1703: }
1707: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1708: {
1709: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1710: IS isicol;
1711: PetscErrorCode ierr;
1712: const PetscInt *r,*ic;
1713: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1714: PetscInt *bi,*cols,nnz,*cols_lvl;
1715: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1716: PetscInt i,levels,diagonal_fill;
1717: PetscBool col_identity,row_identity;
1718: PetscReal f;
1719: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1720: PetscBT lnkbt;
1721: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1722: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1723: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1724:
1726: /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1727: /*
1728: PetscBool olddatastruct=PETSC_FALSE;
1729: PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);
1730: if(olddatastruct){
1731: MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);
1732: return(0);
1733: }
1734: */
1735:
1736: levels = (PetscInt)info->levels;
1737: ISIdentity(isrow,&row_identity);
1738: ISIdentity(iscol,&col_identity);
1740: if (!levels && row_identity && col_identity) {
1741: /* special case: ilu(0) with natural ordering */
1742: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1743: if (a->inode.size) {
1744: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1745: }
1746: return(0);
1747: }
1749: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1750: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1751: ISGetIndices(isrow,&r);
1752: ISGetIndices(isicol,&ic);
1754: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1755: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1756: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1757: bi[0] = bdiag[0] = 0;
1759: PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);
1761: /* create a linked list for storing column indices of the active row */
1762: nlnk = n + 1;
1763: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1765: /* initial FreeSpace size is f*(ai[n]+1) */
1766: f = info->fill;
1767: diagonal_fill = (PetscInt)info->diagonal_fill;
1768: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1769: current_space = free_space;
1770: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1771: current_space_lvl = free_space_lvl;
1772:
1773: for (i=0; i<n; i++) {
1774: nzi = 0;
1775: /* copy current row into linked list */
1776: nnz = ai[r[i]+1] - ai[r[i]];
1777: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1778: cols = aj + ai[r[i]];
1779: lnk[i] = -1; /* marker to indicate if diagonal exists */
1780: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1781: nzi += nlnk;
1783: /* make sure diagonal entry is included */
1784: if (diagonal_fill && lnk[i] == -1) {
1785: fm = n;
1786: while (lnk[fm] < i) fm = lnk[fm];
1787: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1788: lnk[fm] = i;
1789: lnk_lvl[i] = 0;
1790: nzi++; dcount++;
1791: }
1793: /* add pivot rows into the active row */
1794: nzbd = 0;
1795: prow = lnk[n];
1796: while (prow < i) {
1797: nnz = bdiag[prow];
1798: cols = bj_ptr[prow] + nnz + 1;
1799: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1800: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1801: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1802: nzi += nlnk;
1803: prow = lnk[prow];
1804: nzbd++;
1805: }
1806: bdiag[i] = nzbd;
1807: bi[i+1] = bi[i] + nzi;
1809: /* if free space is not available, make more free space */
1810: if (current_space->local_remaining<nzi) {
1811: nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1812: PetscFreeSpaceGet(nnz,¤t_space);
1813: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1814: reallocs++;
1815: }
1817: /* copy data into free_space and free_space_lvl, then initialize lnk */
1818: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1819: bj_ptr[i] = current_space->array;
1820: bjlvl_ptr[i] = current_space_lvl->array;
1822: /* make sure the active row i has diagonal entry */
1823: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1825: current_space->array += nzi;
1826: current_space->local_used += nzi;
1827: current_space->local_remaining -= nzi;
1828: current_space_lvl->array += nzi;
1829: current_space_lvl->local_used += nzi;
1830: current_space_lvl->local_remaining -= nzi;
1831: }
1833: ISRestoreIndices(isrow,&r);
1834: ISRestoreIndices(isicol,&ic);
1836: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1837: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1838: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1839:
1840: PetscIncompleteLLDestroy(lnk,lnkbt);
1841: PetscFreeSpaceDestroy(free_space_lvl);
1842: PetscFree2(bj_ptr,bjlvl_ptr);
1844: #if defined(PETSC_USE_INFO)
1845: {
1846: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1847: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1848: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1849: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1850: PetscInfo(A,"for best performance.\n");
1851: if (diagonal_fill) {
1852: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1853: }
1854: }
1855: #endif
1857: /* put together the new matrix */
1858: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1859: PetscLogObjectParent(fact,isicol);
1860: b = (Mat_SeqAIJ*)(fact)->data;
1861: b->free_a = PETSC_TRUE;
1862: b->free_ij = PETSC_TRUE;
1863: b->singlemalloc = PETSC_FALSE;
1864: PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
1865: b->j = bj;
1866: b->i = bi;
1867: b->diag = bdiag;
1868: b->ilen = 0;
1869: b->imax = 0;
1870: b->row = isrow;
1871: b->col = iscol;
1872: PetscObjectReference((PetscObject)isrow);
1873: PetscObjectReference((PetscObject)iscol);
1874: b->icol = isicol;
1875: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1876: /* In b structure: Free imax, ilen, old a, old j.
1877: Allocate bdiag, solve_work, new a, new j */
1878: PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1879: b->maxnz = b->nz = bdiag[0]+1;
1880: (fact)->info.factor_mallocs = reallocs;
1881: (fact)->info.fill_ratio_given = f;
1882: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1883: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1884: if (a->inode.size) {
1885: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1886: }
1887: return(0);
1888: }
1892: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1893: {
1894: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1895: IS isicol;
1896: PetscErrorCode ierr;
1897: const PetscInt *r,*ic;
1898: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1899: PetscInt *bi,*cols,nnz,*cols_lvl;
1900: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1901: PetscInt i,levels,diagonal_fill;
1902: PetscBool col_identity,row_identity;
1903: PetscReal f;
1904: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1905: PetscBT lnkbt;
1906: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1907: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1908: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1909: PetscBool missing;
1910:
1912: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1913: f = info->fill;
1914: levels = (PetscInt)info->levels;
1915: diagonal_fill = (PetscInt)info->diagonal_fill;
1916: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1918: ISIdentity(isrow,&row_identity);
1919: ISIdentity(iscol,&col_identity);
1920: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1921: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1922: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1923: if (a->inode.size) {
1924: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1925: }
1926: fact->factortype = MAT_FACTOR_ILU;
1927: (fact)->info.factor_mallocs = 0;
1928: (fact)->info.fill_ratio_given = info->fill;
1929: (fact)->info.fill_ratio_needed = 1.0;
1930: b = (Mat_SeqAIJ*)(fact)->data;
1931: MatMissingDiagonal(A,&missing,&d);
1932: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1933: b->row = isrow;
1934: b->col = iscol;
1935: b->icol = isicol;
1936: PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1937: PetscObjectReference((PetscObject)isrow);
1938: PetscObjectReference((PetscObject)iscol);
1939: return(0);
1940: }
1942: ISGetIndices(isrow,&r);
1943: ISGetIndices(isicol,&ic);
1945: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1946: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1947: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1948: bi[0] = bdiag[0] = 0;
1950: PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);
1952: /* create a linked list for storing column indices of the active row */
1953: nlnk = n + 1;
1954: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1956: /* initial FreeSpace size is f*(ai[n]+1) */
1957: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1958: current_space = free_space;
1959: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1960: current_space_lvl = free_space_lvl;
1961:
1962: for (i=0; i<n; i++) {
1963: nzi = 0;
1964: /* copy current row into linked list */
1965: nnz = ai[r[i]+1] - ai[r[i]];
1966: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1967: cols = aj + ai[r[i]];
1968: lnk[i] = -1; /* marker to indicate if diagonal exists */
1969: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1970: nzi += nlnk;
1972: /* make sure diagonal entry is included */
1973: if (diagonal_fill && lnk[i] == -1) {
1974: fm = n;
1975: while (lnk[fm] < i) fm = lnk[fm];
1976: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1977: lnk[fm] = i;
1978: lnk_lvl[i] = 0;
1979: nzi++; dcount++;
1980: }
1982: /* add pivot rows into the active row */
1983: nzbd = 0;
1984: prow = lnk[n];
1985: while (prow < i) {
1986: nnz = bdiag[prow];
1987: cols = bj_ptr[prow] + nnz + 1;
1988: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1989: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1990: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1991: nzi += nlnk;
1992: prow = lnk[prow];
1993: nzbd++;
1994: }
1995: bdiag[i] = nzbd;
1996: bi[i+1] = bi[i] + nzi;
1998: /* if free space is not available, make more free space */
1999: if (current_space->local_remaining<nzi) {
2000: nnz = nzi*(n - i); /* estimated and max additional space needed */
2001: PetscFreeSpaceGet(nnz,¤t_space);
2002: PetscFreeSpaceGet(nnz,¤t_space_lvl);
2003: reallocs++;
2004: }
2006: /* copy data into free_space and free_space_lvl, then initialize lnk */
2007: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2008: bj_ptr[i] = current_space->array;
2009: bjlvl_ptr[i] = current_space_lvl->array;
2011: /* make sure the active row i has diagonal entry */
2012: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2014: current_space->array += nzi;
2015: current_space->local_used += nzi;
2016: current_space->local_remaining -= nzi;
2017: current_space_lvl->array += nzi;
2018: current_space_lvl->local_used += nzi;
2019: current_space_lvl->local_remaining -= nzi;
2020: }
2022: ISRestoreIndices(isrow,&r);
2023: ISRestoreIndices(isicol,&ic);
2025: /* destroy list of free space and other temporary arrays */
2026: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
2027: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2028: PetscIncompleteLLDestroy(lnk,lnkbt);
2029: PetscFreeSpaceDestroy(free_space_lvl);
2030: PetscFree2(bj_ptr,bjlvl_ptr);
2032: #if defined(PETSC_USE_INFO)
2033: {
2034: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2035: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
2036: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
2037: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
2038: PetscInfo(A,"for best performance.\n");
2039: if (diagonal_fill) {
2040: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
2041: }
2042: }
2043: #endif
2045: /* put together the new matrix */
2046: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
2047: PetscLogObjectParent(fact,isicol);
2048: b = (Mat_SeqAIJ*)(fact)->data;
2049: b->free_a = PETSC_TRUE;
2050: b->free_ij = PETSC_TRUE;
2051: b->singlemalloc = PETSC_FALSE;
2052: PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);
2053: b->j = bj;
2054: b->i = bi;
2055: for (i=0; i<n; i++) bdiag[i] += bi[i];
2056: b->diag = bdiag;
2057: b->ilen = 0;
2058: b->imax = 0;
2059: b->row = isrow;
2060: b->col = iscol;
2061: PetscObjectReference((PetscObject)isrow);
2062: PetscObjectReference((PetscObject)iscol);
2063: b->icol = isicol;
2064: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
2065: /* In b structure: Free imax, ilen, old a, old j.
2066: Allocate bdiag, solve_work, new a, new j */
2067: PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2068: b->maxnz = b->nz = bi[n] ;
2069: (fact)->info.factor_mallocs = reallocs;
2070: (fact)->info.fill_ratio_given = f;
2071: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2072: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2073: if (a->inode.size) {
2074: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2075: }
2076: return(0);
2077: }
2081: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2082: {
2083: Mat C = B;
2084: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2085: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2086: IS ip=b->row,iip = b->icol;
2088: const PetscInt *rip,*riip;
2089: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2090: PetscInt *ai=a->i,*aj=a->j;
2091: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2092: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2093: PetscBool perm_identity;
2094: FactorShiftCtx sctx;
2095: PetscReal rs;
2096: MatScalar d,*v;
2099: /* MatPivotSetUp(): initialize shift context sctx */
2100: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2102: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2103: sctx.shift_top = info->zeropivot;
2104: for (i=0; i<mbs; i++) {
2105: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2106: d = (aa)[a->diag[i]];
2107: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2108: v = aa+ai[i];
2109: nz = ai[i+1] - ai[i];
2110: for (j=0; j<nz; j++)
2111: rs += PetscAbsScalar(v[j]);
2112: if (rs>sctx.shift_top) sctx.shift_top = rs;
2113: }
2114: sctx.shift_top *= 1.1;
2115: sctx.nshift_max = 5;
2116: sctx.shift_lo = 0.;
2117: sctx.shift_hi = 1.;
2118: }
2120: ISGetIndices(ip,&rip);
2121: ISGetIndices(iip,&riip);
2122:
2123: /* allocate working arrays
2124: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2125: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2126: */
2127: PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);
2128:
2129: do {
2130: sctx.newshift = PETSC_FALSE;
2132: for (i=0; i<mbs; i++) c2r[i] = mbs;
2133: il[0] = 0;
2134:
2135: for (k = 0; k<mbs; k++){
2136: /* zero rtmp */
2137: nz = bi[k+1] - bi[k];
2138: bjtmp = bj + bi[k];
2139: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2140:
2141: /* load in initial unfactored row */
2142: bval = ba + bi[k];
2143: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2144: for (j = jmin; j < jmax; j++){
2145: col = riip[aj[j]];
2146: if (col >= k){ /* only take upper triangular entry */
2147: rtmp[col] = aa[j];
2148: *bval++ = 0.0; /* for in-place factorization */
2149: }
2150: }
2151: /* shift the diagonal of the matrix: ZeropivotApply() */
2152: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2153:
2154: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2155: dk = rtmp[k];
2156: i = c2r[k]; /* first row to be added to k_th row */
2158: while (i < k){
2159: nexti = c2r[i]; /* next row to be added to k_th row */
2160:
2161: /* compute multiplier, update diag(k) and U(i,k) */
2162: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2163: uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2164: dk += uikdi*ba[ili]; /* update diag[k] */
2165: ba[ili] = uikdi; /* -U(i,k) */
2167: /* add multiple of row i to k-th row */
2168: jmin = ili + 1; jmax = bi[i+1];
2169: if (jmin < jmax){
2170: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2171: /* update il and c2r for row i */
2172: il[i] = jmin;
2173: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2174: }
2175: i = nexti;
2176: }
2178: /* copy data into U(k,:) */
2179: rs = 0.0;
2180: jmin = bi[k]; jmax = bi[k+1]-1;
2181: if (jmin < jmax) {
2182: for (j=jmin; j<jmax; j++){
2183: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2184: }
2185: /* add the k-th row into il and c2r */
2186: il[k] = jmin;
2187: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2188: }
2190: /* MatPivotCheck() */
2191: sctx.rs = rs;
2192: sctx.pv = dk;
2193: MatPivotCheck(A,info,&sctx,i);
2194: if(sctx.newshift) break;
2195: dk = sctx.pv;
2196:
2197: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2198: }
2199: } while (sctx.newshift);
2200:
2201: PetscFree3(rtmp,il,c2r);
2202: ISRestoreIndices(ip,&rip);
2203: ISRestoreIndices(iip,&riip);
2205: ISIdentity(ip,&perm_identity);
2206: if (perm_identity){
2207: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2208: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2209: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2210: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2211: } else {
2212: B->ops->solve = MatSolve_SeqSBAIJ_1;
2213: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2214: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2215: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2216: }
2218: C->assembled = PETSC_TRUE;
2219: C->preallocated = PETSC_TRUE;
2220: PetscLogFlops(C->rmap->n);
2222: /* MatPivotView() */
2223: if (sctx.nshift){
2224: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2225: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
2226: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2227: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2228: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2229: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
2230: }
2231: }
2232: return(0);
2233: }
2237: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2238: {
2239: Mat C = B;
2240: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2241: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2242: IS ip=b->row,iip = b->icol;
2244: const PetscInt *rip,*riip;
2245: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2246: PetscInt *ai=a->i,*aj=a->j;
2247: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2248: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2249: PetscBool perm_identity;
2250: FactorShiftCtx sctx;
2251: PetscReal rs;
2252: MatScalar d,*v;
2255: /* MatPivotSetUp(): initialize shift context sctx */
2256: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2258: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2259: sctx.shift_top = info->zeropivot;
2260: for (i=0; i<mbs; i++) {
2261: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2262: d = (aa)[a->diag[i]];
2263: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2264: v = aa+ai[i];
2265: nz = ai[i+1] - ai[i];
2266: for (j=0; j<nz; j++)
2267: rs += PetscAbsScalar(v[j]);
2268: if (rs>sctx.shift_top) sctx.shift_top = rs;
2269: }
2270: sctx.shift_top *= 1.1;
2271: sctx.nshift_max = 5;
2272: sctx.shift_lo = 0.;
2273: sctx.shift_hi = 1.;
2274: }
2276: ISGetIndices(ip,&rip);
2277: ISGetIndices(iip,&riip);
2278:
2279: /* initialization */
2280: PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);
2281:
2282: do {
2283: sctx.newshift = PETSC_FALSE;
2285: for (i=0; i<mbs; i++) jl[i] = mbs;
2286: il[0] = 0;
2287:
2288: for (k = 0; k<mbs; k++){
2289: /* zero rtmp */
2290: nz = bi[k+1] - bi[k];
2291: bjtmp = bj + bi[k];
2292: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2294: bval = ba + bi[k];
2295: /* initialize k-th row by the perm[k]-th row of A */
2296: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2297: for (j = jmin; j < jmax; j++){
2298: col = riip[aj[j]];
2299: if (col >= k){ /* only take upper triangular entry */
2300: rtmp[col] = aa[j];
2301: *bval++ = 0.0; /* for in-place factorization */
2302: }
2303: }
2304: /* shift the diagonal of the matrix */
2305: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2307: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2308: dk = rtmp[k];
2309: i = jl[k]; /* first row to be added to k_th row */
2311: while (i < k){
2312: nexti = jl[i]; /* next row to be added to k_th row */
2313:
2314: /* compute multiplier, update diag(k) and U(i,k) */
2315: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2316: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
2317: dk += uikdi*ba[ili];
2318: ba[ili] = uikdi; /* -U(i,k) */
2320: /* add multiple of row i to k-th row */
2321: jmin = ili + 1; jmax = bi[i+1];
2322: if (jmin < jmax){
2323: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2324: /* update il and jl for row i */
2325: il[i] = jmin;
2326: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2327: }
2328: i = nexti;
2329: }
2331: /* shift the diagonals when zero pivot is detected */
2332: /* compute rs=sum of abs(off-diagonal) */
2333: rs = 0.0;
2334: jmin = bi[k]+1;
2335: nz = bi[k+1] - jmin;
2336: bcol = bj + jmin;
2337: for (j=0; j<nz; j++) {
2338: rs += PetscAbsScalar(rtmp[bcol[j]]);
2339: }
2341: sctx.rs = rs;
2342: sctx.pv = dk;
2343: MatPivotCheck(A,info,&sctx,k);
2344: if (sctx.newshift) break;
2345: dk = sctx.pv;
2346:
2347: /* copy data into U(k,:) */
2348: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2349: jmin = bi[k]+1; jmax = bi[k+1];
2350: if (jmin < jmax) {
2351: for (j=jmin; j<jmax; j++){
2352: col = bj[j]; ba[j] = rtmp[col];
2353: }
2354: /* add the k-th row into il and jl */
2355: il[k] = jmin;
2356: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2357: }
2358: }
2359: } while (sctx.newshift);
2361: PetscFree3(rtmp,il,jl);
2362: ISRestoreIndices(ip,&rip);
2363: ISRestoreIndices(iip,&riip);
2365: ISIdentity(ip,&perm_identity);
2366: if (perm_identity){
2367: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2369: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2370: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2371: } else {
2372: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2373: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2374: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2375: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2376: }
2378: C->assembled = PETSC_TRUE;
2379: C->preallocated = PETSC_TRUE;
2380: PetscLogFlops(C->rmap->n);
2381: if (sctx.nshift){
2382: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2383: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2384: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2385: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2386: }
2387: }
2388: return(0);
2389: }
2391: /*
2392: icc() under revised new data structure.
2393: Factored arrays bj and ba are stored as
2394: U(0,:),...,U(i,:),U(n-1,:)
2396: ui=fact->i is an array of size n+1, in which
2397: ui+
2398: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2399: ui[n]: points to U(n-1,n-1)+1
2400:
2401: udiag=fact->diag is an array of size n,in which
2402: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2404: U(i,:) contains udiag[i] as its last entry, i.e.,
2405: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2406: */
2410: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2411: {
2412: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2413: Mat_SeqSBAIJ *b;
2414: PetscErrorCode ierr;
2415: PetscBool perm_identity,missing;
2416: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2417: const PetscInt *rip,*riip;
2418: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2419: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2420: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2421: PetscReal fill=info->fill,levels=info->levels;
2422: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2423: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2424: PetscBT lnkbt;
2425: IS iperm;
2426:
2428: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2429: MatMissingDiagonal(A,&missing,&d);
2430: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2431: ISIdentity(perm,&perm_identity);
2432: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2434: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2435: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2436: ui[0] = 0;
2438: /* ICC(0) without matrix ordering: simply rearrange column indices */
2439: if (!levels && perm_identity) {
2440: for (i=0; i<am; i++) {
2441: ncols = ai[i+1] - a->diag[i];
2442: ui[i+1] = ui[i] + ncols;
2443: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2444: }
2445: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2446: cols = uj;
2447: for (i=0; i<am; i++) {
2448: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2449: ncols = ai[i+1] - a->diag[i] -1;
2450: for (j=0; j<ncols; j++) *cols++ = aj[j];
2451: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2452: }
2453: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2454: ISGetIndices(iperm,&riip);
2455: ISGetIndices(perm,&rip);
2457: /* initialization */
2458: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
2460: /* jl: linked list for storing indices of the pivot rows
2461: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2462: PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2463: for (i=0; i<am; i++){
2464: jl[i] = am; il[i] = 0;
2465: }
2467: /* create and initialize a linked list for storing column indices of the active row k */
2468: nlnk = am + 1;
2469: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2471: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2472: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2473: current_space = free_space;
2474: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);
2475: current_space_lvl = free_space_lvl;
2477: for (k=0; k<am; k++){ /* for each active row k */
2478: /* initialize lnk by the column indices of row rip[k] of A */
2479: nzk = 0;
2480: ncols = ai[rip[k]+1] - ai[rip[k]];
2481: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2482: ncols_upper = 0;
2483: for (j=0; j<ncols; j++){
2484: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2485: if (riip[i] >= k){ /* only take upper triangular entry */
2486: ajtmp[ncols_upper] = i;
2487: ncols_upper++;
2488: }
2489: }
2490: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2491: nzk += nlnk;
2493: /* update lnk by computing fill-in for each pivot row to be merged in */
2494: prow = jl[k]; /* 1st pivot row */
2495:
2496: while (prow < k){
2497: nextprow = jl[prow];
2498:
2499: /* merge prow into k-th row */
2500: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2501: jmax = ui[prow+1];
2502: ncols = jmax-jmin;
2503: i = jmin - ui[prow];
2504: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2505: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2506: j = *(uj - 1);
2507: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2508: nzk += nlnk;
2510: /* update il and jl for prow */
2511: if (jmin < jmax){
2512: il[prow] = jmin;
2513: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2514: }
2515: prow = nextprow;
2516: }
2518: /* if free space is not available, make more free space */
2519: if (current_space->local_remaining<nzk) {
2520: i = am - k + 1; /* num of unfactored rows */
2521: i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2522: PetscFreeSpaceGet(i,¤t_space);
2523: PetscFreeSpaceGet(i,¤t_space_lvl);
2524: reallocs++;
2525: }
2527: /* copy data into free_space and free_space_lvl, then initialize lnk */
2528: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2529: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2531: /* add the k-th row into il and jl */
2532: if (nzk > 1){
2533: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2534: jl[k] = jl[i]; jl[i] = k;
2535: il[k] = ui[k] + 1;
2536: }
2537: uj_ptr[k] = current_space->array;
2538: uj_lvl_ptr[k] = current_space_lvl->array;
2540: current_space->array += nzk;
2541: current_space->local_used += nzk;
2542: current_space->local_remaining -= nzk;
2544: current_space_lvl->array += nzk;
2545: current_space_lvl->local_used += nzk;
2546: current_space_lvl->local_remaining -= nzk;
2548: ui[k+1] = ui[k] + nzk;
2549: }
2551: ISRestoreIndices(perm,&rip);
2552: ISRestoreIndices(iperm,&riip);
2553: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2554: PetscFree(ajtmp);
2556: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2557: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2558: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2559: PetscIncompleteLLDestroy(lnk,lnkbt);
2560: PetscFreeSpaceDestroy(free_space_lvl);
2562: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2564: /* put together the new matrix in MATSEQSBAIJ format */
2565: b = (Mat_SeqSBAIJ*)(fact)->data;
2566: b->singlemalloc = PETSC_FALSE;
2567: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2568: b->j = uj;
2569: b->i = ui;
2570: b->diag = udiag;
2571: b->free_diag = PETSC_TRUE;
2572: b->ilen = 0;
2573: b->imax = 0;
2574: b->row = perm;
2575: b->col = perm;
2576: PetscObjectReference((PetscObject)perm);
2577: PetscObjectReference((PetscObject)perm);
2578: b->icol = iperm;
2579: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2580: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2581: PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2582: b->maxnz = b->nz = ui[am];
2583: b->free_a = PETSC_TRUE;
2584: b->free_ij = PETSC_TRUE;
2585:
2586: fact->info.factor_mallocs = reallocs;
2587: fact->info.fill_ratio_given = fill;
2588: if (ai[am] != 0) {
2589: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2590: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2591: } else {
2592: fact->info.fill_ratio_needed = 0.0;
2593: }
2594: #if defined(PETSC_USE_INFO)
2595: if (ai[am] != 0) {
2596: PetscReal af = fact->info.fill_ratio_needed;
2597: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2598: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2599: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2600: } else {
2601: PetscInfo(A,"Empty matrix.\n");
2602: }
2603: #endif
2604: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2605: return(0);
2606: }
2610: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2611: {
2612: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2613: Mat_SeqSBAIJ *b;
2614: PetscErrorCode ierr;
2615: PetscBool perm_identity,missing;
2616: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2617: const PetscInt *rip,*riip;
2618: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2619: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2620: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2621: PetscReal fill=info->fill,levels=info->levels;
2622: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2623: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2624: PetscBT lnkbt;
2625: IS iperm;
2626:
2628: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2629: MatMissingDiagonal(A,&missing,&d);
2630: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2631: ISIdentity(perm,&perm_identity);
2632: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2634: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2635: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2636: ui[0] = 0;
2638: /* ICC(0) without matrix ordering: simply copies fill pattern */
2639: if (!levels && perm_identity) {
2641: for (i=0; i<am; i++) {
2642: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2643: udiag[i] = ui[i];
2644: }
2645: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2646: cols = uj;
2647: for (i=0; i<am; i++) {
2648: aj = a->j + a->diag[i];
2649: ncols = ui[i+1] - ui[i];
2650: for (j=0; j<ncols; j++) *cols++ = *aj++;
2651: }
2652: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2653: ISGetIndices(iperm,&riip);
2654: ISGetIndices(perm,&rip);
2656: /* initialization */
2657: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
2659: /* jl: linked list for storing indices of the pivot rows
2660: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2661: PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2662: for (i=0; i<am; i++){
2663: jl[i] = am; il[i] = 0;
2664: }
2666: /* create and initialize a linked list for storing column indices of the active row k */
2667: nlnk = am + 1;
2668: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2670: /* initial FreeSpace size is fill*(ai[am]+1) */
2671: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2672: current_space = free_space;
2673: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2674: current_space_lvl = free_space_lvl;
2676: for (k=0; k<am; k++){ /* for each active row k */
2677: /* initialize lnk by the column indices of row rip[k] of A */
2678: nzk = 0;
2679: ncols = ai[rip[k]+1] - ai[rip[k]];
2680: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2681: ncols_upper = 0;
2682: for (j=0; j<ncols; j++){
2683: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2684: if (riip[i] >= k){ /* only take upper triangular entry */
2685: ajtmp[ncols_upper] = i;
2686: ncols_upper++;
2687: }
2688: }
2689: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2690: nzk += nlnk;
2692: /* update lnk by computing fill-in for each pivot row to be merged in */
2693: prow = jl[k]; /* 1st pivot row */
2694:
2695: while (prow < k){
2696: nextprow = jl[prow];
2697:
2698: /* merge prow into k-th row */
2699: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2700: jmax = ui[prow+1];
2701: ncols = jmax-jmin;
2702: i = jmin - ui[prow];
2703: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2704: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2705: j = *(uj - 1);
2706: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2707: nzk += nlnk;
2709: /* update il and jl for prow */
2710: if (jmin < jmax){
2711: il[prow] = jmin;
2712: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2713: }
2714: prow = nextprow;
2715: }
2717: /* if free space is not available, make more free space */
2718: if (current_space->local_remaining<nzk) {
2719: i = am - k + 1; /* num of unfactored rows */
2720: i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2721: PetscFreeSpaceGet(i,¤t_space);
2722: PetscFreeSpaceGet(i,¤t_space_lvl);
2723: reallocs++;
2724: }
2726: /* copy data into free_space and free_space_lvl, then initialize lnk */
2727: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2728: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2730: /* add the k-th row into il and jl */
2731: if (nzk > 1){
2732: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2733: jl[k] = jl[i]; jl[i] = k;
2734: il[k] = ui[k] + 1;
2735: }
2736: uj_ptr[k] = current_space->array;
2737: uj_lvl_ptr[k] = current_space_lvl->array;
2739: current_space->array += nzk;
2740: current_space->local_used += nzk;
2741: current_space->local_remaining -= nzk;
2743: current_space_lvl->array += nzk;
2744: current_space_lvl->local_used += nzk;
2745: current_space_lvl->local_remaining -= nzk;
2747: ui[k+1] = ui[k] + nzk;
2748: }
2750: #if defined(PETSC_USE_INFO)
2751: if (ai[am] != 0) {
2752: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2753: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2754: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2755: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2756: } else {
2757: PetscInfo(A,"Empty matrix.\n");
2758: }
2759: #endif
2761: ISRestoreIndices(perm,&rip);
2762: ISRestoreIndices(iperm,&riip);
2763: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2764: PetscFree(ajtmp);
2766: /* destroy list of free space and other temporary array(s) */
2767: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2768: PetscFreeSpaceContiguous(&free_space,uj);
2769: PetscIncompleteLLDestroy(lnk,lnkbt);
2770: PetscFreeSpaceDestroy(free_space_lvl);
2772: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2774: /* put together the new matrix in MATSEQSBAIJ format */
2776: b = (Mat_SeqSBAIJ*)fact->data;
2777: b->singlemalloc = PETSC_FALSE;
2778: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2779: b->j = uj;
2780: b->i = ui;
2781: b->diag = udiag;
2782: b->free_diag = PETSC_TRUE;
2783: b->ilen = 0;
2784: b->imax = 0;
2785: b->row = perm;
2786: b->col = perm;
2787: PetscObjectReference((PetscObject)perm);
2788: PetscObjectReference((PetscObject)perm);
2789: b->icol = iperm;
2790: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2791: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2792: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2793: b->maxnz = b->nz = ui[am];
2794: b->free_a = PETSC_TRUE;
2795: b->free_ij = PETSC_TRUE;
2796:
2797: fact->info.factor_mallocs = reallocs;
2798: fact->info.fill_ratio_given = fill;
2799: if (ai[am] != 0) {
2800: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2801: } else {
2802: fact->info.fill_ratio_needed = 0.0;
2803: }
2804: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2805: return(0);
2806: }
2810: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2811: {
2812: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2813: Mat_SeqSBAIJ *b;
2814: PetscErrorCode ierr;
2815: PetscBool perm_identity;
2816: PetscReal fill = info->fill;
2817: const PetscInt *rip,*riip;
2818: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2819: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2820: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2821: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2822: PetscBT lnkbt;
2823: IS iperm;
2826: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2827: /* check whether perm is the identity mapping */
2828: ISIdentity(perm,&perm_identity);
2829: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2830: ISGetIndices(iperm,&riip);
2831: ISGetIndices(perm,&rip);
2833: /* initialization */
2834: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2835: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2836: ui[0] = 0;
2838: /* jl: linked list for storing indices of the pivot rows
2839: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2840: PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
2841: for (i=0; i<am; i++){
2842: jl[i] = am; il[i] = 0;
2843: }
2845: /* create and initialize a linked list for storing column indices of the active row k */
2846: nlnk = am + 1;
2847: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2849: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2850: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);
2851: current_space = free_space;
2853: for (k=0; k<am; k++){ /* for each active row k */
2854: /* initialize lnk by the column indices of row rip[k] of A */
2855: nzk = 0;
2856: ncols = ai[rip[k]+1] - ai[rip[k]];
2857: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2858: ncols_upper = 0;
2859: for (j=0; j<ncols; j++){
2860: i = riip[*(aj + ai[rip[k]] + j)];
2861: if (i >= k){ /* only take upper triangular entry */
2862: cols[ncols_upper] = i;
2863: ncols_upper++;
2864: }
2865: }
2866: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2867: nzk += nlnk;
2869: /* update lnk by computing fill-in for each pivot row to be merged in */
2870: prow = jl[k]; /* 1st pivot row */
2871:
2872: while (prow < k){
2873: nextprow = jl[prow];
2874: /* merge prow into k-th row */
2875: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2876: jmax = ui[prow+1];
2877: ncols = jmax-jmin;
2878: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2879: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2880: nzk += nlnk;
2882: /* update il and jl for prow */
2883: if (jmin < jmax){
2884: il[prow] = jmin;
2885: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2886: }
2887: prow = nextprow;
2888: }
2890: /* if free space is not available, make more free space */
2891: if (current_space->local_remaining<nzk) {
2892: i = am - k + 1; /* num of unfactored rows */
2893: i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2894: PetscFreeSpaceGet(i,¤t_space);
2895: reallocs++;
2896: }
2898: /* copy data into free space, then initialize lnk */
2899: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2901: /* add the k-th row into il and jl */
2902: if (nzk > 1){
2903: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2904: jl[k] = jl[i]; jl[i] = k;
2905: il[k] = ui[k] + 1;
2906: }
2907: ui_ptr[k] = current_space->array;
2908: current_space->array += nzk;
2909: current_space->local_used += nzk;
2910: current_space->local_remaining -= nzk;
2912: ui[k+1] = ui[k] + nzk;
2913: }
2915: ISRestoreIndices(perm,&rip);
2916: ISRestoreIndices(iperm,&riip);
2917: PetscFree4(ui_ptr,jl,il,cols);
2919: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2920: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2921: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2922: PetscLLDestroy(lnk,lnkbt);
2924: /* put together the new matrix in MATSEQSBAIJ format */
2926: b = (Mat_SeqSBAIJ*)fact->data;
2927: b->singlemalloc = PETSC_FALSE;
2928: b->free_a = PETSC_TRUE;
2929: b->free_ij = PETSC_TRUE;
2930: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2931: b->j = uj;
2932: b->i = ui;
2933: b->diag = udiag;
2934: b->free_diag = PETSC_TRUE;
2935: b->ilen = 0;
2936: b->imax = 0;
2937: b->row = perm;
2938: b->col = perm;
2939: PetscObjectReference((PetscObject)perm);
2940: PetscObjectReference((PetscObject)perm);
2941: b->icol = iperm;
2942: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2943: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2944: PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2945: b->maxnz = b->nz = ui[am];
2946:
2947: fact->info.factor_mallocs = reallocs;
2948: fact->info.fill_ratio_given = fill;
2949: if (ai[am] != 0) {
2950: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2951: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2952: } else {
2953: fact->info.fill_ratio_needed = 0.0;
2954: }
2955: #if defined(PETSC_USE_INFO)
2956: if (ai[am] != 0) {
2957: PetscReal af = fact->info.fill_ratio_needed;
2958: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2959: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2960: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2961: } else {
2962: PetscInfo(A,"Empty matrix.\n");
2963: }
2964: #endif
2965: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2966: return(0);
2967: }
2971: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2972: {
2973: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2974: Mat_SeqSBAIJ *b;
2975: PetscErrorCode ierr;
2976: PetscBool perm_identity;
2977: PetscReal fill = info->fill;
2978: const PetscInt *rip,*riip;
2979: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2980: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2981: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2982: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2983: PetscBT lnkbt;
2984: IS iperm;
2987: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2988: /* check whether perm is the identity mapping */
2989: ISIdentity(perm,&perm_identity);
2990: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2991: ISGetIndices(iperm,&riip);
2992: ISGetIndices(perm,&rip);
2994: /* initialization */
2995: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2996: ui[0] = 0;
2998: /* jl: linked list for storing indices of the pivot rows
2999: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3000: PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
3001: for (i=0; i<am; i++){
3002: jl[i] = am; il[i] = 0;
3003: }
3005: /* create and initialize a linked list for storing column indices of the active row k */
3006: nlnk = am + 1;
3007: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
3009: /* initial FreeSpace size is fill*(ai[am]+1) */
3010: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
3011: current_space = free_space;
3013: for (k=0; k<am; k++){ /* for each active row k */
3014: /* initialize lnk by the column indices of row rip[k] of A */
3015: nzk = 0;
3016: ncols = ai[rip[k]+1] - ai[rip[k]];
3017: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3018: ncols_upper = 0;
3019: for (j=0; j<ncols; j++){
3020: i = riip[*(aj + ai[rip[k]] + j)];
3021: if (i >= k){ /* only take upper triangular entry */
3022: cols[ncols_upper] = i;
3023: ncols_upper++;
3024: }
3025: }
3026: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3027: nzk += nlnk;
3029: /* update lnk by computing fill-in for each pivot row to be merged in */
3030: prow = jl[k]; /* 1st pivot row */
3031:
3032: while (prow < k){
3033: nextprow = jl[prow];
3034: /* merge prow into k-th row */
3035: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3036: jmax = ui[prow+1];
3037: ncols = jmax-jmin;
3038: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3039: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3040: nzk += nlnk;
3042: /* update il and jl for prow */
3043: if (jmin < jmax){
3044: il[prow] = jmin;
3045: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3046: }
3047: prow = nextprow;
3048: }
3050: /* if free space is not available, make more free space */
3051: if (current_space->local_remaining<nzk) {
3052: i = am - k + 1; /* num of unfactored rows */
3053: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3054: PetscFreeSpaceGet(i,¤t_space);
3055: reallocs++;
3056: }
3058: /* copy data into free space, then initialize lnk */
3059: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3061: /* add the k-th row into il and jl */
3062: if (nzk-1 > 0){
3063: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3064: jl[k] = jl[i]; jl[i] = k;
3065: il[k] = ui[k] + 1;
3066: }
3067: ui_ptr[k] = current_space->array;
3068: current_space->array += nzk;
3069: current_space->local_used += nzk;
3070: current_space->local_remaining -= nzk;
3072: ui[k+1] = ui[k] + nzk;
3073: }
3075: #if defined(PETSC_USE_INFO)
3076: if (ai[am] != 0) {
3077: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3078: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
3079: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
3080: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
3081: } else {
3082: PetscInfo(A,"Empty matrix.\n");
3083: }
3084: #endif
3086: ISRestoreIndices(perm,&rip);
3087: ISRestoreIndices(iperm,&riip);
3088: PetscFree4(ui_ptr,jl,il,cols);
3090: /* destroy list of free space and other temporary array(s) */
3091: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
3092: PetscFreeSpaceContiguous(&free_space,uj);
3093: PetscLLDestroy(lnk,lnkbt);
3095: /* put together the new matrix in MATSEQSBAIJ format */
3097: b = (Mat_SeqSBAIJ*)fact->data;
3098: b->singlemalloc = PETSC_FALSE;
3099: b->free_a = PETSC_TRUE;
3100: b->free_ij = PETSC_TRUE;
3101: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
3102: b->j = uj;
3103: b->i = ui;
3104: b->diag = 0;
3105: b->ilen = 0;
3106: b->imax = 0;
3107: b->row = perm;
3108: b->col = perm;
3109: PetscObjectReference((PetscObject)perm);
3110: PetscObjectReference((PetscObject)perm);
3111: b->icol = iperm;
3112: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3113: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
3114: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3115: b->maxnz = b->nz = ui[am];
3116:
3117: fact->info.factor_mallocs = reallocs;
3118: fact->info.fill_ratio_given = fill;
3119: if (ai[am] != 0) {
3120: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3121: } else {
3122: fact->info.fill_ratio_needed = 0.0;
3123: }
3124: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3125: return(0);
3126: }
3130: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3131: {
3132: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3133: PetscErrorCode ierr;
3134: PetscInt n = A->rmap->n;
3135: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3136: PetscScalar *x,sum;
3137: const PetscScalar *b;
3138: const MatScalar *aa = a->a,*v;
3139: PetscInt i,nz;
3142: if (!n) return(0);
3144: VecGetArrayRead(bb,&b);
3145: VecGetArray(xx,&x);
3147: /* forward solve the lower triangular */
3148: x[0] = b[0];
3149: v = aa;
3150: vi = aj;
3151: for (i=1; i<n; i++) {
3152: nz = ai[i+1] - ai[i];
3153: sum = b[i];
3154: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3155: v += nz;
3156: vi += nz;
3157: x[i] = sum;
3158: }
3159:
3160: /* backward solve the upper triangular */
3161: for (i=n-1; i>=0; i--){
3162: v = aa + adiag[i+1] + 1;
3163: vi = aj + adiag[i+1] + 1;
3164: nz = adiag[i] - adiag[i+1]-1;
3165: sum = x[i];
3166: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3167: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3168: }
3169:
3170: PetscLogFlops(2.0*a->nz - A->cmap->n);
3171: VecRestoreArrayRead(bb,&b);
3172: VecRestoreArray(xx,&x);
3173: return(0);
3174: }
3178: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3179: {
3180: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3181: IS iscol = a->col,isrow = a->row;
3182: PetscErrorCode ierr;
3183: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3184: const PetscInt *rout,*cout,*r,*c;
3185: PetscScalar *x,*tmp,sum;
3186: const PetscScalar *b;
3187: const MatScalar *aa = a->a,*v;
3190: if (!n) return(0);
3192: VecGetArrayRead(bb,&b);
3193: VecGetArray(xx,&x);
3194: tmp = a->solve_work;
3196: ISGetIndices(isrow,&rout); r = rout;
3197: ISGetIndices(iscol,&cout); c = cout;
3199: /* forward solve the lower triangular */
3200: tmp[0] = b[r[0]];
3201: v = aa;
3202: vi = aj;
3203: for (i=1; i<n; i++) {
3204: nz = ai[i+1] - ai[i];
3205: sum = b[r[i]];
3206: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3207: tmp[i] = sum;
3208: v += nz; vi += nz;
3209: }
3211: /* backward solve the upper triangular */
3212: for (i=n-1; i>=0; i--){
3213: v = aa + adiag[i+1]+1;
3214: vi = aj + adiag[i+1]+1;
3215: nz = adiag[i]-adiag[i+1]-1;
3216: sum = tmp[i];
3217: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3218: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3219: }
3221: ISRestoreIndices(isrow,&rout);
3222: ISRestoreIndices(iscol,&cout);
3223: VecRestoreArrayRead(bb,&b);
3224: VecRestoreArray(xx,&x);
3225: PetscLogFlops(2*a->nz - A->cmap->n);
3226: return(0);
3227: }
3231: /*
3232: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3233: */
3234: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3235: {
3236: Mat B = *fact;
3237: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3238: IS isicol;
3239: PetscErrorCode ierr;
3240: const PetscInt *r,*ic;
3241: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3242: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3243: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3244: PetscInt nlnk,*lnk;
3245: PetscBT lnkbt;
3246: PetscBool row_identity,icol_identity;
3247: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3248: const PetscInt *ics;
3249: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3250: PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3251: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3252: PetscBool missing;
3256: if (dt == PETSC_DEFAULT) dt = 0.005;
3257: if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */
3258: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3260: /* ------- symbolic factorization, can be reused ---------*/
3261: MatMissingDiagonal(A,&missing,&i);
3262: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3263: adiag=a->diag;
3265: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3267: /* bdiag is location of diagonal in factor */
3268: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag); /* becomes b->diag */
3269: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev); /* temporary */
3271: /* allocate row pointers bi */
3272: PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);
3274: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3275: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3276: nnz_max = ai[n]+2*n*dtcount+2;
3278: PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);
3279: PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);
3281: /* put together the new matrix */
3282: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
3283: PetscLogObjectParent(B,isicol);
3284: b = (Mat_SeqAIJ*)B->data;
3285: b->free_a = PETSC_TRUE;
3286: b->free_ij = PETSC_TRUE;
3287: b->singlemalloc = PETSC_FALSE;
3288: b->a = ba;
3289: b->j = bj;
3290: b->i = bi;
3291: b->diag = bdiag;
3292: b->ilen = 0;
3293: b->imax = 0;
3294: b->row = isrow;
3295: b->col = iscol;
3296: PetscObjectReference((PetscObject)isrow);
3297: PetscObjectReference((PetscObject)iscol);
3298: b->icol = isicol;
3299: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
3301: PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3302: b->maxnz = nnz_max;
3304: B->factortype = MAT_FACTOR_ILUDT;
3305: B->info.factor_mallocs = 0;
3306: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3307: CHKMEMQ;
3308: /* ------- end of symbolic factorization ---------*/
3310: ISGetIndices(isrow,&r);
3311: ISGetIndices(isicol,&ic);
3312: ics = ic;
3314: /* linked list for storing column indices of the active row */
3315: nlnk = n + 1;
3316: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3318: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3319: PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);
3320: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3321: PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);
3322: PetscMemzero(rtmp,n*sizeof(MatScalar));
3324: bi[0] = 0;
3325: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3326: bdiag_rev[n] = bdiag[0];
3327: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3328: for (i=0; i<n; i++) {
3329: /* copy initial fill into linked list */
3330: nzi = 0; /* nonzeros for active row i */
3331: nzi = ai[r[i]+1] - ai[r[i]];
3332: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3333: nzi_al = adiag[r[i]] - ai[r[i]];
3334: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3335: ajtmp = aj + ai[r[i]];
3336: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3337:
3338: /* load in initial (unfactored row) */
3339: aatmp = a->a + ai[r[i]];
3340: for (j=0; j<nzi; j++) {
3341: rtmp[ics[*ajtmp++]] = *aatmp++;
3342: }
3343:
3344: /* add pivot rows into linked list */
3345: row = lnk[n];
3346: while (row < i ) {
3347: nzi_bl = bi[row+1] - bi[row] + 1;
3348: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3349: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3350: nzi += nlnk;
3351: row = lnk[row];
3352: }
3353:
3354: /* copy data from lnk into jtmp, then initialize lnk */
3355: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3357: /* numerical factorization */
3358: bjtmp = jtmp;
3359: row = *bjtmp++; /* 1st pivot row */
3360: while ( row < i ) {
3361: pc = rtmp + row;
3362: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3363: multiplier = (*pc) * (*pv);
3364: *pc = multiplier;
3365: if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3366: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3367: pv = ba + bdiag[row+1] + 1;
3368: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3369: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3370: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3371: PetscLogFlops(1+2*nz);
3372: }
3373: row = *bjtmp++;
3374: }
3376: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3377: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3378: nzi_bl = 0; j = 0;
3379: while (jtmp[j] < i){ /* Note: jtmp is sorted */
3380: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3381: nzi_bl++; j++;
3382: }
3383: nzi_bu = nzi - nzi_bl -1;
3384: while (j < nzi){
3385: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3386: j++;
3387: }
3388:
3389: bjtmp = bj + bi[i];
3390: batmp = ba + bi[i];
3391: /* apply level dropping rule to L part */
3392: ncut = nzi_al + dtcount;
3393: if (ncut < nzi_bl){
3394: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3395: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3396: } else {
3397: ncut = nzi_bl;
3398: }
3399: for (j=0; j<ncut; j++){
3400: bjtmp[j] = jtmp[j];
3401: batmp[j] = vtmp[j];
3402: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3403: }
3404: bi[i+1] = bi[i] + ncut;
3405: nzi = ncut + 1;
3406:
3407: /* apply level dropping rule to U part */
3408: ncut = nzi_au + dtcount;
3409: if (ncut < nzi_bu){
3410: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3411: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3412: } else {
3413: ncut = nzi_bu;
3414: }
3415: nzi += ncut;
3417: /* mark bdiagonal */
3418: bdiag[i+1] = bdiag[i] - (ncut + 1);
3419: bdiag_rev[n-i-1] = bdiag[i+1];
3420: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3421: bjtmp = bj + bdiag[i];
3422: batmp = ba + bdiag[i];
3423: *bjtmp = i;
3424: *batmp = diag_tmp; /* rtmp[i]; */
3425: if (*batmp == 0.0) {
3426: *batmp = dt+shift;
3427: /* printf(" row %d add shift %g\n",i,shift); */
3428: }
3429: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3430: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3431:
3432: bjtmp = bj + bdiag[i+1]+1;
3433: batmp = ba + bdiag[i+1]+1;
3434: for (k=0; k<ncut; k++){
3435: bjtmp[k] = jtmp[nzi_bl+1+k];
3436: batmp[k] = vtmp[nzi_bl+1+k];
3437: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3438: }
3439: /* printf("\n"); */
3440:
3441: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3442: /*
3443: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3444: printf(" ----------------------------\n");
3445: */
3446: } /* for (i=0; i<n; i++) */
3447: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3448: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3450: ISRestoreIndices(isrow,&r);
3451: ISRestoreIndices(isicol,&ic);
3453: PetscLLDestroy(lnk,lnkbt);
3454: PetscFree2(im,jtmp);
3455: PetscFree2(rtmp,vtmp);
3456: PetscFree(bdiag_rev);
3458: PetscLogFlops(B->cmap->n);
3459: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3461: ISIdentity(isrow,&row_identity);
3462: ISIdentity(isicol,&icol_identity);
3463: if (row_identity && icol_identity) {
3464: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3465: } else {
3466: B->ops->solve = MatSolve_SeqAIJ;
3467: }
3468:
3469: B->ops->solveadd = 0;
3470: B->ops->solvetranspose = 0;
3471: B->ops->solvetransposeadd = 0;
3472: B->ops->matsolve = 0;
3473: B->assembled = PETSC_TRUE;
3474: B->preallocated = PETSC_TRUE;
3475: return(0);
3476: }
3478: /* a wraper of MatILUDTFactor_SeqAIJ() */
3481: /*
3482: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3483: */
3485: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3486: {
3487: PetscErrorCode ierr;
3490: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3491: return(0);
3492: }
3494: /*
3495: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3496: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3497: */
3500: /*
3501: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3502: */
3504: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3505: {
3506: Mat C=fact;
3507: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3508: IS isrow = b->row,isicol = b->icol;
3510: const PetscInt *r,*ic,*ics;
3511: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3512: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3513: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3514: PetscReal dt=info->dt,shift=info->shiftamount;
3515: PetscBool row_identity, col_identity;
3518: ISGetIndices(isrow,&r);
3519: ISGetIndices(isicol,&ic);
3520: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
3521: ics = ic;
3523: for (i=0; i<n; i++){
3524: /* initialize rtmp array */
3525: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3526: bjtmp = bj + bi[i];
3527: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3528: rtmp[i] = 0.0;
3529: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3530: bjtmp = bj + bdiag[i+1] + 1;
3531: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3533: /* load in initial unfactored row of A */
3534: /* printf("row %d\n",i); */
3535: nz = ai[r[i]+1] - ai[r[i]];
3536: ajtmp = aj + ai[r[i]];
3537: v = aa + ai[r[i]];
3538: for (j=0; j<nz; j++) {
3539: rtmp[ics[*ajtmp++]] = v[j];
3540: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3541: }
3542: /* printf("\n"); */
3544: /* numerical factorization */
3545: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3546: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3547: k = 0;
3548: while (k < nzl){
3549: row = *bjtmp++;
3550: /* printf(" prow %d\n",row); */
3551: pc = rtmp + row;
3552: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3553: multiplier = (*pc) * (*pv);
3554: *pc = multiplier;
3555: if (PetscAbsScalar(multiplier) > dt){
3556: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3557: pv = b->a + bdiag[row+1] + 1;
3558: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3559: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3560: PetscLogFlops(1+2*nz);
3561: }
3562: k++;
3563: }
3564:
3565: /* finished row so stick it into b->a */
3566: /* L-part */
3567: pv = b->a + bi[i] ;
3568: pj = bj + bi[i] ;
3569: nzl = bi[i+1] - bi[i];
3570: for (j=0; j<nzl; j++) {
3571: pv[j] = rtmp[pj[j]];
3572: /* printf(" (%d,%g),",pj[j],pv[j]); */
3573: }
3575: /* diagonal: invert diagonal entries for simplier triangular solves */
3576: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3577: b->a[bdiag[i]] = 1.0/rtmp[i];
3578: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3580: /* U-part */
3581: pv = b->a + bdiag[i+1] + 1;
3582: pj = bj + bdiag[i+1] + 1;
3583: nzu = bdiag[i] - bdiag[i+1] - 1;
3584: for (j=0; j<nzu; j++) {
3585: pv[j] = rtmp[pj[j]];
3586: /* printf(" (%d,%g),",pj[j],pv[j]); */
3587: }
3588: /* printf("\n"); */
3589: }
3591: PetscFree(rtmp);
3592: ISRestoreIndices(isicol,&ic);
3593: ISRestoreIndices(isrow,&r);
3594:
3595: ISIdentity(isrow,&row_identity);
3596: ISIdentity(isicol,&col_identity);
3597: if (row_identity && col_identity) {
3598: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3599: } else {
3600: C->ops->solve = MatSolve_SeqAIJ;
3601: }
3602: C->ops->solveadd = 0;
3603: C->ops->solvetranspose = 0;
3604: C->ops->solvetransposeadd = 0;
3605: C->ops->matsolve = 0;
3606: C->assembled = PETSC_TRUE;
3607: C->preallocated = PETSC_TRUE;
3608: PetscLogFlops(C->cmap->n);
3609: return(0);
3610: }