Actual source code: baijfact3.c
petsc-3.7.3 2016-08-01
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <petsc/private/kernels/blockinvert.h>
10: /*
11: This is used to set the numeric factorization for both LU and ILU symbolic factorization
12: */
13: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
14: {
16: if (natural) {
17: switch (fact->rmap->bs) {
18: case 1:
19: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
20: break;
21: case 2:
22: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
23: break;
24: case 3:
25: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
26: break;
27: case 4:
28: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
29: break;
30: case 5:
31: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
32: break;
33: case 6:
34: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
35: break;
36: case 7:
37: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
38: break;
39: case 15:
40: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
41: break;
42: default:
43: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
44: break;
45: }
46: } else {
47: switch (fact->rmap->bs) {
48: case 1:
49: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
50: break;
51: case 2:
52: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
53: break;
54: case 3:
55: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
56: break;
57: case 4:
58: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
59: break;
60: case 5:
61: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
62: break;
63: case 6:
64: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
65: break;
66: case 7:
67: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
68: break;
69: default:
70: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
71: break;
72: }
73: }
74: return(0);
75: }
79: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
80: {
82: if (natural) {
83: switch (inA->rmap->bs) {
84: case 1:
85: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
86: break;
87: case 2:
88: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
89: break;
90: case 3:
91: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
92: break;
93: case 4:
94: #if defined(PETSC_USE_REAL_MAT_SINGLE)
95: {
96: PetscBool sse_enabled_local;
98: PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);
99: if (sse_enabled_local) {
100: # if defined(PETSC_HAVE_SSE)
101: int i,*AJ=a->j,nz=a->nz,n=a->mbs;
102: if (n==(unsigned short)n) {
103: unsigned short *aj=(unsigned short*)AJ;
104: for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];
106: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
107: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
109: PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
110: } else {
111: /* Scale the column indices for easier indexing in MatSolve. */
112: /* for (i=0;i<nz;i++) { */
113: /* AJ[i] = AJ[i]*4; */
114: /* } */
115: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
118: PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
119: }
120: # else
121: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
122: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
123: # endif
124: } else {
125: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
126: }
127: }
128: #else
129: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
130: #endif
131: break;
132: case 5:
133: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
134: break;
135: case 6:
136: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
137: break;
138: case 7:
139: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
140: break;
141: default:
142: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
143: break;
144: }
145: } else {
146: switch (inA->rmap->bs) {
147: case 1:
148: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
149: break;
150: case 2:
151: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
152: break;
153: case 3:
154: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
155: break;
156: case 4:
157: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
158: break;
159: case 5:
160: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
161: break;
162: case 6:
163: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
164: break;
165: case 7:
166: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
167: break;
168: default:
169: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
170: break;
171: }
172: }
173: return(0);
174: }
176: /*
177: The symbolic factorization code is identical to that for AIJ format,
178: except for very small changes since this is now a SeqBAIJ datastructure.
179: NOT good code reuse.
180: */
181: #include <petscbt.h>
182: #include <../src/mat/utils/freespace.h>
186: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
187: {
188: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
189: PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
190: PetscBool row_identity,col_identity,both_identity;
191: IS isicol;
192: PetscErrorCode ierr;
193: const PetscInt *r,*ic;
194: PetscInt i,*ai=a->i,*aj=a->j;
195: PetscInt *bi,*bj,*ajtmp;
196: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
197: PetscReal f;
198: PetscInt nlnk,*lnk,k,**bi_ptr;
199: PetscFreeSpaceList free_space=NULL,current_space=NULL;
200: PetscBT lnkbt;
201: PetscBool missing;
204: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
205: MatMissingDiagonal(A,&missing,&i);
206: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
208: if (bs>1) { /* check shifttype */
209: if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
210: }
212: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
213: ISGetIndices(isrow,&r);
214: ISGetIndices(isicol,&ic);
216: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
217: PetscMalloc1(n+1,&bi);
218: PetscMalloc1(n+1,&bdiag);
219: bi[0] = bdiag[0] = 0;
221: /* linked list for storing column indices of the active row */
222: nlnk = n + 1;
223: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
225: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
227: /* initial FreeSpace size is f*(ai[n]+1) */
228: f = info->fill;
229: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
231: current_space = free_space;
233: for (i=0; i<n; i++) {
234: /* copy previous fill into linked list */
235: nzi = 0;
236: nnz = ai[r[i]+1] - ai[r[i]];
237: ajtmp = aj + ai[r[i]];
238: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
239: nzi += nlnk;
241: /* add pivot rows into linked list */
242: row = lnk[n];
243: while (row < i) {
244: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
245: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
246: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
247: nzi += nlnk;
248: row = lnk[row];
249: }
250: bi[i+1] = bi[i] + nzi;
251: im[i] = nzi;
253: /* mark bdiag */
254: nzbd = 0;
255: nnz = nzi;
256: k = lnk[n];
257: while (nnz-- && k < i) {
258: nzbd++;
259: k = lnk[k];
260: }
261: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
263: /* if free space is not available, make more free space */
264: if (current_space->local_remaining<nzi) {
265: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */
266: PetscFreeSpaceGet(nnz,¤t_space);
267: reallocs++;
268: }
270: /* copy data into free space, then initialize lnk */
271: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
273: bi_ptr[i] = current_space->array;
274: current_space->array += nzi;
275: current_space->local_used += nzi;
276: current_space->local_remaining -= nzi;
277: }
279: ISRestoreIndices(isrow,&r);
280: ISRestoreIndices(isicol,&ic);
282: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
283: PetscMalloc1(bi[n]+1,&bj);
284: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
285: PetscLLDestroy(lnk,lnkbt);
286: PetscFree2(bi_ptr,im);
288: /* put together the new matrix */
289: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
290: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
291: b = (Mat_SeqBAIJ*)(B)->data;
293: b->free_a = PETSC_TRUE;
294: b->free_ij = PETSC_TRUE;
295: b->singlemalloc = PETSC_FALSE;
297: PetscMalloc1((bdiag[0]+1)*bs2,&b->a);
298: b->j = bj;
299: b->i = bi;
300: b->diag = bdiag;
301: b->free_diag = PETSC_TRUE;
302: b->ilen = 0;
303: b->imax = 0;
304: b->row = isrow;
305: b->col = iscol;
306: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
308: PetscObjectReference((PetscObject)isrow);
309: PetscObjectReference((PetscObject)iscol);
310: b->icol = isicol;
311: PetscMalloc1(bs*n+bs,&b->solve_work);
312: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
314: b->maxnz = b->nz = bdiag[0]+1;
316: B->factortype = MAT_FACTOR_LU;
317: B->info.factor_mallocs = reallocs;
318: B->info.fill_ratio_given = f;
320: if (ai[n] != 0) {
321: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
322: } else {
323: B->info.fill_ratio_needed = 0.0;
324: }
325: #if defined(PETSC_USE_INFO)
326: if (ai[n] != 0) {
327: PetscReal af = B->info.fill_ratio_needed;
328: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
329: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
330: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
331: PetscInfo(A,"for best performance.\n");
332: } else {
333: PetscInfo(A,"Empty matrix\n");
334: }
335: #endif
337: ISIdentity(isrow,&row_identity);
338: ISIdentity(iscol,&col_identity);
340: both_identity = (PetscBool) (row_identity && col_identity);
342: MatSeqBAIJSetNumericFactorization(B,both_identity);
343: return(0);
344: }
348: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
349: {
350: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
351: PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
352: PetscBool row_identity,col_identity,both_identity;
353: IS isicol;
354: PetscErrorCode ierr;
355: const PetscInt *r,*ic;
356: PetscInt i,*ai=a->i,*aj=a->j;
357: PetscInt *bi,*bj,*ajtmp;
358: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
359: PetscReal f;
360: PetscInt nlnk,*lnk,k,**bi_ptr;
361: PetscFreeSpaceList free_space=NULL,current_space=NULL;
362: PetscBT lnkbt;
363: PetscBool missing;
366: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
367: MatMissingDiagonal(A,&missing,&i);
368: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
370: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
371: ISGetIndices(isrow,&r);
372: ISGetIndices(isicol,&ic);
374: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
375: PetscMalloc1(n+1,&bi);
376: PetscMalloc1(n+1,&bdiag);
378: bi[0] = bdiag[0] = 0;
380: /* linked list for storing column indices of the active row */
381: nlnk = n + 1;
382: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
384: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
386: /* initial FreeSpace size is f*(ai[n]+1) */
387: f = info->fill;
388: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
389: current_space = free_space;
391: for (i=0; i<n; i++) {
392: /* copy previous fill into linked list */
393: nzi = 0;
394: nnz = ai[r[i]+1] - ai[r[i]];
395: ajtmp = aj + ai[r[i]];
396: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
397: nzi += nlnk;
399: /* add pivot rows into linked list */
400: row = lnk[n];
401: while (row < i) {
402: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
403: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
404: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
405: nzi += nlnk;
406: row = lnk[row];
407: }
408: bi[i+1] = bi[i] + nzi;
409: im[i] = nzi;
411: /* mark bdiag */
412: nzbd = 0;
413: nnz = nzi;
414: k = lnk[n];
415: while (nnz-- && k < i) {
416: nzbd++;
417: k = lnk[k];
418: }
419: bdiag[i] = bi[i] + nzbd;
421: /* if free space is not available, make more free space */
422: if (current_space->local_remaining<nzi) {
423: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
424: PetscFreeSpaceGet(nnz,¤t_space);
425: reallocs++;
426: }
428: /* copy data into free space, then initialize lnk */
429: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
431: bi_ptr[i] = current_space->array;
432: current_space->array += nzi;
433: current_space->local_used += nzi;
434: current_space->local_remaining -= nzi;
435: }
436: #if defined(PETSC_USE_INFO)
437: if (ai[n] != 0) {
438: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
439: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
440: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
441: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
442: PetscInfo(A,"for best performance.\n");
443: } else {
444: PetscInfo(A,"Empty matrix\n");
445: }
446: #endif
448: ISRestoreIndices(isrow,&r);
449: ISRestoreIndices(isicol,&ic);
451: /* destroy list of free space and other temporary array(s) */
452: PetscMalloc1(bi[n]+1,&bj);
453: PetscFreeSpaceContiguous(&free_space,bj);
454: PetscLLDestroy(lnk,lnkbt);
455: PetscFree2(bi_ptr,im);
457: /* put together the new matrix */
458: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
459: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
460: b = (Mat_SeqBAIJ*)(B)->data;
462: b->free_a = PETSC_TRUE;
463: b->free_ij = PETSC_TRUE;
464: b->singlemalloc = PETSC_FALSE;
466: PetscMalloc1((bi[n]+1)*bs2,&b->a);
467: b->j = bj;
468: b->i = bi;
469: b->diag = bdiag;
470: b->free_diag = PETSC_TRUE;
471: b->ilen = 0;
472: b->imax = 0;
473: b->row = isrow;
474: b->col = iscol;
475: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
477: PetscObjectReference((PetscObject)isrow);
478: PetscObjectReference((PetscObject)iscol);
479: b->icol = isicol;
481: PetscMalloc1(bs*n+bs,&b->solve_work);
482: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
484: b->maxnz = b->nz = bi[n];
486: (B)->factortype = MAT_FACTOR_LU;
487: (B)->info.factor_mallocs = reallocs;
488: (B)->info.fill_ratio_given = f;
490: if (ai[n] != 0) {
491: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
492: } else {
493: (B)->info.fill_ratio_needed = 0.0;
494: }
496: ISIdentity(isrow,&row_identity);
497: ISIdentity(iscol,&col_identity);
499: both_identity = (PetscBool) (row_identity && col_identity);
501: MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
502: return(0);
503: }