Actual source code: baijfact3.c
petsc-3.8.4 2018-03-24
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <petsc/private/kernels/blockinvert.h>
8: /*
9: This is used to set the numeric factorization for both LU and ILU symbolic factorization
10: */
11: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
12: {
14: if (natural) {
15: switch (fact->rmap->bs) {
16: case 1:
17: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
18: break;
19: case 2:
20: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
21: break;
22: case 3:
23: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
24: break;
25: case 4:
26: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
27: break;
28: case 5:
29: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
30: break;
31: case 6:
32: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
33: break;
34: case 7:
35: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
36: break;
37: case 15:
38: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
39: break;
40: default:
41: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
42: break;
43: }
44: } else {
45: switch (fact->rmap->bs) {
46: case 1:
47: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
48: break;
49: case 2:
50: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
51: break;
52: case 3:
53: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
54: break;
55: case 4:
56: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
57: break;
58: case 5:
59: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
60: break;
61: case 6:
62: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
63: break;
64: case 7:
65: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
66: break;
67: default:
68: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
69: break;
70: }
71: }
72: return(0);
73: }
75: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
76: {
78: if (natural) {
79: switch (inA->rmap->bs) {
80: case 1:
81: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
82: break;
83: case 2:
84: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
85: break;
86: case 3:
87: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
88: break;
89: case 4:
90: #if defined(PETSC_USE_REAL_MAT_SINGLE)
91: {
92: PetscBool sse_enabled_local;
94: PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);
95: if (sse_enabled_local) {
96: # if defined(PETSC_HAVE_SSE)
97: int i,*AJ=a->j,nz=a->nz,n=a->mbs;
98: if (n==(unsigned short)n) {
99: unsigned short *aj=(unsigned short*)AJ;
100: for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];
102: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
103: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
105: PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
106: } else {
107: /* Scale the column indices for easier indexing in MatSolve. */
108: /* for (i=0;i<nz;i++) { */
109: /* AJ[i] = AJ[i]*4; */
110: /* } */
111: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
112: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
114: PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
115: }
116: # else
117: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
118: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
119: # endif
120: } else {
121: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
122: }
123: }
124: #else
125: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
126: #endif
127: break;
128: case 5:
129: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
130: break;
131: case 6:
132: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
133: break;
134: case 7:
135: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
136: break;
137: default:
138: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
139: break;
140: }
141: } else {
142: switch (inA->rmap->bs) {
143: case 1:
144: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
145: break;
146: case 2:
147: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
148: break;
149: case 3:
150: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
151: break;
152: case 4:
153: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
154: break;
155: case 5:
156: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
157: break;
158: case 6:
159: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
160: break;
161: case 7:
162: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
163: break;
164: default:
165: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
166: break;
167: }
168: }
169: return(0);
170: }
172: /*
173: The symbolic factorization code is identical to that for AIJ format,
174: except for very small changes since this is now a SeqBAIJ datastructure.
175: NOT good code reuse.
176: */
177: #include <petscbt.h>
178: #include <../src/mat/utils/freespace.h>
180: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
181: {
182: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
183: PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
184: PetscBool row_identity,col_identity,both_identity;
185: IS isicol;
186: PetscErrorCode ierr;
187: const PetscInt *r,*ic;
188: PetscInt i,*ai=a->i,*aj=a->j;
189: PetscInt *bi,*bj,*ajtmp;
190: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
191: PetscReal f;
192: PetscInt nlnk,*lnk,k,**bi_ptr;
193: PetscFreeSpaceList free_space=NULL,current_space=NULL;
194: PetscBT lnkbt;
195: PetscBool missing;
198: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
199: MatMissingDiagonal(A,&missing,&i);
200: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
202: if (bs>1) { /* check shifttype */
203: if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
204: }
206: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
207: ISGetIndices(isrow,&r);
208: ISGetIndices(isicol,&ic);
210: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
211: PetscMalloc1(n+1,&bi);
212: PetscMalloc1(n+1,&bdiag);
213: bi[0] = bdiag[0] = 0;
215: /* linked list for storing column indices of the active row */
216: nlnk = n + 1;
217: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
219: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
221: /* initial FreeSpace size is f*(ai[n]+1) */
222: f = info->fill;
223: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
225: current_space = free_space;
227: for (i=0; i<n; i++) {
228: /* copy previous fill into linked list */
229: nzi = 0;
230: nnz = ai[r[i]+1] - ai[r[i]];
231: ajtmp = aj + ai[r[i]];
232: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
233: nzi += nlnk;
235: /* add pivot rows into linked list */
236: row = lnk[n];
237: while (row < i) {
238: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
239: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
240: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
241: nzi += nlnk;
242: row = lnk[row];
243: }
244: bi[i+1] = bi[i] + nzi;
245: im[i] = nzi;
247: /* mark bdiag */
248: nzbd = 0;
249: nnz = nzi;
250: k = lnk[n];
251: while (nnz-- && k < i) {
252: nzbd++;
253: k = lnk[k];
254: }
255: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
257: /* if free space is not available, make more free space */
258: if (current_space->local_remaining<nzi) {
259: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */
260: PetscFreeSpaceGet(nnz,¤t_space);
261: reallocs++;
262: }
264: /* copy data into free space, then initialize lnk */
265: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
267: bi_ptr[i] = current_space->array;
268: current_space->array += nzi;
269: current_space->local_used += nzi;
270: current_space->local_remaining -= nzi;
271: }
273: ISRestoreIndices(isrow,&r);
274: ISRestoreIndices(isicol,&ic);
276: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
277: PetscMalloc1(bi[n]+1,&bj);
278: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
279: PetscLLDestroy(lnk,lnkbt);
280: PetscFree2(bi_ptr,im);
282: /* put together the new matrix */
283: MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
284: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
285: b = (Mat_SeqBAIJ*)(B)->data;
287: b->free_a = PETSC_TRUE;
288: b->free_ij = PETSC_TRUE;
289: b->singlemalloc = PETSC_FALSE;
291: PetscMalloc1((bdiag[0]+1)*bs2,&b->a);
292: b->j = bj;
293: b->i = bi;
294: b->diag = bdiag;
295: b->free_diag = PETSC_TRUE;
296: b->ilen = 0;
297: b->imax = 0;
298: b->row = isrow;
299: b->col = iscol;
300: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
302: PetscObjectReference((PetscObject)isrow);
303: PetscObjectReference((PetscObject)iscol);
304: b->icol = isicol;
305: PetscMalloc1(bs*n+bs,&b->solve_work);
306: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
308: b->maxnz = b->nz = bdiag[0]+1;
310: B->factortype = MAT_FACTOR_LU;
311: B->info.factor_mallocs = reallocs;
312: B->info.fill_ratio_given = f;
314: if (ai[n] != 0) {
315: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
316: } else {
317: B->info.fill_ratio_needed = 0.0;
318: }
319: #if defined(PETSC_USE_INFO)
320: if (ai[n] != 0) {
321: PetscReal af = B->info.fill_ratio_needed;
322: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
323: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
324: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
325: PetscInfo(A,"for best performance.\n");
326: } else {
327: PetscInfo(A,"Empty matrix\n");
328: }
329: #endif
331: ISIdentity(isrow,&row_identity);
332: ISIdentity(iscol,&col_identity);
334: both_identity = (PetscBool) (row_identity && col_identity);
336: MatSeqBAIJSetNumericFactorization(B,both_identity);
337: return(0);
338: }
340: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
341: {
342: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
343: PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
344: PetscBool row_identity,col_identity,both_identity;
345: IS isicol;
346: PetscErrorCode ierr;
347: const PetscInt *r,*ic;
348: PetscInt i,*ai=a->i,*aj=a->j;
349: PetscInt *bi,*bj,*ajtmp;
350: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
351: PetscReal f;
352: PetscInt nlnk,*lnk,k,**bi_ptr;
353: PetscFreeSpaceList free_space=NULL,current_space=NULL;
354: PetscBT lnkbt;
355: PetscBool missing;
358: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
359: MatMissingDiagonal(A,&missing,&i);
360: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
362: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
363: ISGetIndices(isrow,&r);
364: ISGetIndices(isicol,&ic);
366: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
367: PetscMalloc1(n+1,&bi);
368: PetscMalloc1(n+1,&bdiag);
370: bi[0] = bdiag[0] = 0;
372: /* linked list for storing column indices of the active row */
373: nlnk = n + 1;
374: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
376: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
378: /* initial FreeSpace size is f*(ai[n]+1) */
379: f = info->fill;
380: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
381: current_space = free_space;
383: for (i=0; i<n; i++) {
384: /* copy previous fill into linked list */
385: nzi = 0;
386: nnz = ai[r[i]+1] - ai[r[i]];
387: ajtmp = aj + ai[r[i]];
388: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
389: nzi += nlnk;
391: /* add pivot rows into linked list */
392: row = lnk[n];
393: while (row < i) {
394: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
395: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
396: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
397: nzi += nlnk;
398: row = lnk[row];
399: }
400: bi[i+1] = bi[i] + nzi;
401: im[i] = nzi;
403: /* mark bdiag */
404: nzbd = 0;
405: nnz = nzi;
406: k = lnk[n];
407: while (nnz-- && k < i) {
408: nzbd++;
409: k = lnk[k];
410: }
411: bdiag[i] = bi[i] + nzbd;
413: /* if free space is not available, make more free space */
414: if (current_space->local_remaining<nzi) {
415: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
416: PetscFreeSpaceGet(nnz,¤t_space);
417: reallocs++;
418: }
420: /* copy data into free space, then initialize lnk */
421: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
423: bi_ptr[i] = current_space->array;
424: current_space->array += nzi;
425: current_space->local_used += nzi;
426: current_space->local_remaining -= nzi;
427: }
428: #if defined(PETSC_USE_INFO)
429: if (ai[n] != 0) {
430: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
431: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
432: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
433: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
434: PetscInfo(A,"for best performance.\n");
435: } else {
436: PetscInfo(A,"Empty matrix\n");
437: }
438: #endif
440: ISRestoreIndices(isrow,&r);
441: ISRestoreIndices(isicol,&ic);
443: /* destroy list of free space and other temporary array(s) */
444: PetscMalloc1(bi[n]+1,&bj);
445: PetscFreeSpaceContiguous(&free_space,bj);
446: PetscLLDestroy(lnk,lnkbt);
447: PetscFree2(bi_ptr,im);
449: /* put together the new matrix */
450: MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
451: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
452: b = (Mat_SeqBAIJ*)(B)->data;
454: b->free_a = PETSC_TRUE;
455: b->free_ij = PETSC_TRUE;
456: b->singlemalloc = PETSC_FALSE;
458: PetscMalloc1((bi[n]+1)*bs2,&b->a);
459: b->j = bj;
460: b->i = bi;
461: b->diag = bdiag;
462: b->free_diag = PETSC_TRUE;
463: b->ilen = 0;
464: b->imax = 0;
465: b->row = isrow;
466: b->col = iscol;
467: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
469: PetscObjectReference((PetscObject)isrow);
470: PetscObjectReference((PetscObject)iscol);
471: b->icol = isicol;
473: PetscMalloc1(bs*n+bs,&b->solve_work);
474: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
476: b->maxnz = b->nz = bi[n];
478: (B)->factortype = MAT_FACTOR_LU;
479: (B)->info.factor_mallocs = reallocs;
480: (B)->info.fill_ratio_given = f;
482: if (ai[n] != 0) {
483: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
484: } else {
485: (B)->info.fill_ratio_needed = 0.0;
486: }
488: ISIdentity(isrow,&row_identity);
489: ISIdentity(iscol,&col_identity);
491: both_identity = (PetscBool) (row_identity && col_identity);
493: MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
494: return(0);
495: }