Actual source code: baijfact3.c
petsc-3.11.4 2019-09-28
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <petsc/private/kernels/blockinvert.h>
8: /*
9: This is used to set the numeric factorization for both LU and ILU symbolic factorization
10: */
11: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
12: {
14: if (natural) {
15: switch (fact->rmap->bs) {
16: case 1:
17: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
18: break;
19: case 2:
20: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
21: break;
22: case 3:
23: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
24: break;
25: case 4:
26: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
27: break;
28: case 5:
29: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
30: break;
31: case 6:
32: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
33: break;
34: case 7:
35: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
36: break;
37: case 9:
38: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
39: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
40: #else
41: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
42: #endif
43: break;
44: case 15:
45: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
46: break;
47: default:
48: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
49: break;
50: }
51: } else {
52: switch (fact->rmap->bs) {
53: case 1:
54: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
55: break;
56: case 2:
57: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
58: break;
59: case 3:
60: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
61: break;
62: case 4:
63: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
64: break;
65: case 5:
66: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
67: break;
68: case 6:
69: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
70: break;
71: case 7:
72: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
73: break;
74: default:
75: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
76: break;
77: }
78: }
79: return(0);
80: }
82: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
83: {
85: if (natural) {
86: switch (inA->rmap->bs) {
87: case 1:
88: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
89: break;
90: case 2:
91: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
92: break;
93: case 3:
94: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
95: break;
96: case 4:
97: #if defined(PETSC_USE_REAL_MAT_SINGLE)
98: {
99: PetscBool sse_enabled_local;
101: PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);
102: if (sse_enabled_local) {
103: # if defined(PETSC_HAVE_SSE)
104: int i,*AJ=a->j,nz=a->nz,n=a->mbs;
105: if (n==(unsigned short)n) {
106: unsigned short *aj=(unsigned short*)AJ;
107: for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];
109: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
110: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
112: PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
113: } else {
114: /* Scale the column indices for easier indexing in MatSolve. */
115: /* for (i=0;i<nz;i++) { */
116: /* AJ[i] = AJ[i]*4; */
117: /* } */
118: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
119: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
121: PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
122: }
123: # else
124: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
125: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
126: # endif
127: } else {
128: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
129: }
130: }
131: #else
132: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
133: #endif
134: break;
135: case 5:
136: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
137: break;
138: case 6:
139: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
140: break;
141: case 7:
142: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
143: break;
144: default:
145: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
146: break;
147: }
148: } else {
149: switch (inA->rmap->bs) {
150: case 1:
151: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
152: break;
153: case 2:
154: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
155: break;
156: case 3:
157: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
158: break;
159: case 4:
160: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
161: break;
162: case 5:
163: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
164: break;
165: case 6:
166: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
167: break;
168: case 7:
169: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
170: break;
171: default:
172: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
173: break;
174: }
175: }
176: return(0);
177: }
179: /*
180: The symbolic factorization code is identical to that for AIJ format,
181: except for very small changes since this is now a SeqBAIJ datastructure.
182: NOT good code reuse.
183: */
184: #include <petscbt.h>
185: #include <../src/mat/utils/freespace.h>
187: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
188: {
189: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
190: PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
191: PetscBool row_identity,col_identity,both_identity;
192: IS isicol;
193: PetscErrorCode ierr;
194: const PetscInt *r,*ic;
195: PetscInt i,*ai=a->i,*aj=a->j;
196: PetscInt *bi,*bj,*ajtmp;
197: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
198: PetscReal f;
199: PetscInt nlnk,*lnk,k,**bi_ptr;
200: PetscFreeSpaceList free_space=NULL,current_space=NULL;
201: PetscBT lnkbt;
202: PetscBool missing;
205: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
206: MatMissingDiagonal(A,&missing,&i);
207: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
209: if (bs>1) { /* check shifttype */
210: if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
211: }
213: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
214: ISGetIndices(isrow,&r);
215: ISGetIndices(isicol,&ic);
217: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
218: PetscMalloc1(n+1,&bi);
219: PetscMalloc1(n+1,&bdiag);
220: bi[0] = bdiag[0] = 0;
222: /* linked list for storing column indices of the active row */
223: nlnk = n + 1;
224: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
226: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
228: /* initial FreeSpace size is f*(ai[n]+1) */
229: f = info->fill;
230: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
232: current_space = free_space;
234: for (i=0; i<n; i++) {
235: /* copy previous fill into linked list */
236: nzi = 0;
237: nnz = ai[r[i]+1] - ai[r[i]];
238: ajtmp = aj + ai[r[i]];
239: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
240: nzi += nlnk;
242: /* add pivot rows into linked list */
243: row = lnk[n];
244: while (row < i) {
245: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
246: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
247: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
248: nzi += nlnk;
249: row = lnk[row];
250: }
251: bi[i+1] = bi[i] + nzi;
252: im[i] = nzi;
254: /* mark bdiag */
255: nzbd = 0;
256: nnz = nzi;
257: k = lnk[n];
258: while (nnz-- && k < i) {
259: nzbd++;
260: k = lnk[k];
261: }
262: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
264: /* if free space is not available, make more free space */
265: if (current_space->local_remaining<nzi) {
266: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */
267: PetscFreeSpaceGet(nnz,¤t_space);
268: reallocs++;
269: }
271: /* copy data into free space, then initialize lnk */
272: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
274: bi_ptr[i] = current_space->array;
275: current_space->array += nzi;
276: current_space->local_used += nzi;
277: current_space->local_remaining -= nzi;
278: }
280: ISRestoreIndices(isrow,&r);
281: ISRestoreIndices(isicol,&ic);
283: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
284: PetscMalloc1(bi[n]+1,&bj);
285: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
286: PetscLLDestroy(lnk,lnkbt);
287: PetscFree2(bi_ptr,im);
289: /* put together the new matrix */
290: MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
291: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
292: b = (Mat_SeqBAIJ*)(B)->data;
294: b->free_a = PETSC_TRUE;
295: b->free_ij = PETSC_TRUE;
296: b->singlemalloc = PETSC_FALSE;
298: PetscMalloc1((bdiag[0]+1)*bs2,&b->a);
299: b->j = bj;
300: b->i = bi;
301: b->diag = bdiag;
302: b->free_diag = PETSC_TRUE;
303: b->ilen = 0;
304: b->imax = 0;
305: b->row = isrow;
306: b->col = iscol;
307: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
309: PetscObjectReference((PetscObject)isrow);
310: PetscObjectReference((PetscObject)iscol);
311: b->icol = isicol;
312: PetscMalloc1(bs*n+bs,&b->solve_work);
313: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
315: b->maxnz = b->nz = bdiag[0]+1;
317: B->factortype = MAT_FACTOR_LU;
318: B->info.factor_mallocs = reallocs;
319: B->info.fill_ratio_given = f;
321: if (ai[n] != 0) {
322: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
323: } else {
324: B->info.fill_ratio_needed = 0.0;
325: }
326: #if defined(PETSC_USE_INFO)
327: if (ai[n] != 0) {
328: PetscReal af = B->info.fill_ratio_needed;
329: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
330: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
331: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
332: PetscInfo(A,"for best performance.\n");
333: } else {
334: PetscInfo(A,"Empty matrix\n");
335: }
336: #endif
338: ISIdentity(isrow,&row_identity);
339: ISIdentity(iscol,&col_identity);
341: both_identity = (PetscBool) (row_identity && col_identity);
343: MatSeqBAIJSetNumericFactorization(B,both_identity);
344: return(0);
345: }
347: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
348: {
349: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
350: PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
351: PetscBool row_identity,col_identity,both_identity;
352: IS isicol;
353: PetscErrorCode ierr;
354: const PetscInt *r,*ic;
355: PetscInt i,*ai=a->i,*aj=a->j;
356: PetscInt *bi,*bj,*ajtmp;
357: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
358: PetscReal f;
359: PetscInt nlnk,*lnk,k,**bi_ptr;
360: PetscFreeSpaceList free_space=NULL,current_space=NULL;
361: PetscBT lnkbt;
362: PetscBool missing;
365: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
366: MatMissingDiagonal(A,&missing,&i);
367: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
369: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
370: ISGetIndices(isrow,&r);
371: ISGetIndices(isicol,&ic);
373: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
374: PetscMalloc1(n+1,&bi);
375: PetscMalloc1(n+1,&bdiag);
377: bi[0] = bdiag[0] = 0;
379: /* linked list for storing column indices of the active row */
380: nlnk = n + 1;
381: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
383: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
385: /* initial FreeSpace size is f*(ai[n]+1) */
386: f = info->fill;
387: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
388: current_space = free_space;
390: for (i=0; i<n; i++) {
391: /* copy previous fill into linked list */
392: nzi = 0;
393: nnz = ai[r[i]+1] - ai[r[i]];
394: ajtmp = aj + ai[r[i]];
395: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
396: nzi += nlnk;
398: /* add pivot rows into linked list */
399: row = lnk[n];
400: while (row < i) {
401: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
402: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
403: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
404: nzi += nlnk;
405: row = lnk[row];
406: }
407: bi[i+1] = bi[i] + nzi;
408: im[i] = nzi;
410: /* mark bdiag */
411: nzbd = 0;
412: nnz = nzi;
413: k = lnk[n];
414: while (nnz-- && k < i) {
415: nzbd++;
416: k = lnk[k];
417: }
418: bdiag[i] = bi[i] + nzbd;
420: /* if free space is not available, make more free space */
421: if (current_space->local_remaining<nzi) {
422: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
423: PetscFreeSpaceGet(nnz,¤t_space);
424: reallocs++;
425: }
427: /* copy data into free space, then initialize lnk */
428: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
430: bi_ptr[i] = current_space->array;
431: current_space->array += nzi;
432: current_space->local_used += nzi;
433: current_space->local_remaining -= nzi;
434: }
435: #if defined(PETSC_USE_INFO)
436: if (ai[n] != 0) {
437: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
438: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
439: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
440: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
441: PetscInfo(A,"for best performance.\n");
442: } else {
443: PetscInfo(A,"Empty matrix\n");
444: }
445: #endif
447: ISRestoreIndices(isrow,&r);
448: ISRestoreIndices(isicol,&ic);
450: /* destroy list of free space and other temporary array(s) */
451: PetscMalloc1(bi[n]+1,&bj);
452: PetscFreeSpaceContiguous(&free_space,bj);
453: PetscLLDestroy(lnk,lnkbt);
454: PetscFree2(bi_ptr,im);
456: /* put together the new matrix */
457: MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
458: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
459: b = (Mat_SeqBAIJ*)(B)->data;
461: b->free_a = PETSC_TRUE;
462: b->free_ij = PETSC_TRUE;
463: b->singlemalloc = PETSC_FALSE;
465: PetscMalloc1((bi[n]+1)*bs2,&b->a);
466: b->j = bj;
467: b->i = bi;
468: b->diag = bdiag;
469: b->free_diag = PETSC_TRUE;
470: b->ilen = 0;
471: b->imax = 0;
472: b->row = isrow;
473: b->col = iscol;
474: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
476: PetscObjectReference((PetscObject)isrow);
477: PetscObjectReference((PetscObject)iscol);
478: b->icol = isicol;
480: PetscMalloc1(bs*n+bs,&b->solve_work);
481: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
483: b->maxnz = b->nz = bi[n];
485: (B)->factortype = MAT_FACTOR_LU;
486: (B)->info.factor_mallocs = reallocs;
487: (B)->info.fill_ratio_given = f;
489: if (ai[n] != 0) {
490: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
491: } else {
492: (B)->info.fill_ratio_needed = 0.0;
493: }
495: ISIdentity(isrow,&row_identity);
496: ISIdentity(iscol,&col_identity);
498: both_identity = (PetscBool) (row_identity && col_identity);
500: MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
501: return(0);
502: }