Actual source code: baijfact3.c
petsc-3.4.5 2014-06-29
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <petsc-private/kernels/blockinvert.h>
10: /*
11: This is used to set the numeric factorization for both LU and ILU symbolic factorization
12: */
13: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
14: {
16: if (natural) {
17: switch (fact->rmap->bs) {
18: case 1:
19: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
20: break;
21: case 2:
22: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
23: break;
24: case 3:
25: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
26: break;
27: case 4:
28: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
29: break;
30: case 5:
31: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
32: break;
33: case 6:
34: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
35: break;
36: case 7:
37: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
38: break;
39: case 15:
40: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
41: break;
42: default:
43: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
44: break;
45: }
46: } else {
47: switch (fact->rmap->bs) {
48: case 1:
49: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
50: break;
51: case 2:
52: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
53: break;
54: case 3:
55: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
56: break;
57: case 4:
58: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
59: break;
60: case 5:
61: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
62: break;
63: case 6:
64: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
65: break;
66: case 7:
67: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
68: break;
69: default:
70: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
71: break;
72: }
73: }
74: return(0);
75: }
79: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
80: {
82: if (natural) {
83: switch (inA->rmap->bs) {
84: case 1:
85: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
86: break;
87: case 2:
88: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
89: break;
90: case 3:
91: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
92: break;
93: case 4:
94: #if defined(PETSC_USE_REAL_MAT_SINGLE)
95: {
96: PetscBool sse_enabled_local;
98: PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);
99: if (sse_enabled_local) {
100: # if defined(PETSC_HAVE_SSE)
101: int i,*AJ=a->j,nz=a->nz,n=a->mbs;
102: if (n==(unsigned short)n) {
103: unsigned short *aj=(unsigned short*)AJ;
104: for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];
106: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
107: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
109: PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
110: } else {
111: /* Scale the column indices for easier indexing in MatSolve. */
112: /* for (i=0;i<nz;i++) { */
113: /* AJ[i] = AJ[i]*4; */
114: /* } */
115: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
118: PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
119: }
120: # else
121: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
122: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
123: # endif
124: } else {
125: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
126: }
127: }
128: #else
129: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
130: #endif
131: break;
132: case 5:
133: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
134: break;
135: case 6:
136: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
137: break;
138: case 7:
139: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
140: break;
141: default:
142: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
143: break;
144: }
145: } else {
146: switch (inA->rmap->bs) {
147: case 1:
148: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
149: break;
150: case 2:
151: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
152: break;
153: case 3:
154: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
155: break;
156: case 4:
157: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
158: break;
159: case 5:
160: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
161: break;
162: case 6:
163: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
164: break;
165: case 7:
166: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
167: break;
168: default:
169: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
170: break;
171: }
172: }
173: return(0);
174: }
176: /*
177: The symbolic factorization code is identical to that for AIJ format,
178: except for very small changes since this is now a SeqBAIJ datastructure.
179: NOT good code reuse.
180: */
181: #include <petscbt.h>
182: #include <../src/mat/utils/freespace.h>
186: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
187: {
188: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
189: PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
190: PetscBool row_identity,col_identity,both_identity;
191: IS isicol;
192: PetscErrorCode ierr;
193: const PetscInt *r,*ic;
194: PetscInt i,*ai=a->i,*aj=a->j;
195: PetscInt *bi,*bj,*ajtmp;
196: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
197: PetscReal f;
198: PetscInt nlnk,*lnk,k,**bi_ptr;
199: PetscFreeSpaceList free_space=NULL,current_space=NULL;
200: PetscBT lnkbt;
203: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
204: if (bs>1) { /* check shifttype */
205: if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
206: }
208: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209: ISGetIndices(isrow,&r);
210: ISGetIndices(isicol,&ic);
212: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
213: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
214: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
215: bi[0] = bdiag[0] = 0;
217: /* linked list for storing column indices of the active row */
218: nlnk = n + 1;
219: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
221: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
223: /* initial FreeSpace size is f*(ai[n]+1) */
224: f = info->fill;
225: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
227: current_space = free_space;
229: for (i=0; i<n; i++) {
230: /* copy previous fill into linked list */
231: nzi = 0;
232: nnz = ai[r[i]+1] - ai[r[i]];
233: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
234: ajtmp = aj + ai[r[i]];
235: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
236: nzi += nlnk;
238: /* add pivot rows into linked list */
239: row = lnk[n];
240: while (row < i) {
241: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
242: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
243: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
244: nzi += nlnk;
245: row = lnk[row];
246: }
247: bi[i+1] = bi[i] + nzi;
248: im[i] = nzi;
250: /* mark bdiag */
251: nzbd = 0;
252: nnz = nzi;
253: k = lnk[n];
254: while (nnz-- && k < i) {
255: nzbd++;
256: k = lnk[k];
257: }
258: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
260: /* if free space is not available, make more free space */
261: if (current_space->local_remaining<nzi) {
262: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
263: PetscFreeSpaceGet(nnz,¤t_space);
264: reallocs++;
265: }
267: /* copy data into free space, then initialize lnk */
268: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
270: bi_ptr[i] = current_space->array;
271: current_space->array += nzi;
272: current_space->local_used += nzi;
273: current_space->local_remaining -= nzi;
274: }
276: ISRestoreIndices(isrow,&r);
277: ISRestoreIndices(isicol,&ic);
279: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
280: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
281: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
282: PetscLLDestroy(lnk,lnkbt);
283: PetscFree2(bi_ptr,im);
285: /* put together the new matrix */
286: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
287: PetscLogObjectParent(B,isicol);
288: b = (Mat_SeqBAIJ*)(B)->data;
290: b->free_a = PETSC_TRUE;
291: b->free_ij = PETSC_TRUE;
292: b->singlemalloc = PETSC_FALSE;
294: PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);
295: b->j = bj;
296: b->i = bi;
297: b->diag = bdiag;
298: b->free_diag = PETSC_TRUE;
299: b->ilen = 0;
300: b->imax = 0;
301: b->row = isrow;
302: b->col = iscol;
303: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
305: PetscObjectReference((PetscObject)isrow);
306: PetscObjectReference((PetscObject)iscol);
307: b->icol = isicol;
308: PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
309: PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
311: b->maxnz = b->nz = bdiag[0]+1;
313: B->factortype = MAT_FACTOR_LU;
314: B->info.factor_mallocs = reallocs;
315: B->info.fill_ratio_given = f;
317: if (ai[n] != 0) {
318: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
319: } else {
320: B->info.fill_ratio_needed = 0.0;
321: }
322: #if defined(PETSC_USE_INFO)
323: if (ai[n] != 0) {
324: PetscReal af = B->info.fill_ratio_needed;
325: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
326: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
327: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
328: PetscInfo(A,"for best performance.\n");
329: } else {
330: PetscInfo(A,"Empty matrix\n");
331: }
332: #endif
334: ISIdentity(isrow,&row_identity);
335: ISIdentity(iscol,&col_identity);
337: both_identity = (PetscBool) (row_identity && col_identity);
339: MatSeqBAIJSetNumericFactorization(B,both_identity);
340: return(0);
341: }
345: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
346: {
347: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
348: PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
349: PetscBool row_identity,col_identity,both_identity;
350: IS isicol;
351: PetscErrorCode ierr;
352: const PetscInt *r,*ic;
353: PetscInt i,*ai=a->i,*aj=a->j;
354: PetscInt *bi,*bj,*ajtmp;
355: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
356: PetscReal f;
357: PetscInt nlnk,*lnk,k,**bi_ptr;
358: PetscFreeSpaceList free_space=NULL,current_space=NULL;
359: PetscBT lnkbt;
362: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
363: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
364: ISGetIndices(isrow,&r);
365: ISGetIndices(isicol,&ic);
367: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
368: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
369: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
371: bi[0] = bdiag[0] = 0;
373: /* linked list for storing column indices of the active row */
374: nlnk = n + 1;
375: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
377: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
379: /* initial FreeSpace size is f*(ai[n]+1) */
380: f = info->fill;
381: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
382: current_space = free_space;
384: for (i=0; i<n; i++) {
385: /* copy previous fill into linked list */
386: nzi = 0;
387: nnz = ai[r[i]+1] - ai[r[i]];
388: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
389: ajtmp = aj + ai[r[i]];
390: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
391: nzi += nlnk;
393: /* add pivot rows into linked list */
394: row = lnk[n];
395: while (row < i) {
396: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
397: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
398: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
399: nzi += nlnk;
400: row = lnk[row];
401: }
402: bi[i+1] = bi[i] + nzi;
403: im[i] = nzi;
405: /* mark bdiag */
406: nzbd = 0;
407: nnz = nzi;
408: k = lnk[n];
409: while (nnz-- && k < i) {
410: nzbd++;
411: k = lnk[k];
412: }
413: bdiag[i] = bi[i] + nzbd;
415: /* if free space is not available, make more free space */
416: if (current_space->local_remaining<nzi) {
417: nnz = (n - i)*nzi; /* estimated and max additional space needed */
418: PetscFreeSpaceGet(nnz,¤t_space);
419: reallocs++;
420: }
422: /* copy data into free space, then initialize lnk */
423: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
425: bi_ptr[i] = current_space->array;
426: current_space->array += nzi;
427: current_space->local_used += nzi;
428: current_space->local_remaining -= nzi;
429: }
430: #if defined(PETSC_USE_INFO)
431: if (ai[n] != 0) {
432: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
433: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
434: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
435: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
436: PetscInfo(A,"for best performance.\n");
437: } else {
438: PetscInfo(A,"Empty matrix\n");
439: }
440: #endif
442: ISRestoreIndices(isrow,&r);
443: ISRestoreIndices(isicol,&ic);
445: /* destroy list of free space and other temporary array(s) */
446: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
447: PetscFreeSpaceContiguous(&free_space,bj);
448: PetscLLDestroy(lnk,lnkbt);
449: PetscFree2(bi_ptr,im);
451: /* put together the new matrix */
452: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
453: PetscLogObjectParent(B,isicol);
454: b = (Mat_SeqBAIJ*)(B)->data;
456: b->free_a = PETSC_TRUE;
457: b->free_ij = PETSC_TRUE;
458: b->singlemalloc = PETSC_FALSE;
460: PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);
461: b->j = bj;
462: b->i = bi;
463: b->diag = bdiag;
464: b->free_diag = PETSC_TRUE;
465: b->ilen = 0;
466: b->imax = 0;
467: b->row = isrow;
468: b->col = iscol;
469: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
471: PetscObjectReference((PetscObject)isrow);
472: PetscObjectReference((PetscObject)iscol);
473: b->icol = isicol;
475: PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
476: PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
478: b->maxnz = b->nz = bi[n];
480: (B)->factortype = MAT_FACTOR_LU;
481: (B)->info.factor_mallocs = reallocs;
482: (B)->info.fill_ratio_given = f;
484: if (ai[n] != 0) {
485: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
486: } else {
487: (B)->info.fill_ratio_needed = 0.0;
488: }
490: ISIdentity(isrow,&row_identity);
491: ISIdentity(iscol,&col_identity);
493: both_identity = (PetscBool) (row_identity && col_identity);
495: MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
496: return(0);
497: }