Actual source code: baijfact3.c
petsc-3.3-p7 2013-05-11
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <../src/mat/blockinvert.h>
10: /*
11: This is used to set the numeric factorization for both LU and ILU symbolic factorization
12: */
13: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
14: {
16: if(natural){
17: switch (fact->rmap->bs){
18: case 1:
19: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
20: break;
21: case 2:
22: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
23: break;
24: case 3:
25: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
26: break;
27: case 4:
28: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
29: break;
30: case 5:
31: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
32: break;
33: case 6:
34: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
35: break;
36: case 7:
37: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
38: break;
39: case 15:
40: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
41: break;
42: default:
43: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
44: break;
45: }
46: } else {
47: switch (fact->rmap->bs){
48: case 1:
49: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
50: break;
51: case 2:
52: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
53: break;
54: case 3:
55: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
56: break;
57: case 4:
58: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
59: break;
60: case 5:
61: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
62: break;
63: case 6:
64: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
65: break;
66: case 7:
67: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
68: break;
69: default:
70: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
71: break;
72: }
73: }
74: return(0);
75: }
79: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
80: {
82: if (natural) {
83: switch (inA->rmap->bs) {
84: case 1:
85: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
86: break;
87: case 2:
88: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
89: break;
90: case 3:
91: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
92: break;
93: case 4:
94: #if defined(PETSC_USE_REAL_MAT_SINGLE)
95: {
96: PetscBool sse_enabled_local;
98: PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);
99: if (sse_enabled_local) {
100: # if defined(PETSC_HAVE_SSE)
101: int i,*AJ=a->j,nz=a->nz,n=a->mbs;
102: if (n==(unsigned short)n) {
103: unsigned short *aj=(unsigned short *)AJ;
104: for (i=0;i<nz;i++) {
105: aj[i] = (unsigned short)AJ[i];
106: }
107: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
108: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
109: PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
110: } else {
111: /* Scale the column indices for easier indexing in MatSolve. */
112: /* for (i=0;i<nz;i++) { */
113: /* AJ[i] = AJ[i]*4; */
114: /* } */
115: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
117: PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
118: }
119: # else
120: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
121: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
122: # endif
123: } else {
124: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
125: }
126: }
127: #else
128: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
129: #endif
130: break;
131: case 5:
132: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
133: break;
134: case 6:
135: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
136: break;
137: case 7:
138: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
139: break;
140: default:
141: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
142: break;
143: }
144: } else {
145: switch (inA->rmap->bs) {
146: case 1:
147: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
148: break;
149: case 2:
150: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
151: break;
152: case 3:
153: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
154: break;
155: case 4:
156: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
157: break;
158: case 5:
159: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
160: break;
161: case 6:
162: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
163: break;
164: case 7:
165: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
166: break;
167: default:
168: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
169: break;
170: }
171: }
172: return(0);
173: }
175: /*
176: The symbolic factorization code is identical to that for AIJ format,
177: except for very small changes since this is now a SeqBAIJ datastructure.
178: NOT good code reuse.
179: */
180: #include <petscbt.h>
181: #include <../src/mat/utils/freespace.h>
185: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
186: {
187: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
188: PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
189: PetscBool row_identity,col_identity,both_identity;
190: IS isicol;
191: PetscErrorCode ierr;
192: const PetscInt *r,*ic;
193: PetscInt i,*ai=a->i,*aj=a->j;
194: PetscInt *bi,*bj,*ajtmp;
195: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
196: PetscReal f;
197: PetscInt nlnk,*lnk,k,**bi_ptr;
198: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
199: PetscBT lnkbt;
202: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
203: if (bs>1){ /* check shifttype */
204: if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE)
205: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
206: }
208: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209: ISGetIndices(isrow,&r);
210: ISGetIndices(isicol,&ic);
212: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
213: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
214: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
215: bi[0] = bdiag[0] = 0;
217: /* linked list for storing column indices of the active row */
218: nlnk = n + 1;
219: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
221: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
223: /* initial FreeSpace size is f*(ai[n]+1) */
224: f = info->fill;
225: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
226: current_space = free_space;
228: for (i=0; i<n; i++) {
229: /* copy previous fill into linked list */
230: nzi = 0;
231: nnz = ai[r[i]+1] - ai[r[i]];
232: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
233: ajtmp = aj + ai[r[i]];
234: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
235: nzi += nlnk;
237: /* add pivot rows into linked list */
238: row = lnk[n];
239: while (row < i) {
240: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
241: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
242: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
243: nzi += nlnk;
244: row = lnk[row];
245: }
246: bi[i+1] = bi[i] + nzi;
247: im[i] = nzi;
249: /* mark bdiag */
250: nzbd = 0;
251: nnz = nzi;
252: k = lnk[n];
253: while (nnz-- && k < i){
254: nzbd++;
255: k = lnk[k];
256: }
257: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
259: /* if free space is not available, make more free space */
260: if (current_space->local_remaining<nzi) {
261: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
262: PetscFreeSpaceGet(nnz,¤t_space);
263: reallocs++;
264: }
266: /* copy data into free space, then initialize lnk */
267: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
268: bi_ptr[i] = current_space->array;
269: current_space->array += nzi;
270: current_space->local_used += nzi;
271: current_space->local_remaining -= nzi;
272: }
274: ISRestoreIndices(isrow,&r);
275: ISRestoreIndices(isicol,&ic);
277: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
278: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
279: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
280: PetscLLDestroy(lnk,lnkbt);
281: PetscFree2(bi_ptr,im);
283: /* put together the new matrix */
284: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
285: PetscLogObjectParent(B,isicol);
286: b = (Mat_SeqBAIJ*)(B)->data;
287: b->free_a = PETSC_TRUE;
288: b->free_ij = PETSC_TRUE;
289: b->singlemalloc = PETSC_FALSE;
290: PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);
291: b->j = bj;
292: b->i = bi;
293: b->diag = bdiag;
294: b->free_diag = PETSC_TRUE;
295: b->ilen = 0;
296: b->imax = 0;
297: b->row = isrow;
298: b->col = iscol;
299: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
300: PetscObjectReference((PetscObject)isrow);
301: PetscObjectReference((PetscObject)iscol);
302: b->icol = isicol;
303: PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
304: PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
305:
306: b->maxnz = b->nz = bdiag[0]+1;
307: B->factortype = MAT_FACTOR_LU;
308: B->info.factor_mallocs = reallocs;
309: B->info.fill_ratio_given = f;
311: if (ai[n] != 0) {
312: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
313: } else {
314: B->info.fill_ratio_needed = 0.0;
315: }
316: #if defined(PETSC_USE_INFO)
317: if (ai[n] != 0) {
318: PetscReal af = B->info.fill_ratio_needed;
319: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
320: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
321: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
322: PetscInfo(A,"for best performance.\n");
323: } else {
324: PetscInfo(A,"Empty matrix\n");
325: }
326: #endif
327:
328: ISIdentity(isrow,&row_identity);
329: ISIdentity(iscol,&col_identity);
330: both_identity = (PetscBool) (row_identity && col_identity);
331: MatSeqBAIJSetNumericFactorization(B,both_identity);
332: return(0);
333: }
337: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
338: {
339: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
340: PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
341: PetscBool row_identity,col_identity,both_identity;
342: IS isicol;
343: PetscErrorCode ierr;
344: const PetscInt *r,*ic;
345: PetscInt i,*ai=a->i,*aj=a->j;
346: PetscInt *bi,*bj,*ajtmp;
347: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
348: PetscReal f;
349: PetscInt nlnk,*lnk,k,**bi_ptr;
350: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
351: PetscBT lnkbt;
354: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
355: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
356: ISGetIndices(isrow,&r);
357: ISGetIndices(isicol,&ic);
359: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
360: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
361: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
363: bi[0] = bdiag[0] = 0;
365: /* linked list for storing column indices of the active row */
366: nlnk = n + 1;
367: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
369: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
371: /* initial FreeSpace size is f*(ai[n]+1) */
372: f = info->fill;
373: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
374: current_space = free_space;
376: for (i=0; i<n; i++) {
377: /* copy previous fill into linked list */
378: nzi = 0;
379: nnz = ai[r[i]+1] - ai[r[i]];
380: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
381: ajtmp = aj + ai[r[i]];
382: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
383: nzi += nlnk;
385: /* add pivot rows into linked list */
386: row = lnk[n];
387: while (row < i) {
388: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
389: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
390: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
391: nzi += nlnk;
392: row = lnk[row];
393: }
394: bi[i+1] = bi[i] + nzi;
395: im[i] = nzi;
397: /* mark bdiag */
398: nzbd = 0;
399: nnz = nzi;
400: k = lnk[n];
401: while (nnz-- && k < i){
402: nzbd++;
403: k = lnk[k];
404: }
405: bdiag[i] = bi[i] + nzbd;
407: /* if free space is not available, make more free space */
408: if (current_space->local_remaining<nzi) {
409: nnz = (n - i)*nzi; /* estimated and max additional space needed */
410: PetscFreeSpaceGet(nnz,¤t_space);
411: reallocs++;
412: }
414: /* copy data into free space, then initialize lnk */
415: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
416: bi_ptr[i] = current_space->array;
417: current_space->array += nzi;
418: current_space->local_used += nzi;
419: current_space->local_remaining -= nzi;
420: }
421: #if defined(PETSC_USE_INFO)
422: if (ai[n] != 0) {
423: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
424: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
425: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
426: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
427: PetscInfo(A,"for best performance.\n");
428: } else {
429: PetscInfo(A,"Empty matrix\n");
430: }
431: #endif
433: ISRestoreIndices(isrow,&r);
434: ISRestoreIndices(isicol,&ic);
436: /* destroy list of free space and other temporary array(s) */
437: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
438: PetscFreeSpaceContiguous(&free_space,bj);
439: PetscLLDestroy(lnk,lnkbt);
440: PetscFree2(bi_ptr,im);
442: /* put together the new matrix */
443: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
444: PetscLogObjectParent(B,isicol);
445: b = (Mat_SeqBAIJ*)(B)->data;
446: b->free_a = PETSC_TRUE;
447: b->free_ij = PETSC_TRUE;
448: b->singlemalloc = PETSC_FALSE;
449: PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);
450: b->j = bj;
451: b->i = bi;
452: b->diag = bdiag;
453: b->free_diag = PETSC_TRUE;
454: b->ilen = 0;
455: b->imax = 0;
456: b->row = isrow;
457: b->col = iscol;
458: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
459: PetscObjectReference((PetscObject)isrow);
460: PetscObjectReference((PetscObject)iscol);
461: b->icol = isicol;
462: PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
463: PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
464:
465: b->maxnz = b->nz = bi[n] ;
466: (B)->factortype = MAT_FACTOR_LU;
467: (B)->info.factor_mallocs = reallocs;
468: (B)->info.fill_ratio_given = f;
470: if (ai[n] != 0) {
471: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
472: } else {
473: (B)->info.fill_ratio_needed = 0.0;
474: }
476: ISIdentity(isrow,&row_identity);
477: ISIdentity(iscol,&col_identity);
478: both_identity = (PetscBool) (row_identity && col_identity);
479: MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
480: return(0);
481: }