Actual source code: baijfact3.c

petsc-3.3-p7 2013-05-11
  2: /*
  3:     Factorization code for BAIJ format. 
  4: */
  5: #include <../src/mat/impls/baij/seq/baij.h>
  6: #include <../src/mat/blockinvert.h>

 10: /*
 11:    This is used to set the numeric factorization for both LU and ILU symbolic factorization
 12: */
 13: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool  natural)
 14: {
 16:   if(natural){
 17:     switch (fact->rmap->bs){
 18:     case 1:
 19:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 20:       break;
 21:     case 2:
 22:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
 23:       break;
 24:     case 3:
 25:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
 26:       break;
 27:     case 4:
 28:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 29:       break;
 30:     case 5:
 31:        fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
 32:        break;
 33:     case 6:
 34:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
 35:       break;
 36:     case 7:
 37:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
 38:       break;
 39:     case 15:
 40:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
 41:       break;
 42:     default:
 43:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 44:       break;
 45:     }
 46:   } else {
 47:     switch (fact->rmap->bs){
 48:     case 1:
 49:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 50:       break;
 51:     case 2:
 52:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
 53:       break;
 54:     case 3:
 55:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
 56:       break;
 57:     case 4:
 58:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
 59:       break;
 60:     case 5:
 61:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
 62:       break;
 63:     case 6:
 64:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
 65:       break;
 66:     case 7:
 67:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
 68:       break;
 69:     default:
 70:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 71:       break;
 72:     }
 73:   }
 74:   return(0);
 75: }

 79: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool  natural)
 80: {
 82:   if (natural) {
 83:     switch (inA->rmap->bs) {
 84:     case 1:
 85:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
 86:       break;
 87:     case 2:
 88:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
 89:       break;
 90:     case 3:
 91:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
 92:       break;
 93:     case 4:
 94: #if defined(PETSC_USE_REAL_MAT_SINGLE)
 95:       {
 96:         PetscBool   sse_enabled_local;
 98:         PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);
 99:         if (sse_enabled_local) {
100: #  if defined(PETSC_HAVE_SSE)
101:           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
102:           if (n==(unsigned short)n) {
103:             unsigned short *aj=(unsigned short *)AJ;
104:             for (i=0;i<nz;i++) {
105:               aj[i] = (unsigned short)AJ[i];
106:             }
107:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
108:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
109:             PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
110:           } else {
111:         /* Scale the column indices for easier indexing in MatSolve. */
112: /*            for (i=0;i<nz;i++) { */
113: /*              AJ[i] = AJ[i]*4; */
114: /*            } */
115:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
117:             PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
118:           }
119: #  else
120:         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
121:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
122: #  endif
123:         } else {
124:           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
125:         }
126:       }
127: #else
128:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
129: #endif
130:       break;
131:     case 5:
132:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
133:       break;
134:     case 6:
135:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
136:       break;
137:     case 7:
138:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
139:       break;
140:     default:
141:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
142:       break;
143:     }
144:   } else {
145:     switch (inA->rmap->bs) {
146:     case 1:
147:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
148:       break;
149:     case 2:
150:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
151:       break;
152:     case 3:
153:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
154:       break;
155:     case 4:
156:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
157:       break;
158:     case 5:
159:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
160:       break;
161:     case 6:
162:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
163:       break;
164:     case 7:
165:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
166:       break;
167:     default:
168:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
169:       break;
170:     }
171:   }
172:   return(0);
173: }

175: /*
176:     The symbolic factorization code is identical to that for AIJ format,
177:   except for very small changes since this is now a SeqBAIJ datastructure.
178:   NOT good code reuse.
179: */
180: #include <petscbt.h>
181: #include <../src/mat/utils/freespace.h>

185: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
186: {
187:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
188:   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
189:   PetscBool          row_identity,col_identity,both_identity;
190:   IS                 isicol;
191:   PetscErrorCode     ierr;
192:   const PetscInt     *r,*ic;
193:   PetscInt           i,*ai=a->i,*aj=a->j;
194:   PetscInt           *bi,*bj,*ajtmp;
195:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
196:   PetscReal          f;
197:   PetscInt           nlnk,*lnk,k,**bi_ptr;
198:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
199:   PetscBT            lnkbt;

202:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
203:   if (bs>1){  /* check shifttype */
204:     if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE)
205:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
206:   }

208:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209:   ISGetIndices(isrow,&r);
210:   ISGetIndices(isicol,&ic);

212:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
213:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
214:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
215:   bi[0] = bdiag[0] = 0;

217:   /* linked list for storing column indices of the active row */
218:   nlnk = n + 1;
219:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

221:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

223:   /* initial FreeSpace size is f*(ai[n]+1) */
224:   f = info->fill;
225:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
226:   current_space = free_space;

228:   for (i=0; i<n; i++) {
229:     /* copy previous fill into linked list */
230:     nzi = 0;
231:     nnz = ai[r[i]+1] - ai[r[i]];
232:     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
233:     ajtmp = aj + ai[r[i]];
234:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
235:     nzi += nlnk;

237:     /* add pivot rows into linked list */
238:     row = lnk[n];
239:     while (row < i) {
240:       nzbd    = bdiag[row] + 1; /* num of entries in the row with column index <= row */
241:       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
242:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
243:       nzi += nlnk;
244:       row  = lnk[row];
245:     }
246:     bi[i+1] = bi[i] + nzi;
247:     im[i]   = nzi;

249:     /* mark bdiag */
250:     nzbd = 0;
251:     nnz  = nzi;
252:     k    = lnk[n];
253:     while (nnz-- && k < i){
254:       nzbd++;
255:       k = lnk[k];
256:     }
257:     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

259:     /* if free space is not available, make more free space */
260:     if (current_space->local_remaining<nzi) {
261:       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
262:       PetscFreeSpaceGet(nnz,&current_space);
263:       reallocs++;
264:     }

266:     /* copy data into free space, then initialize lnk */
267:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
268:     bi_ptr[i] = current_space->array;
269:     current_space->array           += nzi;
270:     current_space->local_used      += nzi;
271:     current_space->local_remaining -= nzi;
272:   }

274:   ISRestoreIndices(isrow,&r);
275:   ISRestoreIndices(isicol,&ic);

277:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
278:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
279:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
280:   PetscLLDestroy(lnk,lnkbt);
281:   PetscFree2(bi_ptr,im);

283:   /* put together the new matrix */
284:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
285:   PetscLogObjectParent(B,isicol);
286:   b    = (Mat_SeqBAIJ*)(B)->data;
287:   b->free_a       = PETSC_TRUE;
288:   b->free_ij      = PETSC_TRUE;
289:   b->singlemalloc = PETSC_FALSE;
290:   PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);
291:   b->j          = bj;
292:   b->i          = bi;
293:   b->diag       = bdiag;
294:   b->free_diag  = PETSC_TRUE;
295:   b->ilen       = 0;
296:   b->imax       = 0;
297:   b->row        = isrow;
298:   b->col        = iscol;
299:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
300:   PetscObjectReference((PetscObject)isrow);
301:   PetscObjectReference((PetscObject)iscol);
302:   b->icol       = isicol;
303:   PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
304:   PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
305: 
306:   b->maxnz = b->nz = bdiag[0]+1;
307:   B->factortype            =  MAT_FACTOR_LU;
308:   B->info.factor_mallocs   = reallocs;
309:   B->info.fill_ratio_given = f;

311:   if (ai[n] != 0) {
312:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
313:   } else {
314:     B->info.fill_ratio_needed = 0.0;
315:   }
316: #if defined(PETSC_USE_INFO)
317:   if (ai[n] != 0) {
318:     PetscReal af = B->info.fill_ratio_needed;
319:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
320:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
321:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
322:     PetscInfo(A,"for best performance.\n");
323:   } else {
324:     PetscInfo(A,"Empty matrix\n");
325:   }
326: #endif
327: 
328:   ISIdentity(isrow,&row_identity);
329:   ISIdentity(iscol,&col_identity);
330:   both_identity = (PetscBool) (row_identity && col_identity);
331:   MatSeqBAIJSetNumericFactorization(B,both_identity);
332:   return(0);
333:  }

337: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
338: {
339:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
340:   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
341:   PetscBool          row_identity,col_identity,both_identity;
342:   IS                 isicol;
343:   PetscErrorCode     ierr;
344:   const PetscInt     *r,*ic;
345:   PetscInt           i,*ai=a->i,*aj=a->j;
346:   PetscInt           *bi,*bj,*ajtmp;
347:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
348:   PetscReal          f;
349:   PetscInt           nlnk,*lnk,k,**bi_ptr;
350:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
351:   PetscBT            lnkbt;

354:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
355:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
356:   ISGetIndices(isrow,&r);
357:   ISGetIndices(isicol,&ic);

359:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
360:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
361:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);

363:   bi[0] = bdiag[0] = 0;

365:   /* linked list for storing column indices of the active row */
366:   nlnk = n + 1;
367:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

369:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

371:   /* initial FreeSpace size is f*(ai[n]+1) */
372:   f = info->fill;
373:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
374:   current_space = free_space;

376:   for (i=0; i<n; i++) {
377:     /* copy previous fill into linked list */
378:     nzi = 0;
379:     nnz = ai[r[i]+1] - ai[r[i]];
380:     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
381:     ajtmp = aj + ai[r[i]];
382:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
383:     nzi += nlnk;

385:     /* add pivot rows into linked list */
386:     row = lnk[n];
387:     while (row < i) {
388:       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
389:       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
390:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
391:       nzi += nlnk;
392:       row  = lnk[row];
393:     }
394:     bi[i+1] = bi[i] + nzi;
395:     im[i]   = nzi;

397:     /* mark bdiag */
398:     nzbd = 0;
399:     nnz  = nzi;
400:     k    = lnk[n];
401:     while (nnz-- && k < i){
402:       nzbd++;
403:       k = lnk[k];
404:     }
405:     bdiag[i] = bi[i] + nzbd;

407:     /* if free space is not available, make more free space */
408:     if (current_space->local_remaining<nzi) {
409:       nnz = (n - i)*nzi; /* estimated and max additional space needed */
410:       PetscFreeSpaceGet(nnz,&current_space);
411:       reallocs++;
412:     }

414:     /* copy data into free space, then initialize lnk */
415:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
416:     bi_ptr[i] = current_space->array;
417:     current_space->array           += nzi;
418:     current_space->local_used      += nzi;
419:     current_space->local_remaining -= nzi;
420:   }
421: #if defined(PETSC_USE_INFO)
422:   if (ai[n] != 0) {
423:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
424:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
425:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
426:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
427:     PetscInfo(A,"for best performance.\n");
428:   } else {
429:     PetscInfo(A,"Empty matrix\n");
430:   }
431: #endif

433:   ISRestoreIndices(isrow,&r);
434:   ISRestoreIndices(isicol,&ic);

436:   /* destroy list of free space and other temporary array(s) */
437:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
438:   PetscFreeSpaceContiguous(&free_space,bj);
439:   PetscLLDestroy(lnk,lnkbt);
440:   PetscFree2(bi_ptr,im);

442:   /* put together the new matrix */
443:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
444:   PetscLogObjectParent(B,isicol);
445:   b    = (Mat_SeqBAIJ*)(B)->data;
446:   b->free_a       = PETSC_TRUE;
447:   b->free_ij      = PETSC_TRUE;
448:   b->singlemalloc = PETSC_FALSE;
449:   PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);
450:   b->j          = bj;
451:   b->i          = bi;
452:   b->diag       = bdiag;
453:   b->free_diag  = PETSC_TRUE;
454:   b->ilen       = 0;
455:   b->imax       = 0;
456:   b->row        = isrow;
457:   b->col        = iscol;
458:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
459:   PetscObjectReference((PetscObject)isrow);
460:   PetscObjectReference((PetscObject)iscol);
461:   b->icol       = isicol;
462:   PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
463:   PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
464: 
465:   b->maxnz = b->nz = bi[n] ;
466:   (B)->factortype            =  MAT_FACTOR_LU;
467:   (B)->info.factor_mallocs   = reallocs;
468:   (B)->info.fill_ratio_given = f;

470:   if (ai[n] != 0) {
471:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
472:   } else {
473:     (B)->info.fill_ratio_needed = 0.0;
474:   }

476:   ISIdentity(isrow,&row_identity);
477:   ISIdentity(iscol,&col_identity);
478:   both_identity = (PetscBool) (row_identity && col_identity);
479:   MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
480:   return(0);
481:  }