Actual source code: baijfact3.c

petsc-3.4.5 2014-06-29
  2: /*
  3:     Factorization code for BAIJ format.
  4: */
  5: #include <../src/mat/impls/baij/seq/baij.h>
  6: #include <petsc-private/kernels/blockinvert.h>

 10: /*
 11:    This is used to set the numeric factorization for both LU and ILU symbolic factorization
 12: */
 13: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
 14: {
 16:   if (natural) {
 17:     switch (fact->rmap->bs) {
 18:     case 1:
 19:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 20:       break;
 21:     case 2:
 22:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
 23:       break;
 24:     case 3:
 25:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
 26:       break;
 27:     case 4:
 28:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 29:       break;
 30:     case 5:
 31:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
 32:       break;
 33:     case 6:
 34:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
 35:       break;
 36:     case 7:
 37:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
 38:       break;
 39:     case 15:
 40:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
 41:       break;
 42:     default:
 43:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 44:       break;
 45:     }
 46:   } else {
 47:     switch (fact->rmap->bs) {
 48:     case 1:
 49:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 50:       break;
 51:     case 2:
 52:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
 53:       break;
 54:     case 3:
 55:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
 56:       break;
 57:     case 4:
 58:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
 59:       break;
 60:     case 5:
 61:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
 62:       break;
 63:     case 6:
 64:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
 65:       break;
 66:     case 7:
 67:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
 68:       break;
 69:     default:
 70:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 71:       break;
 72:     }
 73:   }
 74:   return(0);
 75: }

 79: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
 80: {
 82:   if (natural) {
 83:     switch (inA->rmap->bs) {
 84:     case 1:
 85:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
 86:       break;
 87:     case 2:
 88:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
 89:       break;
 90:     case 3:
 91:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
 92:       break;
 93:     case 4:
 94: #if defined(PETSC_USE_REAL_MAT_SINGLE)
 95:       {
 96:         PetscBool      sse_enabled_local;
 98:         PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);
 99:         if (sse_enabled_local) {
100: #  if defined(PETSC_HAVE_SSE)
101:           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
102:           if (n==(unsigned short)n) {
103:             unsigned short *aj=(unsigned short*)AJ;
104:             for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];

106:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
107:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;

109:             PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
110:           } else {
111:             /* Scale the column indices for easier indexing in MatSolve. */
112: /*            for (i=0;i<nz;i++) { */
113: /*              AJ[i] = AJ[i]*4; */
114: /*            } */
115:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;

118:             PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
119:           }
120: #  else
121:           /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
122:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
123: #  endif
124:         } else {
125:           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
126:         }
127:       }
128: #else
129:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
130: #endif
131:       break;
132:     case 5:
133:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
134:       break;
135:     case 6:
136:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
137:       break;
138:     case 7:
139:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
140:       break;
141:     default:
142:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
143:       break;
144:     }
145:   } else {
146:     switch (inA->rmap->bs) {
147:     case 1:
148:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
149:       break;
150:     case 2:
151:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
152:       break;
153:     case 3:
154:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
155:       break;
156:     case 4:
157:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
158:       break;
159:     case 5:
160:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
161:       break;
162:     case 6:
163:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
164:       break;
165:     case 7:
166:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
167:       break;
168:     default:
169:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
170:       break;
171:     }
172:   }
173:   return(0);
174: }

176: /*
177:     The symbolic factorization code is identical to that for AIJ format,
178:   except for very small changes since this is now a SeqBAIJ datastructure.
179:   NOT good code reuse.
180: */
181: #include <petscbt.h>
182: #include <../src/mat/utils/freespace.h>

186: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
187: {
188:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
189:   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
190:   PetscBool          row_identity,col_identity,both_identity;
191:   IS                 isicol;
192:   PetscErrorCode     ierr;
193:   const PetscInt     *r,*ic;
194:   PetscInt           i,*ai=a->i,*aj=a->j;
195:   PetscInt           *bi,*bj,*ajtmp;
196:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
197:   PetscReal          f;
198:   PetscInt           nlnk,*lnk,k,**bi_ptr;
199:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
200:   PetscBT            lnkbt;

203:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
204:   if (bs>1) {  /* check shifttype */
205:     if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
206:   }

208:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209:   ISGetIndices(isrow,&r);
210:   ISGetIndices(isicol,&ic);

212:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
213:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
214:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
215:   bi[0] = bdiag[0] = 0;

217:   /* linked list for storing column indices of the active row */
218:   nlnk = n + 1;
219:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

221:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

223:   /* initial FreeSpace size is f*(ai[n]+1) */
224:   f    = info->fill;
225:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);

227:   current_space = free_space;

229:   for (i=0; i<n; i++) {
230:     /* copy previous fill into linked list */
231:     nzi = 0;
232:     nnz = ai[r[i]+1] - ai[r[i]];
233:     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
234:     ajtmp = aj + ai[r[i]];
235:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
236:     nzi  += nlnk;

238:     /* add pivot rows into linked list */
239:     row = lnk[n];
240:     while (row < i) {
241:       nzbd  = bdiag[row] + 1;   /* num of entries in the row with column index <= row */
242:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
243:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
244:       nzi  += nlnk;
245:       row   = lnk[row];
246:     }
247:     bi[i+1] = bi[i] + nzi;
248:     im[i]   = nzi;

250:     /* mark bdiag */
251:     nzbd = 0;
252:     nnz  = nzi;
253:     k    = lnk[n];
254:     while (nnz-- && k < i) {
255:       nzbd++;
256:       k = lnk[k];
257:     }
258:     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

260:     /* if free space is not available, make more free space */
261:     if (current_space->local_remaining<nzi) {
262:       nnz  = 2*(n - i)*nzi; /* estimated and max additional space needed */
263:       PetscFreeSpaceGet(nnz,&current_space);
264:       reallocs++;
265:     }

267:     /* copy data into free space, then initialize lnk */
268:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

270:     bi_ptr[i]                       = current_space->array;
271:     current_space->array           += nzi;
272:     current_space->local_used      += nzi;
273:     current_space->local_remaining -= nzi;
274:   }

276:   ISRestoreIndices(isrow,&r);
277:   ISRestoreIndices(isicol,&ic);

279:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
280:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
281:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
282:   PetscLLDestroy(lnk,lnkbt);
283:   PetscFree2(bi_ptr,im);

285:   /* put together the new matrix */
286:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
287:   PetscLogObjectParent(B,isicol);
288:   b    = (Mat_SeqBAIJ*)(B)->data;

290:   b->free_a       = PETSC_TRUE;
291:   b->free_ij      = PETSC_TRUE;
292:   b->singlemalloc = PETSC_FALSE;

294:   PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);
295:   b->j             = bj;
296:   b->i             = bi;
297:   b->diag          = bdiag;
298:   b->free_diag     = PETSC_TRUE;
299:   b->ilen          = 0;
300:   b->imax          = 0;
301:   b->row           = isrow;
302:   b->col           = iscol;
303:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

305:   PetscObjectReference((PetscObject)isrow);
306:   PetscObjectReference((PetscObject)iscol);
307:   b->icol = isicol;
308:   PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
309:   PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));

311:   b->maxnz = b->nz = bdiag[0]+1;

313:   B->factortype            =  MAT_FACTOR_LU;
314:   B->info.factor_mallocs   = reallocs;
315:   B->info.fill_ratio_given = f;

317:   if (ai[n] != 0) {
318:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
319:   } else {
320:     B->info.fill_ratio_needed = 0.0;
321:   }
322: #if defined(PETSC_USE_INFO)
323:   if (ai[n] != 0) {
324:     PetscReal af = B->info.fill_ratio_needed;
325:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
326:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
327:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
328:     PetscInfo(A,"for best performance.\n");
329:   } else {
330:     PetscInfo(A,"Empty matrix\n");
331:   }
332: #endif

334:   ISIdentity(isrow,&row_identity);
335:   ISIdentity(iscol,&col_identity);

337:   both_identity = (PetscBool) (row_identity && col_identity);

339:   MatSeqBAIJSetNumericFactorization(B,both_identity);
340:   return(0);
341: }

345: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
346: {
347:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
348:   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
349:   PetscBool          row_identity,col_identity,both_identity;
350:   IS                 isicol;
351:   PetscErrorCode     ierr;
352:   const PetscInt     *r,*ic;
353:   PetscInt           i,*ai=a->i,*aj=a->j;
354:   PetscInt           *bi,*bj,*ajtmp;
355:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
356:   PetscReal          f;
357:   PetscInt           nlnk,*lnk,k,**bi_ptr;
358:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
359:   PetscBT            lnkbt;

362:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
363:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
364:   ISGetIndices(isrow,&r);
365:   ISGetIndices(isicol,&ic);

367:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
368:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
369:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);

371:   bi[0] = bdiag[0] = 0;

373:   /* linked list for storing column indices of the active row */
374:   nlnk = n + 1;
375:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

377:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

379:   /* initial FreeSpace size is f*(ai[n]+1) */
380:   f             = info->fill;
381:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
382:   current_space = free_space;

384:   for (i=0; i<n; i++) {
385:     /* copy previous fill into linked list */
386:     nzi = 0;
387:     nnz = ai[r[i]+1] - ai[r[i]];
388:     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
389:     ajtmp = aj + ai[r[i]];
390:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
391:     nzi  += nlnk;

393:     /* add pivot rows into linked list */
394:     row = lnk[n];
395:     while (row < i) {
396:       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
397:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
398:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
399:       nzi  += nlnk;
400:       row   = lnk[row];
401:     }
402:     bi[i+1] = bi[i] + nzi;
403:     im[i]   = nzi;

405:     /* mark bdiag */
406:     nzbd = 0;
407:     nnz  = nzi;
408:     k    = lnk[n];
409:     while (nnz-- && k < i) {
410:       nzbd++;
411:       k = lnk[k];
412:     }
413:     bdiag[i] = bi[i] + nzbd;

415:     /* if free space is not available, make more free space */
416:     if (current_space->local_remaining<nzi) {
417:       nnz  = (n - i)*nzi; /* estimated and max additional space needed */
418:       PetscFreeSpaceGet(nnz,&current_space);
419:       reallocs++;
420:     }

422:     /* copy data into free space, then initialize lnk */
423:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

425:     bi_ptr[i]                       = current_space->array;
426:     current_space->array           += nzi;
427:     current_space->local_used      += nzi;
428:     current_space->local_remaining -= nzi;
429:   }
430: #if defined(PETSC_USE_INFO)
431:   if (ai[n] != 0) {
432:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
433:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
434:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
435:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
436:     PetscInfo(A,"for best performance.\n");
437:   } else {
438:     PetscInfo(A,"Empty matrix\n");
439:   }
440: #endif

442:   ISRestoreIndices(isrow,&r);
443:   ISRestoreIndices(isicol,&ic);

445:   /* destroy list of free space and other temporary array(s) */
446:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
447:   PetscFreeSpaceContiguous(&free_space,bj);
448:   PetscLLDestroy(lnk,lnkbt);
449:   PetscFree2(bi_ptr,im);

451:   /* put together the new matrix */
452:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
453:   PetscLogObjectParent(B,isicol);
454:   b    = (Mat_SeqBAIJ*)(B)->data;

456:   b->free_a       = PETSC_TRUE;
457:   b->free_ij      = PETSC_TRUE;
458:   b->singlemalloc = PETSC_FALSE;

460:   PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);
461:   b->j             = bj;
462:   b->i             = bi;
463:   b->diag          = bdiag;
464:   b->free_diag     = PETSC_TRUE;
465:   b->ilen          = 0;
466:   b->imax          = 0;
467:   b->row           = isrow;
468:   b->col           = iscol;
469:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

471:   PetscObjectReference((PetscObject)isrow);
472:   PetscObjectReference((PetscObject)iscol);
473:   b->icol = isicol;

475:   PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
476:   PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));

478:   b->maxnz = b->nz = bi[n];

480:   (B)->factortype            =  MAT_FACTOR_LU;
481:   (B)->info.factor_mallocs   = reallocs;
482:   (B)->info.fill_ratio_given = f;

484:   if (ai[n] != 0) {
485:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
486:   } else {
487:     (B)->info.fill_ratio_needed = 0.0;
488:   }

490:   ISIdentity(isrow,&row_identity);
491:   ISIdentity(iscol,&col_identity);

493:   both_identity = (PetscBool) (row_identity && col_identity);

495:   MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
496:   return(0);
497: }