Actual source code: baijfact3.c

petsc-3.7.3 2016-08-01
Report Typos and Errors
  2: /*
  3:     Factorization code for BAIJ format.
  4: */
  5: #include <../src/mat/impls/baij/seq/baij.h>
  6: #include <petsc/private/kernels/blockinvert.h>

 10: /*
 11:    This is used to set the numeric factorization for both LU and ILU symbolic factorization
 12: */
 13: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
 14: {
 16:   if (natural) {
 17:     switch (fact->rmap->bs) {
 18:     case 1:
 19:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 20:       break;
 21:     case 2:
 22:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
 23:       break;
 24:     case 3:
 25:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
 26:       break;
 27:     case 4:
 28:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 29:       break;
 30:     case 5:
 31:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
 32:       break;
 33:     case 6:
 34:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
 35:       break;
 36:     case 7:
 37:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
 38:       break;
 39:     case 15:
 40:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
 41:       break;
 42:     default:
 43:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 44:       break;
 45:     }
 46:   } else {
 47:     switch (fact->rmap->bs) {
 48:     case 1:
 49:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 50:       break;
 51:     case 2:
 52:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
 53:       break;
 54:     case 3:
 55:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
 56:       break;
 57:     case 4:
 58:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
 59:       break;
 60:     case 5:
 61:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
 62:       break;
 63:     case 6:
 64:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
 65:       break;
 66:     case 7:
 67:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
 68:       break;
 69:     default:
 70:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 71:       break;
 72:     }
 73:   }
 74:   return(0);
 75: }

 79: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
 80: {
 82:   if (natural) {
 83:     switch (inA->rmap->bs) {
 84:     case 1:
 85:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
 86:       break;
 87:     case 2:
 88:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
 89:       break;
 90:     case 3:
 91:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
 92:       break;
 93:     case 4:
 94: #if defined(PETSC_USE_REAL_MAT_SINGLE)
 95:       {
 96:         PetscBool      sse_enabled_local;
 98:         PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);
 99:         if (sse_enabled_local) {
100: #  if defined(PETSC_HAVE_SSE)
101:           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
102:           if (n==(unsigned short)n) {
103:             unsigned short *aj=(unsigned short*)AJ;
104:             for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];

106:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
107:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;

109:             PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
110:           } else {
111:             /* Scale the column indices for easier indexing in MatSolve. */
112: /*            for (i=0;i<nz;i++) { */
113: /*              AJ[i] = AJ[i]*4; */
114: /*            } */
115:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;

118:             PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
119:           }
120: #  else
121:           /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
122:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
123: #  endif
124:         } else {
125:           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
126:         }
127:       }
128: #else
129:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
130: #endif
131:       break;
132:     case 5:
133:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
134:       break;
135:     case 6:
136:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
137:       break;
138:     case 7:
139:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
140:       break;
141:     default:
142:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
143:       break;
144:     }
145:   } else {
146:     switch (inA->rmap->bs) {
147:     case 1:
148:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
149:       break;
150:     case 2:
151:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
152:       break;
153:     case 3:
154:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
155:       break;
156:     case 4:
157:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
158:       break;
159:     case 5:
160:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
161:       break;
162:     case 6:
163:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
164:       break;
165:     case 7:
166:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
167:       break;
168:     default:
169:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
170:       break;
171:     }
172:   }
173:   return(0);
174: }

176: /*
177:     The symbolic factorization code is identical to that for AIJ format,
178:   except for very small changes since this is now a SeqBAIJ datastructure.
179:   NOT good code reuse.
180: */
181: #include <petscbt.h>
182: #include <../src/mat/utils/freespace.h>

186: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
187: {
188:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
189:   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
190:   PetscBool          row_identity,col_identity,both_identity;
191:   IS                 isicol;
192:   PetscErrorCode     ierr;
193:   const PetscInt     *r,*ic;
194:   PetscInt           i,*ai=a->i,*aj=a->j;
195:   PetscInt           *bi,*bj,*ajtmp;
196:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
197:   PetscReal          f;
198:   PetscInt           nlnk,*lnk,k,**bi_ptr;
199:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
200:   PetscBT            lnkbt;
201:   PetscBool          missing;

204:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
205:   MatMissingDiagonal(A,&missing,&i);
206:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);

208:   if (bs>1) {  /* check shifttype */
209:     if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
210:   }

212:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
213:   ISGetIndices(isrow,&r);
214:   ISGetIndices(isicol,&ic);

216:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
217:   PetscMalloc1(n+1,&bi);
218:   PetscMalloc1(n+1,&bdiag);
219:   bi[0] = bdiag[0] = 0;

221:   /* linked list for storing column indices of the active row */
222:   nlnk = n + 1;
223:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

225:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

227:   /* initial FreeSpace size is f*(ai[n]+1) */
228:   f    = info->fill;
229:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);

231:   current_space = free_space;

233:   for (i=0; i<n; i++) {
234:     /* copy previous fill into linked list */
235:     nzi = 0;
236:     nnz = ai[r[i]+1] - ai[r[i]];
237:     ajtmp = aj + ai[r[i]];
238:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
239:     nzi  += nlnk;

241:     /* add pivot rows into linked list */
242:     row = lnk[n];
243:     while (row < i) {
244:       nzbd  = bdiag[row] + 1;   /* num of entries in the row with column index <= row */
245:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
246:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
247:       nzi  += nlnk;
248:       row   = lnk[row];
249:     }
250:     bi[i+1] = bi[i] + nzi;
251:     im[i]   = nzi;

253:     /* mark bdiag */
254:     nzbd = 0;
255:     nnz  = nzi;
256:     k    = lnk[n];
257:     while (nnz-- && k < i) {
258:       nzbd++;
259:       k = lnk[k];
260:     }
261:     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

263:     /* if free space is not available, make more free space */
264:     if (current_space->local_remaining<nzi) {
265:       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */
266:       PetscFreeSpaceGet(nnz,&current_space);
267:       reallocs++;
268:     }

270:     /* copy data into free space, then initialize lnk */
271:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

273:     bi_ptr[i]                       = current_space->array;
274:     current_space->array           += nzi;
275:     current_space->local_used      += nzi;
276:     current_space->local_remaining -= nzi;
277:   }

279:   ISRestoreIndices(isrow,&r);
280:   ISRestoreIndices(isicol,&ic);

282:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
283:   PetscMalloc1(bi[n]+1,&bj);
284:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
285:   PetscLLDestroy(lnk,lnkbt);
286:   PetscFree2(bi_ptr,im);

288:   /* put together the new matrix */
289:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
290:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
291:   b    = (Mat_SeqBAIJ*)(B)->data;

293:   b->free_a       = PETSC_TRUE;
294:   b->free_ij      = PETSC_TRUE;
295:   b->singlemalloc = PETSC_FALSE;

297:   PetscMalloc1((bdiag[0]+1)*bs2,&b->a);
298:   b->j             = bj;
299:   b->i             = bi;
300:   b->diag          = bdiag;
301:   b->free_diag     = PETSC_TRUE;
302:   b->ilen          = 0;
303:   b->imax          = 0;
304:   b->row           = isrow;
305:   b->col           = iscol;
306:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

308:   PetscObjectReference((PetscObject)isrow);
309:   PetscObjectReference((PetscObject)iscol);
310:   b->icol = isicol;
311:   PetscMalloc1(bs*n+bs,&b->solve_work);
312:   PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));

314:   b->maxnz = b->nz = bdiag[0]+1;

316:   B->factortype            =  MAT_FACTOR_LU;
317:   B->info.factor_mallocs   = reallocs;
318:   B->info.fill_ratio_given = f;

320:   if (ai[n] != 0) {
321:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
322:   } else {
323:     B->info.fill_ratio_needed = 0.0;
324:   }
325: #if defined(PETSC_USE_INFO)
326:   if (ai[n] != 0) {
327:     PetscReal af = B->info.fill_ratio_needed;
328:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
329:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
330:     PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
331:     PetscInfo(A,"for best performance.\n");
332:   } else {
333:     PetscInfo(A,"Empty matrix\n");
334:   }
335: #endif

337:   ISIdentity(isrow,&row_identity);
338:   ISIdentity(iscol,&col_identity);

340:   both_identity = (PetscBool) (row_identity && col_identity);

342:   MatSeqBAIJSetNumericFactorization(B,both_identity);
343:   return(0);
344: }

348: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
349: {
350:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
351:   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
352:   PetscBool          row_identity,col_identity,both_identity;
353:   IS                 isicol;
354:   PetscErrorCode     ierr;
355:   const PetscInt     *r,*ic;
356:   PetscInt           i,*ai=a->i,*aj=a->j;
357:   PetscInt           *bi,*bj,*ajtmp;
358:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
359:   PetscReal          f;
360:   PetscInt           nlnk,*lnk,k,**bi_ptr;
361:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
362:   PetscBT            lnkbt;
363:   PetscBool          missing;

366:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
367:   MatMissingDiagonal(A,&missing,&i);
368:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);

370:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
371:   ISGetIndices(isrow,&r);
372:   ISGetIndices(isicol,&ic);

374:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
375:   PetscMalloc1(n+1,&bi);
376:   PetscMalloc1(n+1,&bdiag);

378:   bi[0] = bdiag[0] = 0;

380:   /* linked list for storing column indices of the active row */
381:   nlnk = n + 1;
382:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

384:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

386:   /* initial FreeSpace size is f*(ai[n]+1) */
387:   f             = info->fill;
388:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
389:   current_space = free_space;

391:   for (i=0; i<n; i++) {
392:     /* copy previous fill into linked list */
393:     nzi = 0;
394:     nnz = ai[r[i]+1] - ai[r[i]];
395:     ajtmp = aj + ai[r[i]];
396:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
397:     nzi  += nlnk;

399:     /* add pivot rows into linked list */
400:     row = lnk[n];
401:     while (row < i) {
402:       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
403:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
404:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
405:       nzi  += nlnk;
406:       row   = lnk[row];
407:     }
408:     bi[i+1] = bi[i] + nzi;
409:     im[i]   = nzi;

411:     /* mark bdiag */
412:     nzbd = 0;
413:     nnz  = nzi;
414:     k    = lnk[n];
415:     while (nnz-- && k < i) {
416:       nzbd++;
417:       k = lnk[k];
418:     }
419:     bdiag[i] = bi[i] + nzbd;

421:     /* if free space is not available, make more free space */
422:     if (current_space->local_remaining<nzi) {
423:       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
424:       PetscFreeSpaceGet(nnz,&current_space);
425:       reallocs++;
426:     }

428:     /* copy data into free space, then initialize lnk */
429:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

431:     bi_ptr[i]                       = current_space->array;
432:     current_space->array           += nzi;
433:     current_space->local_used      += nzi;
434:     current_space->local_remaining -= nzi;
435:   }
436: #if defined(PETSC_USE_INFO)
437:   if (ai[n] != 0) {
438:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
439:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
440:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
441:     PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
442:     PetscInfo(A,"for best performance.\n");
443:   } else {
444:     PetscInfo(A,"Empty matrix\n");
445:   }
446: #endif

448:   ISRestoreIndices(isrow,&r);
449:   ISRestoreIndices(isicol,&ic);

451:   /* destroy list of free space and other temporary array(s) */
452:   PetscMalloc1(bi[n]+1,&bj);
453:   PetscFreeSpaceContiguous(&free_space,bj);
454:   PetscLLDestroy(lnk,lnkbt);
455:   PetscFree2(bi_ptr,im);

457:   /* put together the new matrix */
458:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,NULL);
459:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
460:   b    = (Mat_SeqBAIJ*)(B)->data;

462:   b->free_a       = PETSC_TRUE;
463:   b->free_ij      = PETSC_TRUE;
464:   b->singlemalloc = PETSC_FALSE;

466:   PetscMalloc1((bi[n]+1)*bs2,&b->a);
467:   b->j             = bj;
468:   b->i             = bi;
469:   b->diag          = bdiag;
470:   b->free_diag     = PETSC_TRUE;
471:   b->ilen          = 0;
472:   b->imax          = 0;
473:   b->row           = isrow;
474:   b->col           = iscol;
475:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

477:   PetscObjectReference((PetscObject)isrow);
478:   PetscObjectReference((PetscObject)iscol);
479:   b->icol = isicol;

481:   PetscMalloc1(bs*n+bs,&b->solve_work);
482:   PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));

484:   b->maxnz = b->nz = bi[n];

486:   (B)->factortype            =  MAT_FACTOR_LU;
487:   (B)->info.factor_mallocs   = reallocs;
488:   (B)->info.fill_ratio_given = f;

490:   if (ai[n] != 0) {
491:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
492:   } else {
493:     (B)->info.fill_ratio_needed = 0.0;
494:   }

496:   ISIdentity(isrow,&row_identity);
497:   ISIdentity(iscol,&col_identity);

499:   both_identity = (PetscBool) (row_identity && col_identity);

501:   MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
502:   return(0);
503: }