Actual source code: baijfact3.c

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2: /*
  3:     Factorization code for BAIJ format.
  4: */
  5:  #include <../src/mat/impls/baij/seq/baij.h>
  6:  #include <petsc/private/kernels/blockinvert.h>

  8: /*
  9:    This is used to set the numeric factorization for both LU and ILU symbolic factorization
 10: */
 11: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
 12: {
 14:   if (natural) {
 15:     switch (fact->rmap->bs) {
 16:     case 1:
 17:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 18:       break;
 19:     case 2:
 20:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
 21:       break;
 22:     case 3:
 23:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
 24:       break;
 25:     case 4:
 26:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 27:       break;
 28:     case 5:
 29:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
 30:       break;
 31:     case 6:
 32:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
 33:       break;
 34:     case 7:
 35:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
 36:       break;
 37:     case 15:
 38:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
 39:       break;
 40:     default:
 41:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 42:       break;
 43:     }
 44:   } else {
 45:     switch (fact->rmap->bs) {
 46:     case 1:
 47:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 48:       break;
 49:     case 2:
 50:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
 51:       break;
 52:     case 3:
 53:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
 54:       break;
 55:     case 4:
 56:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
 57:       break;
 58:     case 5:
 59:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
 60:       break;
 61:     case 6:
 62:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
 63:       break;
 64:     case 7:
 65:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
 66:       break;
 67:     default:
 68:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 69:       break;
 70:     }
 71:   }
 72:   return(0);
 73: }

 75: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
 76: {
 78:   if (natural) {
 79:     switch (inA->rmap->bs) {
 80:     case 1:
 81:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
 82:       break;
 83:     case 2:
 84:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
 85:       break;
 86:     case 3:
 87:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
 88:       break;
 89:     case 4:
 90: #if defined(PETSC_USE_REAL_MAT_SINGLE)
 91:       {
 92:         PetscBool      sse_enabled_local;
 94:         PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);
 95:         if (sse_enabled_local) {
 96: #  if defined(PETSC_HAVE_SSE)
 97:           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
 98:           if (n==(unsigned short)n) {
 99:             unsigned short *aj=(unsigned short*)AJ;
100:             for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];

102:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
103:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;

105:             PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
106:           } else {
107:             /* Scale the column indices for easier indexing in MatSolve. */
108: /*            for (i=0;i<nz;i++) { */
109: /*              AJ[i] = AJ[i]*4; */
110: /*            } */
111:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
112:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;

114:             PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
115:           }
116: #  else
117:           /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
118:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
119: #  endif
120:         } else {
121:           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
122:         }
123:       }
124: #else
125:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
126: #endif
127:       break;
128:     case 5:
129:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
130:       break;
131:     case 6:
132:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
133:       break;
134:     case 7:
135:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
136:       break;
137:     default:
138:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
139:       break;
140:     }
141:   } else {
142:     switch (inA->rmap->bs) {
143:     case 1:
144:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
145:       break;
146:     case 2:
147:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
148:       break;
149:     case 3:
150:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
151:       break;
152:     case 4:
153:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
154:       break;
155:     case 5:
156:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
157:       break;
158:     case 6:
159:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
160:       break;
161:     case 7:
162:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
163:       break;
164:     default:
165:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
166:       break;
167:     }
168:   }
169:   return(0);
170: }

172: /*
173:     The symbolic factorization code is identical to that for AIJ format,
174:   except for very small changes since this is now a SeqBAIJ datastructure.
175:   NOT good code reuse.
176: */
177:  #include <petscbt.h>
178:  #include <../src/mat/utils/freespace.h>

180: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
181: {
182:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
183:   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
184:   PetscBool          row_identity,col_identity,both_identity;
185:   IS                 isicol;
186:   PetscErrorCode     ierr;
187:   const PetscInt     *r,*ic;
188:   PetscInt           i,*ai=a->i,*aj=a->j;
189:   PetscInt           *bi,*bj,*ajtmp;
190:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
191:   PetscReal          f;
192:   PetscInt           nlnk,*lnk,k,**bi_ptr;
193:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
194:   PetscBT            lnkbt;
195:   PetscBool          missing;

198:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
199:   MatMissingDiagonal(A,&missing,&i);
200:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);

202:   if (bs>1) {  /* check shifttype */
203:     if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
204:   }

206:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
207:   ISGetIndices(isrow,&r);
208:   ISGetIndices(isicol,&ic);

210:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
211:   PetscMalloc1(n+1,&bi);
212:   PetscMalloc1(n+1,&bdiag);
213:   bi[0] = bdiag[0] = 0;

215:   /* linked list for storing column indices of the active row */
216:   nlnk = n + 1;
217:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

219:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

221:   /* initial FreeSpace size is f*(ai[n]+1) */
222:   f    = info->fill;
223:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);

225:   current_space = free_space;

227:   for (i=0; i<n; i++) {
228:     /* copy previous fill into linked list */
229:     nzi = 0;
230:     nnz = ai[r[i]+1] - ai[r[i]];
231:     ajtmp = aj + ai[r[i]];
232:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
233:     nzi  += nlnk;

235:     /* add pivot rows into linked list */
236:     row = lnk[n];
237:     while (row < i) {
238:       nzbd  = bdiag[row] + 1;   /* num of entries in the row with column index <= row */
239:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
240:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
241:       nzi  += nlnk;
242:       row   = lnk[row];
243:     }
244:     bi[i+1] = bi[i] + nzi;
245:     im[i]   = nzi;

247:     /* mark bdiag */
248:     nzbd = 0;
249:     nnz  = nzi;
250:     k    = lnk[n];
251:     while (nnz-- && k < i) {
252:       nzbd++;
253:       k = lnk[k];
254:     }
255:     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

257:     /* if free space is not available, make more free space */
258:     if (current_space->local_remaining<nzi) {
259:       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */
260:       PetscFreeSpaceGet(nnz,&current_space);
261:       reallocs++;
262:     }

264:     /* copy data into free space, then initialize lnk */
265:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

267:     bi_ptr[i]                       = current_space->array;
268:     current_space->array           += nzi;
269:     current_space->local_used      += nzi;
270:     current_space->local_remaining -= nzi;
271:   }

273:   ISRestoreIndices(isrow,&r);
274:   ISRestoreIndices(isicol,&ic);

276:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
277:   PetscMalloc1(bi[n]+1,&bj);
278:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
279:   PetscLLDestroy(lnk,lnkbt);
280:   PetscFree2(bi_ptr,im);

282:   /* put together the new matrix */
283:   MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
284:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
285:   b    = (Mat_SeqBAIJ*)(B)->data;

287:   b->free_a       = PETSC_TRUE;
288:   b->free_ij      = PETSC_TRUE;
289:   b->singlemalloc = PETSC_FALSE;

291:   PetscMalloc1((bdiag[0]+1)*bs2,&b->a);
292:   b->j             = bj;
293:   b->i             = bi;
294:   b->diag          = bdiag;
295:   b->free_diag     = PETSC_TRUE;
296:   b->ilen          = 0;
297:   b->imax          = 0;
298:   b->row           = isrow;
299:   b->col           = iscol;
300:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

302:   PetscObjectReference((PetscObject)isrow);
303:   PetscObjectReference((PetscObject)iscol);
304:   b->icol = isicol;
305:   PetscMalloc1(bs*n+bs,&b->solve_work);
306:   PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));

308:   b->maxnz = b->nz = bdiag[0]+1;

310:   B->factortype            =  MAT_FACTOR_LU;
311:   B->info.factor_mallocs   = reallocs;
312:   B->info.fill_ratio_given = f;

314:   if (ai[n] != 0) {
315:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
316:   } else {
317:     B->info.fill_ratio_needed = 0.0;
318:   }
319: #if defined(PETSC_USE_INFO)
320:   if (ai[n] != 0) {
321:     PetscReal af = B->info.fill_ratio_needed;
322:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
323:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
324:     PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
325:     PetscInfo(A,"for best performance.\n");
326:   } else {
327:     PetscInfo(A,"Empty matrix\n");
328:   }
329: #endif

331:   ISIdentity(isrow,&row_identity);
332:   ISIdentity(iscol,&col_identity);

334:   both_identity = (PetscBool) (row_identity && col_identity);

336:   MatSeqBAIJSetNumericFactorization(B,both_identity);
337:   return(0);
338: }

340: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
341: {
342:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
343:   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
344:   PetscBool          row_identity,col_identity,both_identity;
345:   IS                 isicol;
346:   PetscErrorCode     ierr;
347:   const PetscInt     *r,*ic;
348:   PetscInt           i,*ai=a->i,*aj=a->j;
349:   PetscInt           *bi,*bj,*ajtmp;
350:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
351:   PetscReal          f;
352:   PetscInt           nlnk,*lnk,k,**bi_ptr;
353:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
354:   PetscBT            lnkbt;
355:   PetscBool          missing;

358:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
359:   MatMissingDiagonal(A,&missing,&i);
360:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);

362:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
363:   ISGetIndices(isrow,&r);
364:   ISGetIndices(isicol,&ic);

366:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
367:   PetscMalloc1(n+1,&bi);
368:   PetscMalloc1(n+1,&bdiag);

370:   bi[0] = bdiag[0] = 0;

372:   /* linked list for storing column indices of the active row */
373:   nlnk = n + 1;
374:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

376:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

378:   /* initial FreeSpace size is f*(ai[n]+1) */
379:   f             = info->fill;
380:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
381:   current_space = free_space;

383:   for (i=0; i<n; i++) {
384:     /* copy previous fill into linked list */
385:     nzi = 0;
386:     nnz = ai[r[i]+1] - ai[r[i]];
387:     ajtmp = aj + ai[r[i]];
388:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
389:     nzi  += nlnk;

391:     /* add pivot rows into linked list */
392:     row = lnk[n];
393:     while (row < i) {
394:       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
395:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
396:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
397:       nzi  += nlnk;
398:       row   = lnk[row];
399:     }
400:     bi[i+1] = bi[i] + nzi;
401:     im[i]   = nzi;

403:     /* mark bdiag */
404:     nzbd = 0;
405:     nnz  = nzi;
406:     k    = lnk[n];
407:     while (nnz-- && k < i) {
408:       nzbd++;
409:       k = lnk[k];
410:     }
411:     bdiag[i] = bi[i] + nzbd;

413:     /* if free space is not available, make more free space */
414:     if (current_space->local_remaining<nzi) {
415:       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
416:       PetscFreeSpaceGet(nnz,&current_space);
417:       reallocs++;
418:     }

420:     /* copy data into free space, then initialize lnk */
421:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

423:     bi_ptr[i]                       = current_space->array;
424:     current_space->array           += nzi;
425:     current_space->local_used      += nzi;
426:     current_space->local_remaining -= nzi;
427:   }
428: #if defined(PETSC_USE_INFO)
429:   if (ai[n] != 0) {
430:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
431:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
432:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
433:     PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
434:     PetscInfo(A,"for best performance.\n");
435:   } else {
436:     PetscInfo(A,"Empty matrix\n");
437:   }
438: #endif

440:   ISRestoreIndices(isrow,&r);
441:   ISRestoreIndices(isicol,&ic);

443:   /* destroy list of free space and other temporary array(s) */
444:   PetscMalloc1(bi[n]+1,&bj);
445:   PetscFreeSpaceContiguous(&free_space,bj);
446:   PetscLLDestroy(lnk,lnkbt);
447:   PetscFree2(bi_ptr,im);

449:   /* put together the new matrix */
450:   MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
451:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
452:   b    = (Mat_SeqBAIJ*)(B)->data;

454:   b->free_a       = PETSC_TRUE;
455:   b->free_ij      = PETSC_TRUE;
456:   b->singlemalloc = PETSC_FALSE;

458:   PetscMalloc1((bi[n]+1)*bs2,&b->a);
459:   b->j             = bj;
460:   b->i             = bi;
461:   b->diag          = bdiag;
462:   b->free_diag     = PETSC_TRUE;
463:   b->ilen          = 0;
464:   b->imax          = 0;
465:   b->row           = isrow;
466:   b->col           = iscol;
467:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

469:   PetscObjectReference((PetscObject)isrow);
470:   PetscObjectReference((PetscObject)iscol);
471:   b->icol = isicol;

473:   PetscMalloc1(bs*n+bs,&b->solve_work);
474:   PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));

476:   b->maxnz = b->nz = bi[n];

478:   (B)->factortype            =  MAT_FACTOR_LU;
479:   (B)->info.factor_mallocs   = reallocs;
480:   (B)->info.fill_ratio_given = f;

482:   if (ai[n] != 0) {
483:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
484:   } else {
485:     (B)->info.fill_ratio_needed = 0.0;
486:   }

488:   ISIdentity(isrow,&row_identity);
489:   ISIdentity(iscol,&col_identity);

491:   both_identity = (PetscBool) (row_identity && col_identity);

493:   MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
494:   return(0);
495: }