Actual source code: baijfact3.c

petsc-3.11.4 2019-09-28
Report Typos and Errors

  2: /*
  3:     Factorization code for BAIJ format.
  4: */
  5:  #include <../src/mat/impls/baij/seq/baij.h>
  6:  #include <petsc/private/kernels/blockinvert.h>

  8: /*
  9:    This is used to set the numeric factorization for both LU and ILU symbolic factorization
 10: */
 11: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
 12: {
 14:   if (natural) {
 15:     switch (fact->rmap->bs) {
 16:     case 1:
 17:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 18:       break;
 19:     case 2:
 20:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
 21:       break;
 22:     case 3:
 23:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
 24:       break;
 25:     case 4:
 26:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 27:       break;
 28:     case 5:
 29:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
 30:       break;
 31:     case 6:
 32:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
 33:       break;
 34:     case 7:
 35:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
 36:       break;
 37:     case 9:
 38: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
 39:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
 40: #else
 41:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 42: #endif
 43:       break;
 44:     case 15:
 45:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
 46:       break;
 47:     default:
 48:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 49:       break;
 50:     }
 51:   } else {
 52:     switch (fact->rmap->bs) {
 53:     case 1:
 54:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 55:       break;
 56:     case 2:
 57:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
 58:       break;
 59:     case 3:
 60:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
 61:       break;
 62:     case 4:
 63:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
 64:       break;
 65:     case 5:
 66:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
 67:       break;
 68:     case 6:
 69:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
 70:       break;
 71:     case 7:
 72:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
 73:       break;
 74:     default:
 75:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 76:       break;
 77:     }
 78:   }
 79:   return(0);
 80: }

 82: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
 83: {
 85:   if (natural) {
 86:     switch (inA->rmap->bs) {
 87:     case 1:
 88:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
 89:       break;
 90:     case 2:
 91:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
 92:       break;
 93:     case 3:
 94:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
 95:       break;
 96:     case 4:
 97: #if defined(PETSC_USE_REAL_MAT_SINGLE)
 98:       {
 99:         PetscBool      sse_enabled_local;
101:         PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);
102:         if (sse_enabled_local) {
103: #  if defined(PETSC_HAVE_SSE)
104:           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
105:           if (n==(unsigned short)n) {
106:             unsigned short *aj=(unsigned short*)AJ;
107:             for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];

109:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
110:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;

112:             PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
113:           } else {
114:             /* Scale the column indices for easier indexing in MatSolve. */
115: /*            for (i=0;i<nz;i++) { */
116: /*              AJ[i] = AJ[i]*4; */
117: /*            } */
118:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
119:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;

121:             PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
122:           }
123: #  else
124:           /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
125:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
126: #  endif
127:         } else {
128:           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
129:         }
130:       }
131: #else
132:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
133: #endif
134:       break;
135:     case 5:
136:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
137:       break;
138:     case 6:
139:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
140:       break;
141:     case 7:
142:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
143:       break;
144:     default:
145:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
146:       break;
147:     }
148:   } else {
149:     switch (inA->rmap->bs) {
150:     case 1:
151:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
152:       break;
153:     case 2:
154:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
155:       break;
156:     case 3:
157:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
158:       break;
159:     case 4:
160:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
161:       break;
162:     case 5:
163:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
164:       break;
165:     case 6:
166:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
167:       break;
168:     case 7:
169:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
170:       break;
171:     default:
172:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
173:       break;
174:     }
175:   }
176:   return(0);
177: }

179: /*
180:     The symbolic factorization code is identical to that for AIJ format,
181:   except for very small changes since this is now a SeqBAIJ datastructure.
182:   NOT good code reuse.
183: */
184:  #include <petscbt.h>
185:  #include <../src/mat/utils/freespace.h>

187: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
188: {
189:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
190:   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
191:   PetscBool          row_identity,col_identity,both_identity;
192:   IS                 isicol;
193:   PetscErrorCode     ierr;
194:   const PetscInt     *r,*ic;
195:   PetscInt           i,*ai=a->i,*aj=a->j;
196:   PetscInt           *bi,*bj,*ajtmp;
197:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
198:   PetscReal          f;
199:   PetscInt           nlnk,*lnk,k,**bi_ptr;
200:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
201:   PetscBT            lnkbt;
202:   PetscBool          missing;

205:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
206:   MatMissingDiagonal(A,&missing,&i);
207:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);

209:   if (bs>1) {  /* check shifttype */
210:     if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
211:   }

213:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
214:   ISGetIndices(isrow,&r);
215:   ISGetIndices(isicol,&ic);

217:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
218:   PetscMalloc1(n+1,&bi);
219:   PetscMalloc1(n+1,&bdiag);
220:   bi[0] = bdiag[0] = 0;

222:   /* linked list for storing column indices of the active row */
223:   nlnk = n + 1;
224:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

226:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

228:   /* initial FreeSpace size is f*(ai[n]+1) */
229:   f    = info->fill;
230:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);

232:   current_space = free_space;

234:   for (i=0; i<n; i++) {
235:     /* copy previous fill into linked list */
236:     nzi = 0;
237:     nnz = ai[r[i]+1] - ai[r[i]];
238:     ajtmp = aj + ai[r[i]];
239:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
240:     nzi  += nlnk;

242:     /* add pivot rows into linked list */
243:     row = lnk[n];
244:     while (row < i) {
245:       nzbd  = bdiag[row] + 1;   /* num of entries in the row with column index <= row */
246:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
247:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
248:       nzi  += nlnk;
249:       row   = lnk[row];
250:     }
251:     bi[i+1] = bi[i] + nzi;
252:     im[i]   = nzi;

254:     /* mark bdiag */
255:     nzbd = 0;
256:     nnz  = nzi;
257:     k    = lnk[n];
258:     while (nnz-- && k < i) {
259:       nzbd++;
260:       k = lnk[k];
261:     }
262:     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

264:     /* if free space is not available, make more free space */
265:     if (current_space->local_remaining<nzi) {
266:       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */
267:       PetscFreeSpaceGet(nnz,&current_space);
268:       reallocs++;
269:     }

271:     /* copy data into free space, then initialize lnk */
272:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

274:     bi_ptr[i]                       = current_space->array;
275:     current_space->array           += nzi;
276:     current_space->local_used      += nzi;
277:     current_space->local_remaining -= nzi;
278:   }

280:   ISRestoreIndices(isrow,&r);
281:   ISRestoreIndices(isicol,&ic);

283:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
284:   PetscMalloc1(bi[n]+1,&bj);
285:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
286:   PetscLLDestroy(lnk,lnkbt);
287:   PetscFree2(bi_ptr,im);

289:   /* put together the new matrix */
290:   MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
291:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
292:   b    = (Mat_SeqBAIJ*)(B)->data;

294:   b->free_a       = PETSC_TRUE;
295:   b->free_ij      = PETSC_TRUE;
296:   b->singlemalloc = PETSC_FALSE;

298:   PetscMalloc1((bdiag[0]+1)*bs2,&b->a);
299:   b->j             = bj;
300:   b->i             = bi;
301:   b->diag          = bdiag;
302:   b->free_diag     = PETSC_TRUE;
303:   b->ilen          = 0;
304:   b->imax          = 0;
305:   b->row           = isrow;
306:   b->col           = iscol;
307:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

309:   PetscObjectReference((PetscObject)isrow);
310:   PetscObjectReference((PetscObject)iscol);
311:   b->icol = isicol;
312:   PetscMalloc1(bs*n+bs,&b->solve_work);
313:   PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));

315:   b->maxnz = b->nz = bdiag[0]+1;

317:   B->factortype            =  MAT_FACTOR_LU;
318:   B->info.factor_mallocs   = reallocs;
319:   B->info.fill_ratio_given = f;

321:   if (ai[n] != 0) {
322:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
323:   } else {
324:     B->info.fill_ratio_needed = 0.0;
325:   }
326: #if defined(PETSC_USE_INFO)
327:   if (ai[n] != 0) {
328:     PetscReal af = B->info.fill_ratio_needed;
329:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
330:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
331:     PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
332:     PetscInfo(A,"for best performance.\n");
333:   } else {
334:     PetscInfo(A,"Empty matrix\n");
335:   }
336: #endif

338:   ISIdentity(isrow,&row_identity);
339:   ISIdentity(iscol,&col_identity);

341:   both_identity = (PetscBool) (row_identity && col_identity);

343:   MatSeqBAIJSetNumericFactorization(B,both_identity);
344:   return(0);
345: }

347: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
348: {
349:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
350:   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
351:   PetscBool          row_identity,col_identity,both_identity;
352:   IS                 isicol;
353:   PetscErrorCode     ierr;
354:   const PetscInt     *r,*ic;
355:   PetscInt           i,*ai=a->i,*aj=a->j;
356:   PetscInt           *bi,*bj,*ajtmp;
357:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
358:   PetscReal          f;
359:   PetscInt           nlnk,*lnk,k,**bi_ptr;
360:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
361:   PetscBT            lnkbt;
362:   PetscBool          missing;

365:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
366:   MatMissingDiagonal(A,&missing,&i);
367:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);

369:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
370:   ISGetIndices(isrow,&r);
371:   ISGetIndices(isicol,&ic);

373:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
374:   PetscMalloc1(n+1,&bi);
375:   PetscMalloc1(n+1,&bdiag);

377:   bi[0] = bdiag[0] = 0;

379:   /* linked list for storing column indices of the active row */
380:   nlnk = n + 1;
381:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

383:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

385:   /* initial FreeSpace size is f*(ai[n]+1) */
386:   f             = info->fill;
387:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
388:   current_space = free_space;

390:   for (i=0; i<n; i++) {
391:     /* copy previous fill into linked list */
392:     nzi = 0;
393:     nnz = ai[r[i]+1] - ai[r[i]];
394:     ajtmp = aj + ai[r[i]];
395:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
396:     nzi  += nlnk;

398:     /* add pivot rows into linked list */
399:     row = lnk[n];
400:     while (row < i) {
401:       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
402:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
403:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
404:       nzi  += nlnk;
405:       row   = lnk[row];
406:     }
407:     bi[i+1] = bi[i] + nzi;
408:     im[i]   = nzi;

410:     /* mark bdiag */
411:     nzbd = 0;
412:     nnz  = nzi;
413:     k    = lnk[n];
414:     while (nnz-- && k < i) {
415:       nzbd++;
416:       k = lnk[k];
417:     }
418:     bdiag[i] = bi[i] + nzbd;

420:     /* if free space is not available, make more free space */
421:     if (current_space->local_remaining<nzi) {
422:       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
423:       PetscFreeSpaceGet(nnz,&current_space);
424:       reallocs++;
425:     }

427:     /* copy data into free space, then initialize lnk */
428:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

430:     bi_ptr[i]                       = current_space->array;
431:     current_space->array           += nzi;
432:     current_space->local_used      += nzi;
433:     current_space->local_remaining -= nzi;
434:   }
435: #if defined(PETSC_USE_INFO)
436:   if (ai[n] != 0) {
437:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
438:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
439:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
440:     PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
441:     PetscInfo(A,"for best performance.\n");
442:   } else {
443:     PetscInfo(A,"Empty matrix\n");
444:   }
445: #endif

447:   ISRestoreIndices(isrow,&r);
448:   ISRestoreIndices(isicol,&ic);

450:   /* destroy list of free space and other temporary array(s) */
451:   PetscMalloc1(bi[n]+1,&bj);
452:   PetscFreeSpaceContiguous(&free_space,bj);
453:   PetscLLDestroy(lnk,lnkbt);
454:   PetscFree2(bi_ptr,im);

456:   /* put together the new matrix */
457:   MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
458:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
459:   b    = (Mat_SeqBAIJ*)(B)->data;

461:   b->free_a       = PETSC_TRUE;
462:   b->free_ij      = PETSC_TRUE;
463:   b->singlemalloc = PETSC_FALSE;

465:   PetscMalloc1((bi[n]+1)*bs2,&b->a);
466:   b->j             = bj;
467:   b->i             = bi;
468:   b->diag          = bdiag;
469:   b->free_diag     = PETSC_TRUE;
470:   b->ilen          = 0;
471:   b->imax          = 0;
472:   b->row           = isrow;
473:   b->col           = iscol;
474:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

476:   PetscObjectReference((PetscObject)isrow);
477:   PetscObjectReference((PetscObject)iscol);
478:   b->icol = isicol;

480:   PetscMalloc1(bs*n+bs,&b->solve_work);
481:   PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));

483:   b->maxnz = b->nz = bi[n];

485:   (B)->factortype            =  MAT_FACTOR_LU;
486:   (B)->info.factor_mallocs   = reallocs;
487:   (B)->info.fill_ratio_given = f;

489:   if (ai[n] != 0) {
490:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
491:   } else {
492:     (B)->info.fill_ratio_needed = 0.0;
493:   }

495:   ISIdentity(isrow,&row_identity);
496:   ISIdentity(iscol,&col_identity);

498:   both_identity = (PetscBool) (row_identity && col_identity);

500:   MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
501:   return(0);
502: }