Actual source code: bddcschurs.c

petsc-3.6.4 2016-04-12
Report Typos and Errors
  1: #include <../src/ksp/pc/impls/bddc/bddc.h>
  2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3: #include <petscblaslapack.h>

  5: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
  6: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
  7: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC,Vec,Vec);
  8: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC,Vec,Vec);

 12: static PetscErrorCode PCBDDCMumpsCorrectionSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
 13: {
 14:   PCBDDCReuseMumps ctx;
 15: #if defined(PETSC_HAVE_MUMPS)
 16:   PetscInt         ival;
 17: #endif
 18:   PetscErrorCode   ierr;

 21:   PCShellGetContext(pc,(void **)&ctx);
 22: #if defined(PETSC_HAVE_MUMPS)
 23:   MatMumpsGetIcntl(ctx->F,26,&ival);
 24:   MatMumpsSetIcntl(ctx->F,26,-1);
 25: #endif
 26:   if (transpose) {
 27:     MatSolveTranspose(ctx->F,rhs,sol);
 28:   } else {
 29:     MatSolve(ctx->F,rhs,sol);
 30:   }
 31: #if defined(PETSC_HAVE_MUMPS)
 32:   MatMumpsSetIcntl(ctx->F,26,ival);
 33: #endif
 34:   return(0);
 35: }

 39: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC pc, Vec rhs, Vec sol)
 40: {
 41:   PetscErrorCode   ierr;

 44:   PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_FALSE);
 45:   return(0);
 46: }

 50: static PetscErrorCode PCBDDCMumpsCorrectionSolveTranspose(PC pc, Vec rhs, Vec sol)
 51: {
 52:   PetscErrorCode   ierr;

 55:   PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_TRUE);
 56:   return(0);
 57: }

 61: static PetscErrorCode PCBDDCReuseMumpsReset(PCBDDCReuseMumps reuse)
 62: {

 66:   MatDestroy(&reuse->F);
 67:   VecDestroy(&reuse->sol);
 68:   VecDestroy(&reuse->rhs);
 69:   PCDestroy(&reuse->interior_solver);
 70:   PCDestroy(&reuse->correction_solver);
 71:   ISDestroy(&reuse->is_R);
 72:   ISDestroy(&reuse->is_B);
 73:   VecScatterDestroy(&reuse->correction_scatter_B);
 74:   VecDestroy(&reuse->sol_B);
 75:   VecDestroy(&reuse->rhs_B);
 76:   return(0);
 77: }

 81: static PetscErrorCode PCBDDCMumpsInteriorSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
 82: {
 83:   PCBDDCReuseMumps ctx;
 84:   PetscScalar      *array,*array_mumps;
 85: #if defined(PETSC_HAVE_MUMPS)
 86:   PetscInt         ival;
 87: #endif
 88:   PetscErrorCode   ierr;

 91:   PCShellGetContext(pc,(void **)&ctx);
 92: #if defined(PETSC_HAVE_MUMPS)
 93:   MatMumpsGetIcntl(ctx->F,26,&ival);
 94:   MatMumpsSetIcntl(ctx->F,26,0);
 95: #endif
 96:   /* copy rhs into factored matrix workspace (can it be avoided?, MatSolve_MUMPS has another copy b->x internally) */
 97:   VecGetArrayRead(rhs,(const PetscScalar**)&array);
 98:   VecGetArray(ctx->rhs,&array_mumps);
 99:   PetscMemcpy(array_mumps,array,ctx->n_I*sizeof(PetscScalar));
100:   VecRestoreArray(ctx->rhs,&array_mumps);
101:   VecRestoreArrayRead(rhs,(const PetscScalar**)&array);

103:   if (transpose) {
104:     MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);
105:   } else {
106:     MatSolve(ctx->F,ctx->rhs,ctx->sol);
107:   }

109:   /* get back data to caller worskpace */
110:   VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
111:   VecGetArray(sol,&array);
112:   PetscMemcpy(array,array_mumps,ctx->n_I*sizeof(PetscScalar));
113:   VecRestoreArray(sol,&array);
114:   VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
115: #if defined(PETSC_HAVE_MUMPS)
116:   MatMumpsSetIcntl(ctx->F,26,ival);
117: #endif
118:   return(0);
119: }

123: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC pc, Vec rhs, Vec sol)
124: {
125:   PetscErrorCode   ierr;

128:   PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_FALSE);
129:   return(0);
130: }

134: static PetscErrorCode PCBDDCMumpsInteriorSolveTranspose(PC pc, Vec rhs, Vec sol)
135: {
136:   PetscErrorCode   ierr;

139:   PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_TRUE);
140:   return(0);
141: }

145: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
146: {
147:   Mat            B, C, D, Bd, Cd, AinvBd;
148:   KSP            ksp;
149:   PC             pc;
150:   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
151:   PetscReal      fill = 2.0;
152:   PetscInt       n_I;
153:   PetscMPIInt    size;

157:   MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);
158:   if (size != 1) {
159:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
160:   }
161:   if (reuse == MAT_REUSE_MATRIX) {
162:     PetscBool Sdense;

164:     PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
165:     if (!Sdense) {
166:       SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
167:     }
168:   }
169:   MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
170:   MatSchurComplementGetKSP(M, &ksp);
171:   KSPGetPC(ksp, &pc);
172:   PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);
173:   PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);
174:   PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);
175:   PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);
176:   PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);
177:   MatGetSize(B,&n_I,NULL);
178:   if (n_I) {
179:     if (!Bdense) {
180:       MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
181:     } else {
182:       Bd = B;
183:     }

185:     if (isLU || isILU || isCHOL) {
186:       Mat fact;
187:       KSPSetUp(ksp);
188:       PCFactorGetMatrix(pc, &fact);
189:       MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
190:       MatMatSolve(fact, Bd, AinvBd);
191:     } else {
192:       PetscBool ex = PETSC_TRUE;

194:       if (ex) {
195:         Mat Ainvd;

197:         PCComputeExplicitOperator(pc, &Ainvd);
198:         MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
199:         MatDestroy(&Ainvd);
200:       } else {
201:         Vec         sol,rhs;
202:         PetscScalar *arrayrhs,*arraysol;
203:         PetscInt    i,nrhs,n;

205:         MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
206:         MatGetSize(Bd,&n,&nrhs);
207:         MatDenseGetArray(Bd,&arrayrhs);
208:         MatDenseGetArray(AinvBd,&arraysol);
209:         KSPGetSolution(ksp,&sol);
210:         KSPGetRhs(ksp,&rhs);
211:         for (i=0;i<nrhs;i++) {
212:           VecPlaceArray(rhs,arrayrhs+i*n);
213:           VecPlaceArray(sol,arraysol+i*n);
214:           KSPSolve(ksp,rhs,sol);
215:           VecResetArray(rhs);
216:           VecResetArray(sol);
217:         }
218:         MatDenseRestoreArray(Bd,&arrayrhs);
219:         MatDenseRestoreArray(AinvBd,&arrayrhs);
220:       }
221:     }
222:     if (!Bdense & !issym) {
223:       MatDestroy(&Bd);
224:     }

226:     if (!issym) {
227:       if (!Cdense) {
228:         MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
229:       } else {
230:         Cd = C;
231:       }
232:       MatMatMult(Cd, AinvBd, reuse, fill, S);
233:       if (!Cdense) {
234:         MatDestroy(&Cd);
235:       }
236:     } else {
237:       MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
238:       if (!Bdense) {
239:         MatDestroy(&Bd);
240:       }
241:     }
242:     MatDestroy(&AinvBd);
243:   }

245:   if (D) {
246:     Mat       Dd;
247:     PetscBool Ddense;

249:     PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);
250:     if (!Ddense) {
251:       MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
252:     } else {
253:       Dd = D;
254:     }
255:     if (n_I) {
256:       MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);
257:     } else {
258:       if (reuse == MAT_INITIAL_MATRIX) {
259:         MatDuplicate(Dd,MAT_COPY_VALUES,S);
260:       } else {
261:         MatCopy(Dd,*S,SAME_NONZERO_PATTERN);
262:       }
263:     }
264:     if (!Ddense) {
265:       MatDestroy(&Dd);
266:     }
267:   } else {
268:     MatScale(*S,-1.0);
269:   }
270:   return(0);
271: }

275: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, PetscBool faster_deluxe, PetscBool compute_Stilda, PetscBool reuse_solvers)
276: {
277:   Mat                    F,A_II,A_IB,A_BI,A_BB,AE_II;
278:   Mat                    S_all;
279:   Mat                    global_schur_subsets,work_mat,*submats;
280:   ISLocalToGlobalMapping l2gmap_subsets;
281:   IS                     is_I,is_I_layer;
282:   IS                     all_subsets,all_subsets_mult,all_subsets_n;
283:   PetscInt               *nnz,*all_local_idx_N;
284:   PetscInt               *auxnum1,*auxnum2;
285:   PetscInt               i,subset_size,max_subset_size;
286:   PetscInt               extra,local_size,global_size;
287:   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
288:   PetscScalar            *Bwork;
289:   PetscSubcomm           subcomm;
290:   PetscMPIInt            color,rank;
291:   MPI_Comm               comm_n;
292:   PetscErrorCode         ierr;

295:   /* update info in sub_schurs */
296:   MatDestroy(&sub_schurs->A);
297:   MatDestroy(&sub_schurs->S);
298:   sub_schurs->is_hermitian = PETSC_FALSE;
299:   sub_schurs->is_posdef = PETSC_FALSE;
300:   if (Ain) {
301:     PetscBool isseqaij;
302:     /* determine if we are dealing with hermitian positive definite problems */
303: #if !defined(PETSC_USE_COMPLEX)
304:     if (Ain->symmetric_set) {
305:       sub_schurs->is_hermitian = Ain->symmetric;
306:     }
307: #else
308:     if (Ain->hermitian_set) {
309:       sub_schurs->is_hermitian = Ain->hermitian;
310:     }
311: #endif
312:     if (Ain->spd_set) {
313:       sub_schurs->is_posdef = Ain->spd;
314:     }

316:     /* check */
317:     PetscObjectTypeCompare((PetscObject)Ain,MATSEQAIJ,&isseqaij);
318:     if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
319:       PetscInt lsize;

321:       MatGetSize(Ain,&lsize,NULL);
322:       if (lsize) {
323:         PetscScalar val;
324:         PetscReal   norm;
325:         Vec         vec1,vec2,vec3;

327:         MatCreateVecs(Ain,&vec1,&vec2);
328:         VecDuplicate(vec1,&vec3);
329:         VecSetRandom(vec1,NULL);
330:         MatMult(Ain,vec1,vec2);
331: #if !defined(PETSC_USE_COMPLEX)
332:         MatMultTranspose(Ain,vec1,vec3);
333: #else
334:         MatMultHermitianTranspose(Ain,vec1,vec3);
335: #endif
336:         VecAXPY(vec3,-1.0,vec2);
337:         VecNorm(vec3,NORM_INFINITY,&norm);
338:         if (norm > PetscSqrtReal(PETSC_SMALL)) {
339:           sub_schurs->is_hermitian = PETSC_FALSE;
340:         } else {
341:           sub_schurs->is_hermitian = PETSC_TRUE;
342:         }
343:         VecDot(vec1,vec2,&val);
344:         if (PetscRealPart(val) > 0. && PetscAbsReal(PetscImaginaryPart(val)) < PETSC_SMALL) sub_schurs->is_posdef = PETSC_TRUE;
345:         VecDestroy(&vec1);
346:         VecDestroy(&vec2);
347:         VecDestroy(&vec3);
348:       } else {
349:         sub_schurs->is_hermitian = PETSC_TRUE;
350:         sub_schurs->is_posdef = PETSC_TRUE;
351:       }
352:     }
353:     if (isseqaij) {
354:       PetscObjectReference((PetscObject)Ain);
355:       sub_schurs->A = Ain;
356:     } else {
357:       MatConvert(Ain,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);
358:     }
359:   }
360:   if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
361:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"General matrix pencils are not currently supported (%d,%d)",sub_schurs->is_hermitian,sub_schurs->is_posdef);
362:   }

364:   PetscObjectReference((PetscObject)Sin);
365:   sub_schurs->S = Sin;
366:   if (sub_schurs->use_mumps) {
367:     sub_schurs->use_mumps = (PetscBool)(!!sub_schurs->A);
368:   }

370:   /* preliminary checks */
371:   if (!sub_schurs->use_mumps && compute_Stilda) {
372:     SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
373:   }

375:   /* restrict work on active processes */
376:   color = 0;
377:   if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
378:   MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
379:   PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);
380:   PetscSubcommSetNumber(subcomm,2);
381:   PetscSubcommSetTypeGeneral(subcomm,color,rank);
382:   PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);
383:   PetscSubcommDestroy(&subcomm);
384:   if (!sub_schurs->n_subs) {
385:     PetscCommDestroy(&comm_n);
386:     return(0);
387:   }
388:   /* PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL); */

390:   /* get Schur complement matrices */
391:   if (!sub_schurs->use_mumps) {
392:     Mat       tA_IB,tA_BI,tA_BB;
393:     PetscBool isseqsbaij;
394:     MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);
395:     PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);
396:     if (isseqsbaij) {
397:       MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);
398:       MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);
399:       MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);
400:     } else {
401:       PetscObjectReference((PetscObject)tA_BB);
402:       A_BB = tA_BB;
403:       PetscObjectReference((PetscObject)tA_IB);
404:       A_IB = tA_IB;
405:       PetscObjectReference((PetscObject)tA_BI);
406:       A_BI = tA_BI;
407:     }
408:   } else {
409:     A_II = NULL;
410:     A_IB = NULL;
411:     A_BI = NULL;
412:     A_BB = NULL;
413:   }
414:   S_all = NULL;

416:   /* determine interior problems */
417:   ISGetLocalSize(sub_schurs->is_I,&i);
418:   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
419:     PetscBT                touched;
420:     const PetscInt*        idx_B;
421:     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;

423:     if (xadj == NULL || adjncy == NULL) {
424:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
425:     }
426:     /* get sizes */
427:     ISGetLocalSize(sub_schurs->is_I,&n_I);
428:     ISGetLocalSize(sub_schurs->is_B,&n_B);

430:     PetscMalloc1(n_I+n_B,&local_numbering);
431:     PetscBTCreate(n_I+n_B,&touched);
432:     PetscBTMemzero(n_I+n_B,touched);

434:     /* all boundary dofs must be skipped when adding layers */
435:     ISGetIndices(sub_schurs->is_B,&idx_B);
436:     for (j=0;j<n_B;j++) {
437:       PetscBTSet(touched,idx_B[j]);
438:     }
439:     PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));
440:     ISRestoreIndices(sub_schurs->is_B,&idx_B);

442:     /* add prescribed number of layers of dofs */
443:     n_local_dofs = n_B;
444:     n_prev_added = n_B;
445:     for (layer=0;layer<nlayers;layer++) {
446:       PetscInt n_added;
447:       if (n_local_dofs == n_I+n_B) break;
448:       if (n_local_dofs > n_I+n_B) {
449:         SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B);
450:       }
451:       PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);
452:       n_prev_added = n_added;
453:       n_local_dofs += n_added;
454:       if (!n_added) break;
455:     }
456:     PetscBTDestroy(&touched);

458:     /* IS for I layer dofs in original numbering */
459:     ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);
460:     PetscFree(local_numbering);
461:     ISSort(is_I_layer);
462:     /* IS for I layer dofs in I numbering */
463:     if (!sub_schurs->use_mumps) {
464:       ISLocalToGlobalMapping ItoNmap;
465:       ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);
466:       ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);
467:       ISLocalToGlobalMappingDestroy(&ItoNmap);

469:       /* II block */
470:       MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);
471:     }
472:   } else {
473:     PetscInt n_I;

475:     /* IS for I dofs in original numbering */
476:     PetscObjectReference((PetscObject)sub_schurs->is_I);
477:     is_I_layer = sub_schurs->is_I;

479:     /* IS for I dofs in I numbering (strided 1) */
480:     if (!sub_schurs->use_mumps) {
481:       ISGetSize(sub_schurs->is_I,&n_I);
482:       ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);

484:       /* II block is the same */
485:       PetscObjectReference((PetscObject)A_II);
486:       AE_II = A_II;
487:     }
488:   }

490:   /* Get info on subset sizes and sum of all subsets sizes */
491:   max_subset_size = 0;
492:   local_size = 0;
493:   for (i=0;i<sub_schurs->n_subs;i++) {
494:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
495:     max_subset_size = PetscMax(subset_size,max_subset_size);
496:     local_size += subset_size;
497:   }

499:   /* Work arrays for local indices */
500:   extra = 0;
501:   if (sub_schurs->use_mumps && is_I_layer) {
502:     ISGetLocalSize(is_I_layer,&extra);
503:   }
504:   PetscMalloc1(local_size+extra,&all_local_idx_N);
505:   if (extra) {
506:     const PetscInt *idxs;
507:     ISGetIndices(is_I_layer,&idxs);
508:     PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));
509:     ISRestoreIndices(is_I_layer,&idxs);
510:   }
511:   PetscMalloc1(local_size,&nnz);
512:   PetscMalloc1(sub_schurs->n_subs,&auxnum1);
513:   PetscMalloc1(sub_schurs->n_subs,&auxnum2);

515:   /* Get local indices in local numbering */
516:   local_size = 0;
517:   for (i=0;i<sub_schurs->n_subs;i++) {
518:     PetscInt j;
519:     const    PetscInt *idxs;

521:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
522:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
523:     /* start (smallest in global ordering) and multiplicity */
524:     auxnum1[i] = idxs[0];
525:     auxnum2[i] = subset_size;
526:     /* subset indices in local numbering */
527:     PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));
528:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
529:     for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
530:     local_size += subset_size;
531:   }

533:   /* allocate extra workspace needed only for GETRI */
534:   Bwork = NULL;
535:   pivots = NULL;
536:   if (local_size && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
537:     PetscScalar lwork;

539:     B_lwork = -1;
540:     PetscBLASIntCast(local_size,&B_N);
541:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
542:     PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,Bwork,&B_N,pivots,&lwork,&B_lwork,&B_ierr));
543:     PetscFPTrapPop();
544:     if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
545:     PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
546:     PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);
547:   }

549:   /* prepare parallel matrices for summing up properly schurs on subsets */
550:   ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);
551:   ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);
552:   ISDestroy(&all_subsets_n);
553:   ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);
554:   PCBDDCSubsetNumbering(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);
555:   ISDestroy(&all_subsets);
556:   ISDestroy(&all_subsets_mult);
557:   ISGetLocalSize(all_subsets_n,&i);
558:   if (i != local_size) {
559:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %d != %d",i,local_size);
560:   }
561:   ISLocalToGlobalMappingCreateIS(all_subsets_n,&l2gmap_subsets);
562:   MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);
563:   ISLocalToGlobalMappingDestroy(&l2gmap_subsets);
564:   MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);
565:   MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);
566:   MatSetType(global_schur_subsets,MATMPIAIJ);

568:   /* subset indices in local boundary numbering */
569:   if (!sub_schurs->is_Ej_all) {
570:     PetscInt *all_local_idx_B;

572:     PetscMalloc1(local_size,&all_local_idx_B);
573:     ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);
574:     if (subset_size != local_size) {
575:       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %d != %d\n",subset_size,local_size);
576:     }
577:     ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);
578:   }

580:   /* Local matrix of all local Schur on subsets (transposed) */
581:   if (!sub_schurs->S_Ej_all) {
582:     MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);
583:     MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);
584:     MatSetType(sub_schurs->S_Ej_all,MATAIJ);
585:     MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);
586:   }

588:   /* Compute Schur complements explicitly */
589:   F = NULL;
590:   if (!sub_schurs->use_mumps) {
591:     Mat         S_Ej_expl;
592:     PetscScalar *work;
593:     PetscInt    j,*dummy_idx;
594:     PetscBool   Sdense;

596:     PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
597:     local_size = 0;
598:     for (i=0;i<sub_schurs->n_subs;i++) {
599:       IS  is_subset_B;
600:       Mat AE_EE,AE_IE,AE_EI,S_Ej;

602:       /* subsets in original and boundary numbering */
603:       ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);
604:       /* EE block */
605:       MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);
606:       /* IE block */
607:       MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);
608:       /* EI block */
609:       if (sub_schurs->is_hermitian) {
610:         MatCreateTranspose(AE_IE,&AE_EI);
611:       } else {
612:         MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);
613:       }
614:       ISDestroy(&is_subset_B);
615:       MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);
616:       MatDestroy(&AE_EE);
617:       MatDestroy(&AE_IE);
618:       MatDestroy(&AE_EI);
619:       if (AE_II == A_II) { /* we can reuse the same ksp */
620:         KSP ksp;
621:         MatSchurComplementGetKSP(sub_schurs->S,&ksp);
622:         MatSchurComplementSetKSP(S_Ej,ksp);
623:       } else { /* build new ksp object which inherits ksp and pc types from the original one */
624:         KSP       origksp,schurksp;
625:         PC        origpc,schurpc;
626:         KSPType   ksp_type;
627:         PetscInt  n_internal;
628:         PetscBool ispcnone;

630:         MatSchurComplementGetKSP(sub_schurs->S,&origksp);
631:         MatSchurComplementGetKSP(S_Ej,&schurksp);
632:         KSPGetType(origksp,&ksp_type);
633:         KSPSetType(schurksp,ksp_type);
634:         KSPGetPC(schurksp,&schurpc);
635:         KSPGetPC(origksp,&origpc);
636:         PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);
637:         if (!ispcnone) {
638:           PCType pc_type;
639:           PCGetType(origpc,&pc_type);
640:           PCSetType(schurpc,pc_type);
641:         } else {
642:           PCSetType(schurpc,PCLU);
643:         }
644:         ISGetSize(is_I,&n_internal);
645:         if (n_internal) { /* UMFPACK gives error with 0 sized problems */
646:           MatSolverPackage solver=NULL;
647:           PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);
648:           if (solver) {
649:             PCFactorSetMatSolverPackage(schurpc,solver);
650:           }
651:         }
652:         KSPSetUp(schurksp);
653:       }
654:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
655:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);
656:       PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_hermitian,MAT_REUSE_MATRIX,&S_Ej_expl);
657:       PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);
658:       if (Sdense) {
659:         for (j=0;j<subset_size;j++) {
660:           dummy_idx[j]=local_size+j;
661:         }
662:         MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
663:       } else {
664:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
665:       }
666:       MatDestroy(&S_Ej);
667:       MatDestroy(&S_Ej_expl);
668:       local_size += subset_size;
669:     }
670:     PetscFree2(dummy_idx,work);
671:     /* free */
672:     ISDestroy(&is_I);
673:     MatDestroy(&AE_II);
674:     PetscFree(all_local_idx_N);
675:   } else {
676:     Mat         A;
677:     IS          is_A_all;
678:     PetscScalar *work,*S_data;
679:     PetscInt    *idxs_schur,n_I,n_I_all,*dummy_idx,size_schur,size_active_schur,cum,cum2;
680:     PetscBool   mumps_S;

682:     /* get working mat */
683:     n_I = 0;
684:     if (is_I_layer) {
685:       ISGetLocalSize(is_I_layer,&n_I);
686:     }
687:     if (!sub_schurs->is_dir) {
688:       ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);
689:       size_schur = local_size;
690:     } else {
691:       IS list[2];

693:       ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&list[0]);
694:       list[1] = sub_schurs->is_dir;
695:       ISConcatenate(PETSC_COMM_SELF,2,list,&is_A_all);
696:       ISDestroy(&list[0]);
697:       ISGetLocalSize(sub_schurs->is_dir,&size_schur);
698:       size_schur += local_size;
699:     }
700:     PetscFree(all_local_idx_N);
701:     size_active_schur = local_size; /* size active schurs does not count any dirichlet dof on the interface */
702:     MatGetSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);
703:     MatSetOptionsPrefix(A,"sub_schurs_");
704:     MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_hermitian);
705:     MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);
706:     MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);

708:     if (n_I) {
709:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
710:         MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);
711:       } else {
712:         MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);
713:       }
714:       /* subsets ordered last */
715:       PetscMalloc1(size_schur,&idxs_schur);
716:       for (i=0;i<size_schur;i++) {
717:         idxs_schur[i] = n_I+i+1;
718:       }
719: #if defined(PETSC_HAVE_MUMPS)
720:       MatMumpsSetSchurIndices(F,size_schur,idxs_schur);
721: #endif
722:       PetscFree(idxs_schur);

724:       /* factorization step */
725:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
726:         MatCholeskyFactorSymbolic(F,A,NULL,NULL);
727: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
728:         MatMumpsSetIcntl(F,19,2);
729: #endif
730:         MatCholeskyFactorNumeric(F,A,NULL);
731:       } else {
732:         MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
733: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
734:         MatMumpsSetIcntl(F,19,3);
735: #endif
736:         MatLUFactorNumeric(F,A,NULL);
737:       }

739:       /* get explicit Schur Complement computed during numeric factorization */
740: #if defined(PETSC_HAVE_MUMPS)
741:       MatMumpsGetSchurComplement(F,&S_all);
742: #endif

744:       /* we can reuse the solvers if we are not using the economic version */
745:       ISGetLocalSize(sub_schurs->is_I,&n_I_all);
746:       reuse_solvers = (PetscBool)(reuse_solvers && (n_I == n_I_all));
747:       mumps_S = PETSC_TRUE;
748:     } else { /* we can't use MUMPS when size_schur == size_of_the_problem */
749:       MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);
750:       reuse_solvers = PETSC_FALSE;
751:       mumps_S = PETSC_FALSE;
752:     }

754:     if (reuse_solvers) {
755:       Mat              A_II;
756:       Vec              vec1_B;
757:       PCBDDCReuseMumps msolv_ctx;

759:       if (sub_schurs->reuse_mumps) {
760:         PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
761:       } else {
762:         PetscNew(&sub_schurs->reuse_mumps);
763:       }
764:       msolv_ctx = sub_schurs->reuse_mumps;
765:       MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);
766:       MatGetSize(A_II,&msolv_ctx->n_I,NULL);
767:       PetscObjectReference((PetscObject)F);
768:       msolv_ctx->F = F;
769:       MatCreateVecs(F,&msolv_ctx->sol,&msolv_ctx->rhs);

771:       /* interior solver */
772:       PCCreate(PETSC_COMM_SELF,&msolv_ctx->interior_solver);
773:       PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);
774:       PCSetType(msolv_ctx->interior_solver,PCSHELL);
775:       PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);
776:       PCShellSetApply(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolve);
777:       PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolveTranspose);

779:       /* correction solver */
780:       PCCreate(PETSC_COMM_SELF,&msolv_ctx->correction_solver);
781:       PCSetOperators(msolv_ctx->correction_solver,A,A);
782:       PCSetType(msolv_ctx->correction_solver,PCSHELL);
783:       PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);
784:       PCShellSetApply(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolve);
785:       PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolveTranspose);

787:       /* scatter and vecs for Schur complement solver */
788:       MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);
789:       MatCreateVecs(sub_schurs->S,&vec1_B,NULL);
790:       ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);
791:       VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);
792:       VecDestroy(&vec1_B);
793:       PetscObjectReference((PetscObject)is_A_all);
794:       msolv_ctx->is_R = is_A_all;
795:     }
796:     MatDestroy(&A);
797:     ISDestroy(&is_A_all);

799:     /* Work arrays */
800:     PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);

802:     /* matrices for adaptive selection */
803:     if (compute_Stilda) {
804:       if (!sub_schurs->sum_S_Ej_tilda_all) {
805:         MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_tilda_all);
806:         MatSetSizes(sub_schurs->sum_S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
807:         MatSetType(sub_schurs->sum_S_Ej_tilda_all,MATAIJ);
808:         MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_tilda_all,0,nnz);
809:       }
810:       if (!sub_schurs->sum_S_Ej_inv_all) {
811:         MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_inv_all);
812:         MatSetSizes(sub_schurs->sum_S_Ej_inv_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
813:         MatSetType(sub_schurs->sum_S_Ej_inv_all,MATAIJ);
814:         MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_inv_all,0,nnz);
815:       }
816:     }

818:     /* S_Ej_all */
819:     cum = cum2 = 0;
820:     MatDenseGetArray(S_all,&S_data);
821:     for (i=0;i<sub_schurs->n_subs;i++) {
822:       PetscInt j;

824:       /* get S_E */
825:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
826:       if (mumps_S && sub_schurs->is_hermitian) { /* When using MUMPS data I need to expand to upper triangular (column oriented) */
827:         PetscInt k;
828:         for (k=0;k<subset_size;k++) {
829:           for (j=k;j<subset_size;j++) {
830:             work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
831:             work[j*subset_size+k] = S_data[cum2+k*size_schur+j];
832:           }
833:         }
834:       } else { /* copy to workspace */
835:         PetscInt k;
836:         for (k=0;k<subset_size;k++) {
837:           for (j=0;j<subset_size;j++) {
838:             work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
839:           }
840:         }
841:       }
842:       /* insert S_E values */
843:       for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
844:       MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);

846:       /* if adaptivity is requested, invert S_E block */
847:       if (compute_Stilda) {
848:         PetscBLASIntCast(subset_size,&B_N);
849:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
850:         if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
851:           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
852:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
853:           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
854:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
855:         } else {
856:           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
857:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
858:           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
859:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
860:         }
861:         PetscFPTrapPop();
862:         MatSetValues(sub_schurs->sum_S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
863:       }
864:       cum += subset_size;
865:       cum2 += subset_size*(size_schur + 1);
866:     }
867:     MatDenseRestoreArray(S_all,&S_data);

869: #if defined(PETSC_HAVE_MUMPS)
870:     if (mumps_S) {
871:       MatMumpsRestoreSchurComplement(F,&S_all);
872:     }
873: #endif

875:     if (compute_Stilda && size_active_schur) {
876:       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur) { /* we already computed the inverse */
877:         PetscInt j;
878:         for (j=0;j<size_schur;j++) dummy_idx[j] = j;
879:         MatSetValues(sub_schurs->sum_S_Ej_tilda_all,size_schur,dummy_idx,size_schur,dummy_idx,work,INSERT_VALUES);
880:       } else {
881:         if (mumps_S) { /* use MatMumps calls to invert S */
882: #if defined(PETSC_HAVE_MUMPS)
883:           MatMumpsInvertSchurComplement(F);
884:           MatMumpsGetSchurComplement(F,&S_all);
885: #endif
886:         } else { /* we need to invert explicitly since we are not using MUMPS for S */
887:           MatDenseGetArray(S_all,&S_data);
888:           PetscBLASIntCast(size_schur,&B_N);
889:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
890:           if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
891:             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
892:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
893:             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
894:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
895:           } else {
896:             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
897:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
898:             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
899:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
900:           }
901:           PetscFPTrapPop();
902:           MatDenseRestoreArray(S_all,&S_data);
903:         }
904:         /* S_Ej_tilda_all */
905:         cum = cum2 = 0;
906:         MatDenseGetArray(S_all,&S_data);
907:         for (i=0;i<sub_schurs->n_subs;i++) {
908:           PetscInt j;

910:           ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
911:           /* get (St^-1)_E */
912:           if (sub_schurs->is_hermitian) { /* Here I don't need to expand to upper triangular (column oriented) */
913:             PetscInt k;
914:             for (k=0;k<subset_size;k++) {
915:               for (j=k;j<subset_size;j++) {
916:                 work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
917:               }
918:             }
919:           } else { /* copy to workspace */
920:             PetscInt k;
921:             for (k=0;k<subset_size;k++) {
922:               for (j=0;j<subset_size;j++) {
923:                 work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
924:               }
925:             }
926:           }
927:           for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
928:           MatSetValues(sub_schurs->sum_S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
929:           cum += subset_size;
930:           cum2 += subset_size*(size_schur + 1);
931:         }
932:         MatDenseRestoreArray(S_all,&S_data);
933: #if defined(PETSC_HAVE_MUMPS)
934:         if (mumps_S) {
935:           MatMumpsRestoreSchurComplement(F,&S_all);
936:         }
937: #endif
938:       }
939:     }
940:     PetscFree2(dummy_idx,work);
941:   }
942:   PetscFree(nnz);
943:   MatDestroy(&F);
944:   ISDestroy(&is_I_layer);
945:   MatDestroy(&S_all);
946:   MatDestroy(&A_BB);
947:   MatDestroy(&A_IB);
948:   MatDestroy(&A_BI);
949:   MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
950:   MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
951:   if (compute_Stilda) {
952:     MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
953:     MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
954:     MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
955:     MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
956:   }

958:   /* Global matrix of all assembled Schur on subsets */
959:   MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);
960:   MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);
961:   MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);

963:   /* Get local part of (\sum_j S_Ej) */
964:   if (!sub_schurs->sum_S_Ej_all) {
965:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_INITIAL_MATRIX,&submats);
966:     sub_schurs->sum_S_Ej_all = submats[0];
967:   } else {
968:     PetscMalloc1(1,&submats);
969:     submats[0] = sub_schurs->sum_S_Ej_all;
970:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
971:   }

973:   /* Compute explicitly (\sum_j S_Ej)^-1 (faster scaling during PCApply, needs extra work when doing setup) */
974:   if (faster_deluxe) {
975:     Mat         tmpmat;
976:     PetscScalar *array;
977:     PetscInt    cum;

979:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&array);
980:     cum = 0;
981:     for (i=0;i<sub_schurs->n_subs;i++) {
982:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
983:       PetscBLASIntCast(subset_size,&B_N);
984:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
985:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
986:         PetscInt j,k;

988:         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
989:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
990:         PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
991:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
992:         for (j=0;j<B_N;j++) {
993:           for (k=j+1;k<B_N;k++) {
994:             array[k*B_N+j+cum] = array[j*B_N+k+cum];
995:           }
996:         }
997:       } else {
998:         PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
999:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1000:         PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1001:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1002:       }
1003:       PetscFPTrapPop();
1004:       cum += subset_size*subset_size;
1005:     }
1006:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&array);
1007:     MatMatMult(sub_schurs->S_Ej_all,sub_schurs->sum_S_Ej_all,MAT_INITIAL_MATRIX,1.0,&tmpmat);
1008:     MatDestroy(&sub_schurs->S_Ej_all);
1009:     MatDestroy(&sub_schurs->sum_S_Ej_all);
1010:     sub_schurs->S_Ej_all = tmpmat;
1011:   }

1013:   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1014:   if (compute_Stilda) {
1015:     MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_tilda_all);
1016:     MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
1017:     submats[0] = sub_schurs->sum_S_Ej_tilda_all;
1018:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
1019:     MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_inv_all);
1020:     MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
1021:     submats[0] = sub_schurs->sum_S_Ej_inv_all;
1022:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
1023:   }

1025:   /* free workspace */
1026:   PetscFree(submats);
1027:   PetscFree2(Bwork,pivots);
1028:   MatDestroy(&global_schur_subsets);
1029:   MatDestroy(&work_mat);
1030:   ISDestroy(&all_subsets_n);
1031:   PetscCommDestroy(&comm_n);
1032:   return(0);
1033: }

1037: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap)
1038: {
1039:   IS              *faces,*edges,*all_cc,vertices;
1040:   PetscInt        i,n_faces,n_edges,n_all_cc;
1041:   PetscBool       is_sorted;
1042:   PetscErrorCode  ierr;

1045:   ISSorted(is_I,&is_sorted);
1046:   if (!is_sorted) {
1047:     SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1048:   }
1049:   ISSorted(is_B,&is_sorted);
1050:   if (!is_sorted) {
1051:     SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1052:   }

1054:   /* reset any previous data */
1055:   PCBDDCSubSchursReset(sub_schurs);

1057:   /* get index sets for faces and edges (already sorted by global ordering) */
1058:   PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1059:   n_all_cc = n_faces+n_edges;
1060:   PetscBTCreate(n_all_cc,&sub_schurs->is_edge);
1061:   PetscMalloc1(n_all_cc,&all_cc);
1062:   for (i=0;i<n_faces;i++) {
1063:     all_cc[i] = faces[i];
1064:   }
1065:   for (i=0;i<n_edges;i++) {
1066:     all_cc[n_faces+i] = edges[i];
1067:     PetscBTSet(sub_schurs->is_edge,n_faces+i);
1068:   }
1069:   PetscFree(faces);
1070:   PetscFree(edges);
1071:   sub_schurs->is_dir = NULL;
1072:   PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);

1074:   /* Determine if MUMPS can be used */
1075:   sub_schurs->use_mumps = PETSC_FALSE;
1076: #if defined(PETSC_HAVE_MUMPS)
1077:   sub_schurs->use_mumps = PETSC_TRUE;
1078: #endif

1080:   PetscObjectReference((PetscObject)is_I);
1081:   sub_schurs->is_I = is_I;
1082:   PetscObjectReference((PetscObject)is_B);
1083:   sub_schurs->is_B = is_B;
1084:   PetscObjectReference((PetscObject)graph->l2gmap);
1085:   sub_schurs->l2gmap = graph->l2gmap;
1086:   PetscObjectReference((PetscObject)BtoNmap);
1087:   sub_schurs->BtoNmap = BtoNmap;
1088:   sub_schurs->n_subs = n_all_cc;
1089:   sub_schurs->is_subs = all_cc;
1090:   if (!sub_schurs->use_mumps) { /* sort by local ordering mumps is not present */
1091:     for (i=0;i<sub_schurs->n_subs;i++) {
1092:       ISSort(sub_schurs->is_subs[i]);
1093:     }
1094:   }
1095:   sub_schurs->is_vertices = vertices;
1096:   sub_schurs->S_Ej_all = NULL;
1097:   sub_schurs->sum_S_Ej_all = NULL;
1098:   sub_schurs->sum_S_Ej_inv_all = NULL;
1099:   sub_schurs->sum_S_Ej_tilda_all = NULL;
1100:   sub_schurs->is_Ej_all = NULL;
1101:   return(0);
1102: }

1106: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1107: {
1108:   PCBDDCSubSchurs schurs_ctx;
1109:   PetscErrorCode  ierr;

1112:   PetscNew(&schurs_ctx);
1113:   schurs_ctx->n_subs = 0;
1114:   *sub_schurs = schurs_ctx;
1115:   return(0);
1116: }

1120: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
1121: {

1125:   PCBDDCSubSchursReset(*sub_schurs);
1126:   PetscFree(*sub_schurs);
1127:   return(0);
1128: }

1132: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1133: {
1134:   PetscInt       i;

1138:   MatDestroy(&sub_schurs->A);
1139:   MatDestroy(&sub_schurs->S);
1140:   ISDestroy(&sub_schurs->is_I);
1141:   ISDestroy(&sub_schurs->is_B);
1142:   ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
1143:   ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
1144:   MatDestroy(&sub_schurs->S_Ej_all);
1145:   MatDestroy(&sub_schurs->sum_S_Ej_all);
1146:   MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1147:   MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
1148:   ISDestroy(&sub_schurs->is_Ej_all);
1149:   ISDestroy(&sub_schurs->is_vertices);
1150:   ISDestroy(&sub_schurs->is_dir);
1151:   PetscBTDestroy(&sub_schurs->is_edge);
1152:   for (i=0;i<sub_schurs->n_subs;i++) {
1153:     ISDestroy(&sub_schurs->is_subs[i]);
1154:   }
1155:   if (sub_schurs->n_subs) {
1156:     PetscFree(sub_schurs->is_subs);
1157:   }
1158:   if (sub_schurs->reuse_mumps) {
1159:     PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
1160:   }
1161:   PetscFree(sub_schurs->reuse_mumps);
1162:   sub_schurs->n_subs = 0;
1163:   return(0);
1164: }

1168: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
1169: {
1170:   PetscInt       i,j,n;

1174:   n = 0;
1175:   for (i=-n_prev;i<0;i++) {
1176:     PetscInt start_dof = queue_tip[i];
1177:     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
1178:       PetscInt dof = adjncy[j];
1179:       if (!PetscBTLookup(touched,dof)) {
1180:         PetscBTSet(touched,dof);
1181:         queue_tip[n] = dof;
1182:         n++;
1183:       }
1184:     }
1185:   }
1186:   *n_added = n;
1187:   return(0);
1188: }