Actual source code: bddcschurs.c

petsc-3.7.3 2016-08-01
Report Typos and Errors
  1: #include <../src/ksp/pc/impls/bddc/bddc.h>
  2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3: #include <petscblaslapack.h>

  5: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
  6: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
  7: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC,Vec,Vec);
  8: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC,Vec,Vec);

 12: static PetscErrorCode PCBDDCMumpsCorrectionSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
 13: {
 14:   PCBDDCReuseMumps ctx;
 15: #if defined(PETSC_HAVE_MUMPS)
 16:   PetscInt         ival;
 17: #endif
 18:   PetscErrorCode   ierr;

 21:   PCShellGetContext(pc,(void **)&ctx);
 22: #if defined(PETSC_HAVE_MUMPS)
 23:   MatMumpsGetIcntl(ctx->F,26,&ival);
 24:   MatMumpsSetIcntl(ctx->F,26,-1);
 25: #endif
 26:   if (transpose) {
 27:     MatSolveTranspose(ctx->F,rhs,sol);
 28:   } else {
 29:     MatSolve(ctx->F,rhs,sol);
 30:   }
 31: #if defined(PETSC_HAVE_MUMPS)
 32:   MatMumpsSetIcntl(ctx->F,26,ival);
 33: #endif
 34:   return(0);
 35: }

 39: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC pc, Vec rhs, Vec sol)
 40: {
 41:   PetscErrorCode   ierr;

 44:   PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_FALSE);
 45:   return(0);
 46: }

 50: static PetscErrorCode PCBDDCMumpsCorrectionSolveTranspose(PC pc, Vec rhs, Vec sol)
 51: {
 52:   PetscErrorCode   ierr;

 55:   PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_TRUE);
 56:   return(0);
 57: }

 61: static PetscErrorCode PCBDDCReuseMumpsReset(PCBDDCReuseMumps reuse)
 62: {

 66:   MatDestroy(&reuse->F);
 67:   VecDestroy(&reuse->sol);
 68:   VecDestroy(&reuse->rhs);
 69:   PCDestroy(&reuse->interior_solver);
 70:   PCDestroy(&reuse->correction_solver);
 71:   ISDestroy(&reuse->is_R);
 72:   ISDestroy(&reuse->is_B);
 73:   VecScatterDestroy(&reuse->correction_scatter_B);
 74:   VecDestroy(&reuse->sol_B);
 75:   VecDestroy(&reuse->rhs_B);
 76:   return(0);
 77: }

 81: static PetscErrorCode PCBDDCMumpsInteriorSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
 82: {
 83:   PCBDDCReuseMumps ctx;
 84:   PetscScalar      *array,*array_mumps;
 85: #if defined(PETSC_HAVE_MUMPS)
 86:   PetscInt         ival;
 87: #endif
 88:   PetscErrorCode   ierr;

 91:   PCShellGetContext(pc,(void **)&ctx);
 92: #if defined(PETSC_HAVE_MUMPS)
 93:   MatMumpsGetIcntl(ctx->F,26,&ival);
 94:   MatMumpsSetIcntl(ctx->F,26,0);
 95: #endif
 96:   /* copy rhs into factored matrix workspace (can it be avoided?, MatSolve_MUMPS has another copy b->x internally) */
 97:   VecGetArrayRead(rhs,(const PetscScalar**)&array);
 98:   VecGetArray(ctx->rhs,&array_mumps);
 99:   PetscMemcpy(array_mumps,array,ctx->n_I*sizeof(PetscScalar));
100:   VecRestoreArray(ctx->rhs,&array_mumps);
101:   VecRestoreArrayRead(rhs,(const PetscScalar**)&array);

103:   if (transpose) {
104:     MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);
105:   } else {
106:     MatSolve(ctx->F,ctx->rhs,ctx->sol);
107:   }

109:   /* get back data to caller worskpace */
110:   VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
111:   VecGetArray(sol,&array);
112:   PetscMemcpy(array,array_mumps,ctx->n_I*sizeof(PetscScalar));
113:   VecRestoreArray(sol,&array);
114:   VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
115: #if defined(PETSC_HAVE_MUMPS)
116:   MatMumpsSetIcntl(ctx->F,26,ival);
117: #endif
118:   return(0);
119: }

123: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC pc, Vec rhs, Vec sol)
124: {
125:   PetscErrorCode   ierr;

128:   PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_FALSE);
129:   return(0);
130: }

134: static PetscErrorCode PCBDDCMumpsInteriorSolveTranspose(PC pc, Vec rhs, Vec sol)
135: {
136:   PetscErrorCode   ierr;

139:   PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_TRUE);
140:   return(0);
141: }

145: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
146: {
147:   Mat            B, C, D, Bd, Cd, AinvBd;
148:   KSP            ksp;
149:   PC             pc;
150:   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
151:   PetscReal      fill = 2.0;
152:   PetscInt       n_I;
153:   PetscMPIInt    size;

157:   MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);
158:   if (size != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
159:   if (reuse == MAT_REUSE_MATRIX) {
160:     PetscBool Sdense;

162:     PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
163:     if (!Sdense) SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
164:   }
165:   MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
166:   MatSchurComplementGetKSP(M, &ksp);
167:   KSPGetPC(ksp, &pc);
168:   PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);
169:   PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);
170:   PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);
171:   PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);
172:   PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);
173:   MatGetSize(B,&n_I,NULL);
174:   if (n_I) {
175:     if (!Bdense) {
176:       MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
177:     } else {
178:       Bd = B;
179:     }

181:     if (isLU || isILU || isCHOL) {
182:       Mat fact;
183:       KSPSetUp(ksp);
184:       PCFactorGetMatrix(pc, &fact);
185:       MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
186:       MatMatSolve(fact, Bd, AinvBd);
187:     } else {
188:       PetscBool ex = PETSC_TRUE;

190:       if (ex) {
191:         Mat Ainvd;

193:         PCComputeExplicitOperator(pc, &Ainvd);
194:         MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
195:         MatDestroy(&Ainvd);
196:       } else {
197:         Vec         sol,rhs;
198:         PetscScalar *arrayrhs,*arraysol;
199:         PetscInt    i,nrhs,n;

201:         MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
202:         MatGetSize(Bd,&n,&nrhs);
203:         MatDenseGetArray(Bd,&arrayrhs);
204:         MatDenseGetArray(AinvBd,&arraysol);
205:         KSPGetSolution(ksp,&sol);
206:         KSPGetRhs(ksp,&rhs);
207:         for (i=0;i<nrhs;i++) {
208:           VecPlaceArray(rhs,arrayrhs+i*n);
209:           VecPlaceArray(sol,arraysol+i*n);
210:           KSPSolve(ksp,rhs,sol);
211:           VecResetArray(rhs);
212:           VecResetArray(sol);
213:         }
214:         MatDenseRestoreArray(Bd,&arrayrhs);
215:         MatDenseRestoreArray(AinvBd,&arrayrhs);
216:       }
217:     }
218:     if (!Bdense & !issym) {
219:       MatDestroy(&Bd);
220:     }

222:     if (!issym) {
223:       if (!Cdense) {
224:         MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
225:       } else {
226:         Cd = C;
227:       }
228:       MatMatMult(Cd, AinvBd, reuse, fill, S);
229:       if (!Cdense) {
230:         MatDestroy(&Cd);
231:       }
232:     } else {
233:       MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
234:       if (!Bdense) {
235:         MatDestroy(&Bd);
236:       }
237:     }
238:     MatDestroy(&AinvBd);
239:   }

241:   if (D) {
242:     Mat       Dd;
243:     PetscBool Ddense;

245:     PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);
246:     if (!Ddense) {
247:       MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
248:     } else {
249:       Dd = D;
250:     }
251:     if (n_I) {
252:       MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);
253:     } else {
254:       if (reuse == MAT_INITIAL_MATRIX) {
255:         MatDuplicate(Dd,MAT_COPY_VALUES,S);
256:       } else {
257:         MatCopy(Dd,*S,SAME_NONZERO_PATTERN);
258:       }
259:     }
260:     if (!Ddense) {
261:       MatDestroy(&Dd);
262:     }
263:   } else {
264:     MatScale(*S,-1.0);
265:   }
266:   return(0);
267: }

271: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, PetscBool faster_deluxe, PetscBool compute_Stilda, PetscBool reuse_solvers)
272: {
273:   Mat                    F,A_II,A_IB,A_BI,A_BB,AE_II;
274:   Mat                    S_all;
275:   Mat                    global_schur_subsets,work_mat,*submats;
276:   ISLocalToGlobalMapping l2gmap_subsets;
277:   IS                     is_I,is_I_layer;
278:   IS                     all_subsets,all_subsets_mult,all_subsets_n;
279:   PetscInt               *nnz,*all_local_idx_N;
280:   PetscInt               *auxnum1,*auxnum2;
281:   PetscInt               i,subset_size,max_subset_size;
282:   PetscInt               extra,local_size,global_size;
283:   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
284:   PetscScalar            *Bwork;
285:   PetscSubcomm           subcomm;
286:   PetscMPIInt            color,rank;
287:   MPI_Comm               comm_n;
288:   PetscErrorCode         ierr;

291:   /* update info in sub_schurs */
292:   MatDestroy(&sub_schurs->A);
293:   MatDestroy(&sub_schurs->S);
294:   sub_schurs->is_hermitian = PETSC_FALSE;
295:   sub_schurs->is_posdef = PETSC_FALSE;
296:   if (Ain) {
297:     PetscBool isseqaij;
298:     /* determine if we are dealing with hermitian positive definite problems */
299: #if !defined(PETSC_USE_COMPLEX)
300:     if (Ain->symmetric_set) {
301:       sub_schurs->is_hermitian = Ain->symmetric;
302:     }
303: #else
304:     if (Ain->hermitian_set) {
305:       sub_schurs->is_hermitian = Ain->hermitian;
306:     }
307: #endif
308:     if (Ain->spd_set) {
309:       sub_schurs->is_posdef = Ain->spd;
310:     }

312:     /* check */
313:     PetscObjectTypeCompare((PetscObject)Ain,MATSEQAIJ,&isseqaij);
314:     if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
315:       PetscInt lsize;

317:       MatGetSize(Ain,&lsize,NULL);
318:       if (lsize) {
319:         PetscScalar val;
320:         PetscReal   norm;
321:         Vec         vec1,vec2,vec3;

323:         MatCreateVecs(Ain,&vec1,&vec2);
324:         VecDuplicate(vec1,&vec3);
325:         VecSetRandom(vec1,NULL);
326:         MatMult(Ain,vec1,vec2);
327: #if !defined(PETSC_USE_COMPLEX)
328:         MatMultTranspose(Ain,vec1,vec3);
329: #else
330:         MatMultHermitianTranspose(Ain,vec1,vec3);
331: #endif
332:         VecAXPY(vec3,-1.0,vec2);
333:         VecNorm(vec3,NORM_INFINITY,&norm);
334:         if (norm > PetscSqrtReal(PETSC_SMALL)) {
335:           sub_schurs->is_hermitian = PETSC_FALSE;
336:         } else {
337:           sub_schurs->is_hermitian = PETSC_TRUE;
338:         }
339:         VecDot(vec1,vec2,&val);
340:         if (PetscRealPart(val) > 0. && PetscAbsReal(PetscImaginaryPart(val)) < PETSC_SMALL) sub_schurs->is_posdef = PETSC_TRUE;
341:         VecDestroy(&vec1);
342:         VecDestroy(&vec2);
343:         VecDestroy(&vec3);
344:       } else {
345:         sub_schurs->is_hermitian = PETSC_TRUE;
346:         sub_schurs->is_posdef = PETSC_TRUE;
347:       }
348:     }
349:     if (isseqaij) {
350:       PetscObjectReference((PetscObject)Ain);
351:       sub_schurs->A = Ain;
352:     } else {
353:       MatConvert(Ain,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);
354:     }
355:   }
356:   if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"General matrix pencils are not currently supported (%D,%D)",sub_schurs->is_hermitian,sub_schurs->is_posdef);

358:   PetscObjectReference((PetscObject)Sin);
359:   sub_schurs->S = Sin;
360:   if (sub_schurs->use_mumps) {
361:     sub_schurs->use_mumps = (PetscBool)(!!sub_schurs->A);
362:   }

364:   /* preliminary checks */
365:   if (!sub_schurs->use_mumps && compute_Stilda) SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");

367:   /* restrict work on active processes */
368:   color = 0;
369:   if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
370:   MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
371:   PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);
372:   PetscSubcommSetNumber(subcomm,2);
373:   PetscSubcommSetTypeGeneral(subcomm,color,rank);
374:   PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);
375:   PetscSubcommDestroy(&subcomm);
376:   if (!sub_schurs->n_subs) {
377:     PetscCommDestroy(&comm_n);
378:     return(0);
379:   }
380:   /* PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL); */

382:   /* get Schur complement matrices */
383:   if (!sub_schurs->use_mumps) {
384:     Mat       tA_IB,tA_BI,tA_BB;
385:     PetscBool isseqsbaij;
386:     MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);
387:     PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);
388:     if (isseqsbaij) {
389:       MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);
390:       MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);
391:       MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);
392:     } else {
393:       PetscObjectReference((PetscObject)tA_BB);
394:       A_BB = tA_BB;
395:       PetscObjectReference((PetscObject)tA_IB);
396:       A_IB = tA_IB;
397:       PetscObjectReference((PetscObject)tA_BI);
398:       A_BI = tA_BI;
399:     }
400:   } else {
401:     A_II = NULL;
402:     A_IB = NULL;
403:     A_BI = NULL;
404:     A_BB = NULL;
405:   }
406:   S_all = NULL;

408:   /* determine interior problems */
409:   ISGetLocalSize(sub_schurs->is_I,&i);
410:   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
411:     PetscBT                touched;
412:     const PetscInt*        idx_B;
413:     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;

415:     if (!xadj || !adjncy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
416:     /* get sizes */
417:     ISGetLocalSize(sub_schurs->is_I,&n_I);
418:     ISGetLocalSize(sub_schurs->is_B,&n_B);

420:     PetscMalloc1(n_I+n_B,&local_numbering);
421:     PetscBTCreate(n_I+n_B,&touched);
422:     PetscBTMemzero(n_I+n_B,touched);

424:     /* all boundary dofs must be skipped when adding layers */
425:     ISGetIndices(sub_schurs->is_B,&idx_B);
426:     for (j=0;j<n_B;j++) {
427:       PetscBTSet(touched,idx_B[j]);
428:     }
429:     PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));
430:     ISRestoreIndices(sub_schurs->is_B,&idx_B);

432:     /* add prescribed number of layers of dofs */
433:     n_local_dofs = n_B;
434:     n_prev_added = n_B;
435:     for (layer=0;layer<nlayers;layer++) {
436:       PetscInt n_added;
437:       if (n_local_dofs == n_I+n_B) break;
438:       if (n_local_dofs > n_I+n_B) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %D. Out of bound access (%D > %D)",layer,n_local_dofs,n_I+n_B);
439:       PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);
440:       n_prev_added = n_added;
441:       n_local_dofs += n_added;
442:       if (!n_added) break;
443:     }
444:     PetscBTDestroy(&touched);

446:     /* IS for I layer dofs in original numbering */
447:     ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);
448:     PetscFree(local_numbering);
449:     ISSort(is_I_layer);
450:     /* IS for I layer dofs in I numbering */
451:     if (!sub_schurs->use_mumps) {
452:       ISLocalToGlobalMapping ItoNmap;
453:       ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);
454:       ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);
455:       ISLocalToGlobalMappingDestroy(&ItoNmap);

457:       /* II block */
458:       MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);
459:     }
460:   } else {
461:     PetscInt n_I;

463:     /* IS for I dofs in original numbering */
464:     PetscObjectReference((PetscObject)sub_schurs->is_I);
465:     is_I_layer = sub_schurs->is_I;

467:     /* IS for I dofs in I numbering (strided 1) */
468:     if (!sub_schurs->use_mumps) {
469:       ISGetSize(sub_schurs->is_I,&n_I);
470:       ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);

472:       /* II block is the same */
473:       PetscObjectReference((PetscObject)A_II);
474:       AE_II = A_II;
475:     }
476:   }

478:   /* Get info on subset sizes and sum of all subsets sizes */
479:   max_subset_size = 0;
480:   local_size = 0;
481:   for (i=0;i<sub_schurs->n_subs;i++) {
482:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
483:     max_subset_size = PetscMax(subset_size,max_subset_size);
484:     local_size += subset_size;
485:   }

487:   /* Work arrays for local indices */
488:   extra = 0;
489:   if (sub_schurs->use_mumps && is_I_layer) {
490:     ISGetLocalSize(is_I_layer,&extra);
491:   }
492:   PetscMalloc1(local_size+extra,&all_local_idx_N);
493:   if (extra) {
494:     const PetscInt *idxs;
495:     ISGetIndices(is_I_layer,&idxs);
496:     PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));
497:     ISRestoreIndices(is_I_layer,&idxs);
498:   }
499:   PetscMalloc1(local_size,&nnz);
500:   PetscMalloc1(sub_schurs->n_subs,&auxnum1);
501:   PetscMalloc1(sub_schurs->n_subs,&auxnum2);

503:   /* Get local indices in local numbering */
504:   local_size = 0;
505:   for (i=0;i<sub_schurs->n_subs;i++) {
506:     PetscInt j;
507:     const    PetscInt *idxs;

509:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
510:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
511:     /* start (smallest in global ordering) and multiplicity */
512:     auxnum1[i] = idxs[0];
513:     auxnum2[i] = subset_size;
514:     /* subset indices in local numbering */
515:     PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));
516:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
517:     for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
518:     local_size += subset_size;
519:   }

521:   /* allocate extra workspace needed only for GETRI */
522:   if (local_size && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
523:     PetscScalar  lwork,dummyscalar = 0.;
524:     PetscBLASInt dummyint = 0;

526:     B_lwork = -1;
527:     PetscBLASIntCast(local_size,&B_N);
528:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
529:     PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
530:     PetscFPTrapPop();
531:     if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
532:     PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
533:     PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);
534:   } else {
535:     Bwork = NULL;
536:     pivots = NULL;
537:   }

539:   /* prepare parallel matrices for summing up properly schurs on subsets */
540:   ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);
541:   ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);
542:   ISDestroy(&all_subsets_n);
543:   ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);
544:   PCBDDCSubsetNumbering(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);
545:   ISDestroy(&all_subsets);
546:   ISDestroy(&all_subsets_mult);
547:   ISGetLocalSize(all_subsets_n,&i);
548:   if (i != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",i,local_size);
549:   ISLocalToGlobalMappingCreateIS(all_subsets_n,&l2gmap_subsets);
550:   MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,NULL,&work_mat);
551:   ISLocalToGlobalMappingDestroy(&l2gmap_subsets);
552:   MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);
553:   MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);
554:   MatSetType(global_schur_subsets,MATMPIAIJ);

556:   /* subset indices in local boundary numbering */
557:   if (!sub_schurs->is_Ej_all) {
558:     PetscInt *all_local_idx_B;

560:     PetscMalloc1(local_size,&all_local_idx_B);
561:     ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);
562:     if (subset_size != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %D != %D\n",subset_size,local_size);
563:     ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);
564:   }

566:   /* Local matrix of all local Schur on subsets (transposed) */
567:   if (!sub_schurs->S_Ej_all) {
568:     MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);
569:     MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);
570:     MatSetType(sub_schurs->S_Ej_all,MATAIJ);
571:     MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);
572:   }

574:   /* Compute Schur complements explicitly */
575:   F = NULL;
576:   if (!sub_schurs->use_mumps) {
577:     Mat         S_Ej_expl;
578:     PetscScalar *work;
579:     PetscInt    j,*dummy_idx;
580:     PetscBool   Sdense;

582:     PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
583:     local_size = 0;
584:     for (i=0;i<sub_schurs->n_subs;i++) {
585:       IS  is_subset_B;
586:       Mat AE_EE,AE_IE,AE_EI,S_Ej;

588:       /* subsets in original and boundary numbering */
589:       ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);
590:       /* EE block */
591:       MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);
592:       /* IE block */
593:       MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);
594:       /* EI block */
595:       if (sub_schurs->is_hermitian) {
596:         MatCreateTranspose(AE_IE,&AE_EI);
597:       } else {
598:         MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);
599:       }
600:       ISDestroy(&is_subset_B);
601:       MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);
602:       MatDestroy(&AE_EE);
603:       MatDestroy(&AE_IE);
604:       MatDestroy(&AE_EI);
605:       if (AE_II == A_II) { /* we can reuse the same ksp */
606:         KSP ksp;
607:         MatSchurComplementGetKSP(sub_schurs->S,&ksp);
608:         MatSchurComplementSetKSP(S_Ej,ksp);
609:       } else { /* build new ksp object which inherits ksp and pc types from the original one */
610:         KSP       origksp,schurksp;
611:         PC        origpc,schurpc;
612:         KSPType   ksp_type;
613:         PetscInt  n_internal;
614:         PetscBool ispcnone;

616:         MatSchurComplementGetKSP(sub_schurs->S,&origksp);
617:         MatSchurComplementGetKSP(S_Ej,&schurksp);
618:         KSPGetType(origksp,&ksp_type);
619:         KSPSetType(schurksp,ksp_type);
620:         KSPGetPC(schurksp,&schurpc);
621:         KSPGetPC(origksp,&origpc);
622:         PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);
623:         if (!ispcnone) {
624:           PCType pc_type;
625:           PCGetType(origpc,&pc_type);
626:           PCSetType(schurpc,pc_type);
627:         } else {
628:           PCSetType(schurpc,PCLU);
629:         }
630:         ISGetSize(is_I,&n_internal);
631:         if (n_internal) { /* UMFPACK gives error with 0 sized problems */
632:           MatSolverPackage solver=NULL;
633:           PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);
634:           if (solver) {
635:             PCFactorSetMatSolverPackage(schurpc,solver);
636:           }
637:         }
638:         KSPSetUp(schurksp);
639:       }
640:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
641:       MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);
642:       PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_hermitian,MAT_REUSE_MATRIX,&S_Ej_expl);
643:       PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);
644:       if (Sdense) {
645:         for (j=0;j<subset_size;j++) {
646:           dummy_idx[j]=local_size+j;
647:         }
648:         MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
649:       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
650:       MatDestroy(&S_Ej);
651:       MatDestroy(&S_Ej_expl);
652:       local_size += subset_size;
653:     }
654:     PetscFree2(dummy_idx,work);
655:     /* free */
656:     ISDestroy(&is_I);
657:     MatDestroy(&AE_II);
658:     PetscFree(all_local_idx_N);
659:   } else {
660:     Mat         A;
661:     IS          is_A_all;
662:     PetscScalar *work,*S_data;
663:     PetscInt    n_I,n_I_all,*dummy_idx,size_schur,size_active_schur,cum,cum2;
664:     PetscBool   mumps_S;

666:     /* get working mat */
667:     n_I = 0;
668:     if (is_I_layer) {
669:       ISGetLocalSize(is_I_layer,&n_I);
670:     }
671:     if (!sub_schurs->is_dir) {
672:       ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);
673:       size_schur = local_size;
674:     } else {
675:       IS list[2];

677:       ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&list[0]);
678:       list[1] = sub_schurs->is_dir;
679:       ISConcatenate(PETSC_COMM_SELF,2,list,&is_A_all);
680:       ISDestroy(&list[0]);
681:       ISGetLocalSize(sub_schurs->is_dir,&size_schur);
682:       size_schur += local_size;
683:     }
684:     PetscFree(all_local_idx_N);
685:     size_active_schur = local_size; /* size active schurs does not count any dirichlet dof on the interface */
686:     MatGetSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);
687:     MatSetOptionsPrefix(A,"sub_schurs_");
688:     MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_hermitian);
689:     MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);
690:     MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);

692:     if (n_I) {
693:       IS is_schur;

695:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
696:         MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);
697:       } else {
698:         MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);
699:       }
700:       /* subsets ordered last */
701:       ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur);
702:       MatFactorSetSchurIS(F,is_schur);
703:       ISDestroy(&is_schur);

705:       /* factorization step */
706:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
707:         MatCholeskyFactorSymbolic(F,A,NULL,NULL);
708: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
709:         MatMumpsSetIcntl(F,19,2);
710: #endif
711:         MatCholeskyFactorNumeric(F,A,NULL);
712:       } else {
713:         MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
714: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
715:         MatMumpsSetIcntl(F,19,3);
716: #endif
717:         MatLUFactorNumeric(F,A,NULL);
718:       }

720:       /* get explicit Schur Complement computed during numeric factorization */
721:       MatFactorGetSchurComplement(F,&S_all);

723:       /* we can reuse the solvers if we are not using the economic version */
724:       ISGetLocalSize(sub_schurs->is_I,&n_I_all);
725:       reuse_solvers = (PetscBool)(reuse_solvers && (n_I == n_I_all));
726:       mumps_S = PETSC_TRUE;
727:     } else { /* we can't use MUMPS when size_schur == size_of_the_problem */
728:       MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);
729:       reuse_solvers = PETSC_FALSE;
730:       mumps_S = PETSC_FALSE;
731:     }

733:     if (reuse_solvers) {
734:       Mat              A_II;
735:       Vec              vec1_B;
736:       PCBDDCReuseMumps msolv_ctx;

738:       if (sub_schurs->reuse_mumps) {
739:         PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
740:       } else {
741:         PetscNew(&sub_schurs->reuse_mumps);
742:       }
743:       msolv_ctx = sub_schurs->reuse_mumps;
744:       MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);
745:       MatGetSize(A_II,&msolv_ctx->n_I,NULL);
746:       PetscObjectReference((PetscObject)F);
747:       msolv_ctx->F = F;
748:       MatCreateVecs(F,&msolv_ctx->sol,&msolv_ctx->rhs);

750:       /* interior solver */
751:       PCCreate(PETSC_COMM_SELF,&msolv_ctx->interior_solver);
752:       PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);
753:       PCSetType(msolv_ctx->interior_solver,PCSHELL);
754:       PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);
755:       PCShellSetApply(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolve);
756:       PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolveTranspose);

758:       /* correction solver */
759:       PCCreate(PETSC_COMM_SELF,&msolv_ctx->correction_solver);
760:       PCSetOperators(msolv_ctx->correction_solver,A,A);
761:       PCSetType(msolv_ctx->correction_solver,PCSHELL);
762:       PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);
763:       PCShellSetApply(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolve);
764:       PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolveTranspose);

766:       /* scatter and vecs for Schur complement solver */
767:       MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);
768:       MatCreateVecs(sub_schurs->S,&vec1_B,NULL);
769:       ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);
770:       VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);
771:       VecDestroy(&vec1_B);
772:       PetscObjectReference((PetscObject)is_A_all);
773:       msolv_ctx->is_R = is_A_all;
774:     }
775:     MatDestroy(&A);
776:     ISDestroy(&is_A_all);

778:     /* Work arrays */
779:     PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);

781:     /* matrices for adaptive selection */
782:     if (compute_Stilda) {
783:       if (!sub_schurs->sum_S_Ej_tilda_all) {
784:         MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_tilda_all);
785:         MatSetSizes(sub_schurs->sum_S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
786:         MatSetType(sub_schurs->sum_S_Ej_tilda_all,MATAIJ);
787:         MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_tilda_all,0,nnz);
788:       }
789:       if (!sub_schurs->sum_S_Ej_inv_all) {
790:         MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_inv_all);
791:         MatSetSizes(sub_schurs->sum_S_Ej_inv_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
792:         MatSetType(sub_schurs->sum_S_Ej_inv_all,MATAIJ);
793:         MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_inv_all,0,nnz);
794:       }
795:     }

797:     /* S_Ej_all */
798:     cum = cum2 = 0;
799:     MatDenseGetArray(S_all,&S_data);
800:     for (i=0;i<sub_schurs->n_subs;i++) {
801:       PetscInt j;

803:       /* get S_E */
804:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
805:       if (mumps_S && sub_schurs->is_hermitian) { /* When using MUMPS data I need to expand to upper triangular (column oriented) */
806:         PetscInt k;
807:         for (k=0;k<subset_size;k++) {
808:           for (j=k;j<subset_size;j++) {
809:             work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
810:             work[j*subset_size+k] = S_data[cum2+k*size_schur+j];
811:           }
812:         }
813:       } else { /* copy to workspace */
814:         PetscInt k;
815:         for (k=0;k<subset_size;k++) {
816:           for (j=0;j<subset_size;j++) {
817:             work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
818:           }
819:         }
820:       }
821:       /* insert S_E values */
822:       for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
823:       MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);

825:       /* if adaptivity is requested, invert S_E block */
826:       if (compute_Stilda) {
827:         PetscBLASIntCast(subset_size,&B_N);
828:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
829:         if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
830:           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
831:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
832:           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
833:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
834:         } else {
835:           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
836:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
837:           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
838:           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
839:         }
840:         PetscFPTrapPop();
841:         MatSetValues(sub_schurs->sum_S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
842:       }
843:       cum += subset_size;
844:       cum2 += subset_size*(size_schur + 1);
845:     }
846:     MatDenseRestoreArray(S_all,&S_data);

848:     if (mumps_S) {
849:       MatFactorRestoreSchurComplement(F,&S_all);
850:     }

852:     if (compute_Stilda && size_active_schur) {
853:       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur) { /* we already computed the inverse */
854:         PetscInt j;
855:         for (j=0;j<size_schur;j++) dummy_idx[j] = j;
856:         MatSetValues(sub_schurs->sum_S_Ej_tilda_all,size_schur,dummy_idx,size_schur,dummy_idx,work,INSERT_VALUES);
857:       } else {
858:         if (mumps_S) { /* use MatFactor calls to invert S */
859:           MatFactorInvertSchurComplement(F);
860:           MatFactorGetSchurComplement(F,&S_all);
861:         } else { /* we need to invert explicitly since we are not using MUMPS for S */
862:           MatDenseGetArray(S_all,&S_data);
863:           PetscBLASIntCast(size_schur,&B_N);
864:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
865:           if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
866:             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
867:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
868:             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
869:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
870:           } else {
871:             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
872:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
873:             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
874:             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
875:           }
876:           PetscFPTrapPop();
877:           MatDenseRestoreArray(S_all,&S_data);
878:         }
879:         /* S_Ej_tilda_all */
880:         cum = cum2 = 0;
881:         MatDenseGetArray(S_all,&S_data);
882:         for (i=0;i<sub_schurs->n_subs;i++) {
883:           PetscInt j;

885:           ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
886:           /* get (St^-1)_E */
887:           if (sub_schurs->is_hermitian) { /* Here I don't need to expand to upper triangular (column oriented) */
888:             PetscInt k;
889:             for (k=0;k<subset_size;k++) {
890:               for (j=k;j<subset_size;j++) {
891:                 work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
892:               }
893:             }
894:           } else { /* copy to workspace */
895:             PetscInt k;
896:             for (k=0;k<subset_size;k++) {
897:               for (j=0;j<subset_size;j++) {
898:                 work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
899:               }
900:             }
901:           }
902:           for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
903:           MatSetValues(sub_schurs->sum_S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
904:           cum += subset_size;
905:           cum2 += subset_size*(size_schur + 1);
906:         }
907:         MatDenseRestoreArray(S_all,&S_data);
908:         if (mumps_S) {
909:           MatFactorRestoreSchurComplement(F,&S_all);
910:         }
911:       }
912:     }
913:     PetscFree2(dummy_idx,work);
914:   }
915:   PetscFree(nnz);
916:   MatDestroy(&F);
917:   ISDestroy(&is_I_layer);
918:   MatDestroy(&S_all);
919:   MatDestroy(&A_BB);
920:   MatDestroy(&A_IB);
921:   MatDestroy(&A_BI);
922:   MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
923:   MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
924:   if (compute_Stilda) {
925:     MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
926:     MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
927:     MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
928:     MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
929:   }

931:   /* Global matrix of all assembled Schur on subsets */
932:   MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);
933:   MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);
934:   MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);

936:   /* Get local part of (\sum_j S_Ej) */
937:   if (!sub_schurs->sum_S_Ej_all) {
938:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_INITIAL_MATRIX,&submats);
939:     sub_schurs->sum_S_Ej_all = submats[0];
940:   } else {
941:     PetscMalloc1(1,&submats);
942:     submats[0] = sub_schurs->sum_S_Ej_all;
943:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
944:   }

946:   /* Compute explicitly (\sum_j S_Ej)^-1 (faster scaling during PCApply, needs extra work when doing setup) */
947:   if (faster_deluxe) {
948:     Mat         tmpmat;
949:     PetscScalar *array;
950:     PetscInt    cum;

952:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&array);
953:     cum = 0;
954:     for (i=0;i<sub_schurs->n_subs;i++) {
955:       ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
956:       PetscBLASIntCast(subset_size,&B_N);
957:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
958:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
959:         PetscInt j,k;

961:         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
962:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
963:         PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
964:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
965:         for (j=0;j<B_N;j++) {
966:           for (k=j+1;k<B_N;k++) {
967:             array[k*B_N+j+cum] = array[j*B_N+k+cum];
968:           }
969:         }
970:       } else {
971:         PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
972:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
973:         PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
974:         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
975:       }
976:       PetscFPTrapPop();
977:       cum += subset_size*subset_size;
978:     }
979:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&array);
980:     MatMatMult(sub_schurs->S_Ej_all,sub_schurs->sum_S_Ej_all,MAT_INITIAL_MATRIX,1.0,&tmpmat);
981:     MatDestroy(&sub_schurs->S_Ej_all);
982:     MatDestroy(&sub_schurs->sum_S_Ej_all);
983:     sub_schurs->S_Ej_all = tmpmat;
984:   }

986:   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
987:   if (compute_Stilda) {
988:     MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_tilda_all);
989:     MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
990:     submats[0] = sub_schurs->sum_S_Ej_tilda_all;
991:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
992:     MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_inv_all);
993:     MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
994:     submats[0] = sub_schurs->sum_S_Ej_inv_all;
995:     MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
996:   }

998:   /* free workspace */
999:   PetscFree(submats);
1000:   PetscFree2(Bwork,pivots);
1001:   MatDestroy(&global_schur_subsets);
1002:   MatDestroy(&work_mat);
1003:   ISDestroy(&all_subsets_n);
1004:   PetscCommDestroy(&comm_n);
1005:   return(0);
1006: }

1010: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap)
1011: {
1012:   IS              *faces,*edges,*all_cc,vertices;
1013:   PetscInt        i,n_faces,n_edges,n_all_cc;
1014:   PetscBool       is_sorted;
1015:   PetscErrorCode  ierr;

1018:   ISSorted(is_I,&is_sorted);
1019:   if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1020:   ISSorted(is_B,&is_sorted);
1021:   if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");

1023:   /* reset any previous data */
1024:   PCBDDCSubSchursReset(sub_schurs);

1026:   /* get index sets for faces and edges (already sorted by global ordering) */
1027:   PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1028:   n_all_cc = n_faces+n_edges;
1029:   PetscBTCreate(n_all_cc,&sub_schurs->is_edge);
1030:   PetscMalloc1(n_all_cc,&all_cc);
1031:   for (i=0;i<n_faces;i++) {
1032:     all_cc[i] = faces[i];
1033:   }
1034:   for (i=0;i<n_edges;i++) {
1035:     all_cc[n_faces+i] = edges[i];
1036:     PetscBTSet(sub_schurs->is_edge,n_faces+i);
1037:   }
1038:   PetscFree(faces);
1039:   PetscFree(edges);
1040:   sub_schurs->is_dir = NULL;
1041:   PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);

1043:   /* Determine if MUMPS can be used */
1044:   sub_schurs->use_mumps = PETSC_FALSE;
1045: #if defined(PETSC_HAVE_MUMPS)
1046:   sub_schurs->use_mumps = PETSC_TRUE;
1047: #endif

1049:   PetscObjectReference((PetscObject)is_I);
1050:   sub_schurs->is_I = is_I;
1051:   PetscObjectReference((PetscObject)is_B);
1052:   sub_schurs->is_B = is_B;
1053:   PetscObjectReference((PetscObject)graph->l2gmap);
1054:   sub_schurs->l2gmap = graph->l2gmap;
1055:   PetscObjectReference((PetscObject)BtoNmap);
1056:   sub_schurs->BtoNmap = BtoNmap;
1057:   sub_schurs->n_subs = n_all_cc;
1058:   sub_schurs->is_subs = all_cc;
1059:   if (!sub_schurs->use_mumps) { /* sort by local ordering mumps is not present */
1060:     for (i=0;i<sub_schurs->n_subs;i++) {
1061:       ISSort(sub_schurs->is_subs[i]);
1062:     }
1063:   }
1064:   sub_schurs->is_vertices = vertices;
1065:   sub_schurs->S_Ej_all = NULL;
1066:   sub_schurs->sum_S_Ej_all = NULL;
1067:   sub_schurs->sum_S_Ej_inv_all = NULL;
1068:   sub_schurs->sum_S_Ej_tilda_all = NULL;
1069:   sub_schurs->is_Ej_all = NULL;
1070:   return(0);
1071: }

1075: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1076: {
1077:   PCBDDCSubSchurs schurs_ctx;
1078:   PetscErrorCode  ierr;

1081:   PetscNew(&schurs_ctx);
1082:   schurs_ctx->n_subs = 0;
1083:   *sub_schurs = schurs_ctx;
1084:   return(0);
1085: }

1089: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
1090: {

1094:   PCBDDCSubSchursReset(*sub_schurs);
1095:   PetscFree(*sub_schurs);
1096:   return(0);
1097: }

1101: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1102: {
1103:   PetscInt       i;

1107:   MatDestroy(&sub_schurs->A);
1108:   MatDestroy(&sub_schurs->S);
1109:   ISDestroy(&sub_schurs->is_I);
1110:   ISDestroy(&sub_schurs->is_B);
1111:   ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
1112:   ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
1113:   MatDestroy(&sub_schurs->S_Ej_all);
1114:   MatDestroy(&sub_schurs->sum_S_Ej_all);
1115:   MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1116:   MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
1117:   ISDestroy(&sub_schurs->is_Ej_all);
1118:   ISDestroy(&sub_schurs->is_vertices);
1119:   ISDestroy(&sub_schurs->is_dir);
1120:   PetscBTDestroy(&sub_schurs->is_edge);
1121:   for (i=0;i<sub_schurs->n_subs;i++) {
1122:     ISDestroy(&sub_schurs->is_subs[i]);
1123:   }
1124:   if (sub_schurs->n_subs) {
1125:     PetscFree(sub_schurs->is_subs);
1126:   }
1127:   if (sub_schurs->reuse_mumps) {
1128:     PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
1129:   }
1130:   PetscFree(sub_schurs->reuse_mumps);
1131:   sub_schurs->n_subs = 0;
1132:   return(0);
1133: }

1137: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
1138: {
1139:   PetscInt       i,j,n;

1143:   n = 0;
1144:   for (i=-n_prev;i<0;i++) {
1145:     PetscInt start_dof = queue_tip[i];
1146:     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
1147:       PetscInt dof = adjncy[j];
1148:       if (!PetscBTLookup(touched,dof)) {
1149:         PetscBTSet(touched,dof);
1150:         queue_tip[n] = dof;
1151:         n++;
1152:       }
1153:     }
1154:   }
1155:   *n_added = n;
1156:   return(0);
1157: }