Actual source code: bddcschurs.c
petsc-3.7.3 2016-08-01
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <petscblaslapack.h>
5: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
6: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
7: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC,Vec,Vec);
8: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC,Vec,Vec);
12: static PetscErrorCode PCBDDCMumpsCorrectionSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
13: {
14: PCBDDCReuseMumps ctx;
15: #if defined(PETSC_HAVE_MUMPS)
16: PetscInt ival;
17: #endif
18: PetscErrorCode ierr;
21: PCShellGetContext(pc,(void **)&ctx);
22: #if defined(PETSC_HAVE_MUMPS)
23: MatMumpsGetIcntl(ctx->F,26,&ival);
24: MatMumpsSetIcntl(ctx->F,26,-1);
25: #endif
26: if (transpose) {
27: MatSolveTranspose(ctx->F,rhs,sol);
28: } else {
29: MatSolve(ctx->F,rhs,sol);
30: }
31: #if defined(PETSC_HAVE_MUMPS)
32: MatMumpsSetIcntl(ctx->F,26,ival);
33: #endif
34: return(0);
35: }
39: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC pc, Vec rhs, Vec sol)
40: {
41: PetscErrorCode ierr;
44: PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_FALSE);
45: return(0);
46: }
50: static PetscErrorCode PCBDDCMumpsCorrectionSolveTranspose(PC pc, Vec rhs, Vec sol)
51: {
52: PetscErrorCode ierr;
55: PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_TRUE);
56: return(0);
57: }
61: static PetscErrorCode PCBDDCReuseMumpsReset(PCBDDCReuseMumps reuse)
62: {
66: MatDestroy(&reuse->F);
67: VecDestroy(&reuse->sol);
68: VecDestroy(&reuse->rhs);
69: PCDestroy(&reuse->interior_solver);
70: PCDestroy(&reuse->correction_solver);
71: ISDestroy(&reuse->is_R);
72: ISDestroy(&reuse->is_B);
73: VecScatterDestroy(&reuse->correction_scatter_B);
74: VecDestroy(&reuse->sol_B);
75: VecDestroy(&reuse->rhs_B);
76: return(0);
77: }
81: static PetscErrorCode PCBDDCMumpsInteriorSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
82: {
83: PCBDDCReuseMumps ctx;
84: PetscScalar *array,*array_mumps;
85: #if defined(PETSC_HAVE_MUMPS)
86: PetscInt ival;
87: #endif
88: PetscErrorCode ierr;
91: PCShellGetContext(pc,(void **)&ctx);
92: #if defined(PETSC_HAVE_MUMPS)
93: MatMumpsGetIcntl(ctx->F,26,&ival);
94: MatMumpsSetIcntl(ctx->F,26,0);
95: #endif
96: /* copy rhs into factored matrix workspace (can it be avoided?, MatSolve_MUMPS has another copy b->x internally) */
97: VecGetArrayRead(rhs,(const PetscScalar**)&array);
98: VecGetArray(ctx->rhs,&array_mumps);
99: PetscMemcpy(array_mumps,array,ctx->n_I*sizeof(PetscScalar));
100: VecRestoreArray(ctx->rhs,&array_mumps);
101: VecRestoreArrayRead(rhs,(const PetscScalar**)&array);
103: if (transpose) {
104: MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);
105: } else {
106: MatSolve(ctx->F,ctx->rhs,ctx->sol);
107: }
109: /* get back data to caller worskpace */
110: VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
111: VecGetArray(sol,&array);
112: PetscMemcpy(array,array_mumps,ctx->n_I*sizeof(PetscScalar));
113: VecRestoreArray(sol,&array);
114: VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
115: #if defined(PETSC_HAVE_MUMPS)
116: MatMumpsSetIcntl(ctx->F,26,ival);
117: #endif
118: return(0);
119: }
123: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC pc, Vec rhs, Vec sol)
124: {
125: PetscErrorCode ierr;
128: PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_FALSE);
129: return(0);
130: }
134: static PetscErrorCode PCBDDCMumpsInteriorSolveTranspose(PC pc, Vec rhs, Vec sol)
135: {
136: PetscErrorCode ierr;
139: PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_TRUE);
140: return(0);
141: }
145: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
146: {
147: Mat B, C, D, Bd, Cd, AinvBd;
148: KSP ksp;
149: PC pc;
150: PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
151: PetscReal fill = 2.0;
152: PetscInt n_I;
153: PetscMPIInt size;
157: MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);
158: if (size != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
159: if (reuse == MAT_REUSE_MATRIX) {
160: PetscBool Sdense;
162: PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
163: if (!Sdense) SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
164: }
165: MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
166: MatSchurComplementGetKSP(M, &ksp);
167: KSPGetPC(ksp, &pc);
168: PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);
169: PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);
170: PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);
171: PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);
172: PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);
173: MatGetSize(B,&n_I,NULL);
174: if (n_I) {
175: if (!Bdense) {
176: MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
177: } else {
178: Bd = B;
179: }
181: if (isLU || isILU || isCHOL) {
182: Mat fact;
183: KSPSetUp(ksp);
184: PCFactorGetMatrix(pc, &fact);
185: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
186: MatMatSolve(fact, Bd, AinvBd);
187: } else {
188: PetscBool ex = PETSC_TRUE;
190: if (ex) {
191: Mat Ainvd;
193: PCComputeExplicitOperator(pc, &Ainvd);
194: MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
195: MatDestroy(&Ainvd);
196: } else {
197: Vec sol,rhs;
198: PetscScalar *arrayrhs,*arraysol;
199: PetscInt i,nrhs,n;
201: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
202: MatGetSize(Bd,&n,&nrhs);
203: MatDenseGetArray(Bd,&arrayrhs);
204: MatDenseGetArray(AinvBd,&arraysol);
205: KSPGetSolution(ksp,&sol);
206: KSPGetRhs(ksp,&rhs);
207: for (i=0;i<nrhs;i++) {
208: VecPlaceArray(rhs,arrayrhs+i*n);
209: VecPlaceArray(sol,arraysol+i*n);
210: KSPSolve(ksp,rhs,sol);
211: VecResetArray(rhs);
212: VecResetArray(sol);
213: }
214: MatDenseRestoreArray(Bd,&arrayrhs);
215: MatDenseRestoreArray(AinvBd,&arrayrhs);
216: }
217: }
218: if (!Bdense & !issym) {
219: MatDestroy(&Bd);
220: }
222: if (!issym) {
223: if (!Cdense) {
224: MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
225: } else {
226: Cd = C;
227: }
228: MatMatMult(Cd, AinvBd, reuse, fill, S);
229: if (!Cdense) {
230: MatDestroy(&Cd);
231: }
232: } else {
233: MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
234: if (!Bdense) {
235: MatDestroy(&Bd);
236: }
237: }
238: MatDestroy(&AinvBd);
239: }
241: if (D) {
242: Mat Dd;
243: PetscBool Ddense;
245: PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);
246: if (!Ddense) {
247: MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
248: } else {
249: Dd = D;
250: }
251: if (n_I) {
252: MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);
253: } else {
254: if (reuse == MAT_INITIAL_MATRIX) {
255: MatDuplicate(Dd,MAT_COPY_VALUES,S);
256: } else {
257: MatCopy(Dd,*S,SAME_NONZERO_PATTERN);
258: }
259: }
260: if (!Ddense) {
261: MatDestroy(&Dd);
262: }
263: } else {
264: MatScale(*S,-1.0);
265: }
266: return(0);
267: }
271: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, PetscBool faster_deluxe, PetscBool compute_Stilda, PetscBool reuse_solvers)
272: {
273: Mat F,A_II,A_IB,A_BI,A_BB,AE_II;
274: Mat S_all;
275: Mat global_schur_subsets,work_mat,*submats;
276: ISLocalToGlobalMapping l2gmap_subsets;
277: IS is_I,is_I_layer;
278: IS all_subsets,all_subsets_mult,all_subsets_n;
279: PetscInt *nnz,*all_local_idx_N;
280: PetscInt *auxnum1,*auxnum2;
281: PetscInt i,subset_size,max_subset_size;
282: PetscInt extra,local_size,global_size;
283: PetscBLASInt B_N,B_ierr,B_lwork,*pivots;
284: PetscScalar *Bwork;
285: PetscSubcomm subcomm;
286: PetscMPIInt color,rank;
287: MPI_Comm comm_n;
288: PetscErrorCode ierr;
291: /* update info in sub_schurs */
292: MatDestroy(&sub_schurs->A);
293: MatDestroy(&sub_schurs->S);
294: sub_schurs->is_hermitian = PETSC_FALSE;
295: sub_schurs->is_posdef = PETSC_FALSE;
296: if (Ain) {
297: PetscBool isseqaij;
298: /* determine if we are dealing with hermitian positive definite problems */
299: #if !defined(PETSC_USE_COMPLEX)
300: if (Ain->symmetric_set) {
301: sub_schurs->is_hermitian = Ain->symmetric;
302: }
303: #else
304: if (Ain->hermitian_set) {
305: sub_schurs->is_hermitian = Ain->hermitian;
306: }
307: #endif
308: if (Ain->spd_set) {
309: sub_schurs->is_posdef = Ain->spd;
310: }
312: /* check */
313: PetscObjectTypeCompare((PetscObject)Ain,MATSEQAIJ,&isseqaij);
314: if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
315: PetscInt lsize;
317: MatGetSize(Ain,&lsize,NULL);
318: if (lsize) {
319: PetscScalar val;
320: PetscReal norm;
321: Vec vec1,vec2,vec3;
323: MatCreateVecs(Ain,&vec1,&vec2);
324: VecDuplicate(vec1,&vec3);
325: VecSetRandom(vec1,NULL);
326: MatMult(Ain,vec1,vec2);
327: #if !defined(PETSC_USE_COMPLEX)
328: MatMultTranspose(Ain,vec1,vec3);
329: #else
330: MatMultHermitianTranspose(Ain,vec1,vec3);
331: #endif
332: VecAXPY(vec3,-1.0,vec2);
333: VecNorm(vec3,NORM_INFINITY,&norm);
334: if (norm > PetscSqrtReal(PETSC_SMALL)) {
335: sub_schurs->is_hermitian = PETSC_FALSE;
336: } else {
337: sub_schurs->is_hermitian = PETSC_TRUE;
338: }
339: VecDot(vec1,vec2,&val);
340: if (PetscRealPart(val) > 0. && PetscAbsReal(PetscImaginaryPart(val)) < PETSC_SMALL) sub_schurs->is_posdef = PETSC_TRUE;
341: VecDestroy(&vec1);
342: VecDestroy(&vec2);
343: VecDestroy(&vec3);
344: } else {
345: sub_schurs->is_hermitian = PETSC_TRUE;
346: sub_schurs->is_posdef = PETSC_TRUE;
347: }
348: }
349: if (isseqaij) {
350: PetscObjectReference((PetscObject)Ain);
351: sub_schurs->A = Ain;
352: } else {
353: MatConvert(Ain,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);
354: }
355: }
356: if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"General matrix pencils are not currently supported (%D,%D)",sub_schurs->is_hermitian,sub_schurs->is_posdef);
358: PetscObjectReference((PetscObject)Sin);
359: sub_schurs->S = Sin;
360: if (sub_schurs->use_mumps) {
361: sub_schurs->use_mumps = (PetscBool)(!!sub_schurs->A);
362: }
364: /* preliminary checks */
365: if (!sub_schurs->use_mumps && compute_Stilda) SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
367: /* restrict work on active processes */
368: color = 0;
369: if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
370: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
371: PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);
372: PetscSubcommSetNumber(subcomm,2);
373: PetscSubcommSetTypeGeneral(subcomm,color,rank);
374: PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);
375: PetscSubcommDestroy(&subcomm);
376: if (!sub_schurs->n_subs) {
377: PetscCommDestroy(&comm_n);
378: return(0);
379: }
380: /* PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL); */
382: /* get Schur complement matrices */
383: if (!sub_schurs->use_mumps) {
384: Mat tA_IB,tA_BI,tA_BB;
385: PetscBool isseqsbaij;
386: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);
387: PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);
388: if (isseqsbaij) {
389: MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);
390: MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);
391: MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);
392: } else {
393: PetscObjectReference((PetscObject)tA_BB);
394: A_BB = tA_BB;
395: PetscObjectReference((PetscObject)tA_IB);
396: A_IB = tA_IB;
397: PetscObjectReference((PetscObject)tA_BI);
398: A_BI = tA_BI;
399: }
400: } else {
401: A_II = NULL;
402: A_IB = NULL;
403: A_BI = NULL;
404: A_BB = NULL;
405: }
406: S_all = NULL;
408: /* determine interior problems */
409: ISGetLocalSize(sub_schurs->is_I,&i);
410: if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
411: PetscBT touched;
412: const PetscInt* idx_B;
413: PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
415: if (!xadj || !adjncy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
416: /* get sizes */
417: ISGetLocalSize(sub_schurs->is_I,&n_I);
418: ISGetLocalSize(sub_schurs->is_B,&n_B);
420: PetscMalloc1(n_I+n_B,&local_numbering);
421: PetscBTCreate(n_I+n_B,&touched);
422: PetscBTMemzero(n_I+n_B,touched);
424: /* all boundary dofs must be skipped when adding layers */
425: ISGetIndices(sub_schurs->is_B,&idx_B);
426: for (j=0;j<n_B;j++) {
427: PetscBTSet(touched,idx_B[j]);
428: }
429: PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));
430: ISRestoreIndices(sub_schurs->is_B,&idx_B);
432: /* add prescribed number of layers of dofs */
433: n_local_dofs = n_B;
434: n_prev_added = n_B;
435: for (layer=0;layer<nlayers;layer++) {
436: PetscInt n_added;
437: if (n_local_dofs == n_I+n_B) break;
438: if (n_local_dofs > n_I+n_B) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %D. Out of bound access (%D > %D)",layer,n_local_dofs,n_I+n_B);
439: PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);
440: n_prev_added = n_added;
441: n_local_dofs += n_added;
442: if (!n_added) break;
443: }
444: PetscBTDestroy(&touched);
446: /* IS for I layer dofs in original numbering */
447: ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);
448: PetscFree(local_numbering);
449: ISSort(is_I_layer);
450: /* IS for I layer dofs in I numbering */
451: if (!sub_schurs->use_mumps) {
452: ISLocalToGlobalMapping ItoNmap;
453: ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);
454: ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);
455: ISLocalToGlobalMappingDestroy(&ItoNmap);
457: /* II block */
458: MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);
459: }
460: } else {
461: PetscInt n_I;
463: /* IS for I dofs in original numbering */
464: PetscObjectReference((PetscObject)sub_schurs->is_I);
465: is_I_layer = sub_schurs->is_I;
467: /* IS for I dofs in I numbering (strided 1) */
468: if (!sub_schurs->use_mumps) {
469: ISGetSize(sub_schurs->is_I,&n_I);
470: ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);
472: /* II block is the same */
473: PetscObjectReference((PetscObject)A_II);
474: AE_II = A_II;
475: }
476: }
478: /* Get info on subset sizes and sum of all subsets sizes */
479: max_subset_size = 0;
480: local_size = 0;
481: for (i=0;i<sub_schurs->n_subs;i++) {
482: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
483: max_subset_size = PetscMax(subset_size,max_subset_size);
484: local_size += subset_size;
485: }
487: /* Work arrays for local indices */
488: extra = 0;
489: if (sub_schurs->use_mumps && is_I_layer) {
490: ISGetLocalSize(is_I_layer,&extra);
491: }
492: PetscMalloc1(local_size+extra,&all_local_idx_N);
493: if (extra) {
494: const PetscInt *idxs;
495: ISGetIndices(is_I_layer,&idxs);
496: PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));
497: ISRestoreIndices(is_I_layer,&idxs);
498: }
499: PetscMalloc1(local_size,&nnz);
500: PetscMalloc1(sub_schurs->n_subs,&auxnum1);
501: PetscMalloc1(sub_schurs->n_subs,&auxnum2);
503: /* Get local indices in local numbering */
504: local_size = 0;
505: for (i=0;i<sub_schurs->n_subs;i++) {
506: PetscInt j;
507: const PetscInt *idxs;
509: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
510: ISGetIndices(sub_schurs->is_subs[i],&idxs);
511: /* start (smallest in global ordering) and multiplicity */
512: auxnum1[i] = idxs[0];
513: auxnum2[i] = subset_size;
514: /* subset indices in local numbering */
515: PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));
516: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
517: for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
518: local_size += subset_size;
519: }
521: /* allocate extra workspace needed only for GETRI */
522: if (local_size && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
523: PetscScalar lwork,dummyscalar = 0.;
524: PetscBLASInt dummyint = 0;
526: B_lwork = -1;
527: PetscBLASIntCast(local_size,&B_N);
528: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
529: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
530: PetscFPTrapPop();
531: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
532: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
533: PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);
534: } else {
535: Bwork = NULL;
536: pivots = NULL;
537: }
539: /* prepare parallel matrices for summing up properly schurs on subsets */
540: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);
541: ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);
542: ISDestroy(&all_subsets_n);
543: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);
544: PCBDDCSubsetNumbering(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);
545: ISDestroy(&all_subsets);
546: ISDestroy(&all_subsets_mult);
547: ISGetLocalSize(all_subsets_n,&i);
548: if (i != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",i,local_size);
549: ISLocalToGlobalMappingCreateIS(all_subsets_n,&l2gmap_subsets);
550: MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,NULL,&work_mat);
551: ISLocalToGlobalMappingDestroy(&l2gmap_subsets);
552: MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);
553: MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);
554: MatSetType(global_schur_subsets,MATMPIAIJ);
556: /* subset indices in local boundary numbering */
557: if (!sub_schurs->is_Ej_all) {
558: PetscInt *all_local_idx_B;
560: PetscMalloc1(local_size,&all_local_idx_B);
561: ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);
562: if (subset_size != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %D != %D\n",subset_size,local_size);
563: ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);
564: }
566: /* Local matrix of all local Schur on subsets (transposed) */
567: if (!sub_schurs->S_Ej_all) {
568: MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);
569: MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);
570: MatSetType(sub_schurs->S_Ej_all,MATAIJ);
571: MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);
572: }
574: /* Compute Schur complements explicitly */
575: F = NULL;
576: if (!sub_schurs->use_mumps) {
577: Mat S_Ej_expl;
578: PetscScalar *work;
579: PetscInt j,*dummy_idx;
580: PetscBool Sdense;
582: PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
583: local_size = 0;
584: for (i=0;i<sub_schurs->n_subs;i++) {
585: IS is_subset_B;
586: Mat AE_EE,AE_IE,AE_EI,S_Ej;
588: /* subsets in original and boundary numbering */
589: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);
590: /* EE block */
591: MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);
592: /* IE block */
593: MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);
594: /* EI block */
595: if (sub_schurs->is_hermitian) {
596: MatCreateTranspose(AE_IE,&AE_EI);
597: } else {
598: MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);
599: }
600: ISDestroy(&is_subset_B);
601: MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);
602: MatDestroy(&AE_EE);
603: MatDestroy(&AE_IE);
604: MatDestroy(&AE_EI);
605: if (AE_II == A_II) { /* we can reuse the same ksp */
606: KSP ksp;
607: MatSchurComplementGetKSP(sub_schurs->S,&ksp);
608: MatSchurComplementSetKSP(S_Ej,ksp);
609: } else { /* build new ksp object which inherits ksp and pc types from the original one */
610: KSP origksp,schurksp;
611: PC origpc,schurpc;
612: KSPType ksp_type;
613: PetscInt n_internal;
614: PetscBool ispcnone;
616: MatSchurComplementGetKSP(sub_schurs->S,&origksp);
617: MatSchurComplementGetKSP(S_Ej,&schurksp);
618: KSPGetType(origksp,&ksp_type);
619: KSPSetType(schurksp,ksp_type);
620: KSPGetPC(schurksp,&schurpc);
621: KSPGetPC(origksp,&origpc);
622: PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);
623: if (!ispcnone) {
624: PCType pc_type;
625: PCGetType(origpc,&pc_type);
626: PCSetType(schurpc,pc_type);
627: } else {
628: PCSetType(schurpc,PCLU);
629: }
630: ISGetSize(is_I,&n_internal);
631: if (n_internal) { /* UMFPACK gives error with 0 sized problems */
632: MatSolverPackage solver=NULL;
633: PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);
634: if (solver) {
635: PCFactorSetMatSolverPackage(schurpc,solver);
636: }
637: }
638: KSPSetUp(schurksp);
639: }
640: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
641: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);
642: PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_hermitian,MAT_REUSE_MATRIX,&S_Ej_expl);
643: PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);
644: if (Sdense) {
645: for (j=0;j<subset_size;j++) {
646: dummy_idx[j]=local_size+j;
647: }
648: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
649: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
650: MatDestroy(&S_Ej);
651: MatDestroy(&S_Ej_expl);
652: local_size += subset_size;
653: }
654: PetscFree2(dummy_idx,work);
655: /* free */
656: ISDestroy(&is_I);
657: MatDestroy(&AE_II);
658: PetscFree(all_local_idx_N);
659: } else {
660: Mat A;
661: IS is_A_all;
662: PetscScalar *work,*S_data;
663: PetscInt n_I,n_I_all,*dummy_idx,size_schur,size_active_schur,cum,cum2;
664: PetscBool mumps_S;
666: /* get working mat */
667: n_I = 0;
668: if (is_I_layer) {
669: ISGetLocalSize(is_I_layer,&n_I);
670: }
671: if (!sub_schurs->is_dir) {
672: ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);
673: size_schur = local_size;
674: } else {
675: IS list[2];
677: ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&list[0]);
678: list[1] = sub_schurs->is_dir;
679: ISConcatenate(PETSC_COMM_SELF,2,list,&is_A_all);
680: ISDestroy(&list[0]);
681: ISGetLocalSize(sub_schurs->is_dir,&size_schur);
682: size_schur += local_size;
683: }
684: PetscFree(all_local_idx_N);
685: size_active_schur = local_size; /* size active schurs does not count any dirichlet dof on the interface */
686: MatGetSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);
687: MatSetOptionsPrefix(A,"sub_schurs_");
688: MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_hermitian);
689: MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);
690: MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);
692: if (n_I) {
693: IS is_schur;
695: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
696: MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);
697: } else {
698: MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);
699: }
700: /* subsets ordered last */
701: ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur);
702: MatFactorSetSchurIS(F,is_schur);
703: ISDestroy(&is_schur);
705: /* factorization step */
706: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
707: MatCholeskyFactorSymbolic(F,A,NULL,NULL);
708: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
709: MatMumpsSetIcntl(F,19,2);
710: #endif
711: MatCholeskyFactorNumeric(F,A,NULL);
712: } else {
713: MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
714: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
715: MatMumpsSetIcntl(F,19,3);
716: #endif
717: MatLUFactorNumeric(F,A,NULL);
718: }
720: /* get explicit Schur Complement computed during numeric factorization */
721: MatFactorGetSchurComplement(F,&S_all);
723: /* we can reuse the solvers if we are not using the economic version */
724: ISGetLocalSize(sub_schurs->is_I,&n_I_all);
725: reuse_solvers = (PetscBool)(reuse_solvers && (n_I == n_I_all));
726: mumps_S = PETSC_TRUE;
727: } else { /* we can't use MUMPS when size_schur == size_of_the_problem */
728: MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);
729: reuse_solvers = PETSC_FALSE;
730: mumps_S = PETSC_FALSE;
731: }
733: if (reuse_solvers) {
734: Mat A_II;
735: Vec vec1_B;
736: PCBDDCReuseMumps msolv_ctx;
738: if (sub_schurs->reuse_mumps) {
739: PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
740: } else {
741: PetscNew(&sub_schurs->reuse_mumps);
742: }
743: msolv_ctx = sub_schurs->reuse_mumps;
744: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);
745: MatGetSize(A_II,&msolv_ctx->n_I,NULL);
746: PetscObjectReference((PetscObject)F);
747: msolv_ctx->F = F;
748: MatCreateVecs(F,&msolv_ctx->sol,&msolv_ctx->rhs);
750: /* interior solver */
751: PCCreate(PETSC_COMM_SELF,&msolv_ctx->interior_solver);
752: PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);
753: PCSetType(msolv_ctx->interior_solver,PCSHELL);
754: PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);
755: PCShellSetApply(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolve);
756: PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolveTranspose);
758: /* correction solver */
759: PCCreate(PETSC_COMM_SELF,&msolv_ctx->correction_solver);
760: PCSetOperators(msolv_ctx->correction_solver,A,A);
761: PCSetType(msolv_ctx->correction_solver,PCSHELL);
762: PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);
763: PCShellSetApply(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolve);
764: PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolveTranspose);
766: /* scatter and vecs for Schur complement solver */
767: MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);
768: MatCreateVecs(sub_schurs->S,&vec1_B,NULL);
769: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);
770: VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);
771: VecDestroy(&vec1_B);
772: PetscObjectReference((PetscObject)is_A_all);
773: msolv_ctx->is_R = is_A_all;
774: }
775: MatDestroy(&A);
776: ISDestroy(&is_A_all);
778: /* Work arrays */
779: PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
781: /* matrices for adaptive selection */
782: if (compute_Stilda) {
783: if (!sub_schurs->sum_S_Ej_tilda_all) {
784: MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_tilda_all);
785: MatSetSizes(sub_schurs->sum_S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
786: MatSetType(sub_schurs->sum_S_Ej_tilda_all,MATAIJ);
787: MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_tilda_all,0,nnz);
788: }
789: if (!sub_schurs->sum_S_Ej_inv_all) {
790: MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_inv_all);
791: MatSetSizes(sub_schurs->sum_S_Ej_inv_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
792: MatSetType(sub_schurs->sum_S_Ej_inv_all,MATAIJ);
793: MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_inv_all,0,nnz);
794: }
795: }
797: /* S_Ej_all */
798: cum = cum2 = 0;
799: MatDenseGetArray(S_all,&S_data);
800: for (i=0;i<sub_schurs->n_subs;i++) {
801: PetscInt j;
803: /* get S_E */
804: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
805: if (mumps_S && sub_schurs->is_hermitian) { /* When using MUMPS data I need to expand to upper triangular (column oriented) */
806: PetscInt k;
807: for (k=0;k<subset_size;k++) {
808: for (j=k;j<subset_size;j++) {
809: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
810: work[j*subset_size+k] = S_data[cum2+k*size_schur+j];
811: }
812: }
813: } else { /* copy to workspace */
814: PetscInt k;
815: for (k=0;k<subset_size;k++) {
816: for (j=0;j<subset_size;j++) {
817: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
818: }
819: }
820: }
821: /* insert S_E values */
822: for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
823: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
825: /* if adaptivity is requested, invert S_E block */
826: if (compute_Stilda) {
827: PetscBLASIntCast(subset_size,&B_N);
828: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
829: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
830: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
831: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
832: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
833: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
834: } else {
835: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
836: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
837: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
838: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
839: }
840: PetscFPTrapPop();
841: MatSetValues(sub_schurs->sum_S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
842: }
843: cum += subset_size;
844: cum2 += subset_size*(size_schur + 1);
845: }
846: MatDenseRestoreArray(S_all,&S_data);
848: if (mumps_S) {
849: MatFactorRestoreSchurComplement(F,&S_all);
850: }
852: if (compute_Stilda && size_active_schur) {
853: if (sub_schurs->n_subs == 1 && size_schur == size_active_schur) { /* we already computed the inverse */
854: PetscInt j;
855: for (j=0;j<size_schur;j++) dummy_idx[j] = j;
856: MatSetValues(sub_schurs->sum_S_Ej_tilda_all,size_schur,dummy_idx,size_schur,dummy_idx,work,INSERT_VALUES);
857: } else {
858: if (mumps_S) { /* use MatFactor calls to invert S */
859: MatFactorInvertSchurComplement(F);
860: MatFactorGetSchurComplement(F,&S_all);
861: } else { /* we need to invert explicitly since we are not using MUMPS for S */
862: MatDenseGetArray(S_all,&S_data);
863: PetscBLASIntCast(size_schur,&B_N);
864: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
865: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
866: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
867: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
868: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
869: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
870: } else {
871: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
872: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
873: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
874: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
875: }
876: PetscFPTrapPop();
877: MatDenseRestoreArray(S_all,&S_data);
878: }
879: /* S_Ej_tilda_all */
880: cum = cum2 = 0;
881: MatDenseGetArray(S_all,&S_data);
882: for (i=0;i<sub_schurs->n_subs;i++) {
883: PetscInt j;
885: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
886: /* get (St^-1)_E */
887: if (sub_schurs->is_hermitian) { /* Here I don't need to expand to upper triangular (column oriented) */
888: PetscInt k;
889: for (k=0;k<subset_size;k++) {
890: for (j=k;j<subset_size;j++) {
891: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
892: }
893: }
894: } else { /* copy to workspace */
895: PetscInt k;
896: for (k=0;k<subset_size;k++) {
897: for (j=0;j<subset_size;j++) {
898: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
899: }
900: }
901: }
902: for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
903: MatSetValues(sub_schurs->sum_S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
904: cum += subset_size;
905: cum2 += subset_size*(size_schur + 1);
906: }
907: MatDenseRestoreArray(S_all,&S_data);
908: if (mumps_S) {
909: MatFactorRestoreSchurComplement(F,&S_all);
910: }
911: }
912: }
913: PetscFree2(dummy_idx,work);
914: }
915: PetscFree(nnz);
916: MatDestroy(&F);
917: ISDestroy(&is_I_layer);
918: MatDestroy(&S_all);
919: MatDestroy(&A_BB);
920: MatDestroy(&A_IB);
921: MatDestroy(&A_BI);
922: MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
923: MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
924: if (compute_Stilda) {
925: MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
926: MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
927: MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
928: MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
929: }
931: /* Global matrix of all assembled Schur on subsets */
932: MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);
933: MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);
934: MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
936: /* Get local part of (\sum_j S_Ej) */
937: if (!sub_schurs->sum_S_Ej_all) {
938: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_INITIAL_MATRIX,&submats);
939: sub_schurs->sum_S_Ej_all = submats[0];
940: } else {
941: PetscMalloc1(1,&submats);
942: submats[0] = sub_schurs->sum_S_Ej_all;
943: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
944: }
946: /* Compute explicitly (\sum_j S_Ej)^-1 (faster scaling during PCApply, needs extra work when doing setup) */
947: if (faster_deluxe) {
948: Mat tmpmat;
949: PetscScalar *array;
950: PetscInt cum;
952: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&array);
953: cum = 0;
954: for (i=0;i<sub_schurs->n_subs;i++) {
955: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
956: PetscBLASIntCast(subset_size,&B_N);
957: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
958: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
959: PetscInt j,k;
961: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
962: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
963: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
964: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
965: for (j=0;j<B_N;j++) {
966: for (k=j+1;k<B_N;k++) {
967: array[k*B_N+j+cum] = array[j*B_N+k+cum];
968: }
969: }
970: } else {
971: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
972: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
973: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
974: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
975: }
976: PetscFPTrapPop();
977: cum += subset_size*subset_size;
978: }
979: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&array);
980: MatMatMult(sub_schurs->S_Ej_all,sub_schurs->sum_S_Ej_all,MAT_INITIAL_MATRIX,1.0,&tmpmat);
981: MatDestroy(&sub_schurs->S_Ej_all);
982: MatDestroy(&sub_schurs->sum_S_Ej_all);
983: sub_schurs->S_Ej_all = tmpmat;
984: }
986: /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
987: if (compute_Stilda) {
988: MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_tilda_all);
989: MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
990: submats[0] = sub_schurs->sum_S_Ej_tilda_all;
991: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
992: MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_inv_all);
993: MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
994: submats[0] = sub_schurs->sum_S_Ej_inv_all;
995: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
996: }
998: /* free workspace */
999: PetscFree(submats);
1000: PetscFree2(Bwork,pivots);
1001: MatDestroy(&global_schur_subsets);
1002: MatDestroy(&work_mat);
1003: ISDestroy(&all_subsets_n);
1004: PetscCommDestroy(&comm_n);
1005: return(0);
1006: }
1010: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap)
1011: {
1012: IS *faces,*edges,*all_cc,vertices;
1013: PetscInt i,n_faces,n_edges,n_all_cc;
1014: PetscBool is_sorted;
1015: PetscErrorCode ierr;
1018: ISSorted(is_I,&is_sorted);
1019: if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1020: ISSorted(is_B,&is_sorted);
1021: if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1023: /* reset any previous data */
1024: PCBDDCSubSchursReset(sub_schurs);
1026: /* get index sets for faces and edges (already sorted by global ordering) */
1027: PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1028: n_all_cc = n_faces+n_edges;
1029: PetscBTCreate(n_all_cc,&sub_schurs->is_edge);
1030: PetscMalloc1(n_all_cc,&all_cc);
1031: for (i=0;i<n_faces;i++) {
1032: all_cc[i] = faces[i];
1033: }
1034: for (i=0;i<n_edges;i++) {
1035: all_cc[n_faces+i] = edges[i];
1036: PetscBTSet(sub_schurs->is_edge,n_faces+i);
1037: }
1038: PetscFree(faces);
1039: PetscFree(edges);
1040: sub_schurs->is_dir = NULL;
1041: PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);
1043: /* Determine if MUMPS can be used */
1044: sub_schurs->use_mumps = PETSC_FALSE;
1045: #if defined(PETSC_HAVE_MUMPS)
1046: sub_schurs->use_mumps = PETSC_TRUE;
1047: #endif
1049: PetscObjectReference((PetscObject)is_I);
1050: sub_schurs->is_I = is_I;
1051: PetscObjectReference((PetscObject)is_B);
1052: sub_schurs->is_B = is_B;
1053: PetscObjectReference((PetscObject)graph->l2gmap);
1054: sub_schurs->l2gmap = graph->l2gmap;
1055: PetscObjectReference((PetscObject)BtoNmap);
1056: sub_schurs->BtoNmap = BtoNmap;
1057: sub_schurs->n_subs = n_all_cc;
1058: sub_schurs->is_subs = all_cc;
1059: if (!sub_schurs->use_mumps) { /* sort by local ordering mumps is not present */
1060: for (i=0;i<sub_schurs->n_subs;i++) {
1061: ISSort(sub_schurs->is_subs[i]);
1062: }
1063: }
1064: sub_schurs->is_vertices = vertices;
1065: sub_schurs->S_Ej_all = NULL;
1066: sub_schurs->sum_S_Ej_all = NULL;
1067: sub_schurs->sum_S_Ej_inv_all = NULL;
1068: sub_schurs->sum_S_Ej_tilda_all = NULL;
1069: sub_schurs->is_Ej_all = NULL;
1070: return(0);
1071: }
1075: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1076: {
1077: PCBDDCSubSchurs schurs_ctx;
1078: PetscErrorCode ierr;
1081: PetscNew(&schurs_ctx);
1082: schurs_ctx->n_subs = 0;
1083: *sub_schurs = schurs_ctx;
1084: return(0);
1085: }
1089: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
1090: {
1094: PCBDDCSubSchursReset(*sub_schurs);
1095: PetscFree(*sub_schurs);
1096: return(0);
1097: }
1101: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1102: {
1103: PetscInt i;
1107: MatDestroy(&sub_schurs->A);
1108: MatDestroy(&sub_schurs->S);
1109: ISDestroy(&sub_schurs->is_I);
1110: ISDestroy(&sub_schurs->is_B);
1111: ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
1112: ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
1113: MatDestroy(&sub_schurs->S_Ej_all);
1114: MatDestroy(&sub_schurs->sum_S_Ej_all);
1115: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1116: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
1117: ISDestroy(&sub_schurs->is_Ej_all);
1118: ISDestroy(&sub_schurs->is_vertices);
1119: ISDestroy(&sub_schurs->is_dir);
1120: PetscBTDestroy(&sub_schurs->is_edge);
1121: for (i=0;i<sub_schurs->n_subs;i++) {
1122: ISDestroy(&sub_schurs->is_subs[i]);
1123: }
1124: if (sub_schurs->n_subs) {
1125: PetscFree(sub_schurs->is_subs);
1126: }
1127: if (sub_schurs->reuse_mumps) {
1128: PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
1129: }
1130: PetscFree(sub_schurs->reuse_mumps);
1131: sub_schurs->n_subs = 0;
1132: return(0);
1133: }
1137: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
1138: {
1139: PetscInt i,j,n;
1143: n = 0;
1144: for (i=-n_prev;i<0;i++) {
1145: PetscInt start_dof = queue_tip[i];
1146: for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
1147: PetscInt dof = adjncy[j];
1148: if (!PetscBTLookup(touched,dof)) {
1149: PetscBTSet(touched,dof);
1150: queue_tip[n] = dof;
1151: n++;
1152: }
1153: }
1154: }
1155: *n_added = n;
1156: return(0);
1157: }