Actual source code: bddcschurs.c
petsc-3.6.1 2015-08-06
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <petscblaslapack.h>
5: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
6: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
7: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC,Vec,Vec);
8: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC,Vec,Vec);
12: static PetscErrorCode PCBDDCMumpsCorrectionSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
13: {
14: PCBDDCReuseMumps ctx;
15: #if defined(PETSC_HAVE_MUMPS)
16: PetscInt ival;
17: #endif
18: PetscErrorCode ierr;
21: PCShellGetContext(pc,(void **)&ctx);
22: #if defined(PETSC_HAVE_MUMPS)
23: MatMumpsGetIcntl(ctx->F,26,&ival);
24: MatMumpsSetIcntl(ctx->F,26,-1);
25: #endif
26: if (transpose) {
27: MatSolveTranspose(ctx->F,rhs,sol);
28: } else {
29: MatSolve(ctx->F,rhs,sol);
30: }
31: #if defined(PETSC_HAVE_MUMPS)
32: MatMumpsSetIcntl(ctx->F,26,ival);
33: #endif
34: return(0);
35: }
39: static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC pc, Vec rhs, Vec sol)
40: {
41: PetscErrorCode ierr;
44: PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_FALSE);
45: return(0);
46: }
50: static PetscErrorCode PCBDDCMumpsCorrectionSolveTranspose(PC pc, Vec rhs, Vec sol)
51: {
52: PetscErrorCode ierr;
55: PCBDDCMumpsCorrectionSolve_Private(pc,rhs,sol,PETSC_TRUE);
56: return(0);
57: }
61: static PetscErrorCode PCBDDCReuseMumpsReset(PCBDDCReuseMumps reuse)
62: {
66: MatDestroy(&reuse->F);
67: VecDestroy(&reuse->sol);
68: VecDestroy(&reuse->rhs);
69: PCDestroy(&reuse->interior_solver);
70: PCDestroy(&reuse->correction_solver);
71: ISDestroy(&reuse->is_R);
72: ISDestroy(&reuse->is_B);
73: VecScatterDestroy(&reuse->correction_scatter_B);
74: VecDestroy(&reuse->sol_B);
75: VecDestroy(&reuse->rhs_B);
76: return(0);
77: }
81: static PetscErrorCode PCBDDCMumpsInteriorSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose)
82: {
83: PCBDDCReuseMumps ctx;
84: PetscScalar *array,*array_mumps;
85: #if defined(PETSC_HAVE_MUMPS)
86: PetscInt ival;
87: #endif
88: PetscErrorCode ierr;
91: PCShellGetContext(pc,(void **)&ctx);
92: #if defined(PETSC_HAVE_MUMPS)
93: MatMumpsGetIcntl(ctx->F,26,&ival);
94: MatMumpsSetIcntl(ctx->F,26,0);
95: #endif
96: /* copy rhs into factored matrix workspace (can it be avoided?, MatSolve_MUMPS has another copy b->x internally) */
97: VecGetArrayRead(rhs,(const PetscScalar**)&array);
98: VecGetArray(ctx->rhs,&array_mumps);
99: PetscMemcpy(array_mumps,array,ctx->n_I*sizeof(PetscScalar));
100: VecRestoreArray(ctx->rhs,&array_mumps);
101: VecRestoreArrayRead(rhs,(const PetscScalar**)&array);
103: if (transpose) {
104: MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);
105: } else {
106: MatSolve(ctx->F,ctx->rhs,ctx->sol);
107: }
109: /* get back data to caller worskpace */
110: VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
111: VecGetArray(sol,&array);
112: PetscMemcpy(array,array_mumps,ctx->n_I*sizeof(PetscScalar));
113: VecRestoreArray(sol,&array);
114: VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);
115: #if defined(PETSC_HAVE_MUMPS)
116: MatMumpsSetIcntl(ctx->F,26,ival);
117: #endif
118: return(0);
119: }
123: static PetscErrorCode PCBDDCMumpsInteriorSolve(PC pc, Vec rhs, Vec sol)
124: {
125: PetscErrorCode ierr;
128: PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_FALSE);
129: return(0);
130: }
134: static PetscErrorCode PCBDDCMumpsInteriorSolveTranspose(PC pc, Vec rhs, Vec sol)
135: {
136: PetscErrorCode ierr;
139: PCBDDCMumpsInteriorSolve_Private(pc,rhs,sol,PETSC_TRUE);
140: return(0);
141: }
145: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
146: {
147: Mat B, C, D, Bd, Cd, AinvBd;
148: KSP ksp;
149: PC pc;
150: PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
151: PetscReal fill = 2.0;
152: PetscInt n_I;
153: PetscMPIInt size;
157: MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);
158: if (size != 1) {
159: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
160: }
161: if (reuse == MAT_REUSE_MATRIX) {
162: PetscBool Sdense;
164: PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
165: if (!Sdense) {
166: SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
167: }
168: }
169: MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
170: MatSchurComplementGetKSP(M, &ksp);
171: KSPGetPC(ksp, &pc);
172: PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);
173: PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);
174: PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);
175: PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);
176: PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);
177: MatGetSize(B,&n_I,NULL);
178: if (n_I) {
179: if (!Bdense) {
180: MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
181: } else {
182: Bd = B;
183: }
185: if (isLU || isILU || isCHOL) {
186: Mat fact;
187: KSPSetUp(ksp);
188: PCFactorGetMatrix(pc, &fact);
189: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
190: MatMatSolve(fact, Bd, AinvBd);
191: } else {
192: PetscBool ex = PETSC_TRUE;
194: if (ex) {
195: Mat Ainvd;
197: PCComputeExplicitOperator(pc, &Ainvd);
198: MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
199: MatDestroy(&Ainvd);
200: } else {
201: Vec sol,rhs;
202: PetscScalar *arrayrhs,*arraysol;
203: PetscInt i,nrhs,n;
205: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
206: MatGetSize(Bd,&n,&nrhs);
207: MatDenseGetArray(Bd,&arrayrhs);
208: MatDenseGetArray(AinvBd,&arraysol);
209: KSPGetSolution(ksp,&sol);
210: KSPGetRhs(ksp,&rhs);
211: for (i=0;i<nrhs;i++) {
212: VecPlaceArray(rhs,arrayrhs+i*n);
213: VecPlaceArray(sol,arraysol+i*n);
214: KSPSolve(ksp,rhs,sol);
215: VecResetArray(rhs);
216: VecResetArray(sol);
217: }
218: MatDenseRestoreArray(Bd,&arrayrhs);
219: MatDenseRestoreArray(AinvBd,&arrayrhs);
220: }
221: }
222: if (!Bdense & !issym) {
223: MatDestroy(&Bd);
224: }
226: if (!issym) {
227: if (!Cdense) {
228: MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
229: } else {
230: Cd = C;
231: }
232: MatMatMult(Cd, AinvBd, reuse, fill, S);
233: if (!Cdense) {
234: MatDestroy(&Cd);
235: }
236: } else {
237: MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
238: if (!Bdense) {
239: MatDestroy(&Bd);
240: }
241: }
242: MatDestroy(&AinvBd);
243: }
245: if (D) {
246: Mat Dd;
247: PetscBool Ddense;
249: PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);
250: if (!Ddense) {
251: MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
252: } else {
253: Dd = D;
254: }
255: if (n_I) {
256: MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);
257: } else {
258: if (reuse == MAT_INITIAL_MATRIX) {
259: MatDuplicate(Dd,MAT_COPY_VALUES,S);
260: } else {
261: MatCopy(Dd,*S,SAME_NONZERO_PATTERN);
262: }
263: }
264: if (!Ddense) {
265: MatDestroy(&Dd);
266: }
267: } else {
268: MatScale(*S,-1.0);
269: }
270: return(0);
271: }
275: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, PetscBool faster_deluxe, PetscBool compute_Stilda, PetscBool reuse_solvers)
276: {
277: Mat F,A_II,A_IB,A_BI,A_BB,AE_II;
278: Mat S_all;
279: Mat global_schur_subsets,work_mat,*submats;
280: ISLocalToGlobalMapping l2gmap_subsets;
281: IS is_I,is_I_layer;
282: IS all_subsets,all_subsets_mult,all_subsets_n;
283: PetscInt *nnz,*all_local_idx_N;
284: PetscInt *auxnum1,*auxnum2;
285: PetscInt i,subset_size,max_subset_size;
286: PetscInt extra,local_size,global_size;
287: PetscBLASInt B_N,B_ierr,B_lwork,*pivots;
288: PetscScalar *Bwork;
289: PetscSubcomm subcomm;
290: PetscMPIInt color,rank;
291: MPI_Comm comm_n;
292: PetscErrorCode ierr;
295: /* update info in sub_schurs */
296: MatDestroy(&sub_schurs->A);
297: MatDestroy(&sub_schurs->S);
298: sub_schurs->is_hermitian = PETSC_FALSE;
299: sub_schurs->is_posdef = PETSC_FALSE;
300: if (Ain) {
301: PetscBool isseqaij;
302: /* determine if we are dealing with hermitian positive definite problems */
303: #if !defined(PETSC_USE_COMPLEX)
304: if (Ain->symmetric_set) {
305: sub_schurs->is_hermitian = Ain->symmetric;
306: }
307: #else
308: if (Ain->hermitian_set) {
309: sub_schurs->is_hermitian = Ain->hermitian;
310: }
311: #endif
312: if (Ain->spd_set) {
313: sub_schurs->is_posdef = Ain->spd;
314: }
316: /* check */
317: PetscObjectTypeCompare((PetscObject)Ain,MATSEQAIJ,&isseqaij);
318: if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
319: PetscInt lsize;
321: MatGetSize(Ain,&lsize,NULL);
322: if (lsize) {
323: PetscScalar val;
324: PetscReal norm;
325: Vec vec1,vec2,vec3;
327: MatCreateVecs(Ain,&vec1,&vec2);
328: VecDuplicate(vec1,&vec3);
329: VecSetRandom(vec1,NULL);
330: MatMult(Ain,vec1,vec2);
331: #if !defined(PETSC_USE_COMPLEX)
332: MatMultTranspose(Ain,vec1,vec3);
333: #else
334: MatMultHermitianTranspose(Ain,vec1,vec3);
335: #endif
336: VecAXPY(vec3,-1.0,vec2);
337: VecNorm(vec3,NORM_INFINITY,&norm);
338: if (norm > PetscSqrtReal(PETSC_SMALL)) {
339: sub_schurs->is_hermitian = PETSC_FALSE;
340: } else {
341: sub_schurs->is_hermitian = PETSC_TRUE;
342: }
343: VecDot(vec1,vec2,&val);
344: if (PetscRealPart(val) > 0. && PetscAbsReal(PetscImaginaryPart(val)) < PETSC_SMALL) sub_schurs->is_posdef = PETSC_TRUE;
345: VecDestroy(&vec1);
346: VecDestroy(&vec2);
347: VecDestroy(&vec3);
348: } else {
349: sub_schurs->is_hermitian = PETSC_TRUE;
350: sub_schurs->is_posdef = PETSC_TRUE;
351: }
352: }
353: if (isseqaij) {
354: PetscObjectReference((PetscObject)Ain);
355: sub_schurs->A = Ain;
356: } else {
357: MatConvert(Ain,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);
358: }
359: }
360: if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
361: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"General matrix pencils are not currently supported (%d,%d)",sub_schurs->is_hermitian,sub_schurs->is_posdef);
362: }
364: PetscObjectReference((PetscObject)Sin);
365: sub_schurs->S = Sin;
366: if (sub_schurs->use_mumps) {
367: sub_schurs->use_mumps = (PetscBool)(!!sub_schurs->A);
368: }
370: /* preliminary checks */
371: if (!sub_schurs->use_mumps && compute_Stilda) {
372: SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
373: }
375: /* restrict work on active processes */
376: color = 0;
377: if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
378: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
379: PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);
380: PetscSubcommSetNumber(subcomm,2);
381: PetscSubcommSetTypeGeneral(subcomm,color,rank);
382: PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);
383: PetscSubcommDestroy(&subcomm);
384: if (!sub_schurs->n_subs) {
385: PetscCommDestroy(&comm_n);
386: return(0);
387: }
388: /* PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL); */
390: /* get Schur complement matrices */
391: if (!sub_schurs->use_mumps) {
392: Mat tA_IB,tA_BI,tA_BB;
393: PetscBool isseqsbaij;
394: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);
395: PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);
396: if (isseqsbaij) {
397: MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);
398: MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);
399: MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);
400: } else {
401: PetscObjectReference((PetscObject)tA_BB);
402: A_BB = tA_BB;
403: PetscObjectReference((PetscObject)tA_IB);
404: A_IB = tA_IB;
405: PetscObjectReference((PetscObject)tA_BI);
406: A_BI = tA_BI;
407: }
408: } else {
409: A_II = NULL;
410: A_IB = NULL;
411: A_BI = NULL;
412: A_BB = NULL;
413: }
414: S_all = NULL;
416: /* determine interior problems */
417: ISGetLocalSize(sub_schurs->is_I,&i);
418: if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
419: PetscBT touched;
420: const PetscInt* idx_B;
421: PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
423: if (xadj == NULL || adjncy == NULL) {
424: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
425: }
426: /* get sizes */
427: ISGetLocalSize(sub_schurs->is_I,&n_I);
428: ISGetLocalSize(sub_schurs->is_B,&n_B);
430: PetscMalloc1(n_I+n_B,&local_numbering);
431: PetscBTCreate(n_I+n_B,&touched);
432: PetscBTMemzero(n_I+n_B,touched);
434: /* all boundary dofs must be skipped when adding layers */
435: ISGetIndices(sub_schurs->is_B,&idx_B);
436: for (j=0;j<n_B;j++) {
437: PetscBTSet(touched,idx_B[j]);
438: }
439: PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));
440: ISRestoreIndices(sub_schurs->is_B,&idx_B);
442: /* add prescribed number of layers of dofs */
443: n_local_dofs = n_B;
444: n_prev_added = n_B;
445: for (layer=0;layer<nlayers;layer++) {
446: PetscInt n_added;
447: if (n_local_dofs == n_I+n_B) break;
448: if (n_local_dofs > n_I+n_B) {
449: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B);
450: }
451: PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);
452: n_prev_added = n_added;
453: n_local_dofs += n_added;
454: if (!n_added) break;
455: }
456: PetscBTDestroy(&touched);
458: /* IS for I layer dofs in original numbering */
459: ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);
460: PetscFree(local_numbering);
461: ISSort(is_I_layer);
462: /* IS for I layer dofs in I numbering */
463: if (!sub_schurs->use_mumps) {
464: ISLocalToGlobalMapping ItoNmap;
465: ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);
466: ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);
467: ISLocalToGlobalMappingDestroy(&ItoNmap);
469: /* II block */
470: MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);
471: }
472: } else {
473: PetscInt n_I;
475: /* IS for I dofs in original numbering */
476: PetscObjectReference((PetscObject)sub_schurs->is_I);
477: is_I_layer = sub_schurs->is_I;
479: /* IS for I dofs in I numbering (strided 1) */
480: if (!sub_schurs->use_mumps) {
481: ISGetSize(sub_schurs->is_I,&n_I);
482: ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);
484: /* II block is the same */
485: PetscObjectReference((PetscObject)A_II);
486: AE_II = A_II;
487: }
488: }
490: /* Get info on subset sizes and sum of all subsets sizes */
491: max_subset_size = 0;
492: local_size = 0;
493: for (i=0;i<sub_schurs->n_subs;i++) {
494: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
495: max_subset_size = PetscMax(subset_size,max_subset_size);
496: local_size += subset_size;
497: }
499: /* Work arrays for local indices */
500: extra = 0;
501: if (sub_schurs->use_mumps && is_I_layer) {
502: ISGetLocalSize(is_I_layer,&extra);
503: }
504: PetscMalloc1(local_size+extra,&all_local_idx_N);
505: if (extra) {
506: const PetscInt *idxs;
507: ISGetIndices(is_I_layer,&idxs);
508: PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));
509: ISRestoreIndices(is_I_layer,&idxs);
510: }
511: PetscMalloc1(local_size,&nnz);
512: PetscMalloc1(sub_schurs->n_subs,&auxnum1);
513: PetscMalloc1(sub_schurs->n_subs,&auxnum2);
515: /* Get local indices in local numbering */
516: local_size = 0;
517: for (i=0;i<sub_schurs->n_subs;i++) {
518: PetscInt j;
519: const PetscInt *idxs;
521: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
522: ISGetIndices(sub_schurs->is_subs[i],&idxs);
523: /* start (smallest in global ordering) and multiplicity */
524: auxnum1[i] = idxs[0];
525: auxnum2[i] = subset_size;
526: /* subset indices in local numbering */
527: PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));
528: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
529: for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
530: local_size += subset_size;
531: }
533: /* allocate extra workspace needed only for GETRI */
534: Bwork = NULL;
535: pivots = NULL;
536: if (local_size && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
537: PetscScalar lwork;
539: B_lwork = -1;
540: PetscBLASIntCast(local_size,&B_N);
541: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
542: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,Bwork,&B_N,pivots,&lwork,&B_lwork,&B_ierr));
543: PetscFPTrapPop();
544: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
545: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
546: PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);
547: }
549: /* prepare parallel matrices for summing up properly schurs on subsets */
550: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);
551: ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);
552: ISDestroy(&all_subsets_n);
553: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);
554: PCBDDCSubsetNumbering(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);
555: ISDestroy(&all_subsets);
556: ISDestroy(&all_subsets_mult);
557: ISGetLocalSize(all_subsets_n,&i);
558: if (i != local_size) {
559: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %d != %d",i,local_size);
560: }
561: ISLocalToGlobalMappingCreateIS(all_subsets_n,&l2gmap_subsets);
562: MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);
563: ISLocalToGlobalMappingDestroy(&l2gmap_subsets);
564: MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);
565: MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);
566: MatSetType(global_schur_subsets,MATMPIAIJ);
568: /* subset indices in local boundary numbering */
569: if (!sub_schurs->is_Ej_all) {
570: PetscInt *all_local_idx_B;
572: PetscMalloc1(local_size,&all_local_idx_B);
573: ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);
574: if (subset_size != local_size) {
575: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %d != %d\n",subset_size,local_size);
576: }
577: ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);
578: }
580: /* Local matrix of all local Schur on subsets (transposed) */
581: if (!sub_schurs->S_Ej_all) {
582: MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);
583: MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);
584: MatSetType(sub_schurs->S_Ej_all,MATAIJ);
585: MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);
586: }
588: /* Compute Schur complements explicitly */
589: F = NULL;
590: if (!sub_schurs->use_mumps) {
591: Mat S_Ej_expl;
592: PetscScalar *work;
593: PetscInt j,*dummy_idx;
594: PetscBool Sdense;
596: PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
597: local_size = 0;
598: for (i=0;i<sub_schurs->n_subs;i++) {
599: IS is_subset_B;
600: Mat AE_EE,AE_IE,AE_EI,S_Ej;
602: /* subsets in original and boundary numbering */
603: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);
604: /* EE block */
605: MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);
606: /* IE block */
607: MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);
608: /* EI block */
609: if (sub_schurs->is_hermitian) {
610: MatCreateTranspose(AE_IE,&AE_EI);
611: } else {
612: MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);
613: }
614: ISDestroy(&is_subset_B);
615: MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);
616: MatDestroy(&AE_EE);
617: MatDestroy(&AE_IE);
618: MatDestroy(&AE_EI);
619: if (AE_II == A_II) { /* we can reuse the same ksp */
620: KSP ksp;
621: MatSchurComplementGetKSP(sub_schurs->S,&ksp);
622: MatSchurComplementSetKSP(S_Ej,ksp);
623: } else { /* build new ksp object which inherits ksp and pc types from the original one */
624: KSP origksp,schurksp;
625: PC origpc,schurpc;
626: KSPType ksp_type;
627: PetscInt n_internal;
628: PetscBool ispcnone;
630: MatSchurComplementGetKSP(sub_schurs->S,&origksp);
631: MatSchurComplementGetKSP(S_Ej,&schurksp);
632: KSPGetType(origksp,&ksp_type);
633: KSPSetType(schurksp,ksp_type);
634: KSPGetPC(schurksp,&schurpc);
635: KSPGetPC(origksp,&origpc);
636: PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);
637: if (!ispcnone) {
638: PCType pc_type;
639: PCGetType(origpc,&pc_type);
640: PCSetType(schurpc,pc_type);
641: } else {
642: PCSetType(schurpc,PCLU);
643: }
644: ISGetSize(is_I,&n_internal);
645: if (n_internal) { /* UMFPACK gives error with 0 sized problems */
646: MatSolverPackage solver=NULL;
647: PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);
648: if (solver) {
649: PCFactorSetMatSolverPackage(schurpc,solver);
650: }
651: }
652: KSPSetUp(schurksp);
653: }
654: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
655: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);
656: PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_hermitian,MAT_REUSE_MATRIX,&S_Ej_expl);
657: PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);
658: if (Sdense) {
659: for (j=0;j<subset_size;j++) {
660: dummy_idx[j]=local_size+j;
661: }
662: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
663: } else {
664: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
665: }
666: MatDestroy(&S_Ej);
667: MatDestroy(&S_Ej_expl);
668: local_size += subset_size;
669: }
670: PetscFree2(dummy_idx,work);
671: /* free */
672: ISDestroy(&is_I);
673: MatDestroy(&AE_II);
674: PetscFree(all_local_idx_N);
675: } else {
676: Mat A;
677: IS is_A_all;
678: PetscScalar *work,*S_data;
679: PetscInt *idxs_schur,n_I,n_I_all,*dummy_idx,size_schur,size_active_schur,cum,cum2;
680: PetscBool mumps_S;
682: /* get working mat */
683: n_I = 0;
684: if (is_I_layer) {
685: ISGetLocalSize(is_I_layer,&n_I);
686: }
687: if (!sub_schurs->is_dir) {
688: ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);
689: size_schur = local_size;
690: } else {
691: IS list[2];
693: ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&list[0]);
694: list[1] = sub_schurs->is_dir;
695: ISConcatenate(PETSC_COMM_SELF,2,list,&is_A_all);
696: ISDestroy(&list[0]);
697: ISGetLocalSize(sub_schurs->is_dir,&size_schur);
698: size_schur += local_size;
699: }
700: PetscFree(all_local_idx_N);
701: size_active_schur = local_size; /* size active schurs does not count any dirichlet dof on the interface */
702: MatGetSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);
703: MatSetOptionsPrefix(A,"sub_schurs_");
704: MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_hermitian);
705: MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);
706: MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);
708: if (n_I) {
709: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
710: MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);
711: } else {
712: MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);
713: }
714: /* subsets ordered last */
715: PetscMalloc1(size_schur,&idxs_schur);
716: for (i=0;i<size_schur;i++) {
717: idxs_schur[i] = n_I+i+1;
718: }
719: #if defined(PETSC_HAVE_MUMPS)
720: MatMumpsSetSchurIndices(F,size_schur,idxs_schur);
721: #endif
722: PetscFree(idxs_schur);
724: /* factorization step */
725: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
726: MatCholeskyFactorSymbolic(F,A,NULL,NULL);
727: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
728: MatMumpsSetIcntl(F,19,2);
729: #endif
730: MatCholeskyFactorNumeric(F,A,NULL);
731: } else {
732: MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
733: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
734: MatMumpsSetIcntl(F,19,3);
735: #endif
736: MatLUFactorNumeric(F,A,NULL);
737: }
739: /* get explicit Schur Complement computed during numeric factorization */
740: #if defined(PETSC_HAVE_MUMPS)
741: MatMumpsGetSchurComplement(F,&S_all);
742: #endif
744: /* we can reuse the solvers if we are not using the economic version */
745: ISGetLocalSize(sub_schurs->is_I,&n_I_all);
746: reuse_solvers = (PetscBool)(reuse_solvers && (n_I == n_I_all));
747: mumps_S = PETSC_TRUE;
748: } else { /* we can't use MUMPS when size_schur == size_of_the_problem */
749: MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);
750: reuse_solvers = PETSC_FALSE;
751: mumps_S = PETSC_FALSE;
752: }
754: if (reuse_solvers) {
755: Mat A_II;
756: Vec vec1_B;
757: PCBDDCReuseMumps msolv_ctx;
759: if (sub_schurs->reuse_mumps) {
760: PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
761: } else {
762: PetscNew(&sub_schurs->reuse_mumps);
763: }
764: msolv_ctx = sub_schurs->reuse_mumps;
765: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);
766: MatGetSize(A_II,&msolv_ctx->n_I,NULL);
767: PetscObjectReference((PetscObject)F);
768: msolv_ctx->F = F;
769: MatCreateVecs(F,&msolv_ctx->sol,&msolv_ctx->rhs);
771: /* interior solver */
772: PCCreate(PETSC_COMM_SELF,&msolv_ctx->interior_solver);
773: PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);
774: PCSetType(msolv_ctx->interior_solver,PCSHELL);
775: PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);
776: PCShellSetApply(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolve);
777: PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolveTranspose);
779: /* correction solver */
780: PCCreate(PETSC_COMM_SELF,&msolv_ctx->correction_solver);
781: PCSetOperators(msolv_ctx->correction_solver,A,A);
782: PCSetType(msolv_ctx->correction_solver,PCSHELL);
783: PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);
784: PCShellSetApply(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolve);
785: PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolveTranspose);
787: /* scatter and vecs for Schur complement solver */
788: MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);
789: MatCreateVecs(sub_schurs->S,&vec1_B,NULL);
790: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);
791: VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);
792: VecDestroy(&vec1_B);
793: PetscObjectReference((PetscObject)is_A_all);
794: msolv_ctx->is_R = is_A_all;
795: }
796: MatDestroy(&A);
797: ISDestroy(&is_A_all);
799: /* Work arrays */
800: PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
802: /* matrices for adaptive selection */
803: if (compute_Stilda) {
804: if (!sub_schurs->sum_S_Ej_tilda_all) {
805: MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_tilda_all);
806: MatSetSizes(sub_schurs->sum_S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
807: MatSetType(sub_schurs->sum_S_Ej_tilda_all,MATAIJ);
808: MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_tilda_all,0,nnz);
809: }
810: if (!sub_schurs->sum_S_Ej_inv_all) {
811: MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_inv_all);
812: MatSetSizes(sub_schurs->sum_S_Ej_inv_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);
813: MatSetType(sub_schurs->sum_S_Ej_inv_all,MATAIJ);
814: MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_inv_all,0,nnz);
815: }
816: }
818: /* S_Ej_all */
819: cum = cum2 = 0;
820: MatDenseGetArray(S_all,&S_data);
821: for (i=0;i<sub_schurs->n_subs;i++) {
822: PetscInt j;
824: /* get S_E */
825: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
826: if (mumps_S && sub_schurs->is_hermitian) { /* When using MUMPS data I need to expand to upper triangular (column oriented) */
827: PetscInt k;
828: for (k=0;k<subset_size;k++) {
829: for (j=k;j<subset_size;j++) {
830: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
831: work[j*subset_size+k] = S_data[cum2+k*size_schur+j];
832: }
833: }
834: } else { /* copy to workspace */
835: PetscInt k;
836: for (k=0;k<subset_size;k++) {
837: for (j=0;j<subset_size;j++) {
838: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
839: }
840: }
841: }
842: /* insert S_E values */
843: for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
844: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
846: /* if adaptivity is requested, invert S_E block */
847: if (compute_Stilda) {
848: PetscBLASIntCast(subset_size,&B_N);
849: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
850: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
851: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
852: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
853: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
854: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
855: } else {
856: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
857: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
858: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
859: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
860: }
861: PetscFPTrapPop();
862: MatSetValues(sub_schurs->sum_S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
863: }
864: cum += subset_size;
865: cum2 += subset_size*(size_schur + 1);
866: }
867: MatDenseRestoreArray(S_all,&S_data);
869: #if defined(PETSC_HAVE_MUMPS)
870: if (mumps_S) {
871: MatMumpsRestoreSchurComplement(F,&S_all);
872: }
873: #endif
875: if (compute_Stilda && size_active_schur) {
876: if (sub_schurs->n_subs == 1 && size_schur == size_active_schur) { /* we already computed the inverse */
877: PetscInt j;
878: for (j=0;j<size_schur;j++) dummy_idx[j] = j;
879: MatSetValues(sub_schurs->sum_S_Ej_tilda_all,size_schur,dummy_idx,size_schur,dummy_idx,work,INSERT_VALUES);
880: } else {
881: if (mumps_S) { /* use MatMumps calls to invert S */
882: #if defined(PETSC_HAVE_MUMPS)
883: MatMumpsInvertSchurComplement(F);
884: MatMumpsGetSchurComplement(F,&S_all);
885: #endif
886: } else { /* we need to invert explicitly since we are not using MUMPS for S */
887: MatDenseGetArray(S_all,&S_data);
888: PetscBLASIntCast(size_schur,&B_N);
889: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
890: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { /* TODO add sytrf/i for symmetric non hermitian */
891: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
892: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
893: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
894: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
895: } else {
896: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
897: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
898: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
899: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
900: }
901: PetscFPTrapPop();
902: MatDenseRestoreArray(S_all,&S_data);
903: }
904: /* S_Ej_tilda_all */
905: cum = cum2 = 0;
906: MatDenseGetArray(S_all,&S_data);
907: for (i=0;i<sub_schurs->n_subs;i++) {
908: PetscInt j;
910: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
911: /* get (St^-1)_E */
912: if (sub_schurs->is_hermitian) { /* Here I don't need to expand to upper triangular (column oriented) */
913: PetscInt k;
914: for (k=0;k<subset_size;k++) {
915: for (j=k;j<subset_size;j++) {
916: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
917: }
918: }
919: } else { /* copy to workspace */
920: PetscInt k;
921: for (k=0;k<subset_size;k++) {
922: for (j=0;j<subset_size;j++) {
923: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
924: }
925: }
926: }
927: for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
928: MatSetValues(sub_schurs->sum_S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
929: cum += subset_size;
930: cum2 += subset_size*(size_schur + 1);
931: }
932: MatDenseRestoreArray(S_all,&S_data);
933: #if defined(PETSC_HAVE_MUMPS)
934: if (mumps_S) {
935: MatMumpsRestoreSchurComplement(F,&S_all);
936: }
937: #endif
938: }
939: }
940: PetscFree2(dummy_idx,work);
941: }
942: PetscFree(nnz);
943: MatDestroy(&F);
944: ISDestroy(&is_I_layer);
945: MatDestroy(&S_all);
946: MatDestroy(&A_BB);
947: MatDestroy(&A_IB);
948: MatDestroy(&A_BI);
949: MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
950: MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
951: if (compute_Stilda) {
952: MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
953: MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
954: MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
955: MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
956: }
958: /* Global matrix of all assembled Schur on subsets */
959: MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);
960: MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);
961: MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
963: /* Get local part of (\sum_j S_Ej) */
964: if (!sub_schurs->sum_S_Ej_all) {
965: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_INITIAL_MATRIX,&submats);
966: sub_schurs->sum_S_Ej_all = submats[0];
967: } else {
968: PetscMalloc1(1,&submats);
969: submats[0] = sub_schurs->sum_S_Ej_all;
970: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
971: }
973: /* Compute explicitly (\sum_j S_Ej)^-1 (faster scaling during PCApply, needs extra work when doing setup) */
974: if (faster_deluxe) {
975: Mat tmpmat;
976: PetscScalar *array;
977: PetscInt cum;
979: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&array);
980: cum = 0;
981: for (i=0;i<sub_schurs->n_subs;i++) {
982: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
983: PetscBLASIntCast(subset_size,&B_N);
984: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
985: if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
986: PetscInt j,k;
988: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
989: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
990: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
991: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
992: for (j=0;j<B_N;j++) {
993: for (k=j+1;k<B_N;k++) {
994: array[k*B_N+j+cum] = array[j*B_N+k+cum];
995: }
996: }
997: } else {
998: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
999: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1000: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1001: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1002: }
1003: PetscFPTrapPop();
1004: cum += subset_size*subset_size;
1005: }
1006: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&array);
1007: MatMatMult(sub_schurs->S_Ej_all,sub_schurs->sum_S_Ej_all,MAT_INITIAL_MATRIX,1.0,&tmpmat);
1008: MatDestroy(&sub_schurs->S_Ej_all);
1009: MatDestroy(&sub_schurs->sum_S_Ej_all);
1010: sub_schurs->S_Ej_all = tmpmat;
1011: }
1013: /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1014: if (compute_Stilda) {
1015: MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_tilda_all);
1016: MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
1017: submats[0] = sub_schurs->sum_S_Ej_tilda_all;
1018: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
1019: MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_inv_all);
1020: MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);
1021: submats[0] = sub_schurs->sum_S_Ej_inv_all;
1022: MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
1023: }
1025: /* free workspace */
1026: PetscFree(submats);
1027: PetscFree2(Bwork,pivots);
1028: MatDestroy(&global_schur_subsets);
1029: MatDestroy(&work_mat);
1030: ISDestroy(&all_subsets_n);
1031: PetscCommDestroy(&comm_n);
1032: return(0);
1033: }
1037: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap)
1038: {
1039: IS *faces,*edges,*all_cc,vertices;
1040: PetscInt i,n_faces,n_edges,n_all_cc;
1041: PetscBool is_sorted;
1042: PetscErrorCode ierr;
1045: ISSorted(is_I,&is_sorted);
1046: if (!is_sorted) {
1047: SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1048: }
1049: ISSorted(is_B,&is_sorted);
1050: if (!is_sorted) {
1051: SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1052: }
1054: /* reset any previous data */
1055: PCBDDCSubSchursReset(sub_schurs);
1057: /* get index sets for faces and edges (already sorted by global ordering) */
1058: PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1059: n_all_cc = n_faces+n_edges;
1060: PetscBTCreate(n_all_cc,&sub_schurs->is_edge);
1061: PetscMalloc1(n_all_cc,&all_cc);
1062: for (i=0;i<n_faces;i++) {
1063: all_cc[i] = faces[i];
1064: }
1065: for (i=0;i<n_edges;i++) {
1066: all_cc[n_faces+i] = edges[i];
1067: PetscBTSet(sub_schurs->is_edge,n_faces+i);
1068: }
1069: PetscFree(faces);
1070: PetscFree(edges);
1071: sub_schurs->is_dir = NULL;
1072: PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);
1074: /* Determine if MUMPS can be used */
1075: sub_schurs->use_mumps = PETSC_FALSE;
1076: #if defined(PETSC_HAVE_MUMPS)
1077: sub_schurs->use_mumps = PETSC_TRUE;
1078: #endif
1080: PetscObjectReference((PetscObject)is_I);
1081: sub_schurs->is_I = is_I;
1082: PetscObjectReference((PetscObject)is_B);
1083: sub_schurs->is_B = is_B;
1084: PetscObjectReference((PetscObject)graph->l2gmap);
1085: sub_schurs->l2gmap = graph->l2gmap;
1086: PetscObjectReference((PetscObject)BtoNmap);
1087: sub_schurs->BtoNmap = BtoNmap;
1088: sub_schurs->n_subs = n_all_cc;
1089: sub_schurs->is_subs = all_cc;
1090: if (!sub_schurs->use_mumps) { /* sort by local ordering mumps is not present */
1091: for (i=0;i<sub_schurs->n_subs;i++) {
1092: ISSort(sub_schurs->is_subs[i]);
1093: }
1094: }
1095: sub_schurs->is_vertices = vertices;
1096: sub_schurs->S_Ej_all = NULL;
1097: sub_schurs->sum_S_Ej_all = NULL;
1098: sub_schurs->sum_S_Ej_inv_all = NULL;
1099: sub_schurs->sum_S_Ej_tilda_all = NULL;
1100: sub_schurs->is_Ej_all = NULL;
1101: return(0);
1102: }
1106: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1107: {
1108: PCBDDCSubSchurs schurs_ctx;
1109: PetscErrorCode ierr;
1112: PetscNew(&schurs_ctx);
1113: schurs_ctx->n_subs = 0;
1114: *sub_schurs = schurs_ctx;
1115: return(0);
1116: }
1120: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
1121: {
1125: PCBDDCSubSchursReset(*sub_schurs);
1126: PetscFree(*sub_schurs);
1127: return(0);
1128: }
1132: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1133: {
1134: PetscInt i;
1138: MatDestroy(&sub_schurs->A);
1139: MatDestroy(&sub_schurs->S);
1140: ISDestroy(&sub_schurs->is_I);
1141: ISDestroy(&sub_schurs->is_B);
1142: ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
1143: ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
1144: MatDestroy(&sub_schurs->S_Ej_all);
1145: MatDestroy(&sub_schurs->sum_S_Ej_all);
1146: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1147: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
1148: ISDestroy(&sub_schurs->is_Ej_all);
1149: ISDestroy(&sub_schurs->is_vertices);
1150: ISDestroy(&sub_schurs->is_dir);
1151: PetscBTDestroy(&sub_schurs->is_edge);
1152: for (i=0;i<sub_schurs->n_subs;i++) {
1153: ISDestroy(&sub_schurs->is_subs[i]);
1154: }
1155: if (sub_schurs->n_subs) {
1156: PetscFree(sub_schurs->is_subs);
1157: }
1158: if (sub_schurs->reuse_mumps) {
1159: PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);
1160: }
1161: PetscFree(sub_schurs->reuse_mumps);
1162: sub_schurs->n_subs = 0;
1163: return(0);
1164: }
1168: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
1169: {
1170: PetscInt i,j,n;
1174: n = 0;
1175: for (i=-n_prev;i<0;i++) {
1176: PetscInt start_dof = queue_tip[i];
1177: for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
1178: PetscInt dof = adjncy[j];
1179: if (!PetscBTLookup(touched,dof)) {
1180: PetscBTSet(touched,dof);
1181: queue_tip[n] = dof;
1182: n++;
1183: }
1184: }
1185: }
1186: *n_added = n;
1187: return(0);
1188: }