Actual source code: bddcschurs.c
petsc-3.11.4 2019-09-28
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <../src/mat/impls/dense/seq/dense.h>
4: #include <petscblaslapack.h>
6: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
7: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
8: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec);
9: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec);
11: /* if v2 is not present, correction is done in-place */
12: PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13: {
14: PetscScalar *array;
15: PetscScalar *array2;
19: if (!ctx->benign_n) return(0);
20: if (sol && full) {
21: PetscInt n_I,size_schur;
23: /* get sizes */
24: MatGetSize(ctx->benign_csAIB,&size_schur,NULL);
25: VecGetSize(v,&n_I);
26: n_I = n_I - size_schur;
27: /* get schur sol from array */
28: VecGetArray(v,&array);
29: VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I);
30: VecRestoreArray(v,&array);
31: /* apply interior sol correction */
32: MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work);
33: VecResetArray(ctx->benign_dummy_schur_vec);
34: MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v);
35: }
36: if (v2) {
37: PetscInt nl;
39: VecGetArrayRead(v,(const PetscScalar**)&array);
40: VecGetLocalSize(v2,&nl);
41: VecGetArray(v2,&array2);
42: PetscMemcpy(array2,array,nl*sizeof(PetscScalar));
43: } else {
44: VecGetArray(v,&array);
45: array2 = array;
46: }
47: if (!sol) { /* change rhs */
48: PetscInt n;
49: for (n=0;n<ctx->benign_n;n++) {
50: PetscScalar sum = 0.;
51: const PetscInt *cols;
52: PetscInt nz,i;
54: ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz);
55: ISGetIndices(ctx->benign_zerodiag_subs[n],&cols);
56: for (i=0;i<nz-1;i++) sum += array[cols[i]];
57: #if defined(PETSC_USE_COMPLEX)
58: sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
59: #else
60: sum = -sum/nz;
61: #endif
62: for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
63: ctx->benign_save_vals[n] = array2[cols[nz-1]];
64: array2[cols[nz-1]] = sum;
65: ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols);
66: }
67: } else {
68: PetscInt n;
69: for (n=0;n<ctx->benign_n;n++) {
70: PetscScalar sum = 0.;
71: const PetscInt *cols;
72: PetscInt nz,i;
73: ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz);
74: ISGetIndices(ctx->benign_zerodiag_subs[n],&cols);
75: for (i=0;i<nz-1;i++) sum += array[cols[i]];
76: #if defined(PETSC_USE_COMPLEX)
77: sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
78: #else
79: sum = -sum/nz;
80: #endif
81: for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
82: array2[cols[nz-1]] = ctx->benign_save_vals[n];
83: ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols);
84: }
85: }
86: if (v2) {
87: VecRestoreArrayRead(v,(const PetscScalar**)&array);
88: VecRestoreArray(v2,&array2);
89: } else {
90: VecRestoreArray(v,&array);
91: }
92: if (!sol && full) {
93: Vec usedv;
94: PetscInt n_I,size_schur;
96: /* get sizes */
97: MatGetSize(ctx->benign_csAIB,&size_schur,NULL);
98: VecGetSize(v,&n_I);
99: n_I = n_I - size_schur;
100: /* compute schur rhs correction */
101: if (v2) {
102: usedv = v2;
103: } else {
104: usedv = v;
105: }
106: /* apply schur rhs correction */
107: MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work);
108: VecGetArrayRead(usedv,(const PetscScalar**)&array);
109: VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I);
110: VecRestoreArrayRead(usedv,(const PetscScalar**)&array);
111: MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec);
112: VecResetArray(ctx->benign_dummy_schur_vec);
113: }
114: return(0);
115: }
117: static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
118: {
119: PCBDDCReuseSolvers ctx;
120: PetscBool copy = PETSC_FALSE;
121: PetscErrorCode ierr;
124: PCShellGetContext(pc,(void **)&ctx);
125: if (full) {
126: #if defined(PETSC_HAVE_MUMPS)
127: MatMumpsSetIcntl(ctx->F,26,-1);
128: #endif
129: #if defined(PETSC_HAVE_MKL_PARDISO)
130: MatMkl_PardisoSetCntl(ctx->F,70,0);
131: #endif
132: copy = ctx->has_vertices;
133: } else { /* interior solver */
134: #if defined(PETSC_HAVE_MUMPS)
135: MatMumpsSetIcntl(ctx->F,26,0);
136: #endif
137: #if defined(PETSC_HAVE_MKL_PARDISO)
138: MatMkl_PardisoSetCntl(ctx->F,70,1);
139: #endif
140: copy = PETSC_TRUE;
141: }
142: /* copy rhs into factored matrix workspace */
143: if (copy) {
144: PetscInt n;
145: PetscScalar *array,*array_solver;
147: VecGetLocalSize(rhs,&n);
148: VecGetArrayRead(rhs,(const PetscScalar**)&array);
149: VecGetArray(ctx->rhs,&array_solver);
150: PetscMemcpy(array_solver,array,n*sizeof(PetscScalar));
151: VecRestoreArray(ctx->rhs,&array_solver);
152: VecRestoreArrayRead(rhs,(const PetscScalar**)&array);
154: PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full);
155: if (transpose) {
156: MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);
157: } else {
158: MatSolve(ctx->F,ctx->rhs,ctx->sol);
159: }
160: PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full);
162: /* get back data to caller worskpace */
163: VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver);
164: VecGetArray(sol,&array);
165: PetscMemcpy(array,array_solver,n*sizeof(PetscScalar));
166: VecRestoreArray(sol,&array);
167: VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver);
168: } else {
169: if (ctx->benign_n) {
170: PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full);
171: if (transpose) {
172: MatSolveTranspose(ctx->F,ctx->rhs,sol);
173: } else {
174: MatSolve(ctx->F,ctx->rhs,sol);
175: }
176: PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full);
177: } else {
178: if (transpose) {
179: MatSolveTranspose(ctx->F,rhs,sol);
180: } else {
181: MatSolve(ctx->F,rhs,sol);
182: }
183: }
184: }
185: /* restore defaults */
186: #if defined(PETSC_HAVE_MUMPS)
187: MatMumpsSetIcntl(ctx->F,26,-1);
188: #endif
189: #if defined(PETSC_HAVE_MKL_PARDISO)
190: MatMkl_PardisoSetCntl(ctx->F,70,0);
191: #endif
192: return(0);
193: }
195: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
196: {
197: PetscErrorCode ierr;
200: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE);
201: return(0);
202: }
204: static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
205: {
206: PetscErrorCode ierr;
209: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE);
210: return(0);
211: }
213: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
214: {
215: PetscErrorCode ierr;
218: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE);
219: return(0);
220: }
222: static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
223: {
224: PetscErrorCode ierr;
227: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE);
228: return(0);
229: }
231: static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
232: {
233: PCBDDCReuseSolvers ctx;
234: PetscBool iascii;
235: PetscErrorCode ierr;
238: PCShellGetContext(pc,(void **)&ctx);
239: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
240: if (iascii) {
241: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO);
242: }
243: MatView(ctx->F,viewer);
244: if (iascii) {
245: PetscViewerPopFormat(viewer);
246: }
247: return(0);
248: }
250: static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
251: {
252: PetscInt i;
256: MatDestroy(&reuse->F);
257: VecDestroy(&reuse->sol);
258: VecDestroy(&reuse->rhs);
259: PCDestroy(&reuse->interior_solver);
260: PCDestroy(&reuse->correction_solver);
261: ISDestroy(&reuse->is_R);
262: ISDestroy(&reuse->is_B);
263: VecScatterDestroy(&reuse->correction_scatter_B);
264: VecDestroy(&reuse->sol_B);
265: VecDestroy(&reuse->rhs_B);
266: for (i=0;i<reuse->benign_n;i++) {
267: ISDestroy(&reuse->benign_zerodiag_subs[i]);
268: }
269: PetscFree(reuse->benign_zerodiag_subs);
270: PetscFree(reuse->benign_save_vals);
271: MatDestroy(&reuse->benign_csAIB);
272: MatDestroy(&reuse->benign_AIIm1ones);
273: VecDestroy(&reuse->benign_corr_work);
274: VecDestroy(&reuse->benign_dummy_schur_vec);
275: return(0);
276: }
278: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
279: {
280: Mat B, C, D, Bd, Cd, AinvBd;
281: KSP ksp;
282: PC pc;
283: PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
284: PetscReal fill = 2.0;
285: PetscInt n_I;
286: PetscMPIInt size;
290: MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);
291: if (size != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
292: if (reuse == MAT_REUSE_MATRIX) {
293: PetscBool Sdense;
295: PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
296: if (!Sdense) SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
297: }
298: MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
299: MatSchurComplementGetKSP(M, &ksp);
300: KSPGetPC(ksp, &pc);
301: PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);
302: PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);
303: PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);
304: PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);
305: PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);
306: MatGetSize(B,&n_I,NULL);
307: if (n_I) {
308: if (!Bdense) {
309: MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
310: } else {
311: Bd = B;
312: }
314: if (isLU || isILU || isCHOL) {
315: Mat fact;
316: KSPSetUp(ksp);
317: PCFactorGetMatrix(pc, &fact);
318: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
319: MatMatSolve(fact, Bd, AinvBd);
320: } else {
321: PetscBool ex = PETSC_TRUE;
323: if (ex) {
324: Mat Ainvd;
326: PCComputeExplicitOperator(pc, &Ainvd);
327: MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
328: MatDestroy(&Ainvd);
329: } else {
330: Vec sol,rhs;
331: PetscScalar *arrayrhs,*arraysol;
332: PetscInt i,nrhs,n;
334: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
335: MatGetSize(Bd,&n,&nrhs);
336: MatDenseGetArray(Bd,&arrayrhs);
337: MatDenseGetArray(AinvBd,&arraysol);
338: KSPGetSolution(ksp,&sol);
339: KSPGetRhs(ksp,&rhs);
340: for (i=0;i<nrhs;i++) {
341: VecPlaceArray(rhs,arrayrhs+i*n);
342: VecPlaceArray(sol,arraysol+i*n);
343: KSPSolve(ksp,rhs,sol);
344: VecResetArray(rhs);
345: VecResetArray(sol);
346: }
347: MatDenseRestoreArray(Bd,&arrayrhs);
348: MatDenseRestoreArray(AinvBd,&arrayrhs);
349: }
350: }
351: if (!Bdense & !issym) {
352: MatDestroy(&Bd);
353: }
355: if (!issym) {
356: if (!Cdense) {
357: MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
358: } else {
359: Cd = C;
360: }
361: MatMatMult(Cd, AinvBd, reuse, fill, S);
362: if (!Cdense) {
363: MatDestroy(&Cd);
364: }
365: } else {
366: MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
367: if (!Bdense) {
368: MatDestroy(&Bd);
369: }
370: }
371: MatDestroy(&AinvBd);
372: }
374: if (D) {
375: Mat Dd;
376: PetscBool Ddense;
378: PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);
379: if (!Ddense) {
380: MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
381: } else {
382: Dd = D;
383: }
384: if (n_I) {
385: MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);
386: } else {
387: if (reuse == MAT_INITIAL_MATRIX) {
388: MatDuplicate(Dd,MAT_COPY_VALUES,S);
389: } else {
390: MatCopy(Dd,*S,SAME_NONZERO_PATTERN);
391: }
392: }
393: if (!Ddense) {
394: MatDestroy(&Dd);
395: }
396: } else {
397: MatScale(*S,-1.0);
398: }
399: return(0);
400: }
402: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
403: {
404: Mat F,A_II,A_IB,A_BI,A_BB,AE_II;
405: Mat S_all;
406: Mat global_schur_subsets,work_mat,*submats;
407: ISLocalToGlobalMapping l2gmap_subsets;
408: IS is_I,is_I_layer;
409: IS all_subsets,all_subsets_mult,all_subsets_n;
410: PetscInt *nnz,*all_local_idx_N;
411: PetscInt *auxnum1,*auxnum2;
412: PetscInt i,subset_size,max_subset_size;
413: PetscInt n_B,extra,local_size,global_size;
414: PetscBLASInt B_N,B_ierr,B_lwork,*pivots;
415: PetscScalar *Bwork;
416: MPI_Comm comm_n;
417: PetscBool deluxe = PETSC_TRUE;
418: PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
419: PetscViewer matl_dbg_viewer = NULL;
420: PetscErrorCode ierr;
423: MatDestroy(&sub_schurs->A);
424: MatDestroy(&sub_schurs->S);
425: /* convert matrix if needed */
426: if (Ain) {
427: PetscBool isseqaij;
428: PetscObjectTypeCompare((PetscObject)Ain,MATSEQAIJ,&isseqaij);
429: if (isseqaij) {
430: PetscObjectReference((PetscObject)Ain);
431: sub_schurs->A = Ain;
432: } else {
433: MatConvert(Ain,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);
434: }
435: }
437: PetscObjectReference((PetscObject)Sin);
438: sub_schurs->S = Sin;
439: if (sub_schurs->schur_explicit) {
440: sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
441: }
443: /* preliminary checks */
444: if (!sub_schurs->schur_explicit && compute_Stilda) SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
446: if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
448: /* debug (MATLAB) */
449: if (sub_schurs->debug) {
450: PetscMPIInt size,rank;
451: PetscInt nr,*print_schurs_ranks,print_schurs;
452: PetscBool flg;
454: MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size);
455: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
456: nr = size;
457: PetscMalloc1(nr,&print_schurs_ranks);
458: PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
459: PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg);
460: if (!flg) print_schurs = PETSC_TRUE;
461: else {
462: for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; }
463: }
464: PetscOptionsEnd();
465: PetscFree(print_schurs_ranks);
466: if (print_schurs) {
467: char filename[256];
469: PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank);
470: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer);
471: PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB);
472: }
473: }
476: /* restrict work on active processes */
477: if (sub_schurs->restrict_comm) {
478: PetscSubcomm subcomm;
479: PetscMPIInt color,rank;
481: color = 0;
482: if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
483: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
484: PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);
485: PetscSubcommSetNumber(subcomm,2);
486: PetscSubcommSetTypeGeneral(subcomm,color,rank);
487: PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);
488: PetscSubcommDestroy(&subcomm);
489: if (!sub_schurs->n_subs) {
490: PetscCommDestroy(&comm_n);
491: return(0);
492: }
493: } else {
494: PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL);
495: }
497: /* get Schur complement matrices */
498: if (!sub_schurs->schur_explicit) {
499: Mat tA_IB,tA_BI,tA_BB;
500: PetscBool isseqsbaij;
501: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);
502: PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);
503: if (isseqsbaij) {
504: MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);
505: MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);
506: MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);
507: } else {
508: PetscObjectReference((PetscObject)tA_BB);
509: A_BB = tA_BB;
510: PetscObjectReference((PetscObject)tA_IB);
511: A_IB = tA_IB;
512: PetscObjectReference((PetscObject)tA_BI);
513: A_BI = tA_BI;
514: }
515: } else {
516: A_II = NULL;
517: A_IB = NULL;
518: A_BI = NULL;
519: A_BB = NULL;
520: }
521: S_all = NULL;
523: /* determine interior problems */
524: ISGetLocalSize(sub_schurs->is_I,&i);
525: if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
526: PetscBT touched;
527: const PetscInt* idx_B;
528: PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
530: if (!xadj) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
531: /* get sizes */
532: ISGetLocalSize(sub_schurs->is_I,&n_I);
533: ISGetLocalSize(sub_schurs->is_B,&n_B);
535: PetscMalloc1(n_I+n_B,&local_numbering);
536: PetscBTCreate(n_I+n_B,&touched);
537: PetscBTMemzero(n_I+n_B,touched);
539: /* all boundary dofs must be skipped when adding layers */
540: ISGetIndices(sub_schurs->is_B,&idx_B);
541: for (j=0;j<n_B;j++) {
542: PetscBTSet(touched,idx_B[j]);
543: }
544: PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));
545: ISRestoreIndices(sub_schurs->is_B,&idx_B);
547: /* add prescribed number of layers of dofs */
548: n_local_dofs = n_B;
549: n_prev_added = n_B;
550: for (layer=0;layer<nlayers;layer++) {
551: PetscInt n_added;
552: if (n_local_dofs == n_I+n_B) break;
553: if (n_local_dofs > n_I+n_B) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %D. Out of bound access (%D > %D)",layer,n_local_dofs,n_I+n_B);
554: PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);
555: n_prev_added = n_added;
556: n_local_dofs += n_added;
557: if (!n_added) break;
558: }
559: PetscBTDestroy(&touched);
561: /* IS for I layer dofs in original numbering */
562: ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);
563: PetscFree(local_numbering);
564: ISSort(is_I_layer);
565: /* IS for I layer dofs in I numbering */
566: if (!sub_schurs->schur_explicit) {
567: ISLocalToGlobalMapping ItoNmap;
568: ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);
569: ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);
570: ISLocalToGlobalMappingDestroy(&ItoNmap);
572: /* II block */
573: MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);
574: }
575: } else {
576: PetscInt n_I;
578: /* IS for I dofs in original numbering */
579: PetscObjectReference((PetscObject)sub_schurs->is_I);
580: is_I_layer = sub_schurs->is_I;
582: /* IS for I dofs in I numbering (strided 1) */
583: if (!sub_schurs->schur_explicit) {
584: ISGetSize(sub_schurs->is_I,&n_I);
585: ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);
587: /* II block is the same */
588: PetscObjectReference((PetscObject)A_II);
589: AE_II = A_II;
590: }
591: }
593: /* Get info on subset sizes and sum of all subsets sizes */
594: max_subset_size = 0;
595: local_size = 0;
596: for (i=0;i<sub_schurs->n_subs;i++) {
597: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
598: max_subset_size = PetscMax(subset_size,max_subset_size);
599: local_size += subset_size;
600: }
602: /* Work arrays for local indices */
603: extra = 0;
604: ISGetLocalSize(sub_schurs->is_B,&n_B);
605: if (sub_schurs->schur_explicit && is_I_layer) {
606: ISGetLocalSize(is_I_layer,&extra);
607: }
608: PetscMalloc1(n_B+extra,&all_local_idx_N);
609: if (extra) {
610: const PetscInt *idxs;
611: ISGetIndices(is_I_layer,&idxs);
612: PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));
613: ISRestoreIndices(is_I_layer,&idxs);
614: }
615: PetscMalloc1(local_size,&nnz);
616: PetscMalloc1(sub_schurs->n_subs,&auxnum1);
617: PetscMalloc1(sub_schurs->n_subs,&auxnum2);
619: /* Get local indices in local numbering */
620: local_size = 0;
621: for (i=0;i<sub_schurs->n_subs;i++) {
622: PetscInt j;
623: const PetscInt *idxs;
625: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
626: ISGetIndices(sub_schurs->is_subs[i],&idxs);
627: /* start (smallest in global ordering) and multiplicity */
628: auxnum1[i] = idxs[0];
629: auxnum2[i] = subset_size;
630: /* subset indices in local numbering */
631: PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));
632: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
633: for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
634: local_size += subset_size;
635: }
637: /* allocate extra workspace needed only for GETRI or SYTRF */
638: use_potr = use_sytr = PETSC_FALSE;
639: if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
640: use_potr = PETSC_TRUE;
641: } else if (sub_schurs->is_symmetric) {
642: use_sytr = PETSC_TRUE;
643: }
644: if (local_size && !use_potr) {
645: PetscScalar lwork,dummyscalar = 0.;
646: PetscBLASInt dummyint = 0;
648: B_lwork = -1;
649: PetscBLASIntCast(local_size,&B_N);
650: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
651: if (use_sytr) {
652: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
653: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr);
654: } else {
655: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
656: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
657: }
658: PetscFPTrapPop();
659: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
660: PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);
661: } else {
662: Bwork = NULL;
663: pivots = NULL;
664: }
666: /* prepare parallel matrices for summing up properly schurs on subsets */
667: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);
668: ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);
669: ISDestroy(&all_subsets_n);
670: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);
671: ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);
672: ISDestroy(&all_subsets);
673: ISDestroy(&all_subsets_mult);
674: ISGetLocalSize(all_subsets_n,&i);
675: if (i != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",i,local_size);
676: ISLocalToGlobalMappingCreateIS(all_subsets_n,&l2gmap_subsets);
677: MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,NULL,&work_mat);
678: ISLocalToGlobalMappingDestroy(&l2gmap_subsets);
679: MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);
680: MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);
681: MatSetType(global_schur_subsets,MATMPIAIJ);
683: /* subset indices in local boundary numbering */
684: if (!sub_schurs->is_Ej_all) {
685: PetscInt *all_local_idx_B;
687: PetscMalloc1(local_size,&all_local_idx_B);
688: ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);
689: if (subset_size != local_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %D != %D",subset_size,local_size);
690: ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);
691: }
693: if (change) {
694: ISLocalToGlobalMapping BtoS;
695: IS change_primal_B;
696: IS change_primal_all;
698: if (sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
699: if (sub_schurs->change) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
700: PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub);
701: for (i=0;i<sub_schurs->n_subs;i++) {
702: ISLocalToGlobalMapping NtoS;
703: ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS);
704: ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i]);
705: ISLocalToGlobalMappingDestroy(&NtoS);
706: }
707: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B);
708: ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS);
709: ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all);
710: ISLocalToGlobalMappingDestroy(&BtoS);
711: ISDestroy(&change_primal_B);
712: PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change);
713: for (i=0;i<sub_schurs->n_subs;i++) {
714: Mat change_sub;
716: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
717: KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i]);
718: KSPSetType(sub_schurs->change[i],KSPPREONLY);
719: if (!sub_schurs->change_with_qr) {
720: MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub);
721: } else {
722: Mat change_subt;
723: MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt);
724: MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub);
725: MatDestroy(&change_subt);
726: }
727: KSPSetOperators(sub_schurs->change[i],change_sub,change_sub);
728: MatDestroy(&change_sub);
729: KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix);
730: KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_");
731: }
732: ISDestroy(&change_primal_all);
733: }
735: /* Local matrix of all local Schur on subsets (transposed) */
736: if (!sub_schurs->S_Ej_all) {
737: MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);
738: MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);
739: MatSetType(sub_schurs->S_Ej_all,MATAIJ);
740: MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);
741: }
743: /* Compute Schur complements explicitly */
744: F = NULL;
745: if (!sub_schurs->schur_explicit) {
746: /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
747: it is not efficient, unless the economic version of the scaling is used */
748: Mat S_Ej_expl;
749: PetscScalar *work;
750: PetscInt j,*dummy_idx;
751: PetscBool Sdense;
753: PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
754: local_size = 0;
755: for (i=0;i<sub_schurs->n_subs;i++) {
756: IS is_subset_B;
757: Mat AE_EE,AE_IE,AE_EI,S_Ej;
759: /* subsets in original and boundary numbering */
760: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);
761: /* EE block */
762: MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);
763: /* IE block */
764: MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);
765: /* EI block */
766: if (sub_schurs->is_symmetric) {
767: MatCreateTranspose(AE_IE,&AE_EI);
768: } else if (sub_schurs->is_hermitian) {
769: MatCreateHermitianTranspose(AE_IE,&AE_EI);
770: } else {
771: MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);
772: }
773: ISDestroy(&is_subset_B);
774: MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);
775: MatDestroy(&AE_EE);
776: MatDestroy(&AE_IE);
777: MatDestroy(&AE_EI);
778: if (AE_II == A_II) { /* we can reuse the same ksp */
779: KSP ksp;
780: MatSchurComplementGetKSP(sub_schurs->S,&ksp);
781: MatSchurComplementSetKSP(S_Ej,ksp);
782: } else { /* build new ksp object which inherits ksp and pc types from the original one */
783: KSP origksp,schurksp;
784: PC origpc,schurpc;
785: KSPType ksp_type;
786: PetscInt n_internal;
787: PetscBool ispcnone;
789: MatSchurComplementGetKSP(sub_schurs->S,&origksp);
790: MatSchurComplementGetKSP(S_Ej,&schurksp);
791: KSPGetType(origksp,&ksp_type);
792: KSPSetType(schurksp,ksp_type);
793: KSPGetPC(schurksp,&schurpc);
794: KSPGetPC(origksp,&origpc);
795: PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);
796: if (!ispcnone) {
797: PCType pc_type;
798: PCGetType(origpc,&pc_type);
799: PCSetType(schurpc,pc_type);
800: } else {
801: PCSetType(schurpc,PCLU);
802: }
803: ISGetSize(is_I,&n_internal);
804: if (n_internal) { /* UMFPACK gives error with 0 sized problems */
805: MatSolverType solver = NULL;
806: PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver);
807: if (solver) {
808: PCFactorSetMatSolverType(schurpc,solver);
809: }
810: }
811: KSPSetUp(schurksp);
812: }
813: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
814: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);
815: PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl);
816: PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);
817: if (Sdense) {
818: for (j=0;j<subset_size;j++) {
819: dummy_idx[j]=local_size+j;
820: }
821: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
822: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
823: MatDestroy(&S_Ej);
824: MatDestroy(&S_Ej_expl);
825: local_size += subset_size;
826: }
827: PetscFree2(dummy_idx,work);
828: /* free */
829: ISDestroy(&is_I);
830: MatDestroy(&AE_II);
831: PetscFree(all_local_idx_N);
832: } else {
833: Mat A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL;
834: Vec Dall = NULL;
835: IS is_A_all,*is_p_r = NULL;
836: PetscScalar *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL;
837: PetscInt n,n_I,*dummy_idx,size_schur,size_active_schur,cum,cum2;
838: PetscBool economic,solver_S,S_lower_triangular = PETSC_FALSE;
839: PetscBool schur_has_vertices,factor_workaround;
840: PetscBool use_cholesky;
842: /* get sizes */
843: n_I = 0;
844: if (is_I_layer) {
845: ISGetLocalSize(is_I_layer,&n_I);
846: }
847: economic = PETSC_FALSE;
848: ISGetLocalSize(sub_schurs->is_I,&cum);
849: if (cum != n_I) economic = PETSC_TRUE;
850: MatGetLocalSize(sub_schurs->A,&n,NULL);
851: size_active_schur = local_size;
853: /* import scaling vector (wrong formulation if we have 3D edges) */
854: if (scaling && compute_Stilda) {
855: const PetscScalar *array;
856: PetscScalar *array2;
857: const PetscInt *idxs;
858: PetscInt i;
860: ISGetIndices(sub_schurs->is_Ej_all,&idxs);
861: VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall);
862: VecGetArrayRead(scaling,&array);
863: VecGetArray(Dall,&array2);
864: for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]];
865: VecRestoreArray(Dall,&array2);
866: VecRestoreArrayRead(scaling,&array);
867: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
868: deluxe = PETSC_FALSE;
869: }
871: /* size active schurs does not count any dirichlet or vertex dof on the interface */
872: factor_workaround = PETSC_FALSE;
873: schur_has_vertices = PETSC_FALSE;
874: cum = n_I+size_active_schur;
875: if (sub_schurs->is_dir) {
876: const PetscInt* idxs;
877: PetscInt n_dir;
879: ISGetLocalSize(sub_schurs->is_dir,&n_dir);
880: ISGetIndices(sub_schurs->is_dir,&idxs);
881: PetscMemcpy(all_local_idx_N+cum,idxs,n_dir*sizeof(PetscInt));
882: ISRestoreIndices(sub_schurs->is_dir,&idxs);
883: cum += n_dir;
884: factor_workaround = PETSC_TRUE;
885: }
886: /* include the primal vertices in the Schur complement */
887: if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
888: PetscInt n_v;
890: ISGetLocalSize(sub_schurs->is_vertices,&n_v);
891: if (n_v) {
892: const PetscInt* idxs;
894: ISGetIndices(sub_schurs->is_vertices,&idxs);
895: PetscMemcpy(all_local_idx_N+cum,idxs,n_v*sizeof(PetscInt));
896: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
897: cum += n_v;
898: factor_workaround = PETSC_TRUE;
899: schur_has_vertices = PETSC_TRUE;
900: }
901: }
902: size_schur = cum - n_I;
903: ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all);
904: if (cum == n) {
905: ISSetPermutation(is_A_all);
906: MatPermute(sub_schurs->A,is_A_all,is_A_all,&A);
907: } else {
908: MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);
909: }
910: MatSetOptionsPrefix(A,sub_schurs->prefix);
911: MatAppendOptionsPrefix(A,"sub_schurs_");
913: /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
914: n^2 instead of n^1.5 or something. This is a workaround */
915: if (benign_n) {
916: Vec v;
917: ISLocalToGlobalMapping N_to_reor;
918: IS is_p0,is_p0_p;
919: PetscScalar *cs_AIB,*AIIm1_data;
920: PetscInt sizeA;
922: ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor);
923: ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
924: ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p);
925: ISDestroy(&is_p0);
926: MatCreateVecs(A,&v,NULL);
927: VecGetSize(v,&sizeA);
928: MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat);
929: MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat);
930: MatDenseGetArray(cs_AIB_mat,&cs_AIB);
931: MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data);
932: PetscMalloc1(benign_n,&is_p_r);
933: /* compute colsum of A_IB restricted to pressures */
934: for (i=0;i<benign_n;i++) {
935: Vec benign_AIIm1_ones;
936: PetscScalar *array;
937: const PetscInt *idxs;
938: PetscInt j,nz;
940: ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i]);
941: ISGetLocalSize(is_p_r[i],&nz);
942: ISGetIndices(is_p_r[i],&idxs);
943: for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.;
944: ISRestoreIndices(is_p_r[i],&idxs);
945: VecCreateSeqWithArray(PETSC_COMM_SELF,1,sizeA,AIIm1_data+sizeA*i,&benign_AIIm1_ones);
946: MatMult(A,benign_AIIm1_ones,v);
947: VecDestroy(&benign_AIIm1_ones);
948: VecGetArray(v,&array);
949: for (j=0;j<size_schur;j++) {
950: #if defined(PETSC_USE_COMPLEX)
951: cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz));
952: #else
953: cs_AIB[i*size_schur + j] = array[j+n_I]/nz;
954: #endif
955: }
956: VecRestoreArray(v,&array);
957: }
958: MatDenseRestoreArray(cs_AIB_mat,&cs_AIB);
959: MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data);
960: VecDestroy(&v);
961: MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
962: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
963: MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
964: MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL);
965: ISDestroy(&is_p0_p);
966: ISLocalToGlobalMappingDestroy(&N_to_reor);
967: }
968: MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric);
969: MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);
970: MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);
972: /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
973: use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
975: /* when using the benign subspace trick, the local Schur complements are SPD */
976: if (benign_trick) sub_schurs->is_posdef = PETSC_TRUE;
978: if (n_I) {
979: IS is_schur;
981: if (use_cholesky) {
982: MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F);
983: } else {
984: MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F);
985: }
986: MatSetErrorIfFailure(A,PETSC_TRUE);
988: /* subsets ordered last */
989: ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur);
990: MatFactorSetSchurIS(F,is_schur);
991: ISDestroy(&is_schur);
993: /* factorization step */
994: if (use_cholesky) {
995: MatCholeskyFactorSymbolic(F,A,NULL,NULL);
996: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
997: MatMumpsSetIcntl(F,19,2);
998: #endif
999: MatCholeskyFactorNumeric(F,A,NULL);
1000: S_lower_triangular = PETSC_TRUE;
1001: } else {
1002: MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
1003: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1004: MatMumpsSetIcntl(F,19,3);
1005: #endif
1006: MatLUFactorNumeric(F,A,NULL);
1007: }
1008: MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view");
1010: if (matl_dbg_viewer) {
1011: Mat S;
1012: IS is;
1014: PetscObjectSetName((PetscObject)A,"A");
1015: MatView(A,matl_dbg_viewer);
1016: MatFactorCreateSchurComplement(F,&S,NULL);
1017: PetscObjectSetName((PetscObject)S,"S");
1018: MatView(S,matl_dbg_viewer);
1019: MatDestroy(&S);
1020: ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is);
1021: PetscObjectSetName((PetscObject)is,"I");
1022: ISView(is,matl_dbg_viewer);
1023: ISDestroy(&is);
1024: ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is);
1025: PetscObjectSetName((PetscObject)is,"B");
1026: ISView(is,matl_dbg_viewer);
1027: ISDestroy(&is);
1028: PetscObjectSetName((PetscObject)is_A_all,"IA");
1029: ISView(is_A_all,matl_dbg_viewer);
1030: }
1032: /* get explicit Schur Complement computed during numeric factorization */
1033: MatFactorGetSchurComplement(F,&S_all,NULL);
1034: MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef);
1035: MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian);
1037: /* we can reuse the solvers if we are not using the economic version */
1038: reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1039: factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1040: if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur)
1041: reuse_solvers = factor_workaround = PETSC_FALSE;
1043: solver_S = PETSC_TRUE;
1045: /* update the Schur complement with the change of basis on the pressures */
1046: if (benign_n) {
1047: PetscScalar *S_data,*cs_AIB,*AIIm1_data;
1048: Mat S2 = NULL,S3 = NULL; /* dbg */
1049: PetscScalar *S2_data,*S3_data; /* dbg */
1050: Vec v;
1051: PetscInt sizeA;
1053: MatDenseGetArray(S_all,&S_data);
1054: MatCreateVecs(A,&v,NULL);
1055: VecGetSize(v,&sizeA);
1056: #if defined(PETSC_HAVE_MUMPS)
1057: MatMumpsSetIcntl(F,26,0);
1058: #endif
1059: #if defined(PETSC_HAVE_MKL_PARDISO)
1060: MatMkl_PardisoSetCntl(F,70,1);
1061: #endif
1062: MatDenseGetArray(cs_AIB_mat,&cs_AIB);
1063: MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data);
1064: if (matl_dbg_viewer) {
1065: MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2);
1066: MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3);
1067: MatDenseGetArray(S2,&S2_data);
1068: MatDenseGetArray(S3,&S3_data);
1069: }
1070: for (i=0;i<benign_n;i++) {
1071: Vec benign_AIIm1_ones;
1072: PetscScalar *array,sum = 0.,one = 1.,*sums;
1073: const PetscInt *idxs;
1074: PetscInt k,j,nz;
1075: PetscBLASInt B_k,B_n;
1077: PetscCalloc1(benign_n,&sums);
1078: VecCreateSeqWithArray(PETSC_COMM_SELF,1,sizeA,AIIm1_data+sizeA*i,&benign_AIIm1_ones);
1079: VecCopy(benign_AIIm1_ones,v);
1080: MatSolve(F,v,benign_AIIm1_ones);
1081: /* p0 dofs (eliminated) are excluded from the sums */
1082: for (k=0;k<benign_n;k++) {
1083: ISGetLocalSize(is_p_r[k],&nz);
1084: ISGetIndices(is_p_r[k],&idxs);
1085: for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i];
1086: ISRestoreIndices(is_p_r[k],&idxs);
1087: }
1088: MatMult(A,benign_AIIm1_ones,v);
1089: VecGetArray(v,&array);
1090: if (matl_dbg_viewer) {
1091: Vec vv;
1092: char name[16];
1094: VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv);
1095: PetscSNPrintf(name,sizeof(name),"Pvs%D",i);
1096: PetscObjectSetName((PetscObject)vv,name);
1097: VecView(vv,matl_dbg_viewer);
1098: VecDestroy(&benign_AIIm1_ones);
1099: }
1100: /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1101: /* cs_AIB already scaled by 1./nz */
1102: B_k = 1;
1103: for (k=0;k<benign_n;k++) {
1104: sum = sums[k];
1105: PetscBLASIntCast(size_schur,&B_n);
1107: if (PetscAbsScalar(sum) == 0.0) continue;
1108: if (k == i) {
1109: PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1110: if (matl_dbg_viewer) {
1111: PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1112: }
1113: } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1114: sum /= 2.0;
1115: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1116: if (matl_dbg_viewer) {
1117: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1118: }
1119: }
1120: }
1121: sum = 1.;
1122: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1123: if (matl_dbg_viewer) {
1124: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n));
1125: }
1126: VecRestoreArray(v,&array);
1127: /* set p0 entry of AIIm1_ones to zero */
1128: ISGetLocalSize(is_p_r[i],&nz);
1129: ISGetIndices(is_p_r[i],&idxs);
1130: for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.;
1131: ISRestoreIndices(is_p_r[i],&idxs);
1132: VecDestroy(&benign_AIIm1_ones);
1133: PetscFree(sums);
1134: }
1135: if (matl_dbg_viewer) {
1136: MatDenseRestoreArray(S2,&S2_data);
1137: MatDenseRestoreArray(S3,&S3_data);
1138: }
1139: if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1140: PetscInt k,j;
1141: for (k=0;k<size_schur;k++) {
1142: for (j=k;j<size_schur;j++) {
1143: S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]);
1144: }
1145: }
1146: }
1148: /* restore defaults */
1149: #if defined(PETSC_HAVE_MUMPS)
1150: MatMumpsSetIcntl(F,26,-1);
1151: #endif
1152: #if defined(PETSC_HAVE_MKL_PARDISO)
1153: MatMkl_PardisoSetCntl(F,70,0);
1154: #endif
1155: MatDenseRestoreArray(cs_AIB_mat,&cs_AIB);
1156: MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data);
1157: VecDestroy(&v);
1158: MatDenseRestoreArray(S_all,&S_data);
1159: if (matl_dbg_viewer) {
1160: Mat S;
1162: MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1163: MatFactorCreateSchurComplement(F,&S,NULL);
1164: PetscObjectSetName((PetscObject)S,"Sb");
1165: MatView(S,matl_dbg_viewer);
1166: MatDestroy(&S);
1167: PetscObjectSetName((PetscObject)S2,"S2P");
1168: MatView(S2,matl_dbg_viewer);
1169: PetscObjectSetName((PetscObject)S3,"S3P");
1170: MatView(S3,matl_dbg_viewer);
1171: PetscObjectSetName((PetscObject)cs_AIB_mat,"cs");
1172: MatView(cs_AIB_mat,matl_dbg_viewer);
1173: MatFactorGetSchurComplement(F,&S_all,NULL);
1174: }
1175: MatDestroy(&S2);
1176: MatDestroy(&S3);
1177: }
1178: if (!reuse_solvers) {
1179: for (i=0;i<benign_n;i++) {
1180: ISDestroy(&is_p_r[i]);
1181: }
1182: PetscFree(is_p_r);
1183: MatDestroy(&cs_AIB_mat);
1184: MatDestroy(&benign_AIIm1_ones_mat);
1185: }
1186: } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1187: MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);
1188: reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1189: factor_workaround = PETSC_FALSE;
1190: solver_S = PETSC_FALSE;
1191: }
1193: if (reuse_solvers) {
1194: Mat A_II,Afake;
1195: Vec vec1_B;
1196: PCBDDCReuseSolvers msolv_ctx;
1197: PetscInt n_R;
1199: if (sub_schurs->reuse_solver) {
1200: PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1201: } else {
1202: PetscNew(&sub_schurs->reuse_solver);
1203: }
1204: msolv_ctx = sub_schurs->reuse_solver;
1205: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);
1206: PetscObjectReference((PetscObject)F);
1207: msolv_ctx->F = F;
1208: MatCreateVecs(F,&msolv_ctx->sol,NULL);
1209: /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1210: {
1211: PetscScalar *array;
1212: PetscInt n;
1214: VecGetLocalSize(msolv_ctx->sol,&n);
1215: VecGetArray(msolv_ctx->sol,&array);
1216: VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs);
1217: VecRestoreArray(msolv_ctx->sol,&array);
1218: }
1219: msolv_ctx->has_vertices = schur_has_vertices;
1221: /* interior solver */
1222: PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver);
1223: PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);
1224: PCSetType(msolv_ctx->interior_solver,PCSHELL);
1225: PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)");
1226: PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);
1227: PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View);
1228: PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior);
1229: PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose);
1231: /* correction solver */
1232: PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver);
1233: PCSetType(msolv_ctx->correction_solver,PCSHELL);
1234: PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)");
1235: PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);
1236: PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View);
1237: PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction);
1238: PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose);
1240: /* scatter and vecs for Schur complement solver */
1241: MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);
1242: MatCreateVecs(sub_schurs->S,&vec1_B,NULL);
1243: if (!schur_has_vertices) {
1244: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);
1245: VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);
1246: PetscObjectReference((PetscObject)is_A_all);
1247: msolv_ctx->is_R = is_A_all;
1248: } else {
1249: IS is_B_all;
1250: const PetscInt* idxs;
1251: PetscInt dual,n_v,n;
1253: ISGetLocalSize(sub_schurs->is_vertices,&n_v);
1254: dual = size_schur - n_v;
1255: ISGetLocalSize(is_A_all,&n);
1256: ISGetIndices(is_A_all,&idxs);
1257: ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all);
1258: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B);
1259: ISDestroy(&is_B_all);
1260: ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all);
1261: VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B);
1262: ISDestroy(&is_B_all);
1263: ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R);
1264: ISRestoreIndices(is_A_all,&idxs);
1265: }
1266: ISGetLocalSize(msolv_ctx->is_R,&n_R);
1267: MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake);
1268: MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY);
1269: MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY);
1270: PCSetOperators(msolv_ctx->correction_solver,Afake,Afake);
1271: MatDestroy(&Afake);
1272: VecDestroy(&vec1_B);
1274: /* communicate benign info to solver context */
1275: if (benign_n) {
1276: PetscScalar *array;
1278: msolv_ctx->benign_n = benign_n;
1279: msolv_ctx->benign_zerodiag_subs = is_p_r;
1280: PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals);
1281: msolv_ctx->benign_csAIB = cs_AIB_mat;
1282: MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL);
1283: VecGetArray(msolv_ctx->benign_corr_work,&array);
1284: VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec);
1285: VecRestoreArray(msolv_ctx->benign_corr_work,&array);
1286: msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1287: }
1288: } else {
1289: if (sub_schurs->reuse_solver) {
1290: PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1291: }
1292: PetscFree(sub_schurs->reuse_solver);
1293: }
1294: MatDestroy(&A);
1295: ISDestroy(&is_A_all);
1297: /* Work arrays */
1298: PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
1300: /* matrices for deluxe scaling and adaptive selection */
1301: if (compute_Stilda) {
1302: if (!sub_schurs->sum_S_Ej_tilda_all) {
1303: MatDuplicate(sub_schurs->S_Ej_all,MAT_SHARE_NONZERO_PATTERN,&sub_schurs->sum_S_Ej_tilda_all);
1304: }
1305: if (!sub_schurs->sum_S_Ej_inv_all && deluxe) {
1306: MatDuplicate(sub_schurs->S_Ej_all,MAT_SHARE_NONZERO_PATTERN,&sub_schurs->sum_S_Ej_inv_all);
1307: }
1308: }
1310: /* S_Ej_all */
1311: cum = cum2 = 0;
1312: MatDenseGetArray(S_all,&S_data);
1313: for (i=0;i<sub_schurs->n_subs;i++) {
1314: PetscInt j;
1316: /* get S_E */
1317: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1318: if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1319: PetscInt k;
1320: for (k=0;k<subset_size;k++) {
1321: for (j=k;j<subset_size;j++) {
1322: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
1323: work[j*subset_size+k] = PetscConj(S_data[cum2+k*size_schur+j]);
1324: }
1325: }
1326: } else { /* just copy to workspace */
1327: PetscInt k;
1328: for (k=0;k<subset_size;k++) {
1329: for (j=0;j<subset_size;j++) {
1330: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
1331: }
1332: }
1333: }
1334: /* insert S_E values */
1335: for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
1336: if (sub_schurs->change) {
1337: Mat change_sub,SEj,T;
1339: /* change basis */
1340: KSPGetOperators(sub_schurs->change[i],&change_sub,NULL);
1341: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1342: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1343: Mat T2;
1344: MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2);
1345: MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1346: MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T);
1347: MatDestroy(&T2);
1348: } else {
1349: MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1350: }
1351: MatCopy(T,SEj,SAME_NONZERO_PATTERN);
1352: MatDestroy(&T);
1353: MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL);
1354: MatDestroy(&SEj);
1355: }
1356: if (deluxe) {
1357: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
1358: /* if adaptivity is requested, invert S_E blocks */
1359: if (compute_Stilda) {
1360: PetscInt k;
1362: PetscBLASIntCast(subset_size,&B_N);
1363: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1364: if (use_potr) {
1365: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
1366: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1367: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
1368: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1369: for (k=0;k<subset_size;k++) {
1370: for (j=k;j<subset_size;j++) {
1371: work[j*subset_size+k] = work[k*subset_size+j];
1372: }
1373: }
1374: } else if (use_sytr) {
1375: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1376: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1377: PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,work,&B_N,pivots,Bwork,&B_ierr));
1378: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1379: for (k=0;k<subset_size;k++) {
1380: for (j=k;j<subset_size;j++) {
1381: work[j*subset_size+k] = work[k*subset_size+j];
1382: }
1383: }
1384: } else {
1385: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
1386: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1387: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1388: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1389: }
1390: PetscLogFlops(1.0*subset_size*subset_size*subset_size);
1391: PetscFPTrapPop();
1392: MatSetValues(sub_schurs->sum_S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
1393: }
1394: } else if (compute_Stilda) { /* not using deluxe */
1395: Mat SEj;
1396: Vec D;
1397: PetscScalar *array;
1399: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1400: VecGetArray(Dall,&array);
1401: VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D);
1402: VecRestoreArray(Dall,&array);
1403: VecShift(D,-1.);
1404: MatDiagonalScale(SEj,D,D);
1405: MatDestroy(&SEj);
1406: VecDestroy(&D);
1407: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
1408: }
1409: cum += subset_size;
1410: cum2 += subset_size*(size_schur + 1);
1411: }
1412: MatDenseRestoreArray(S_all,&S_data);
1414: if (solver_S) {
1415: MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1416: }
1418: schur_factor = NULL;
1419: if (compute_Stilda && size_active_schur) {
1421: if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1422: PetscInt j;
1423: for (j=0;j<size_schur;j++) dummy_idx[j] = j;
1424: MatSetValues(sub_schurs->sum_S_Ej_tilda_all,size_schur,dummy_idx,size_schur,dummy_idx,work,INSERT_VALUES);
1425: } else {
1426: Mat S_all_inv=NULL;
1427: if (solver_S) {
1428: /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1429: The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1430: if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1431: PetscScalar *data;
1432: PetscInt nd = 0;
1434: if (!use_potr) {
1435: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1436: }
1437: MatFactorGetSchurComplement(F,&S_all_inv,NULL);
1438: MatDenseGetArray(S_all_inv,&data);
1439: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1440: ISGetLocalSize(sub_schurs->is_dir,&nd);
1441: }
1443: /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1444: if (schur_has_vertices) {
1445: Mat M;
1446: PetscScalar *tdata;
1447: PetscInt nv = 0, news;
1449: ISGetLocalSize(sub_schurs->is_vertices,&nv);
1450: news = size_active_schur + nv;
1451: PetscCalloc1(news*news,&tdata);
1452: for (i=0;i<size_active_schur;i++) {
1453: PetscMemcpy(tdata+i*(news+1),data+i*(size_schur+1),(size_active_schur-i)*sizeof(PetscScalar));
1454: PetscMemcpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv*sizeof(PetscScalar));
1455: }
1456: for (i=0;i<nv;i++) {
1457: PetscInt k = i+size_active_schur;
1458: PetscMemcpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),(nv-i)*sizeof(PetscScalar));
1459: }
1461: MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M);
1462: MatSetOption(M,MAT_SPD,PETSC_TRUE);
1463: MatCholeskyFactor(M,NULL,NULL);
1464: /* save the factors */
1465: cum = 0;
1466: PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor);
1467: for (i=0;i<size_active_schur;i++) {
1468: PetscMemcpy(schur_factor+cum,tdata+i*(news+1),(size_active_schur-i)*sizeof(PetscScalar));
1469: cum += size_active_schur - i;
1470: }
1471: for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)]));
1472: MatSeqDenseInvertFactors_Private(M);
1473: /* move back just the active dofs to the Schur complement */
1474: for (i=0;i<size_active_schur;i++) {
1475: PetscMemcpy(data+i*size_schur,tdata+i*news,size_active_schur*sizeof(PetscScalar));
1476: }
1477: PetscFree(tdata);
1478: MatDestroy(&M);
1479: } else { /* we can factorize and invert just the activedofs */
1480: Mat M;
1481: PetscScalar *aux;
1483: PetscMalloc1(nd,&aux);
1484: for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)];
1485: MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M);
1486: MatSeqDenseSetLDA(M,size_schur);
1487: MatSetOption(M,MAT_SPD,PETSC_TRUE);
1488: MatCholeskyFactor(M,NULL,NULL);
1489: MatSeqDenseInvertFactors_Private(M);
1490: MatDestroy(&M);
1491: MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M);
1492: MatZeroEntries(M);
1493: MatDestroy(&M);
1494: MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M);
1495: MatSeqDenseSetLDA(M,size_schur);
1496: MatZeroEntries(M);
1497: MatDestroy(&M);
1498: for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i];
1499: PetscFree(aux);
1500: }
1501: MatDenseRestoreArray(S_all_inv,&data);
1502: } else { /* use MatFactor calls to invert S */
1503: MatFactorInvertSchurComplement(F);
1504: MatFactorGetSchurComplement(F,&S_all_inv,NULL);
1505: }
1506: } else { /* we need to invert explicitly since we are not using MatFactor for S */
1507: PetscObjectReference((PetscObject)S_all);
1508: S_all_inv = S_all;
1509: MatDenseGetArray(S_all_inv,&S_data);
1510: PetscBLASIntCast(size_schur,&B_N);
1511: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1512: if (use_potr) {
1513: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
1514: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1515: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
1516: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1517: } else if (use_sytr) {
1518: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1519: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1520: PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr));
1521: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1522: } else {
1523: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
1524: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1525: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1526: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1527: }
1528: PetscLogFlops(1.0*size_schur*size_schur*size_schur);
1529: PetscFPTrapPop();
1530: MatDenseRestoreArray(S_all_inv,&S_data);
1531: }
1532: /* S_Ej_tilda_all */
1533: cum = cum2 = 0;
1534: MatDenseGetArray(S_all_inv,&S_data);
1535: for (i=0;i<sub_schurs->n_subs;i++) {
1536: PetscInt j;
1538: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1539: /* get (St^-1)_E */
1540: /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1541: will be properly accessed later during adaptive selection */
1542: if (S_lower_triangular) {
1543: PetscInt k;
1544: if (sub_schurs->change) {
1545: for (k=0;k<subset_size;k++) {
1546: for (j=k;j<subset_size;j++) {
1547: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
1548: work[j*subset_size+k] = work[k*subset_size+j];
1549: }
1550: }
1551: } else {
1552: for (k=0;k<subset_size;k++) {
1553: for (j=k;j<subset_size;j++) {
1554: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
1555: }
1556: }
1557: }
1558: } else {
1559: PetscInt k;
1560: for (k=0;k<subset_size;k++) {
1561: for (j=0;j<subset_size;j++) {
1562: work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
1563: }
1564: }
1565: }
1566: if (sub_schurs->change) {
1567: Mat change_sub,SEj,T;
1569: /* change basis */
1570: KSPGetOperators(sub_schurs->change[i],&change_sub,NULL);
1571: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1572: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1573: Mat T2;
1574: MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2);
1575: MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1576: MatDestroy(&T2);
1577: MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T);
1578: } else {
1579: MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1580: }
1581: MatCopy(T,SEj,SAME_NONZERO_PATTERN);
1582: MatDestroy(&T);
1583: /* set diagonal entry to a very large value to pick the basis we are eliminating as the first eigenvectors with adaptive selection */
1584: MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1./PETSC_SMALL,NULL,NULL);
1585: MatDestroy(&SEj);
1586: }
1587: for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
1588: MatSetValues(sub_schurs->sum_S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
1589: cum += subset_size;
1590: cum2 += subset_size*(size_schur + 1);
1591: }
1592: MatDenseRestoreArray(S_all_inv,&S_data);
1593: if (solver_S) {
1594: if (schur_has_vertices) {
1595: MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED);
1596: } else {
1597: MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED);
1598: }
1599: }
1600: MatDestroy(&S_all_inv);
1601: }
1603: /* move back factors if needed */
1604: if (schur_has_vertices) {
1605: Mat S_tmp;
1606: PetscInt nd = 0;
1608: if (!solver_S) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
1609: MatFactorGetSchurComplement(F,&S_tmp,NULL);
1610: if (use_potr) {
1611: PetscScalar *data;
1613: MatDenseGetArray(S_tmp,&data);
1614: PetscMemzero(data,size_schur*size_schur*sizeof(PetscScalar));
1616: if (S_lower_triangular) {
1617: cum = 0;
1618: for (i=0;i<size_active_schur;i++) {
1619: PetscMemcpy(data+i*(size_schur+1),schur_factor+cum,(size_active_schur-i)*sizeof(PetscScalar));
1620: cum += size_active_schur-i;
1621: }
1622: } else {
1623: PetscMemcpy(data,schur_factor,size_schur*size_schur*sizeof(PetscScalar));
1624: }
1625: if (sub_schurs->is_dir) {
1626: ISGetLocalSize(sub_schurs->is_dir,&nd);
1627: for (i=0;i<nd;i++) {
1628: data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i];
1629: }
1630: }
1631: /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1632: set the diagonal entry of the Schur factor to a very large value */
1633: for (i=size_active_schur+nd;i<size_schur;i++) {
1634: data[i*(size_schur+1)] = infty;
1635: }
1636: MatDenseRestoreArray(S_tmp,&data);
1637: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1638: MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED);
1639: }
1640: } else if (factor_workaround) { /* we need to eliminate any unneeded coupling */
1641: PetscScalar *data;
1642: PetscInt nd = 0;
1644: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1645: ISGetLocalSize(sub_schurs->is_dir,&nd);
1646: }
1647: MatFactorGetSchurComplement(F,&S_all,NULL);
1648: MatDenseGetArray(S_all,&data);
1649: for (i=0;i<size_active_schur;i++) {
1650: PetscMemzero(data+i*size_schur+size_active_schur,(size_schur-size_active_schur)*sizeof(PetscScalar));
1651: }
1652: for (i=size_active_schur+nd;i<size_schur;i++) {
1653: PetscMemzero(data+i*size_schur+size_active_schur,(size_schur-size_active_schur)*sizeof(PetscScalar));
1654: data[i*(size_schur+1)] = infty;
1655: }
1656: MatDenseRestoreArray(S_all,&data);
1657: MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1658: }
1659: PetscFree2(dummy_idx,work);
1660: PetscFree(schur_factor);
1661: VecDestroy(&Dall);
1662: }
1663: PetscFree(nnz);
1664: MatDestroy(&F);
1665: ISDestroy(&is_I_layer);
1666: MatDestroy(&S_all);
1667: MatDestroy(&A_BB);
1668: MatDestroy(&A_IB);
1669: MatDestroy(&A_BI);
1671: /* speed up matrix assembly */
1672: PetscMalloc1(sub_schurs->n_subs,&nnz);
1673: for (i=0;i<sub_schurs->n_subs;i++) {
1674: ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i]);
1675: }
1676: ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer);
1677: MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz);
1678: MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
1679: MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
1680: if (compute_Stilda) {
1681: MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz);
1682: MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
1683: MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
1684: if (deluxe) {
1685: MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz);
1686: MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
1687: MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
1688: }
1689: }
1690: ISDestroy(&is_I_layer);
1692: /* Global matrix of all assembled Schur on subsets */
1693: MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);
1694: MatAssemblyBegin(work_mat,MAT_FINAL_ASSEMBLY);
1695: MatAssemblyEnd(work_mat,MAT_FINAL_ASSEMBLY);
1696: MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);
1697: MatConvert(work_mat,MATAIJ,MAT_REUSE_MATRIX,&global_schur_subsets);
1699: /* Get local part of (\sum_j S_Ej) */
1700: MatCreateSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_INITIAL_MATRIX,&submats);
1701: if (!sub_schurs->sum_S_Ej_all) {
1702: MatDuplicate(submats[0],MAT_COPY_VALUES,&sub_schurs->sum_S_Ej_all);
1703: } else {
1704: MatCopy(submats[0],sub_schurs->sum_S_Ej_all,SAME_NONZERO_PATTERN);
1705: }
1707: /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1708: if (compute_Stilda) {
1709: MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_tilda_all);
1710: MatConvert(work_mat,MATAIJ,MAT_REUSE_MATRIX,&global_schur_subsets);
1711: MatCreateSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
1712: MatCopy(submats[0],sub_schurs->sum_S_Ej_tilda_all,SAME_NONZERO_PATTERN);
1713: if (deluxe) {
1714: MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_inv_all);
1715: MatConvert(work_mat,MATAIJ,MAT_REUSE_MATRIX,&global_schur_subsets);
1716: MatCreateSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);
1717: MatCopy(submats[0],sub_schurs->sum_S_Ej_inv_all,SAME_NONZERO_PATTERN);
1718: } else {
1719: PetscScalar *array;
1720: PetscInt cum;
1722: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array);
1723: cum = 0;
1724: for (i=0;i<sub_schurs->n_subs;i++) {
1725: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1726: PetscBLASIntCast(subset_size,&B_N);
1727: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1728: if (use_potr) {
1729: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
1730: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1731: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
1732: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1733: } else if (use_sytr) {
1734: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1735: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1736: PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr));
1737: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1738: } else {
1739: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
1740: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1741: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1742: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1743: }
1744: PetscLogFlops(1.0*subset_size*subset_size*subset_size);
1745: PetscFPTrapPop();
1746: cum += subset_size*subset_size;
1747: }
1748: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array);
1749: PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all);
1750: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1751: sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1752: }
1753: }
1754: MatDestroySubMatrices(1,&submats);
1755: if (matl_dbg_viewer) {
1756: PetscInt cum;
1758: if (sub_schurs->S_Ej_all) {
1759: PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE");
1760: MatView(sub_schurs->S_Ej_all,matl_dbg_viewer);
1761: }
1762: if (sub_schurs->sum_S_Ej_all) {
1763: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE");
1764: MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer);
1765: }
1766: if (sub_schurs->sum_S_Ej_inv_all) {
1767: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm");
1768: MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer);
1769: }
1770: if (sub_schurs->sum_S_Ej_tilda_all) {
1771: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt");
1772: MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer);
1773: }
1774: for (i=0,cum=0;i<sub_schurs->n_subs;i++) {
1775: IS is;
1776: char name[16];
1778: PetscSNPrintf(name,sizeof(name),"IE%D",i);
1779: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1780: ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is);
1781: PetscObjectSetName((PetscObject)is,name);
1782: ISView(is,matl_dbg_viewer);
1783: ISDestroy(&is);
1784: cum += subset_size;
1785: }
1786: }
1788: /* free workspace */
1789: PetscViewerDestroy(&matl_dbg_viewer);
1790: PetscFree(submats);
1791: PetscFree2(Bwork,pivots);
1792: MatDestroy(&global_schur_subsets);
1793: MatDestroy(&work_mat);
1794: ISDestroy(&all_subsets_n);
1795: PetscCommDestroy(&comm_n);
1796: return(0);
1797: }
1799: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc)
1800: {
1801: IS *faces,*edges,*all_cc,vertices;
1802: PetscInt i,n_faces,n_edges,n_all_cc;
1803: PetscBool is_sorted,ispetsc;
1804: PetscErrorCode ierr;
1807: ISSorted(is_I,&is_sorted);
1808: if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1809: ISSorted(is_B,&is_sorted);
1810: if (!is_sorted) SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1812: /* reset any previous data */
1813: PCBDDCSubSchursReset(sub_schurs);
1815: /* get index sets for faces and edges (already sorted by global ordering) */
1816: PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1817: n_all_cc = n_faces+n_edges;
1818: PetscBTCreate(n_all_cc,&sub_schurs->is_edge);
1819: PetscMalloc1(n_all_cc,&all_cc);
1820: for (i=0;i<n_faces;i++) {
1821: if (copycc) {
1822: ISDuplicate(faces[i],&all_cc[i]);
1823: } else {
1824: PetscObjectReference((PetscObject)faces[i]);
1825: all_cc[i] = faces[i];
1826: }
1827: }
1828: for (i=0;i<n_edges;i++) {
1829: if (copycc) {
1830: ISDuplicate(edges[i],&all_cc[n_faces+i]);
1831: } else {
1832: PetscObjectReference((PetscObject)edges[i]);
1833: all_cc[n_faces+i] = edges[i];
1834: }
1835: PetscBTSet(sub_schurs->is_edge,n_faces+i);
1836: }
1837: PetscObjectReference((PetscObject)vertices);
1838: sub_schurs->is_vertices = vertices;
1839: PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1840: sub_schurs->is_dir = NULL;
1841: PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);
1843: /* Determine if MatFactor can be used */
1844: PetscStrallocpy(prefix,&sub_schurs->prefix);
1845: #if defined(PETSC_HAVE_MUMPS)
1846: PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,64);
1847: #elif defined(PETSC_HAVE_MKL_PARDISO)
1848: PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,64);
1849: #else
1850: PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,64);
1851: #endif
1852: #if defined(PETSC_USE_COMPLEX)
1853: sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1854: #else
1855: sub_schurs->is_hermitian = PETSC_TRUE;
1856: #endif
1857: sub_schurs->is_posdef = PETSC_TRUE;
1858: sub_schurs->is_symmetric = PETSC_TRUE;
1859: sub_schurs->debug = PETSC_FALSE;
1860: sub_schurs->restrict_comm = PETSC_FALSE;
1861: PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
1862: PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,64,NULL);
1863: PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL);
1864: PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL);
1865: PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL);
1866: PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL);
1867: PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL);
1868: PetscOptionsEnd();
1869: PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERPETSC,&ispetsc);
1870: sub_schurs->schur_explicit = (PetscBool)!ispetsc;
1872: /* for reals, symmetric and hermitian are synonims */
1873: #if !defined(PETSC_USE_COMPLEX)
1874: sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1875: sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1876: #endif
1878: PetscObjectReference((PetscObject)is_I);
1879: sub_schurs->is_I = is_I;
1880: PetscObjectReference((PetscObject)is_B);
1881: sub_schurs->is_B = is_B;
1882: PetscObjectReference((PetscObject)graph->l2gmap);
1883: sub_schurs->l2gmap = graph->l2gmap;
1884: PetscObjectReference((PetscObject)BtoNmap);
1885: sub_schurs->BtoNmap = BtoNmap;
1886: sub_schurs->n_subs = n_all_cc;
1887: sub_schurs->is_subs = all_cc;
1888: sub_schurs->S_Ej_all = NULL;
1889: sub_schurs->sum_S_Ej_all = NULL;
1890: sub_schurs->sum_S_Ej_inv_all = NULL;
1891: sub_schurs->sum_S_Ej_tilda_all = NULL;
1892: sub_schurs->is_Ej_all = NULL;
1893: return(0);
1894: }
1896: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1897: {
1898: PCBDDCSubSchurs schurs_ctx;
1899: PetscErrorCode ierr;
1902: PetscNew(&schurs_ctx);
1903: schurs_ctx->n_subs = 0;
1904: *sub_schurs = schurs_ctx;
1905: return(0);
1906: }
1908: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1909: {
1910: PetscInt i;
1914: if (!sub_schurs) return(0);
1915: PetscFree(sub_schurs->prefix);
1916: MatDestroy(&sub_schurs->A);
1917: MatDestroy(&sub_schurs->S);
1918: ISDestroy(&sub_schurs->is_I);
1919: ISDestroy(&sub_schurs->is_B);
1920: ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
1921: ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
1922: MatDestroy(&sub_schurs->S_Ej_all);
1923: MatDestroy(&sub_schurs->sum_S_Ej_all);
1924: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1925: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
1926: ISDestroy(&sub_schurs->is_Ej_all);
1927: ISDestroy(&sub_schurs->is_vertices);
1928: ISDestroy(&sub_schurs->is_dir);
1929: PetscBTDestroy(&sub_schurs->is_edge);
1930: for (i=0;i<sub_schurs->n_subs;i++) {
1931: ISDestroy(&sub_schurs->is_subs[i]);
1932: }
1933: if (sub_schurs->n_subs) {
1934: PetscFree(sub_schurs->is_subs);
1935: }
1936: if (sub_schurs->reuse_solver) {
1937: PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1938: }
1939: PetscFree(sub_schurs->reuse_solver);
1940: if (sub_schurs->change) {
1941: for (i=0;i<sub_schurs->n_subs;i++) {
1942: KSPDestroy(&sub_schurs->change[i]);
1943: ISDestroy(&sub_schurs->change_primal_sub[i]);
1944: }
1945: }
1946: PetscFree(sub_schurs->change);
1947: PetscFree(sub_schurs->change_primal_sub);
1948: sub_schurs->n_subs = 0;
1949: return(0);
1950: }
1952: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs)
1953: {
1957: PCBDDCSubSchursReset(*sub_schurs);
1958: PetscFree(*sub_schurs);
1959: return(0);
1960: }
1962: PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
1963: {
1964: PetscInt i,j,n;
1968: n = 0;
1969: for (i=-n_prev;i<0;i++) {
1970: PetscInt start_dof = queue_tip[i];
1971: for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
1972: PetscInt dof = adjncy[j];
1973: if (!PetscBTLookup(touched,dof)) {
1974: PetscBTSet(touched,dof);
1975: queue_tip[n] = dof;
1976: n++;
1977: }
1978: }
1979: }
1980: *n_added = n;
1981: return(0);
1982: }