Actual source code: bddcscalingbasic.c
petsc-3.11.4 2019-09-28
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <petscblaslapack.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
6: /* prototypes for deluxe functions */
7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
14: {
15: Mat_SeqDense *mat = (Mat_SeqDense*)A->data;
17: PetscScalar *b,*x;
18: PetscInt n;
19: PetscBLASInt nrhs,info,m;
20: PetscBool flg;
23: PetscBLASIntCast(A->rmap->n,&m);
24: PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
25: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
26: PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
27: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
29: MatGetSize(B,NULL,&n);
30: PetscBLASIntCast(n,&nrhs);
31: MatDenseGetArray(B,&b);
32: MatDenseGetArray(X,&x);
34: PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));
36: if (A->factortype == MAT_FACTOR_LU) {
37: #if defined(PETSC_MISSING_LAPACK_GETRS)
38: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
39: #else
40: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
41: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
42: #endif
43: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported");
45: MatDenseRestoreArray(B,&b);
46: MatDenseRestoreArray(X,&x);
47: PetscLogFlops(nrhs*(2.0*m*m - m));
48: return(0);
49: }
51: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
52: {
53: PC_IS* pcis = (PC_IS*)pc->data;
54: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
58: /* Apply partition of unity */
59: VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);
60: VecSet(global_vector,0.0);
61: VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
62: VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
63: return(0);
64: }
66: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
67: {
68: PC_IS* pcis=(PC_IS*)pc->data;
69: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
70: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
71: PetscErrorCode ierr;
74: VecSet(pcbddc->work_scaling,0.0);
75: VecSet(y,0.0);
76: if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
77: PetscInt i;
78: const PetscScalar *array_x,*array_D;
79: PetscScalar *array;
80: VecGetArrayRead(x,&array_x);
81: VecGetArrayRead(pcis->D,&array_D);
82: VecGetArray(pcbddc->work_scaling,&array);
83: for (i=0;i<deluxe_ctx->n_simple;i++) {
84: array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
85: }
86: VecRestoreArray(pcbddc->work_scaling,&array);
87: VecRestoreArrayRead(pcis->D,&array_D);
88: VecRestoreArrayRead(x,&array_x);
89: }
90: /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
91: if (deluxe_ctx->seq_mat) {
92: PetscInt i;
93: for (i=0;i<deluxe_ctx->seq_n;i++) {
94: if (deluxe_ctx->change) {
95: VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
96: VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
97: if (deluxe_ctx->change_with_qr) {
98: Mat change;
100: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
101: MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
102: } else {
103: KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
104: }
105: } else {
106: VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
107: VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
108: }
109: MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
110: if (deluxe_ctx->seq_mat_inv_sum[i]) {
111: PetscScalar *x;
113: VecGetArray(deluxe_ctx->seq_work2[i],&x);
114: VecPlaceArray(deluxe_ctx->seq_work1[i],x);
115: VecRestoreArray(deluxe_ctx->seq_work2[i],&x);
116: MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
117: VecResetArray(deluxe_ctx->seq_work1[i]);
118: }
119: if (deluxe_ctx->change) {
120: Mat change;
122: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
123: MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
124: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
125: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
126: } else {
127: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
128: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
129: }
130: }
131: }
132: /* put local boundary part in global vector */
133: VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
134: VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
135: return(0);
136: }
138: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
139: {
140: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
147: if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!");
148: PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));
149: return(0);
150: }
152: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
153: {
155: PC_IS *pcis = (PC_IS*)pc->data;
158: VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
159: VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
160: /* Apply partition of unity */
161: VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);
162: return(0);
163: }
165: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
166: {
167: PC_IS* pcis=(PC_IS*)pc->data;
168: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
169: PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
170: PetscErrorCode ierr;
173: /* get local boundary part of global vector */
174: VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
175: VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
176: if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
177: PetscInt i;
178: PetscScalar *array_y;
179: const PetscScalar *array_D;
180: VecGetArray(y,&array_y);
181: VecGetArrayRead(pcis->D,&array_D);
182: for (i=0;i<deluxe_ctx->n_simple;i++) {
183: array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
184: }
185: VecRestoreArrayRead(pcis->D,&array_D);
186: VecRestoreArray(y,&array_y);
187: }
188: /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
189: if (deluxe_ctx->seq_mat) {
190: PetscInt i;
191: for (i=0;i<deluxe_ctx->seq_n;i++) {
192: if (deluxe_ctx->change) {
193: Mat change;
195: VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
196: VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
197: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
198: MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
199: } else {
200: VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
201: VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
202: }
203: if (deluxe_ctx->seq_mat_inv_sum[i]) {
204: PetscScalar *x;
206: VecGetArray(deluxe_ctx->seq_work1[i],&x);
207: VecPlaceArray(deluxe_ctx->seq_work2[i],x);
208: VecRestoreArray(deluxe_ctx->seq_work1[i],&x);
209: MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
210: VecResetArray(deluxe_ctx->seq_work2[i]);
211: }
212: MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
213: if (deluxe_ctx->change) {
214: if (deluxe_ctx->change_with_qr) {
215: Mat change;
217: KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
218: MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
219: } else {
220: KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
221: }
222: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
223: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
224: } else {
225: VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
226: VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
227: }
228: }
229: }
230: return(0);
231: }
233: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
234: {
235: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
242: if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!");
243: PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));
244: return(0);
245: }
247: PetscErrorCode PCBDDCScalingSetUp(PC pc)
248: {
249: PC_IS* pcis=(PC_IS*)pc->data;
250: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
255: PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);
256: /* create work vector for the operator */
257: VecDestroy(&pcbddc->work_scaling);
258: VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);
259: /* always rebuild pcis->D */
260: if (pcis->use_stiffness_scaling) {
261: PetscScalar *a;
262: PetscInt i,n;
264: MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);
265: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
266: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
267: VecAbs(pcis->D);
268: VecGetLocalSize(pcis->D,&n);
269: VecGetArray(pcis->D,&a);
270: for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0;
271: VecRestoreArray(pcis->D,&a);
272: }
273: VecSet(pcis->vec1_global,0.0);
274: VecScatterBegin(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
275: VecScatterEnd(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
276: VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
277: VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
278: VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);
279: /* now setup */
280: if (pcbddc->use_deluxe_scaling) {
281: if (!pcbddc->deluxe_ctx) {
282: PCBDDCScalingCreate_Deluxe(pc);
283: }
284: PCBDDCScalingSetUp_Deluxe(pc);
285: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);
286: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);
287: } else {
288: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);
289: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);
290: }
292: /* test */
293: if (pcbddc->dbg_flag) {
294: Mat B0_B = NULL;
295: Vec B0_Bv = NULL, B0_Bv2 = NULL;
296: Vec vec2_global;
297: PetscViewer viewer = pcbddc->dbg_viewer;
298: PetscReal error;
300: /* extension -> from local to parallel */
301: VecSet(pcis->vec1_global,0.0);
302: VecSetRandom(pcis->vec1_B,NULL);
303: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
304: VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
305: VecDuplicate(pcis->vec1_global,&vec2_global);
306: VecCopy(pcis->vec1_global,vec2_global);
307: VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
308: VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
309: if (pcbddc->benign_n) {
310: IS is_dummy;
312: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
313: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
314: ISDestroy(&is_dummy);
315: MatCreateVecs(B0_B,NULL,&B0_Bv);
316: VecDuplicate(B0_Bv,&B0_Bv2);
317: MatMult(B0_B,pcis->vec1_B,B0_Bv);
318: }
319: PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);
320: if (pcbddc->benign_saddle_point) {
321: PetscReal errorl = 0.;
322: VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
323: VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
324: if (pcbddc->benign_n) {
325: MatMult(B0_B,pcis->vec1_B,B0_Bv2);
326: VecAXPY(B0_Bv,-1.0,B0_Bv2);
327: VecNorm(B0_Bv,NORM_INFINITY,&errorl);
328: }
329: MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));
330: PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);
331: }
332: VecAXPY(pcis->vec1_global,-1.0,vec2_global);
333: VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
334: PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);
335: VecDestroy(&vec2_global);
337: /* restriction -> from parallel to local */
338: VecSet(pcis->vec1_global,0.0);
339: VecSetRandom(pcis->vec1_B,NULL);
340: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
341: VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
342: PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);
343: VecScale(pcis->vec1_B,-1.0);
344: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
345: VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
346: VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
347: PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);
348: MatDestroy(&B0_B);
349: VecDestroy(&B0_Bv);
350: VecDestroy(&B0_Bv2);
351: }
352: PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);
353: return(0);
354: }
356: PetscErrorCode PCBDDCScalingDestroy(PC pc)
357: {
358: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
362: if (pcbddc->deluxe_ctx) {
363: PCBDDCScalingDestroy_Deluxe(pc);
364: }
365: VecDestroy(&pcbddc->work_scaling);
366: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);
367: PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);
368: return(0);
369: }
371: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
372: {
373: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
374: PCBDDCDeluxeScaling deluxe_ctx;
375: PetscErrorCode ierr;
378: PetscNew(&deluxe_ctx);
379: pcbddc->deluxe_ctx = deluxe_ctx;
380: return(0);
381: }
383: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
384: {
385: PC_BDDC* pcbddc=(PC_BDDC*)pc->data;
386: PetscErrorCode ierr;
389: PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);
390: PetscFree(pcbddc->deluxe_ctx);
391: return(0);
392: }
394: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
395: {
396: PetscInt i;
400: PetscFree(deluxe_ctx->idx_simple_B);
401: deluxe_ctx->n_simple = 0;
402: for (i=0;i<deluxe_ctx->seq_n;i++) {
403: VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);
404: VecDestroy(&deluxe_ctx->seq_work1[i]);
405: VecDestroy(&deluxe_ctx->seq_work2[i]);
406: MatDestroy(&deluxe_ctx->seq_mat[i]);
407: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
408: }
409: PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);
410: PetscFree(deluxe_ctx->workspace);
411: deluxe_ctx->seq_n = 0;
412: return(0);
413: }
415: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
416: {
417: PC_IS *pcis=(PC_IS*)pc->data;
418: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
419: PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
420: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
421: PetscErrorCode ierr;
424: /* reset data structures if the topology has changed */
425: if (pcbddc->recompute_topography) {
426: PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);
427: }
429: /* Compute data structures to solve sequential problems */
430: PCBDDCScalingSetUp_Deluxe_Private(pc);
432: /* diagonal scaling on interface dofs not contained in cc */
433: if (sub_schurs->is_vertices || sub_schurs->is_dir) {
434: PetscInt n_com,n_dir;
435: n_com = 0;
436: if (sub_schurs->is_vertices) {
437: ISGetLocalSize(sub_schurs->is_vertices,&n_com);
438: }
439: n_dir = 0;
440: if (sub_schurs->is_dir) {
441: ISGetLocalSize(sub_schurs->is_dir,&n_dir);
442: }
443: if (!deluxe_ctx->n_simple) {
444: deluxe_ctx->n_simple = n_dir + n_com;
445: PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);
446: if (sub_schurs->is_vertices) {
447: PetscInt nmap;
448: const PetscInt *idxs;
450: ISGetIndices(sub_schurs->is_vertices,&idxs);
451: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);
452: if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %D != %D",nmap,n_com);
453: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
454: }
455: if (sub_schurs->is_dir) {
456: PetscInt nmap;
457: const PetscInt *idxs;
459: ISGetIndices(sub_schurs->is_dir,&idxs);
460: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);
461: if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %D != %D",nmap,n_dir);
462: ISRestoreIndices(sub_schurs->is_dir,&idxs);
463: }
464: PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);
465: } else {
466: if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %D is different from the previous one computed %D",n_dir + n_com,deluxe_ctx->n_simple);
467: }
468: } else {
469: deluxe_ctx->n_simple = 0;
470: deluxe_ctx->idx_simple_B = 0;
471: }
472: return(0);
473: }
475: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
476: {
477: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
478: PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
479: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
480: PetscScalar *matdata,*matdata2;
481: PetscInt i,max_subset_size,cum,cum2;
482: const PetscInt *idxs;
483: PetscBool newsetup = PETSC_FALSE;
484: PetscErrorCode ierr;
487: if (!sub_schurs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs");
488: if (!sub_schurs->n_subs) return(0);
490: /* Allocate arrays for subproblems */
491: if (!deluxe_ctx->seq_n) {
492: deluxe_ctx->seq_n = sub_schurs->n_subs;
493: PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);
494: newsetup = PETSC_TRUE;
495: } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %D is different from the sub_schurs %D",deluxe_ctx->seq_n,sub_schurs->n_subs);
497: /* the change of basis is just a reference to sub_schurs->change (if any) */
498: deluxe_ctx->change = sub_schurs->change;
499: deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
501: /* Create objects for deluxe */
502: max_subset_size = 0;
503: for (i=0;i<sub_schurs->n_subs;i++) {
504: PetscInt subset_size;
505: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
506: max_subset_size = PetscMax(subset_size,max_subset_size);
507: }
508: if (newsetup) {
509: PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);
510: }
511: cum = cum2 = 0;
512: ISGetIndices(sub_schurs->is_Ej_all,&idxs);
513: MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);
514: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);
515: for (i=0;i<deluxe_ctx->seq_n;i++) {
516: PetscInt subset_size;
518: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
519: if (newsetup) {
520: IS sub;
521: /* work vectors */
522: VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);
523: VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);
525: /* scatters */
526: ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);
527: VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);
528: ISDestroy(&sub);
529: }
531: /* S_E_j */
532: MatDestroy(&deluxe_ctx->seq_mat[i]);
533: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);
535: /* \sum_k S^k_E_j */
536: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
537: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);
538: MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);
539: MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);
540: if (sub_schurs->is_hermitian) {
541: MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);
542: } else {
543: MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);
544: }
545: if (pcbddc->deluxe_singlemat) {
546: Mat X,Y;
547: if (!sub_schurs->is_hermitian) {
548: MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);
549: } else {
550: PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);
551: X = deluxe_ctx->seq_mat[i];
552: }
553: MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);
554: if (!sub_schurs->is_hermitian) {
555: PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
556: } else {
557: MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
558: }
560: MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
561: MatDestroy(&deluxe_ctx->seq_mat[i]);
562: MatDestroy(&X);
563: if (deluxe_ctx->change) {
564: Mat C,CY;
566: if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
567: KSPGetOperators(deluxe_ctx->change[i],&C,NULL);
568: MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);
569: MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
570: MatDestroy(&CY);
571: }
572: MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);
573: deluxe_ctx->seq_mat[i] = Y;
574: }
575: cum += subset_size;
576: cum2 += subset_size*subset_size;
577: }
578: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
579: MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);
580: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);
581: if (pcbddc->deluxe_singlemat) {
582: deluxe_ctx->change = NULL;
583: deluxe_ctx->change_with_qr = PETSC_FALSE;
584: }
586: if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
587: for (i=0;i<deluxe_ctx->seq_n;i++) {
588: if (newsetup) {
589: PC pc;
591: KSPGetPC(deluxe_ctx->change[i],&pc);
592: PCSetType(pc,PCLU);
593: KSPSetFromOptions(deluxe_ctx->change[i]);
594: }
595: KSPSetUp(deluxe_ctx->change[i]);
596: }
597: }
598: return(0);
599: }