Actual source code: bddcscalingbasic.c

petsc-3.11.4 2019-09-28
Report Typos and Errors
  1:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3:  #include <petscblaslapack.h>
  4:  #include <../src/mat/impls/dense/seq/dense.h>

  6: /* prototypes for deluxe functions */
  7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
  8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
  9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
 10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
 11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);

 13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
 14: {
 15:   Mat_SeqDense   *mat = (Mat_SeqDense*)A->data;
 17:   PetscScalar    *b,*x;
 18:   PetscInt       n;
 19:   PetscBLASInt   nrhs,info,m;
 20:   PetscBool      flg;

 23:   PetscBLASIntCast(A->rmap->n,&m);
 24:   PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
 25:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
 26:   PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
 27:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");

 29:   MatGetSize(B,NULL,&n);
 30:   PetscBLASIntCast(n,&nrhs);
 31:   MatDenseGetArray(B,&b);
 32:   MatDenseGetArray(X,&x);

 34:   PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));

 36:   if (A->factortype == MAT_FACTOR_LU) {
 37: #if defined(PETSC_MISSING_LAPACK_GETRS)
 38:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
 39: #else
 40:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
 41:     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
 42: #endif
 43:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported");

 45:   MatDenseRestoreArray(B,&b);
 46:   MatDenseRestoreArray(X,&x);
 47:   PetscLogFlops(nrhs*(2.0*m*m - m));
 48:   return(0);
 49: }

 51: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
 52: {
 53:   PC_IS*         pcis = (PC_IS*)pc->data;
 54:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

 58:   /* Apply partition of unity */
 59:   VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);
 60:   VecSet(global_vector,0.0);
 61:   VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
 62:   VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
 63:   return(0);
 64: }

 66: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
 67: {
 68:   PC_IS*              pcis=(PC_IS*)pc->data;
 69:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
 70:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
 71:   PetscErrorCode      ierr;

 74:   VecSet(pcbddc->work_scaling,0.0);
 75:   VecSet(y,0.0);
 76:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
 77:     PetscInt          i;
 78:     const PetscScalar *array_x,*array_D;
 79:     PetscScalar       *array;
 80:     VecGetArrayRead(x,&array_x);
 81:     VecGetArrayRead(pcis->D,&array_D);
 82:     VecGetArray(pcbddc->work_scaling,&array);
 83:     for (i=0;i<deluxe_ctx->n_simple;i++) {
 84:       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
 85:     }
 86:     VecRestoreArray(pcbddc->work_scaling,&array);
 87:     VecRestoreArrayRead(pcis->D,&array_D);
 88:     VecRestoreArrayRead(x,&array_x);
 89:   }
 90:   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
 91:   if (deluxe_ctx->seq_mat) {
 92:     PetscInt i;
 93:     for (i=0;i<deluxe_ctx->seq_n;i++) {
 94:       if (deluxe_ctx->change) {
 95:         VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
 96:         VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
 97:         if (deluxe_ctx->change_with_qr) {
 98:           Mat change;

100:           KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
101:           MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
102:         } else {
103:           KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
104:         }
105:       } else {
106:         VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
107:         VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
108:       }
109:       MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
110:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
111:         PetscScalar *x;

113:         VecGetArray(deluxe_ctx->seq_work2[i],&x);
114:         VecPlaceArray(deluxe_ctx->seq_work1[i],x);
115:         VecRestoreArray(deluxe_ctx->seq_work2[i],&x);
116:         MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
117:         VecResetArray(deluxe_ctx->seq_work1[i]);
118:       }
119:       if (deluxe_ctx->change) {
120:         Mat change;

122:         KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
123:         MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
124:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
125:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
126:       } else {
127:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
128:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
129:       }
130:     }
131:   }
132:   /* put local boundary part in global vector */
133:   VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
134:   VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
135:   return(0);
136: }

138: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
139: {
140:   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;

147:   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!");
148:   PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));
149:   return(0);
150: }

152: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
153: {
155:   PC_IS          *pcis = (PC_IS*)pc->data;

158:   VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
159:   VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
160:   /* Apply partition of unity */
161:   VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);
162:   return(0);
163: }

165: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
166: {
167:   PC_IS*              pcis=(PC_IS*)pc->data;
168:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
169:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
170:   PetscErrorCode      ierr;

173:   /* get local boundary part of global vector */
174:   VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
175:   VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
176:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
177:     PetscInt          i;
178:     PetscScalar       *array_y;
179:     const PetscScalar *array_D;
180:     VecGetArray(y,&array_y);
181:     VecGetArrayRead(pcis->D,&array_D);
182:     for (i=0;i<deluxe_ctx->n_simple;i++) {
183:       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
184:     }
185:     VecRestoreArrayRead(pcis->D,&array_D);
186:     VecRestoreArray(y,&array_y);
187:   }
188:   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
189:   if (deluxe_ctx->seq_mat) {
190:     PetscInt i;
191:     for (i=0;i<deluxe_ctx->seq_n;i++) {
192:       if (deluxe_ctx->change) {
193:         Mat change;

195:         VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
196:         VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
197:         KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
198:         MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
199:       } else {
200:         VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
201:         VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
202:       }
203:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
204:         PetscScalar *x;

206:         VecGetArray(deluxe_ctx->seq_work1[i],&x);
207:         VecPlaceArray(deluxe_ctx->seq_work2[i],x);
208:         VecRestoreArray(deluxe_ctx->seq_work1[i],&x);
209:         MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
210:         VecResetArray(deluxe_ctx->seq_work2[i]);
211:       }
212:       MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
213:       if (deluxe_ctx->change) {
214:         if (deluxe_ctx->change_with_qr) {
215:           Mat change;

217:           KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
218:           MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
219:         } else {
220:           KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
221:         }
222:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
223:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
224:       } else {
225:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
226:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
227:       }
228:     }
229:   }
230:   return(0);
231: }

233: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
234: {
235:   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;

242:   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!");
243:   PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));
244:   return(0);
245: }

247: PetscErrorCode PCBDDCScalingSetUp(PC pc)
248: {
249:   PC_IS*         pcis=(PC_IS*)pc->data;
250:   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;

255:   PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);
256:   /* create work vector for the operator */
257:   VecDestroy(&pcbddc->work_scaling);
258:   VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);
259:   /* always rebuild pcis->D */
260:   if (pcis->use_stiffness_scaling) {
261:     PetscScalar *a;
262:     PetscInt    i,n;

264:     MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);
265:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
266:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
267:     VecAbs(pcis->D);
268:     VecGetLocalSize(pcis->D,&n);
269:     VecGetArray(pcis->D,&a);
270:     for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0;
271:     VecRestoreArray(pcis->D,&a);
272:   }
273:   VecSet(pcis->vec1_global,0.0);
274:   VecScatterBegin(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
275:   VecScatterEnd(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
276:   VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
277:   VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
278:   VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);
279:   /* now setup */
280:   if (pcbddc->use_deluxe_scaling) {
281:     if (!pcbddc->deluxe_ctx) {
282:       PCBDDCScalingCreate_Deluxe(pc);
283:     }
284:     PCBDDCScalingSetUp_Deluxe(pc);
285:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);
286:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);
287:   } else {
288:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);
289:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);
290:   }

292:   /* test */
293:   if (pcbddc->dbg_flag) {
294:     Mat         B0_B = NULL;
295:     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
296:     Vec         vec2_global;
297:     PetscViewer viewer = pcbddc->dbg_viewer;
298:     PetscReal   error;

300:     /* extension -> from local to parallel */
301:     VecSet(pcis->vec1_global,0.0);
302:     VecSetRandom(pcis->vec1_B,NULL);
303:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
304:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
305:     VecDuplicate(pcis->vec1_global,&vec2_global);
306:     VecCopy(pcis->vec1_global,vec2_global);
307:     VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
308:     VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
309:     if (pcbddc->benign_n) {
310:       IS is_dummy;

312:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
313:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
314:       ISDestroy(&is_dummy);
315:       MatCreateVecs(B0_B,NULL,&B0_Bv);
316:       VecDuplicate(B0_Bv,&B0_Bv2);
317:       MatMult(B0_B,pcis->vec1_B,B0_Bv);
318:     }
319:     PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);
320:     if (pcbddc->benign_saddle_point) {
321:       PetscReal errorl = 0.;
322:       VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
323:       VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
324:       if (pcbddc->benign_n) {
325:         MatMult(B0_B,pcis->vec1_B,B0_Bv2);
326:         VecAXPY(B0_Bv,-1.0,B0_Bv2);
327:         VecNorm(B0_Bv,NORM_INFINITY,&errorl);
328:       }
329:       MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));
330:       PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);
331:     }
332:     VecAXPY(pcis->vec1_global,-1.0,vec2_global);
333:     VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
334:     PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);
335:     VecDestroy(&vec2_global);

337:     /* restriction -> from parallel to local */
338:     VecSet(pcis->vec1_global,0.0);
339:     VecSetRandom(pcis->vec1_B,NULL);
340:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
341:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
342:     PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);
343:     VecScale(pcis->vec1_B,-1.0);
344:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
345:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
346:     VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
347:     PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);
348:     MatDestroy(&B0_B);
349:     VecDestroy(&B0_Bv);
350:     VecDestroy(&B0_Bv2);
351:   }
352:   PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);
353:   return(0);
354: }

356: PetscErrorCode PCBDDCScalingDestroy(PC pc)
357: {
358:   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;

362:   if (pcbddc->deluxe_ctx) {
363:     PCBDDCScalingDestroy_Deluxe(pc);
364:   }
365:   VecDestroy(&pcbddc->work_scaling);
366:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);
367:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);
368:   return(0);
369: }

371: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
372: {
373:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
374:   PCBDDCDeluxeScaling deluxe_ctx;
375:   PetscErrorCode      ierr;

378:   PetscNew(&deluxe_ctx);
379:   pcbddc->deluxe_ctx = deluxe_ctx;
380:   return(0);
381: }

383: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
384: {
385:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
386:   PetscErrorCode      ierr;

389:   PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);
390:   PetscFree(pcbddc->deluxe_ctx);
391:   return(0);
392: }

394: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
395: {
396:   PetscInt       i;

400:   PetscFree(deluxe_ctx->idx_simple_B);
401:   deluxe_ctx->n_simple = 0;
402:   for (i=0;i<deluxe_ctx->seq_n;i++) {
403:     VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);
404:     VecDestroy(&deluxe_ctx->seq_work1[i]);
405:     VecDestroy(&deluxe_ctx->seq_work2[i]);
406:     MatDestroy(&deluxe_ctx->seq_mat[i]);
407:     MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
408:   }
409:   PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);
410:   PetscFree(deluxe_ctx->workspace);
411:   deluxe_ctx->seq_n = 0;
412:   return(0);
413: }

415: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
416: {
417:   PC_IS               *pcis=(PC_IS*)pc->data;
418:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
419:   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
420:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
421:   PetscErrorCode      ierr;

424:   /* reset data structures if the topology has changed */
425:   if (pcbddc->recompute_topography) {
426:     PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);
427:   }

429:   /* Compute data structures to solve sequential problems */
430:   PCBDDCScalingSetUp_Deluxe_Private(pc);

432:   /* diagonal scaling on interface dofs not contained in cc */
433:   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
434:     PetscInt n_com,n_dir;
435:     n_com = 0;
436:     if (sub_schurs->is_vertices) {
437:       ISGetLocalSize(sub_schurs->is_vertices,&n_com);
438:     }
439:     n_dir = 0;
440:     if (sub_schurs->is_dir) {
441:       ISGetLocalSize(sub_schurs->is_dir,&n_dir);
442:     }
443:     if (!deluxe_ctx->n_simple) {
444:       deluxe_ctx->n_simple = n_dir + n_com;
445:       PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);
446:       if (sub_schurs->is_vertices) {
447:         PetscInt       nmap;
448:         const PetscInt *idxs;

450:         ISGetIndices(sub_schurs->is_vertices,&idxs);
451:         ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);
452:         if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %D != %D",nmap,n_com);
453:         ISRestoreIndices(sub_schurs->is_vertices,&idxs);
454:       }
455:       if (sub_schurs->is_dir) {
456:         PetscInt       nmap;
457:         const PetscInt *idxs;

459:         ISGetIndices(sub_schurs->is_dir,&idxs);
460:         ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);
461:         if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %D != %D",nmap,n_dir);
462:         ISRestoreIndices(sub_schurs->is_dir,&idxs);
463:       }
464:       PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);
465:     } else {
466:       if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %D is different from the previous one computed %D",n_dir + n_com,deluxe_ctx->n_simple);
467:     }
468:   } else {
469:     deluxe_ctx->n_simple = 0;
470:     deluxe_ctx->idx_simple_B = 0;
471:   }
472:   return(0);
473: }

475: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
476: {
477:   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
478:   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
479:   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
480:   PetscScalar            *matdata,*matdata2;
481:   PetscInt               i,max_subset_size,cum,cum2;
482:   const PetscInt         *idxs;
483:   PetscBool              newsetup = PETSC_FALSE;
484:   PetscErrorCode         ierr;

487:   if (!sub_schurs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs");
488:   if (!sub_schurs->n_subs) return(0);

490:   /* Allocate arrays for subproblems */
491:   if (!deluxe_ctx->seq_n) {
492:     deluxe_ctx->seq_n = sub_schurs->n_subs;
493:     PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);
494:     newsetup = PETSC_TRUE;
495:   } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %D is different from the sub_schurs %D",deluxe_ctx->seq_n,sub_schurs->n_subs);

497:   /* the change of basis is just a reference to sub_schurs->change (if any) */
498:   deluxe_ctx->change         = sub_schurs->change;
499:   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;

501:   /* Create objects for deluxe */
502:   max_subset_size = 0;
503:   for (i=0;i<sub_schurs->n_subs;i++) {
504:     PetscInt subset_size;
505:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
506:     max_subset_size = PetscMax(subset_size,max_subset_size);
507:   }
508:   if (newsetup) {
509:     PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);
510:   }
511:   cum = cum2 = 0;
512:   ISGetIndices(sub_schurs->is_Ej_all,&idxs);
513:   MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);
514:   MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);
515:   for (i=0;i<deluxe_ctx->seq_n;i++) {
516:     PetscInt     subset_size;

518:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
519:     if (newsetup) {
520:       IS  sub;
521:       /* work vectors */
522:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);
523:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);

525:       /* scatters */
526:       ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);
527:       VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);
528:       ISDestroy(&sub);
529:     }

531:     /* S_E_j */
532:     MatDestroy(&deluxe_ctx->seq_mat[i]);
533:     MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);

535:     /* \sum_k S^k_E_j */
536:     MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
537:     MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);
538:     MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);
539:     MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);
540:     if (sub_schurs->is_hermitian) {
541:       MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);
542:     } else {
543:       MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);
544:     }
545:     if (pcbddc->deluxe_singlemat) {
546:       Mat X,Y;
547:       if (!sub_schurs->is_hermitian) {
548:         MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);
549:       } else {
550:         PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);
551:         X    = deluxe_ctx->seq_mat[i];
552:       }
553:       MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);
554:       if (!sub_schurs->is_hermitian) {
555:         PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
556:       } else {
557:         MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
558:       }

560:       MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
561:       MatDestroy(&deluxe_ctx->seq_mat[i]);
562:       MatDestroy(&X);
563:       if (deluxe_ctx->change) {
564:         Mat C,CY;

566:         if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
567:         KSPGetOperators(deluxe_ctx->change[i],&C,NULL);
568:         MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);
569:         MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
570:         MatDestroy(&CY);
571:       }
572:       MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);
573:       deluxe_ctx->seq_mat[i] = Y;
574:     }
575:     cum += subset_size;
576:     cum2 += subset_size*subset_size;
577:   }
578:   ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
579:   MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);
580:   MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);
581:   if (pcbddc->deluxe_singlemat) {
582:     deluxe_ctx->change         = NULL;
583:     deluxe_ctx->change_with_qr = PETSC_FALSE;
584:   }

586:   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
587:     for (i=0;i<deluxe_ctx->seq_n;i++) {
588:       if (newsetup) {
589:         PC pc;

591:         KSPGetPC(deluxe_ctx->change[i],&pc);
592:         PCSetType(pc,PCLU);
593:         KSPSetFromOptions(deluxe_ctx->change[i]);
594:       }
595:       KSPSetUp(deluxe_ctx->change[i]);
596:     }
597:   }
598:   return(0);
599: }