Actual source code: bddcscalingbasic.c

petsc-3.13.6 2020-09-29
Report Typos and Errors
  1:  #include <../src/ksp/pc/impls/bddc/bddc.h>
  2:  #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3:  #include <petscblaslapack.h>
  4:  #include <../src/mat/impls/dense/seq/dense.h>

  6: /* prototypes for deluxe functions */
  7: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
  8: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
  9: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
 10: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
 11: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);

 13: static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
 14: {
 15:   Mat_SeqDense      *mat = (Mat_SeqDense*)A->data;
 16:   PetscErrorCode    ierr;
 17:   const PetscScalar *b;
 18:   PetscScalar       *x;
 19:   PetscInt          n;
 20:   PetscBLASInt      nrhs,info,m;
 21:   PetscBool         flg;

 24:   PetscBLASIntCast(A->rmap->n,&m);
 25:   PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
 26:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
 27:   PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);
 28:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");

 30:   MatGetSize(B,NULL,&n);
 31:   PetscBLASIntCast(n,&nrhs);
 32:   MatDenseGetArrayRead(B,&b);
 33:   MatDenseGetArray(X,&x);
 34:   PetscArraycpy(x,b,m*nrhs);
 35:   MatDenseRestoreArrayRead(B,&b);

 37:   if (A->factortype == MAT_FACTOR_LU) {
 38:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
 39:     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
 40:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported");

 42:   MatDenseRestoreArray(X,&x);
 43:   PetscLogFlops(nrhs*(2.0*m*m - m));
 44:   return(0);
 45: }

 47: static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
 48: {
 49:   PC_IS*         pcis = (PC_IS*)pc->data;
 50:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;

 54:   /* Apply partition of unity */
 55:   VecPointwiseMult(pcbddc->work_scaling,pcis->D,local_interface_vector);
 56:   VecSet(global_vector,0.0);
 57:   VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
 58:   VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,global_vector,ADD_VALUES,SCATTER_REVERSE);
 59:   return(0);
 60: }

 62: static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
 63: {
 64:   PC_IS*              pcis=(PC_IS*)pc->data;
 65:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
 66:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
 67:   PetscErrorCode      ierr;

 70:   VecSet(pcbddc->work_scaling,0.0);
 71:   VecSet(y,0.0);
 72:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
 73:     PetscInt          i;
 74:     const PetscScalar *array_x,*array_D;
 75:     PetscScalar       *array;
 76:     VecGetArrayRead(x,&array_x);
 77:     VecGetArrayRead(pcis->D,&array_D);
 78:     VecGetArray(pcbddc->work_scaling,&array);
 79:     for (i=0;i<deluxe_ctx->n_simple;i++) {
 80:       array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]]*array_D[deluxe_ctx->idx_simple_B[i]];
 81:     }
 82:     VecRestoreArray(pcbddc->work_scaling,&array);
 83:     VecRestoreArrayRead(pcis->D,&array_D);
 84:     VecRestoreArrayRead(x,&array_x);
 85:   }
 86:   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
 87:   if (deluxe_ctx->seq_mat) {
 88:     PetscInt i;
 89:     for (i=0;i<deluxe_ctx->seq_n;i++) {
 90:       if (deluxe_ctx->change) {
 91:         VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
 92:         VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
 93:         if (deluxe_ctx->change_with_qr) {
 94:           Mat change;

 96:           KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
 97:           MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
 98:         } else {
 99:           KSPSolve(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
100:         }
101:       } else {
102:         VecScatterBegin(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
103:         VecScatterEnd(deluxe_ctx->seq_scctx[i],x,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
104:       }
105:       MatMultTranspose(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
106:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
107:         PetscScalar *x;

109:         VecGetArray(deluxe_ctx->seq_work2[i],&x);
110:         VecPlaceArray(deluxe_ctx->seq_work1[i],x);
111:         VecRestoreArray(deluxe_ctx->seq_work2[i],&x);
112:         MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
113:         VecResetArray(deluxe_ctx->seq_work1[i]);
114:       }
115:       if (deluxe_ctx->change) {
116:         Mat change;

118:         KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
119:         MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
120:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
121:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
122:       } else {
123:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
124:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],pcbddc->work_scaling,INSERT_VALUES,SCATTER_REVERSE);
125:       }
126:     }
127:   }
128:   /* put local boundary part in global vector */
129:   VecScatterBegin(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
130:   VecScatterEnd(pcis->global_to_B,pcbddc->work_scaling,y,ADD_VALUES,SCATTER_REVERSE);
131:   return(0);
132: }

134: PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
135: {
136:   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;

143:   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!");
144:   PetscUseMethod(pc,"PCBDDCScalingExtension_C",(PC,Vec,Vec),(pc,local_interface_vector,global_vector));
145:   return(0);
146: }

148: static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
149: {
151:   PC_IS          *pcis = (PC_IS*)pc->data;

154:   VecScatterBegin(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
155:   VecScatterEnd(pcis->global_to_B,global_vector,local_interface_vector,INSERT_VALUES,SCATTER_FORWARD);
156:   /* Apply partition of unity */
157:   VecPointwiseMult(local_interface_vector,pcis->D,local_interface_vector);
158:   return(0);
159: }

161: static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
162: {
163:   PC_IS*              pcis=(PC_IS*)pc->data;
164:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
165:   PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
166:   PetscErrorCode      ierr;

169:   /* get local boundary part of global vector */
170:   VecScatterBegin(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
171:   VecScatterEnd(pcis->global_to_B,x,y,INSERT_VALUES,SCATTER_FORWARD);
172:   if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
173:     PetscInt          i;
174:     PetscScalar       *array_y;
175:     const PetscScalar *array_D;
176:     VecGetArray(y,&array_y);
177:     VecGetArrayRead(pcis->D,&array_D);
178:     for (i=0;i<deluxe_ctx->n_simple;i++) {
179:       array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
180:     }
181:     VecRestoreArrayRead(pcis->D,&array_D);
182:     VecRestoreArray(y,&array_y);
183:   }
184:   /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */
185:   if (deluxe_ctx->seq_mat) {
186:     PetscInt i;
187:     for (i=0;i<deluxe_ctx->seq_n;i++) {
188:       if (deluxe_ctx->change) {
189:         Mat change;

191:         VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
192:         VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work2[i],INSERT_VALUES,SCATTER_FORWARD);
193:         KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
194:         MatMultTranspose(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
195:       } else {
196:         VecScatterBegin(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
197:         VecScatterEnd(deluxe_ctx->seq_scctx[i],y,deluxe_ctx->seq_work1[i],INSERT_VALUES,SCATTER_FORWARD);
198:       }
199:       if (deluxe_ctx->seq_mat_inv_sum[i]) {
200:         PetscScalar *x;

202:         VecGetArray(deluxe_ctx->seq_work1[i],&x);
203:         VecPlaceArray(deluxe_ctx->seq_work2[i],x);
204:         VecRestoreArray(deluxe_ctx->seq_work1[i],&x);
205:         MatSolve(deluxe_ctx->seq_mat_inv_sum[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
206:         VecResetArray(deluxe_ctx->seq_work2[i]);
207:       }
208:       MatMult(deluxe_ctx->seq_mat[i],deluxe_ctx->seq_work1[i],deluxe_ctx->seq_work2[i]);
209:       if (deluxe_ctx->change) {
210:         if (deluxe_ctx->change_with_qr) {
211:           Mat change;

213:           KSPGetOperators(deluxe_ctx->change[i],&change,NULL);
214:           MatMult(change,deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
215:         } else {
216:           KSPSolveTranspose(deluxe_ctx->change[i],deluxe_ctx->seq_work2[i],deluxe_ctx->seq_work1[i]);
217:         }
218:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
219:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work1[i],y,INSERT_VALUES,SCATTER_REVERSE);
220:       } else {
221:         VecScatterBegin(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
222:         VecScatterEnd(deluxe_ctx->seq_scctx[i],deluxe_ctx->seq_work2[i],y,INSERT_VALUES,SCATTER_REVERSE);
223:       }
224:     }
225:   }
226:   return(0);
227: }

229: PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
230: {
231:   PC_BDDC        *pcbddc=(PC_BDDC*)pc->data;

238:   if (local_interface_vector == pcbddc->work_scaling) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Local vector cannot be pcbddc->work_scaling!");
239:   PetscUseMethod(pc,"PCBDDCScalingRestriction_C",(PC,Vec,Vec),(pc,global_vector,local_interface_vector));
240:   return(0);
241: }

243: PetscErrorCode PCBDDCScalingSetUp(PC pc)
244: {
245:   PC_IS*         pcis=(PC_IS*)pc->data;
246:   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;

251:   PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);
252:   /* create work vector for the operator */
253:   VecDestroy(&pcbddc->work_scaling);
254:   VecDuplicate(pcis->vec1_B,&pcbddc->work_scaling);
255:   /* always rebuild pcis->D */
256:   if (pcis->use_stiffness_scaling) {
257:     PetscScalar *a;
258:     PetscInt    i,n;

260:     MatGetDiagonal(pcbddc->local_mat,pcis->vec1_N);
261:     VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
262:     VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->D,INSERT_VALUES,SCATTER_FORWARD);
263:     VecAbs(pcis->D);
264:     VecGetLocalSize(pcis->D,&n);
265:     VecGetArray(pcis->D,&a);
266:     for (i=0;i<n;i++) if (PetscAbsScalar(a[i])<PETSC_SMALL) a[i] = 1.0;
267:     VecRestoreArray(pcis->D,&a);
268:   }
269:   VecSet(pcis->vec1_global,0.0);
270:   VecScatterBegin(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
271:   VecScatterEnd(pcis->global_to_B,pcis->D,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
272:   VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
273:   VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
274:   VecPointwiseDivide(pcis->D,pcis->D,pcis->vec1_B);
275:   /* now setup */
276:   if (pcbddc->use_deluxe_scaling) {
277:     if (!pcbddc->deluxe_ctx) {
278:       PCBDDCScalingCreate_Deluxe(pc);
279:     }
280:     PCBDDCScalingSetUp_Deluxe(pc);
281:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Deluxe);
282:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Deluxe);
283:   } else {
284:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",PCBDDCScalingRestriction_Basic);
285:     PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",PCBDDCScalingExtension_Basic);
286:   }
287:   PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level],pc,0,0,0);

289:   /* test */
290:   if (pcbddc->dbg_flag) {
291:     Mat         B0_B = NULL;
292:     Vec         B0_Bv = NULL, B0_Bv2 = NULL;
293:     Vec         vec2_global;
294:     PetscViewer viewer = pcbddc->dbg_viewer;
295:     PetscReal   error;

297:     /* extension -> from local to parallel */
298:     VecSet(pcis->vec1_global,0.0);
299:     VecSetRandom(pcis->vec1_B,NULL);
300:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
301:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
302:     VecDuplicate(pcis->vec1_global,&vec2_global);
303:     VecCopy(pcis->vec1_global,vec2_global);
304:     VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
305:     VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
306:     if (pcbddc->benign_n) {
307:       IS is_dummy;

309:       ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
310:       MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
311:       ISDestroy(&is_dummy);
312:       MatCreateVecs(B0_B,NULL,&B0_Bv);
313:       VecDuplicate(B0_Bv,&B0_Bv2);
314:       MatMult(B0_B,pcis->vec1_B,B0_Bv);
315:     }
316:     PCBDDCScalingExtension(pc,pcis->vec1_B,pcis->vec1_global);
317:     if (pcbddc->benign_saddle_point) {
318:       PetscReal errorl = 0.;
319:       VecScatterBegin(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
320:       VecScatterEnd(pcis->global_to_B,pcis->vec1_global,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
321:       if (pcbddc->benign_n) {
322:         MatMult(B0_B,pcis->vec1_B,B0_Bv2);
323:         VecAXPY(B0_Bv,-1.0,B0_Bv2);
324:         VecNorm(B0_Bv,NORM_INFINITY,&errorl);
325:       }
326:       MPI_Allreduce(&errorl,&error,1,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)pc));
327:       PetscViewerASCIIPrintf(viewer,"Error benign extension %1.14e\n",error);
328:     }
329:     VecAXPY(pcis->vec1_global,-1.0,vec2_global);
330:     VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
331:     PetscViewerASCIIPrintf(viewer,"Error scaling extension %1.14e\n",error);
332:     VecDestroy(&vec2_global);

334:     /* restriction -> from parallel to local */
335:     VecSet(pcis->vec1_global,0.0);
336:     VecSetRandom(pcis->vec1_B,NULL);
337:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
338:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
339:     PCBDDCScalingRestriction(pc,pcis->vec1_global,pcis->vec1_B);
340:     VecScale(pcis->vec1_B,-1.0);
341:     VecScatterBegin(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
342:     VecScatterEnd(pcis->global_to_B,pcis->vec1_B,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
343:     VecNorm(pcis->vec1_global,NORM_INFINITY,&error);
344:     PetscViewerASCIIPrintf(viewer,"Error scaling restriction %1.14e\n",error);
345:     MatDestroy(&B0_B);
346:     VecDestroy(&B0_Bv);
347:     VecDestroy(&B0_Bv2);
348:   }
349:   return(0);
350: }

352: PetscErrorCode PCBDDCScalingDestroy(PC pc)
353: {
354:   PC_BDDC*       pcbddc=(PC_BDDC*)pc->data;

358:   if (pcbddc->deluxe_ctx) {
359:     PCBDDCScalingDestroy_Deluxe(pc);
360:   }
361:   VecDestroy(&pcbddc->work_scaling);
362:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingRestriction_C",NULL);
363:   PetscObjectComposeFunction((PetscObject)pc,"PCBDDCScalingExtension_C",NULL);
364:   return(0);
365: }

367: static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
368: {
369:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
370:   PCBDDCDeluxeScaling deluxe_ctx;
371:   PetscErrorCode      ierr;

374:   PetscNew(&deluxe_ctx);
375:   pcbddc->deluxe_ctx = deluxe_ctx;
376:   return(0);
377: }

379: static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
380: {
381:   PC_BDDC*            pcbddc=(PC_BDDC*)pc->data;
382:   PetscErrorCode      ierr;

385:   PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx);
386:   PetscFree(pcbddc->deluxe_ctx);
387:   return(0);
388: }

390: static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
391: {
392:   PetscInt       i;

396:   PetscFree(deluxe_ctx->idx_simple_B);
397:   deluxe_ctx->n_simple = 0;
398:   for (i=0;i<deluxe_ctx->seq_n;i++) {
399:     VecScatterDestroy(&deluxe_ctx->seq_scctx[i]);
400:     VecDestroy(&deluxe_ctx->seq_work1[i]);
401:     VecDestroy(&deluxe_ctx->seq_work2[i]);
402:     MatDestroy(&deluxe_ctx->seq_mat[i]);
403:     MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
404:   }
405:   PetscFree5(deluxe_ctx->seq_scctx,deluxe_ctx->seq_work1,deluxe_ctx->seq_work2,deluxe_ctx->seq_mat,deluxe_ctx->seq_mat_inv_sum);
406:   PetscFree(deluxe_ctx->workspace);
407:   deluxe_ctx->seq_n = 0;
408:   return(0);
409: }

411: static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
412: {
413:   PC_IS               *pcis=(PC_IS*)pc->data;
414:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
415:   PCBDDCDeluxeScaling deluxe_ctx=pcbddc->deluxe_ctx;
416:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
417:   PetscErrorCode      ierr;

420:   /* reset data structures if the topology has changed */
421:   if (pcbddc->recompute_topography) {
422:     PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx);
423:   }

425:   /* Compute data structures to solve sequential problems */
426:   PCBDDCScalingSetUp_Deluxe_Private(pc);

428:   /* diagonal scaling on interface dofs not contained in cc */
429:   if (sub_schurs->is_vertices || sub_schurs->is_dir) {
430:     PetscInt n_com,n_dir;
431:     n_com = 0;
432:     if (sub_schurs->is_vertices) {
433:       ISGetLocalSize(sub_schurs->is_vertices,&n_com);
434:     }
435:     n_dir = 0;
436:     if (sub_schurs->is_dir) {
437:       ISGetLocalSize(sub_schurs->is_dir,&n_dir);
438:     }
439:     if (!deluxe_ctx->n_simple) {
440:       deluxe_ctx->n_simple = n_dir + n_com;
441:       PetscMalloc1(deluxe_ctx->n_simple,&deluxe_ctx->idx_simple_B);
442:       if (sub_schurs->is_vertices) {
443:         PetscInt       nmap;
444:         const PetscInt *idxs;

446:         ISGetIndices(sub_schurs->is_vertices,&idxs);
447:         ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_com,idxs,&nmap,deluxe_ctx->idx_simple_B);
448:         if (nmap != n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (is_vertices)! %D != %D",nmap,n_com);
449:         ISRestoreIndices(sub_schurs->is_vertices,&idxs);
450:       }
451:       if (sub_schurs->is_dir) {
452:         PetscInt       nmap;
453:         const PetscInt *idxs;

455:         ISGetIndices(sub_schurs->is_dir,&idxs);
456:         ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_dir,idxs,&nmap,deluxe_ctx->idx_simple_B+n_com);
457:         if (nmap != n_dir) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error when mapping simply scaled dofs (sub_schurs->is_dir)! %D != %D",nmap,n_dir);
458:         ISRestoreIndices(sub_schurs->is_dir,&idxs);
459:       }
460:       PetscSortInt(deluxe_ctx->n_simple,deluxe_ctx->idx_simple_B);
461:     } else {
462:       if (deluxe_ctx->n_simple != n_dir + n_com) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of simply scaled dofs %D is different from the previous one computed %D",n_dir + n_com,deluxe_ctx->n_simple);
463:     }
464:   } else {
465:     deluxe_ctx->n_simple = 0;
466:     deluxe_ctx->idx_simple_B = 0;
467:   }
468:   return(0);
469: }

471: static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
472: {
473:   PC_BDDC                *pcbddc=(PC_BDDC*)pc->data;
474:   PCBDDCDeluxeScaling    deluxe_ctx=pcbddc->deluxe_ctx;
475:   PCBDDCSubSchurs        sub_schurs = pcbddc->sub_schurs;
476:   PetscScalar            *matdata,*matdata2;
477:   PetscInt               i,max_subset_size,cum,cum2;
478:   const PetscInt         *idxs;
479:   PetscBool              newsetup = PETSC_FALSE;
480:   PetscErrorCode         ierr;

483:   if (!sub_schurs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Missing PCBDDCSubSchurs");
484:   if (!sub_schurs->n_subs) return(0);

486:   /* Allocate arrays for subproblems */
487:   if (!deluxe_ctx->seq_n) {
488:     deluxe_ctx->seq_n = sub_schurs->n_subs;
489:     PetscCalloc5(deluxe_ctx->seq_n,&deluxe_ctx->seq_scctx,deluxe_ctx->seq_n,&deluxe_ctx->seq_work1,deluxe_ctx->seq_n,&deluxe_ctx->seq_work2,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat,deluxe_ctx->seq_n,&deluxe_ctx->seq_mat_inv_sum);
490:     newsetup = PETSC_TRUE;
491:   } else if (deluxe_ctx->seq_n != sub_schurs->n_subs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number of deluxe subproblems %D is different from the sub_schurs %D",deluxe_ctx->seq_n,sub_schurs->n_subs);

493:   /* the change of basis is just a reference to sub_schurs->change (if any) */
494:   deluxe_ctx->change         = sub_schurs->change;
495:   deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;

497:   /* Create objects for deluxe */
498:   max_subset_size = 0;
499:   for (i=0;i<sub_schurs->n_subs;i++) {
500:     PetscInt subset_size;
501:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
502:     max_subset_size = PetscMax(subset_size,max_subset_size);
503:   }
504:   if (newsetup) {
505:     PetscMalloc1(2*max_subset_size,&deluxe_ctx->workspace);
506:   }
507:   cum = cum2 = 0;
508:   ISGetIndices(sub_schurs->is_Ej_all,&idxs);
509:   MatSeqAIJGetArray(sub_schurs->S_Ej_all,&matdata);
510:   MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&matdata2);
511:   for (i=0;i<deluxe_ctx->seq_n;i++) {
512:     PetscInt     subset_size;

514:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
515:     if (newsetup) {
516:       IS  sub;
517:       /* work vectors */
518:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace,&deluxe_ctx->seq_work1[i]);
519:       VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,deluxe_ctx->workspace+subset_size,&deluxe_ctx->seq_work2[i]);

521:       /* scatters */
522:       ISCreateGeneral(PETSC_COMM_SELF,subset_size,idxs+cum,PETSC_COPY_VALUES,&sub);
523:       VecScatterCreate(pcbddc->work_scaling,sub,deluxe_ctx->seq_work1[i],NULL,&deluxe_ctx->seq_scctx[i]);
524:       ISDestroy(&sub);
525:     }

527:     /* S_E_j */
528:     MatDestroy(&deluxe_ctx->seq_mat[i]);
529:     MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata+cum2,&deluxe_ctx->seq_mat[i]);

531:     /* \sum_k S^k_E_j */
532:     MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
533:     MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,matdata2+cum2,&deluxe_ctx->seq_mat_inv_sum[i]);
534:     MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_SPD,sub_schurs->is_posdef);
535:     MatSetOption(deluxe_ctx->seq_mat_inv_sum[i],MAT_HERMITIAN,sub_schurs->is_hermitian);
536:     if (sub_schurs->is_hermitian) {
537:       MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL);
538:     } else {
539:       MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i],NULL,NULL,NULL);
540:     }
541:     if (pcbddc->deluxe_singlemat) {
542:       Mat X,Y;
543:       if (!sub_schurs->is_hermitian) {
544:         MatTranspose(deluxe_ctx->seq_mat[i],MAT_INITIAL_MATRIX,&X);
545:       } else {
546:         PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]);
547:         X    = deluxe_ctx->seq_mat[i];
548:       }
549:       MatDuplicate(X,MAT_DO_NOT_COPY_VALUES,&Y);
550:       if (!sub_schurs->is_hermitian) {
551:         PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
552:       } else {
553:         MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i],X,Y);
554:       }

556:       MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]);
557:       MatDestroy(&deluxe_ctx->seq_mat[i]);
558:       MatDestroy(&X);
559:       if (deluxe_ctx->change) {
560:         Mat C,CY;
561:         if (!deluxe_ctx->change_with_qr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only QR based change of basis");
562:         KSPGetOperators(deluxe_ctx->change[i],&C,NULL);
563:         MatMatMult(C,Y,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&CY);
564:         MatMatTransposeMult(CY,C,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
565:         MatDestroy(&CY);
566:         MatProductClear(Y); /* clear internal matproduct structure of Y since CY is destroyed */
567:       }
568:       MatTranspose(Y,MAT_INPLACE_MATRIX,&Y);
569:       deluxe_ctx->seq_mat[i] = Y;
570:     }
571:     cum += subset_size;
572:     cum2 += subset_size*subset_size;
573:   }
574:   ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
575:   MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&matdata);
576:   MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&matdata2);
577:   if (pcbddc->deluxe_singlemat) {
578:     deluxe_ctx->change         = NULL;
579:     deluxe_ctx->change_with_qr = PETSC_FALSE;
580:   }

582:   if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
583:     for (i=0;i<deluxe_ctx->seq_n;i++) {
584:       if (newsetup) {
585:         PC pc;

587:         KSPGetPC(deluxe_ctx->change[i],&pc);
588:         PCSetType(pc,PCLU);
589:         KSPSetFromOptions(deluxe_ctx->change[i]);
590:       }
591:       KSPSetUp(deluxe_ctx->change[i]);
592:     }
593:   }
594:   return(0);
595: }