Actual source code: bddcprivate.c

petsc-3.6.1 2015-08-06
Report Typos and Errors
  1: #include <../src/ksp/pc/impls/bddc/bddc.h>
  2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3: #include <petscblaslapack.h>

  5: static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
  6: static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);

 10: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
 11: {
 12:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
 13:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
 14:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
 15:   PetscBLASInt    *B_iwork,*B_ifail;
 16:   PetscScalar     *work,lwork;
 17:   PetscScalar     *St,*S,*eigv;
 18:   PetscScalar     *Sarray,*Starray;
 19:   PetscReal       *eigs,thresh;
 20:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
 21:   PetscBool       allocated_S_St;
 22: #if defined(PETSC_USE_COMPLEX)
 23:   PetscReal       *rwork;
 24: #endif
 25:   PetscErrorCode  ierr;

 28:   if (!sub_schurs->use_mumps) {
 29:     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
 30:   }

 32:   if (pcbddc->dbg_flag) {
 33:     PetscViewerFlush(pcbddc->dbg_viewer);
 34:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
 35:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
 36:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
 37:   }

 39:   if (pcbddc->dbg_flag) {
 40:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
 41:   }

 43:   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
 44:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
 45:   }

 47:   /* max size of subsets */
 48:   mss = 0;
 49:   for (i=0;i<sub_schurs->n_subs;i++) {
 50:     PetscInt subset_size;

 52:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
 53:     mss = PetscMax(mss,subset_size);
 54:   }

 56:   /* min/max and threshold */
 57:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
 58:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
 59:   nmax = PetscMax(nmin,nmax);
 60:   allocated_S_St = PETSC_FALSE;
 61:   if (nmin) {
 62:     allocated_S_St = PETSC_TRUE;
 63:   }

 65:   /* allocate lapack workspace */
 66:   cum = cum2 = 0;
 67:   maxneigs = 0;
 68:   for (i=0;i<sub_schurs->n_subs;i++) {
 69:     PetscInt n,subset_size;

 71:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
 72:     n = PetscMin(subset_size,nmax);
 73:     cum += subset_size;
 74:     cum2 += subset_size*n;
 75:     maxneigs = PetscMax(maxneigs,n);
 76:   }
 77:   if (mss) {
 78:     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
 79:       PetscBLASInt B_itype = 1;
 80:       PetscBLASInt B_N = mss;
 81:       PetscReal    zero = 0.0;
 82:       PetscReal    eps = 0.0; /* dlamch? */

 84:       B_lwork = -1;
 85:       S = NULL;
 86:       St = NULL;
 87:       eigs = NULL;
 88:       eigv = NULL;
 89:       B_iwork = NULL;
 90:       B_ifail = NULL;
 91: #if defined(PETSC_USE_COMPLEX)
 92:       rwork = NULL;
 93: #endif
 94:       thresh = 1.0;
 95:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 96: #if defined(PETSC_USE_COMPLEX)
 97:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
 98: #else
 99:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100: #endif
101:       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102:       PetscFPTrapPop();
103:     } else {
104:         /* TODO */
105:     }
106:   } else {
107:     lwork = 0;
108:   }

110:   nv = 0;
111:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
113:   }
114:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
115:   if (allocated_S_St) {
116:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
117:   }
118:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
119: #if defined(PETSC_USE_COMPLEX)
120:   PetscMalloc1(7*mss,&rwork);
121: #endif
122:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126:                       nv+cum2,&pcbddc->adaptive_constraints_data);
127:   PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));

129:   maxneigs = 0;
130:   cum = cum2 = cumarray = 0;
131:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134:     const PetscInt *idxs;

136:     ISGetIndices(sub_schurs->is_vertices,&idxs);
137:     for (cum=0;cum<nv;cum++) {
138:       pcbddc->adaptive_constraints_n[cum] = 1;
139:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140:       pcbddc->adaptive_constraints_data[cum] = 1.0;
141:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143:     }
144:     cum2 = cum;
145:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
146:   }

148:   if (mss) { /* multilevel */
149:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
150:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
151:   }

153:   for (i=0;i<sub_schurs->n_subs;i++) {

155:     const PetscInt *idxs;
156:     PetscReal      infty = PETSC_MAX_REAL;
157:     PetscInt       j,subset_size,eigs_start = 0;
158:     PetscBLASInt   B_N;
159:     PetscBool      same_data = PETSC_FALSE;

161:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
162:     PetscBLASIntCast(subset_size,&B_N);
163:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
164:       if (sub_schurs->is_hermitian) {
165:         PetscInt j,k;
166:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
167:           PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
168:           PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
169:         }
170:         for (j=0;j<subset_size;j++) {
171:           for (k=j;k<subset_size;k++) {
172:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
173:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
174:           }
175:         }
176:       } else {
177:         PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
178:         PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
179:       }
180:     } else {
181:       S = Sarray + cumarray;
182:       St = Starray + cumarray;
183:     }

185:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
186:     /* see if we can save some work */
187:     if (sub_schurs->n_subs == 1) {
188:       PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
189:     }

191:     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
192:       B_neigs = 0;
193:     } else {
194:       /* Threshold: this is an heuristic for edges */
195:       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;

197:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
198:         PetscBLASInt B_itype = 1;
199:         PetscBLASInt B_IL, B_IU;
200:         PetscReal    eps = -1.0; /* dlamch? */
201:         PetscInt     nmin_s;

203:         /* ask for eigenvalues larger than thresh */
204:         if (pcbddc->dbg_flag) {
205:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
206:         }
207:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
208: #if defined(PETSC_USE_COMPLEX)
209:         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
210: #else
211:         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
212: #endif
213:         PetscFPTrapPop();
214:         if (B_ierr) {
215:           if (B_ierr < 0 ) {
216:             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
217:           } else if (B_ierr <= B_N) {
218:             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
219:           } else {
220:             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
221:           }
222:         }

224:         if (B_neigs > nmax) {
225:           if (pcbddc->dbg_flag) {
226:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
227:           }
228:           eigs_start = B_neigs -nmax;
229:           B_neigs = nmax;
230:         }

232:         nmin_s = PetscMin(nmin,B_N);
233:         if (B_neigs < nmin_s) {
234:           PetscBLASInt B_neigs2;

236:           B_IU = B_N - B_neigs;
237:           B_IL = B_N - nmin_s + 1;
238:           if (pcbddc->dbg_flag) {
239:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
240:           }
241:           if (sub_schurs->is_hermitian) {
242:             PetscInt j;
243:             for (j=0;j<subset_size;j++) {
244:               PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));
245:             }
246:             for (j=0;j<subset_size;j++) {
247:               PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));
248:             }
249:           } else {
250:             PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
251:             PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
252:           }
253:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
254: #if defined(PETSC_USE_COMPLEX)
255:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
256: #else
257:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
258: #endif
259:           PetscFPTrapPop();
260:           B_neigs += B_neigs2;
261:         }
262:         if (B_ierr) {
263:           if (B_ierr < 0 ) {
264:             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
265:           } else if (B_ierr <= B_N) {
266:             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
267:           } else {
268:             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
269:           }
270:         }
271:         if (pcbddc->dbg_flag) {
272:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
273:           for (j=0;j<B_neigs;j++) {
274:             if (eigs[j] == 0.0) {
275:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
276:             } else {
277:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
278:             }
279:           }
280:         }
281:       } else {
282:           /* TODO */
283:       }
284:     }
285:     maxneigs = PetscMax(B_neigs,maxneigs);
286:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
287:     if (B_neigs) {
288:       PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));

290:       if (pcbddc->dbg_flag > 1) {
291:         PetscInt ii;
292:         for (ii=0;ii<B_neigs;ii++) {
293:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
294:           for (j=0;j<B_N;j++) {
295: #if defined(PETSC_USE_COMPLEX)
296:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
297:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
298:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
299: #else
300:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
301: #endif
302:           }
303:         }
304:       }
305: #if 0
306:       for (j=0;j<B_neigs;j++) {
307:         PetscBLASInt Blas_N,Blas_one = 1.0;
308:         PetscScalar norm;
309:         PetscBLASIntCast(subset_size,&Blas_N);
310:         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
311:                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
312:         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
313:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
314:         } else {
315:           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
316:         }
317:         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
318:       }
319: #endif
320:       PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
321:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
322:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
323:       cum++;
324:     }
325:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
326:     /* shift for next computation */
327:     cumarray += subset_size*subset_size;
328:   }
329:   if (pcbddc->dbg_flag) {
330:     PetscViewerFlush(pcbddc->dbg_viewer);
331:   }

333:   if (mss) {
334:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
335:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
336:     /* destroy matrices (junk) */
337:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
338:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
339:   }
340:   if (allocated_S_St) {
341:     PetscFree2(S,St);
342:   }
343:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
344: #if defined(PETSC_USE_COMPLEX)
345:   PetscFree(rwork);
346: #endif
347:   if (pcbddc->dbg_flag) {
348:     PetscInt maxneigs_r;
349:     MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
350:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);
351:   }
352:   return(0);
353: }

357: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
358: {
359:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
360:   PetscScalar    *coarse_submat_vals;

364:   /* Setup local scatters R_to_B and (optionally) R_to_D */
365:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
366:   PCBDDCSetUpLocalScatters(pc);

368:   /* Setup local neumann solver ksp_R */
369:   /* PCBDDCSetUpLocalScatters should be called first! */
370:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

372:   /* Change global null space passed in by the user if change of basis has been requested */
373:   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
374:     PCBDDCNullSpaceAdaptGlobal(pc);
375:   }

377:   /*
378:      Setup local correction and local part of coarse basis.
379:      Gives back the dense local part of the coarse matrix in column major ordering
380:   */
381:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

383:   /* Compute total number of coarse nodes and setup coarse solver */
384:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

386:   /* free */
387:   PetscFree(coarse_submat_vals);
388:   return(0);
389: }

393: PetscErrorCode PCBDDCResetCustomization(PC pc)
394: {
395:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

399:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
400:   ISDestroy(&pcbddc->user_primal_vertices);
401:   MatNullSpaceDestroy(&pcbddc->NullSpace);
402:   ISDestroy(&pcbddc->NeumannBoundaries);
403:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
404:   ISDestroy(&pcbddc->DirichletBoundaries);
405:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
406:   PetscFree(pcbddc->onearnullvecs_state);
407:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
408:   PCBDDCSetDofsSplitting(pc,0,NULL);
409:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
410:   return(0);
411: }

415: PetscErrorCode PCBDDCResetTopography(PC pc)
416: {
417:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

421:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
422:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
423:   MatDestroy(&pcbddc->ConstraintMatrix);
424:   PCBDDCGraphReset(pcbddc->mat_graph);
425:   PCBDDCSubSchursReset(pcbddc->sub_schurs);
426:   return(0);
427: }

431: PetscErrorCode PCBDDCResetSolvers(PC pc)
432: {
433:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
434:   PetscScalar    *array;

438:   VecDestroy(&pcbddc->coarse_vec);
439:   if (pcbddc->coarse_phi_B) {
440:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
441:     PetscFree(array);
442:   }
443:   MatDestroy(&pcbddc->coarse_phi_B);
444:   MatDestroy(&pcbddc->coarse_phi_D);
445:   MatDestroy(&pcbddc->coarse_psi_B);
446:   MatDestroy(&pcbddc->coarse_psi_D);
447:   VecDestroy(&pcbddc->vec1_P);
448:   VecDestroy(&pcbddc->vec1_C);
449:   MatDestroy(&pcbddc->local_auxmat2);
450:   MatDestroy(&pcbddc->local_auxmat1);
451:   VecDestroy(&pcbddc->vec1_R);
452:   VecDestroy(&pcbddc->vec2_R);
453:   ISDestroy(&pcbddc->is_R_local);
454:   VecScatterDestroy(&pcbddc->R_to_B);
455:   VecScatterDestroy(&pcbddc->R_to_D);
456:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
457:   KSPDestroy(&pcbddc->ksp_D);
458:   KSPDestroy(&pcbddc->ksp_R);
459:   KSPDestroy(&pcbddc->coarse_ksp);
460:   MatDestroy(&pcbddc->local_mat);
461:   PetscFree(pcbddc->primal_indices_local_idxs);
462:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
463:   PetscFree(pcbddc->global_primal_indices);
464:   ISDestroy(&pcbddc->coarse_subassembling);
465:   ISDestroy(&pcbddc->coarse_subassembling_init);
466:   return(0);
467: }

471: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
472: {
473:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
474:   PC_IS          *pcis = (PC_IS*)pc->data;
475:   VecType        impVecType;
476:   PetscInt       n_constraints,n_R,old_size;

480:   if (!pcbddc->ConstraintMatrix) {
481:     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
482:   }
483:   /* get sizes */
484:   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
485:   n_R = pcis->n-pcbddc->n_vertices;
486:   VecGetType(pcis->vec1_N,&impVecType);
487:   /* local work vectors (try to avoid unneeded work)*/
488:   /* R nodes */
489:   old_size = -1;
490:   if (pcbddc->vec1_R) {
491:     VecGetSize(pcbddc->vec1_R,&old_size);
492:   }
493:   if (n_R != old_size) {
494:     VecDestroy(&pcbddc->vec1_R);
495:     VecDestroy(&pcbddc->vec2_R);
496:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
497:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
498:     VecSetType(pcbddc->vec1_R,impVecType);
499:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
500:   }
501:   /* local primal dofs */
502:   old_size = -1;
503:   if (pcbddc->vec1_P) {
504:     VecGetSize(pcbddc->vec1_P,&old_size);
505:   }
506:   if (pcbddc->local_primal_size != old_size) {
507:     VecDestroy(&pcbddc->vec1_P);
508:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
509:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
510:     VecSetType(pcbddc->vec1_P,impVecType);
511:   }
512:   /* local explicit constraints */
513:   old_size = -1;
514:   if (pcbddc->vec1_C) {
515:     VecGetSize(pcbddc->vec1_C,&old_size);
516:   }
517:   if (n_constraints && n_constraints != old_size) {
518:     VecDestroy(&pcbddc->vec1_C);
519:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
520:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
521:     VecSetType(pcbddc->vec1_C,impVecType);
522:   }
523:   return(0);
524: }

528: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
529: {
530:   PetscErrorCode  ierr;
531:   /* pointers to pcis and pcbddc */
532:   PC_IS*          pcis = (PC_IS*)pc->data;
533:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
534:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
535:   /* submatrices of local problem */
536:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
537:   /* submatrices of local coarse problem */
538:   Mat             S_VV,S_CV,S_VC,S_CC;
539:   /* working matrices */
540:   Mat             C_CR;
541:   /* additional working stuff */
542:   PC              pc_R;
543:   Mat             F;
544:   PetscBool       isLU,isCHOL,isILU;

546:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
547:   PetscScalar     *work;
548:   PetscInt        *idx_V_B;
549:   PetscInt        n,n_vertices,n_constraints;
550:   PetscInt        i,n_R,n_D,n_B;
551:   PetscBool       unsymmetric_check;
552:   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
553:   MatType         impMatType;
554:   /* some shortcuts to scalars */
555:   PetscScalar     one=1.0,m_one=-1.0;

558:   n_vertices = pcbddc->n_vertices;
559:   n_constraints = pcbddc->local_primal_size-n_vertices;
560:   /* Set Non-overlapping dimensions */
561:   n_B = pcis->n_B;
562:   n_D = pcis->n - n_B;
563:   n_R = pcis->n - n_vertices;

565:   /* Set types for local objects needed by BDDC precondtioner */
566:   impMatType = MATSEQDENSE;

568:   /* vertices in boundary numbering */
569:   PetscMalloc1(n_vertices,&idx_V_B);
570:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
571:   if (i != n_vertices) {
572:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
573:   }

575:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
576:   PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
577:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
578:   MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
579:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
580:   MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
581:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
582:   MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
583:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
584:   MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);

586:   unsymmetric_check = PETSC_FALSE;
587:   /* allocate workspace */
588:   n = 0;
589:   if (n_constraints) {
590:     n += n_R*n_constraints;
591:   }
592:   if (n_vertices) {
593:     n = PetscMax(2*n_R*n_vertices,n);
594:     n = PetscMax((n_R+n_B)*n_vertices,n);
595:   }
596:   if (!pcbddc->symmetric_primal) {
597:     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
598:     unsymmetric_check = PETSC_TRUE;
599:   }
600:   PetscMalloc1(n,&work);

602:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
603:   KSPGetPC(pcbddc->ksp_R,&pc_R);
604:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
605:   PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
606:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
607:   if (isLU || isILU || isCHOL) {
608:     PCFactorGetMatrix(pc_R,&F);
609:   } else if (sub_schurs->reuse_mumps) {
610:     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
611:     MatFactorType type;

613:     F = reuse_mumps->F;
614:     MatGetFactorType(F,&type);
615:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
616:   } else {
617:     F = NULL;
618:   }

620:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
621:   if (n_constraints) {
622:     Mat         M1,M2,M3;
623:     Mat         auxmat;
624:     IS          is_aux;
625:     PetscScalar *array,*array2;

627:     MatDestroy(&pcbddc->local_auxmat1);
628:     MatDestroy(&pcbddc->local_auxmat2);

630:     /* Extract constraints on R nodes: C_{CR}  */
631:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
632:     MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
633:     MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);

635:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
636:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
637:     PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));
638:     for (i=0;i<n_constraints;i++) {
639:       const PetscScalar *row_cmat_values;
640:       const PetscInt    *row_cmat_indices;
641:       PetscInt          size_of_constraint,j;

643:       MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
644:       for (j=0;j<size_of_constraint;j++) {
645:         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
646:       }
647:       MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
648:     }
649:     MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);
650:     if (F) {
651:       Mat B;

653:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
654:       MatMatSolve(F,B,local_auxmat2_R);
655:       MatDestroy(&B);
656:     } else {
657:       PetscScalar *marr;

659:       MatDenseGetArray(local_auxmat2_R,&marr);
660:       for (i=0;i<n_constraints;i++) {
661:         VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
662:         VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);
663:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
664:         VecResetArray(pcbddc->vec1_R);
665:         VecResetArray(pcbddc->vec2_R);
666:       }
667:       MatDenseRestoreArray(local_auxmat2_R,&marr);
668:     }
669:     if (!pcbddc->switch_static) {
670:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
671:       MatDenseGetArray(pcbddc->local_auxmat2,&array);
672:       MatDenseGetArray(local_auxmat2_R,&array2);
673:       for (i=0;i<n_constraints;i++) {
674:         VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);
675:         VecPlaceArray(pcis->vec1_B,array+i*n_B);
676:         VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
677:         VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
678:         VecResetArray(pcis->vec1_B);
679:         VecResetArray(pcbddc->vec1_R);
680:       }
681:       MatDenseRestoreArray(local_auxmat2_R,&array2);
682:       MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
683:       MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
684:     } else {
685:       PetscObjectReference((PetscObject)local_auxmat2_R);
686:       pcbddc->local_auxmat2 = local_auxmat2_R;
687:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
688:     }
689:     ISDestroy(&is_aux);
690:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
691:     MatScale(M3,m_one);
692:     MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);
693:     MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);
694:     if (isCHOL) {
695:       MatCholeskyFactor(M3,NULL,NULL);
696:     } else {
697:       MatLUFactor(M3,NULL,NULL,NULL);
698:     }
699:     VecSet(pcbddc->vec1_C,one);
700:     MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);
701:     MatMatSolve(M3,M2,M1);
702:     MatDestroy(&M2);
703:     MatDestroy(&M3);
704:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
705:     MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
706:     MatDestroy(&auxmat);
707:     MatCopy(M1,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
708:     MatDestroy(&M1);
709:   }
710:   /* Get submatrices from subdomain matrix */
711:   if (n_vertices) {
712:     IS is_aux;

714:     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
715:       IS tis;

717:       ISDuplicate(pcbddc->is_R_local,&tis);
718:       ISSort(tis);
719:       ISComplement(tis,0,pcis->n,&is_aux);
720:       ISDestroy(&tis);
721:     } else {
722:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
723:     }
724:     MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
725:     MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
726:     MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
727:     ISDestroy(&is_aux);
728:   }

730:   /* Matrix of coarse basis functions (local) */
731:   if (pcbddc->coarse_phi_B) {
732:     PetscInt on_B,on_primal,on_D=n_D;
733:     if (pcbddc->coarse_phi_D) {
734:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
735:     }
736:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
737:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
738:       PetscScalar *marray;

740:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
741:       PetscFree(marray);
742:       MatDestroy(&pcbddc->coarse_phi_B);
743:       MatDestroy(&pcbddc->coarse_psi_B);
744:       MatDestroy(&pcbddc->coarse_phi_D);
745:       MatDestroy(&pcbddc->coarse_psi_D);
746:     }
747:   }

749:   if (!pcbddc->coarse_phi_B) {
750:     PetscScalar *marray;

752:     n = n_B*pcbddc->local_primal_size;
753:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
754:       n += n_D*pcbddc->local_primal_size;
755:     }
756:     if (!pcbddc->symmetric_primal) {
757:       n *= 2;
758:     }
759:     PetscCalloc1(n,&marray);
760:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);
761:     n = n_B*pcbddc->local_primal_size;
762:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
763:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);
764:       n += n_D*pcbddc->local_primal_size;
765:     }
766:     if (!pcbddc->symmetric_primal) {
767:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);
768:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
769:         n = n_B*pcbddc->local_primal_size;
770:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);
771:       }
772:     } else {
773:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
774:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
775:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
776:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
777:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
778:       }
779:     }
780:   }
781:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
782:   /* vertices */
783:   if (n_vertices) {

785:     MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);

787:     if (n_R) {
788:       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
789:       PetscBLASInt B_N,B_one = 1;
790:       PetscScalar  *x,*y;
791:       PetscBool    isseqaij;

793:       MatScale(A_RV,m_one);
794:       MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);
795:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
796:       if (F) { /* TODO could be optimized for symmetric problems */
797:         MatMatSolve(F,A_RV,A_RRmA_RV);
798:       } else {
799:         MatDenseGetArray(A_RV,&y);
800:         for (i=0;i<n_vertices;i++) {
801:           VecPlaceArray(pcbddc->vec1_R,y+i*n_R);
802:           VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
803:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
804:           VecResetArray(pcbddc->vec1_R);
805:           VecResetArray(pcbddc->vec2_R);
806:         }
807:         MatDenseRestoreArray(A_RV,&y);
808:       }
809:       MatDestroy(&A_RV);
810:       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
811:       if (n_constraints) {
812:         Mat B;

814:         PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
815:         for (i=0;i<n_vertices;i++) {
816:           VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
817:           VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);
818:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
819:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
820:           VecResetArray(pcis->vec1_B);
821:           VecResetArray(pcbddc->vec1_R);
822:         }
823:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);
824:         MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
825:         MatDestroy(&B);
826:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);
827:         MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
828:         MatScale(S_CV,m_one);
829:         PetscBLASIntCast(n_R*n_vertices,&B_N);
830:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
831:         MatDestroy(&B);
832:       }
833:       PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
834:       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
835:         MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);
836:       }
837:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
838:       MatDestroy(&A_RRmA_RV);
839:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
840:       MatDenseGetArray(A_VV,&x);
841:       MatDenseGetArray(S_VVt,&y);
842:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
843:       MatDenseRestoreArray(A_VV,&x);
844:       MatDenseRestoreArray(S_VVt,&y);
845:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
846:       MatDestroy(&S_VVt);
847:     } else {
848:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
849:     }
850:     MatDestroy(&A_VV);
851:     /* coarse basis functions */
852:     for (i=0;i<n_vertices;i++) {
853:       PetscScalar *y;

855:       VecPlaceArray(pcbddc->vec1_R,work+n_R*i);
856:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
857:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
858:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
859:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
860:       y[n_B*i+idx_V_B[i]] = 1.0;
861:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
862:       VecResetArray(pcis->vec1_B);

864:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
865:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
866:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
867:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
868:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
869:         VecResetArray(pcis->vec1_D);
870:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
871:       }
872:       VecResetArray(pcbddc->vec1_R);
873:     }
874:     /* if n_R == 0 the object is not destroyed */
875:     MatDestroy(&A_RV);
876:   }

878:   if (n_constraints) {
879:     Mat B;

881:     MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
882:     MatScale(S_CC,m_one);
883:     MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
884:     MatScale(S_CC,m_one);
885:     if (n_vertices) {
886:       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
887:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
888:       } else {
889:         Mat S_VCt;

891:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
892:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
893:         MatDestroy(&S_VCt);
894:       }
895:     }
896:     MatDestroy(&B);
897:     /* coarse basis functions */
898:     for (i=0;i<n_constraints;i++) {
899:       PetscScalar *y;

901:       VecPlaceArray(pcbddc->vec1_R,work+n_R*i);
902:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
903:       VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
904:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
905:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
906:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
907:       VecResetArray(pcis->vec1_B);
908:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
909:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
910:         VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
911:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
912:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
913:         VecResetArray(pcis->vec1_D);
914:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
915:       }
916:       VecResetArray(pcbddc->vec1_R);
917:     }
918:   }
919:   if (n_constraints) {
920:     MatDestroy(&local_auxmat2_R);
921:   }

923:   /* compute other basis functions for non-symmetric problems */
924:   if (!pcbddc->symmetric_primal) {

926:     if (n_constraints) {
927:       Mat S_CCT,B_C;

929:       /* this is a lazy thing */
930:       MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);
931:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);
932:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
933:       MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);
934:       MatDestroy(&S_CCT);
935:       if (n_vertices) {
936:         Mat B_V,S_VCT;

938:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);
939:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
940:         MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);
941:         MatDestroy(&B_V);
942:         MatDestroy(&S_VCT);
943:       }
944:       MatDestroy(&B_C);
945:     } else { /* if there are no constraints, reset work */
946:       PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));
947:     }
948:     if (n_vertices && n_R) {
949:       Mat          A_VRT;
950:       PetscScalar  *marray;
951:       PetscBLASInt B_N,B_one = 1;

953:       MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);
954:       MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);
955:       MatDenseGetArray(A_VRT,&marray);
956:       PetscBLASIntCast(n_vertices*n_R,&B_N);
957:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
958:       MatDenseRestoreArray(A_VRT,&marray);
959:       MatDestroy(&A_VRT);
960:     }

962:     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
963:       for (i=0;i<pcbddc->local_primal_size;i++) {
964:         VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
965:         VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);
966:         MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);
967:         VecResetArray(pcbddc->vec1_R);
968:         VecResetArray(pcbddc->vec2_R);
969:       }
970:     } else {
971:       for (i=0;i<pcbddc->local_primal_size;i++) {
972:         VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
973:         VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);
974:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
975:         VecResetArray(pcbddc->vec1_R);
976:         VecResetArray(pcbddc->vec2_R);
977:       }
978:     }
979:     /* coarse basis functions */
980:     for (i=0;i<pcbddc->local_primal_size;i++) {
981:       PetscScalar *y;

983:       VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));
984:       MatDenseGetArray(pcbddc->coarse_psi_B,&y);
985:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
986:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
987:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
988:       if (i<n_vertices) {
989:         y[n_B*i+idx_V_B[i]] = 1.0;
990:       }
991:       MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
992:       VecResetArray(pcis->vec1_B);

994:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
995:         MatDenseGetArray(pcbddc->coarse_psi_D,&y);
996:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
997:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
998:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
999:         VecResetArray(pcis->vec1_D);
1000:         MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
1001:       }
1002:       VecResetArray(pcbddc->vec1_R);
1003:     }
1004:   }
1005:   /* free memory */
1006:   PetscFree(idx_V_B);
1007:   MatDestroy(&S_VV);
1008:   MatDestroy(&S_CV);
1009:   MatDestroy(&S_VC);
1010:   MatDestroy(&S_CC);
1011:   PetscFree(work);
1012:   if (n_vertices) {
1013:     MatDestroy(&A_VR);
1014:   }
1015:   if (n_constraints) {
1016:     MatDestroy(&C_CR);
1017:   }
1018:   /* Checking coarse_sub_mat and coarse basis functios */
1019:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1020:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1021:   if (pcbddc->dbg_flag) {
1022:     Mat         coarse_sub_mat;
1023:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1024:     Mat         coarse_phi_D,coarse_phi_B;
1025:     Mat         coarse_psi_D,coarse_psi_B;
1026:     Mat         A_II,A_BB,A_IB,A_BI;
1027:     Mat         C_B,CPHI;
1028:     IS          is_dummy;
1029:     Vec         mones;
1030:     MatType     checkmattype=MATSEQAIJ;
1031:     PetscReal   real_value;

1033:     MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
1034:     MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
1035:     MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
1036:     MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
1037:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
1038:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
1039:     if (unsymmetric_check) {
1040:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
1041:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
1042:     }
1043:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

1045:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1046:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
1047:     PetscViewerFlush(pcbddc->dbg_viewer);
1048:     if (unsymmetric_check) {
1049:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1050:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
1051:       MatDestroy(&AUXMAT);
1052:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1053:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
1054:       MatDestroy(&AUXMAT);
1055:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1056:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
1057:       MatDestroy(&AUXMAT);
1058:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1059:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
1060:       MatDestroy(&AUXMAT);
1061:     } else {
1062:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
1063:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
1064:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1065:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
1066:       MatDestroy(&AUXMAT);
1067:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1068:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
1069:       MatDestroy(&AUXMAT);
1070:     }
1071:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
1072:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
1073:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
1074:     MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);
1075:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
1076:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
1077:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1078:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

1080:     /* check constraints */
1081:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);
1082:     MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1083:     MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
1084:     MatCreateVecs(CPHI,&mones,NULL);
1085:     VecSet(mones,-1.0);
1086:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
1087:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
1088:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
1089:     if (unsymmetric_check) {
1090:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
1091:       VecSet(mones,-1.0);
1092:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
1093:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
1094:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
1095:     }
1096:     MatDestroy(&C_B);
1097:     MatDestroy(&CPHI);
1098:     ISDestroy(&is_dummy);
1099:     VecDestroy(&mones);

1101:     PetscViewerFlush(pcbddc->dbg_viewer);
1102:     MatDestroy(&A_II);
1103:     MatDestroy(&A_BB);
1104:     MatDestroy(&A_IB);
1105:     MatDestroy(&A_BI);
1106:     MatDestroy(&TM1);
1107:     MatDestroy(&TM2);
1108:     MatDestroy(&TM3);
1109:     MatDestroy(&TM4);
1110:     MatDestroy(&coarse_phi_D);
1111:     MatDestroy(&coarse_phi_B);
1112:     if (unsymmetric_check) {
1113:       MatDestroy(&coarse_psi_D);
1114:       MatDestroy(&coarse_psi_B);
1115:     }
1116:     MatDestroy(&coarse_sub_mat);
1117:   }
1118:   /* get back data */
1119:   *coarse_submat_vals_n = coarse_submat_vals;
1120:   return(0);
1121: }

1125: PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1126: {
1127:   Mat            *work_mat;
1128:   IS             isrow_s,iscol_s;
1129:   PetscBool      rsorted,csorted;
1130:   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;

1134:   ISSorted(isrow,&rsorted);
1135:   ISSorted(iscol,&csorted);
1136:   ISGetLocalSize(isrow,&rsize);
1137:   ISGetLocalSize(iscol,&csize);

1139:   if (!rsorted) {
1140:     const PetscInt *idxs;
1141:     PetscInt *idxs_sorted,i;

1143:     PetscMalloc1(rsize,&idxs_perm_r);
1144:     PetscMalloc1(rsize,&idxs_sorted);
1145:     for (i=0;i<rsize;i++) {
1146:       idxs_perm_r[i] = i;
1147:     }
1148:     ISGetIndices(isrow,&idxs);
1149:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
1150:     for (i=0;i<rsize;i++) {
1151:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1152:     }
1153:     ISRestoreIndices(isrow,&idxs);
1154:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
1155:   } else {
1156:     PetscObjectReference((PetscObject)isrow);
1157:     isrow_s = isrow;
1158:   }

1160:   if (!csorted) {
1161:     if (isrow == iscol) {
1162:       PetscObjectReference((PetscObject)isrow_s);
1163:       iscol_s = isrow_s;
1164:     } else {
1165:       const PetscInt *idxs;
1166:       PetscInt *idxs_sorted,i;

1168:       PetscMalloc1(csize,&idxs_perm_c);
1169:       PetscMalloc1(csize,&idxs_sorted);
1170:       for (i=0;i<csize;i++) {
1171:         idxs_perm_c[i] = i;
1172:       }
1173:       ISGetIndices(iscol,&idxs);
1174:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
1175:       for (i=0;i<csize;i++) {
1176:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1177:       }
1178:       ISRestoreIndices(iscol,&idxs);
1179:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
1180:     }
1181:   } else {
1182:     PetscObjectReference((PetscObject)iscol);
1183:     iscol_s = iscol;
1184:   }

1186:   MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

1188:   if (!rsorted || !csorted) {
1189:     Mat      new_mat;
1190:     IS       is_perm_r,is_perm_c;

1192:     if (!rsorted) {
1193:       PetscInt *idxs_r,i;
1194:       PetscMalloc1(rsize,&idxs_r);
1195:       for (i=0;i<rsize;i++) {
1196:         idxs_r[idxs_perm_r[i]] = i;
1197:       }
1198:       PetscFree(idxs_perm_r);
1199:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
1200:     } else {
1201:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
1202:     }
1203:     ISSetPermutation(is_perm_r);

1205:     if (!csorted) {
1206:       if (isrow_s == iscol_s) {
1207:         PetscObjectReference((PetscObject)is_perm_r);
1208:         is_perm_c = is_perm_r;
1209:       } else {
1210:         PetscInt *idxs_c,i;
1211:         PetscMalloc1(csize,&idxs_c);
1212:         for (i=0;i<csize;i++) {
1213:           idxs_c[idxs_perm_c[i]] = i;
1214:         }
1215:         PetscFree(idxs_perm_c);
1216:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
1217:       }
1218:     } else {
1219:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
1220:     }
1221:     ISSetPermutation(is_perm_c);

1223:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
1224:     MatDestroy(&work_mat[0]);
1225:     work_mat[0] = new_mat;
1226:     ISDestroy(&is_perm_r);
1227:     ISDestroy(&is_perm_c);
1228:   }

1230:   PetscObjectReference((PetscObject)work_mat[0]);
1231:   *B = work_mat[0];
1232:   MatDestroyMatrices(1,&work_mat);
1233:   ISDestroy(&isrow_s);
1234:   ISDestroy(&iscol_s);
1235:   return(0);
1236: }

1240: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1241: {
1242:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1243:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1244:   Mat            new_mat;
1245:   IS             is_local,is_global;
1246:   PetscInt       local_size;
1247:   PetscBool      isseqaij;

1251:   MatDestroy(&pcbddc->local_mat);
1252:   MatGetSize(matis->A,&local_size,NULL);
1253:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
1254:   ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);
1255:   ISDestroy(&is_local);
1256:   MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
1257:   ISDestroy(&is_global);

1259:   /* check */
1260:   if (pcbddc->dbg_flag) {
1261:     Vec       x,x_change;
1262:     PetscReal error;

1264:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
1265:     VecSetRandom(x,NULL);
1266:     MatMult(ChangeOfBasisMatrix,x,x_change);
1267:     VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
1268:     VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
1269:     MatMult(new_mat,matis->x,matis->y);
1270:     VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
1271:     VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
1272:     VecAXPY(x,-1.0,x_change);
1273:     VecNorm(x,NORM_INFINITY,&error);
1274:     PetscViewerFlush(pcbddc->dbg_viewer);
1275:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);
1276:     VecDestroy(&x);
1277:     VecDestroy(&x_change);
1278:   }

1280:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1281:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
1282:   if (isseqaij) {
1283:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
1284:   } else {
1285:     Mat work_mat;
1286:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
1287:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
1288:     MatDestroy(&work_mat);
1289:   }
1290:   if (matis->A->symmetric_set) {
1291:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
1292: #if !defined(PETSC_USE_COMPLEX)
1293:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
1294: #endif
1295:   }
1296:   /*
1297:   PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
1298:   MatView(new_mat,(PetscViewer)0);
1299:   */
1300:   MatDestroy(&new_mat);
1301:   return(0);
1302: }

1306: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1307: {
1308:   PC_IS*          pcis = (PC_IS*)(pc->data);
1309:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1310:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1311:   PetscInt        *idx_R_local=NULL;
1312:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1313:   PetscInt        vbs,bs;
1314:   PetscBT         bitmask=NULL;
1315:   PetscErrorCode  ierr;

1318:   /*
1319:     No need to setup local scatters if
1320:       - primal space is unchanged
1321:         AND
1322:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1323:         AND
1324:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1325:   */
1326:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1327:     return(0);
1328:   }
1329:   /* destroy old objects */
1330:   ISDestroy(&pcbddc->is_R_local);
1331:   VecScatterDestroy(&pcbddc->R_to_B);
1332:   VecScatterDestroy(&pcbddc->R_to_D);
1333:   /* Set Non-overlapping dimensions */
1334:   n_B = pcis->n_B;
1335:   n_D = pcis->n - n_B;
1336:   n_vertices = pcbddc->n_vertices;

1338:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

1340:   /* create auxiliary bitmask and allocate workspace */
1341:   if (!sub_schurs->reuse_mumps) {
1342:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
1343:     PetscBTCreate(pcis->n,&bitmask);
1344:     for (i=0;i<n_vertices;i++) {
1345:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
1346:     }

1348:     for (i=0, n_R=0; i<pcis->n; i++) {
1349:       if (!PetscBTLookup(bitmask,i)) {
1350:         idx_R_local[n_R++] = i;
1351:       }
1352:     }
1353:   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1354:     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1356:     ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);
1357:     ISGetLocalSize(reuse_mumps->is_R,&n_R);
1358:   }

1360:   /* Block code */
1361:   vbs = 1;
1362:   MatGetBlockSize(pcbddc->local_mat,&bs);
1363:   if (bs>1 && !(n_vertices%bs)) {
1364:     PetscBool is_blocked = PETSC_TRUE;
1365:     PetscInt  *vary;
1366:     if (!sub_schurs->reuse_mumps) {
1367:       PetscMalloc1(pcis->n/bs,&vary);
1368:       PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
1369:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1370:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1371:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1372:       for (i=0; i<pcis->n/bs; i++) {
1373:         if (vary[i]!=0 && vary[i]!=bs) {
1374:           is_blocked = PETSC_FALSE;
1375:           break;
1376:         }
1377:       }
1378:       PetscFree(vary);
1379:     } else {
1380:       /* Verify directly the R set */
1381:       for (i=0; i<n_R/bs; i++) {
1382:         PetscInt j,node=idx_R_local[bs*i];
1383:         for (j=1; j<bs; j++) {
1384:           if (node != idx_R_local[bs*i+j]-j) {
1385:             is_blocked = PETSC_FALSE;
1386:             break;
1387:           }
1388:         }
1389:       }
1390:     }
1391:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1392:       vbs = bs;
1393:       for (i=0;i<n_R/vbs;i++) {
1394:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1395:       }
1396:     }
1397:   }
1398:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
1399:   if (sub_schurs->reuse_mumps) {
1400:     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1402:     ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);
1403:     ISDestroy(&reuse_mumps->is_R);
1404:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
1405:     reuse_mumps->is_R = pcbddc->is_R_local;
1406:   } else {
1407:     PetscFree(idx_R_local);
1408:   }

1410:   /* print some info if requested */
1411:   if (pcbddc->dbg_flag) {
1412:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1413:     PetscViewerFlush(pcbddc->dbg_viewer);
1414:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1415:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
1416:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
1417:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);
1418:     PetscViewerFlush(pcbddc->dbg_viewer);
1419:   }

1421:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1422:   if (!sub_schurs->reuse_mumps) {
1423:     IS       is_aux1,is_aux2;
1424:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

1426:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
1427:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
1428:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
1429:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
1430:     for (i=0; i<n_D; i++) {
1431:       PetscBTSet(bitmask,is_indices[i]);
1432:     }
1433:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
1434:     for (i=0, j=0; i<n_R; i++) {
1435:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1436:         aux_array1[j++] = i;
1437:       }
1438:     }
1439:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
1440:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
1441:     for (i=0, j=0; i<n_B; i++) {
1442:       if (!PetscBTLookup(bitmask,is_indices[i])) {
1443:         aux_array2[j++] = i;
1444:       }
1445:     }
1446:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
1447:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
1448:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
1449:     ISDestroy(&is_aux1);
1450:     ISDestroy(&is_aux2);

1452:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1453:       PetscMalloc1(n_D,&aux_array1);
1454:       for (i=0, j=0; i<n_R; i++) {
1455:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1456:           aux_array1[j++] = i;
1457:         }
1458:       }
1459:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
1460:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
1461:       ISDestroy(&is_aux1);
1462:     }
1463:     PetscBTDestroy(&bitmask);
1464:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
1465:   } else {
1466:     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1467:     IS               tis;
1468:     PetscInt         schur_size;

1470:     ISGetLocalSize(reuse_mumps->is_B,&schur_size);
1471:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
1472:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);
1473:     ISDestroy(&tis);
1474:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1475:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
1476:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
1477:       ISDestroy(&tis);
1478:     }
1479:   }
1480:   return(0);
1481: }


1486: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1487: {
1488:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1489:   PC_IS          *pcis = (PC_IS*)pc->data;
1490:   PC             pc_temp;
1491:   Mat            A_RR;
1492:   MatReuse       reuse;
1493:   PetscScalar    m_one = -1.0;
1494:   PetscReal      value;
1495:   PetscInt       n_D,n_R;
1496:   PetscBool      use_exact,use_exact_reduced,issbaij;
1498:   /* prefixes stuff */
1499:   char           dir_prefix[256],neu_prefix[256],str_level[16];
1500:   size_t         len;


1504:   /* compute prefixes */
1505:   PetscStrcpy(dir_prefix,"");
1506:   PetscStrcpy(neu_prefix,"");
1507:   if (!pcbddc->current_level) {
1508:     PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
1509:     PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
1510:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1511:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1512:   } else {
1513:     PetscStrcpy(str_level,"");
1514:     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1515:     PetscStrlen(((PetscObject)pc)->prefix,&len);
1516:     len -= 15; /* remove "pc_bddc_coarse_" */
1517:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1518:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1519:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
1520:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
1521:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1522:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1523:     PetscStrcat(dir_prefix,str_level);
1524:     PetscStrcat(neu_prefix,str_level);
1525:   }

1527:   /* DIRICHLET PROBLEM */
1528:   if (dirichlet) {
1529:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1530:     if (pcbddc->local_mat->symmetric_set) {
1531:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
1532:     }
1533:     /* Matrix for Dirichlet problem is pcis->A_II */
1534:     n_D = pcis->n - pcis->n_B;
1535:     if (!pcbddc->ksp_D) { /* create object if not yet build */
1536:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
1537:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
1538:       /* default */
1539:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
1540:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
1541:       PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
1542:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
1543:       if (issbaij) {
1544:         PCSetType(pc_temp,PCCHOLESKY);
1545:       } else {
1546:         PCSetType(pc_temp,PCLU);
1547:       }
1548:       /* Allow user's customization */
1549:       KSPSetFromOptions(pcbddc->ksp_D);
1550:       PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1551:     }
1552:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
1553:     if (sub_schurs->reuse_mumps) {
1554:       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1556:       KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);
1557:     }
1558:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1559:     if (!n_D) {
1560:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
1561:       PCSetType(pc_temp,PCNONE);
1562:     }
1563:     /* Set Up KSP for Dirichlet problem of BDDC */
1564:     KSPSetUp(pcbddc->ksp_D);
1565:     /* set ksp_D into pcis data */
1566:     KSPDestroy(&pcis->ksp_D);
1567:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
1568:     pcis->ksp_D = pcbddc->ksp_D;
1569:   }

1571:   /* NEUMANN PROBLEM */
1572:   A_RR = 0;
1573:   if (neumann) {
1574:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1575:     PetscInt        ibs,mbs;
1576:     PetscBool       issbaij;
1577:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1578:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1579:     ISGetSize(pcbddc->is_R_local,&n_R);
1580:     if (pcbddc->ksp_R) { /* already created ksp */
1581:       PetscInt nn_R;
1582:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
1583:       PetscObjectReference((PetscObject)A_RR);
1584:       MatGetSize(A_RR,&nn_R,NULL);
1585:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1586:         KSPReset(pcbddc->ksp_R);
1587:         MatDestroy(&A_RR);
1588:         reuse = MAT_INITIAL_MATRIX;
1589:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1590:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1591:           MatDestroy(&A_RR);
1592:           reuse = MAT_INITIAL_MATRIX;
1593:         } else { /* safe to reuse the matrix */
1594:           reuse = MAT_REUSE_MATRIX;
1595:         }
1596:       }
1597:       /* last check */
1598:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1599:         MatDestroy(&A_RR);
1600:         reuse = MAT_INITIAL_MATRIX;
1601:       }
1602:     } else { /* first time, so we need to create the matrix */
1603:       reuse = MAT_INITIAL_MATRIX;
1604:     }
1605:     /* extract A_RR */
1606:     MatGetBlockSize(pcbddc->local_mat,&mbs);
1607:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
1608:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
1609:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1610:       if (matis->A == pcbddc->local_mat) {
1611:         MatDestroy(&pcbddc->local_mat);
1612:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
1613:       } else {
1614:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);
1615:       }
1616:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1617:       if (matis->A == pcbddc->local_mat) {
1618:         MatDestroy(&pcbddc->local_mat);
1619:         MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
1620:       } else {
1621:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);
1622:       }
1623:     }
1624:     if (!sub_schurs->reuse_mumps) {
1625:       MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1626:       if (pcbddc->local_mat->symmetric_set) {
1627:         MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
1628:       }
1629:     } else {
1630:       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1632:       MatDestroy(&A_RR);
1633:       PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);
1634:       PetscObjectReference((PetscObject)A_RR);
1635:     }
1636:     if (!pcbddc->ksp_R) { /* create object if not present */
1637:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
1638:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
1639:       /* default */
1640:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
1641:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
1642:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
1643:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
1644:       if (issbaij) {
1645:         PCSetType(pc_temp,PCCHOLESKY);
1646:       } else {
1647:         PCSetType(pc_temp,PCLU);
1648:       }
1649:       /* Allow user's customization */
1650:       KSPSetFromOptions(pcbddc->ksp_R);
1651:       PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1652:     }
1653:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
1654:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1655:     if (!n_R) {
1656:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
1657:       PCSetType(pc_temp,PCNONE);
1658:     }
1659:     /* Reuse MUMPS solver if it is present */
1660:     if (sub_schurs->reuse_mumps) {
1661:       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1663:       KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);
1664:     }
1665:     /* Set Up KSP for Neumann problem of BDDC */
1666:     KSPSetUp(pcbddc->ksp_R);
1667:   }
1668:   /* free Neumann problem's matrix */
1669:   MatDestroy(&A_RR);

1671:   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1672:   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1673:     if (pcbddc->dbg_flag) {
1674:       PetscViewerFlush(pcbddc->dbg_viewer);
1675:       PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1676:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1677:     }
1678:     if (dirichlet) { /* Dirichlet */
1679:       VecSetRandom(pcis->vec1_D,NULL);
1680:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
1681:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
1682:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
1683:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
1684:       /* need to be adapted? */
1685:       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1686:       MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1687:       PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);
1688:       /* print info */
1689:       if (pcbddc->dbg_flag) {
1690:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
1691:         PetscViewerFlush(pcbddc->dbg_viewer);
1692:       }
1693:       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1694:         PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);
1695:       }
1696:     }
1697:     if (neumann) { /* Neumann */
1698:       KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);
1699:       VecSetRandom(pcbddc->vec1_R,NULL);
1700:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
1701:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
1702:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
1703:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
1704:       /* need to be adapted? */
1705:       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1706:       MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1707:       /* print info */
1708:       if (pcbddc->dbg_flag) {
1709:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
1710:         PetscViewerFlush(pcbddc->dbg_viewer);
1711:       }
1712:       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1713:         PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);
1714:       }
1715:     }
1716:   }
1717:   return(0);
1718: }

1722: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1723: {
1724:   PetscErrorCode  ierr;
1725:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1726:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

1729:   if (!sub_schurs->reuse_mumps) {
1730:     VecSet(pcbddc->vec1_R,0.);
1731:   }
1732:   if (!pcbddc->switch_static) {
1733:     if (applytranspose && pcbddc->local_auxmat1) {
1734:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
1735:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
1736:     }
1737:     if (!sub_schurs->reuse_mumps) {
1738:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1739:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1740:     } else {
1741:       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1743:       VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
1744:       VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
1745:     }
1746:   } else {
1747:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1748:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1749:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1750:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1751:     if (applytranspose && pcbddc->local_auxmat1) {
1752:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
1753:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
1754:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1755:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1756:     }
1757:   }
1758:   if (!sub_schurs->reuse_mumps) {
1759:     if (applytranspose) {
1760:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
1761:     } else {
1762:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
1763:     }
1764: #if defined(PETSC_HAVE_MUMPS)
1765:   } else {
1766:     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1768:     if (applytranspose) {
1769:       MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);
1770:     } else {
1771:       MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);
1772:     }
1773: #endif
1774:   }
1775:   VecSet(inout_B,0.);
1776:   if (!pcbddc->switch_static) {
1777:     if (!sub_schurs->reuse_mumps) {
1778:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1779:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1780:     } else {
1781:       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1783:       VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
1784:       VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
1785:     }
1786:     if (!applytranspose && pcbddc->local_auxmat1) {
1787:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
1788:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
1789:     }
1790:   } else {
1791:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1792:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1793:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1794:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1795:     if (!applytranspose && pcbddc->local_auxmat1) {
1796:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
1797:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
1798:     }
1799:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1800:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1801:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1802:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1803:   }
1804:   return(0);
1805: }

1807: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1810: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1811: {
1813:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1814:   PC_IS*            pcis = (PC_IS*)  (pc->data);
1815:   const PetscScalar zero = 0.0;

1818:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1819:   if (applytranspose) {
1820:     MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
1821:     if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1822:   } else {
1823:     MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
1824:     if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1825:   }
1826:   /* start communications from local primal nodes to rhs of coarse solver */
1827:   VecSet(pcbddc->coarse_vec,zero);
1828:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
1829:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

1831:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1832:   /* TODO remove null space when doing multilevel */
1833:   if (pcbddc->coarse_ksp) {
1834:     Vec rhs,sol;

1836:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
1837:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
1838:     if (applytranspose) {
1839:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
1840:     } else {
1841:       KSPSolve(pcbddc->coarse_ksp,rhs,sol);
1842:     }
1843:   }

1845:   /* Local solution on R nodes */
1846:   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1847:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
1848:   }

1850:   /* communications from coarse sol to local primal nodes */
1851:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
1852:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

1854:   /* Sum contributions from two levels */
1855:   if (applytranspose) {
1856:     MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1857:     if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1858:   } else {
1859:     MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1860:     if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1861:   }
1862:   return(0);
1863: }

1867: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1868: {
1870:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1871:   PetscScalar    *array;
1872:   Vec            from,to;

1875:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1876:     from = pcbddc->coarse_vec;
1877:     to = pcbddc->vec1_P;
1878:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1879:       Vec tvec;

1881:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
1882:       VecResetArray(tvec);
1883:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
1884:       VecGetArray(tvec,&array);
1885:       VecPlaceArray(from,array);
1886:       VecRestoreArray(tvec,&array);
1887:     }
1888:   } else { /* from local to global -> put data in coarse right hand side */
1889:     from = pcbddc->vec1_P;
1890:     to = pcbddc->coarse_vec;
1891:   }
1892:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1893:   return(0);
1894: }

1898: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1899: {
1901:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1902:   PetscScalar    *array;
1903:   Vec            from,to;

1906:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1907:     from = pcbddc->coarse_vec;
1908:     to = pcbddc->vec1_P;
1909:   } else { /* from local to global -> put data in coarse right hand side */
1910:     from = pcbddc->vec1_P;
1911:     to = pcbddc->coarse_vec;
1912:   }
1913:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1914:   if (smode == SCATTER_FORWARD) {
1915:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1916:       Vec tvec;

1918:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
1919:       VecGetArray(to,&array);
1920:       VecPlaceArray(tvec,array);
1921:       VecRestoreArray(to,&array);
1922:     }
1923:   } else {
1924:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1925:      VecResetArray(from);
1926:     }
1927:   }
1928:   return(0);
1929: }

1931: /* uncomment for testing purposes */
1932: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1935: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1936: {
1937:   PetscErrorCode    ierr;
1938:   PC_IS*            pcis = (PC_IS*)(pc->data);
1939:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1940:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1941:   /* one and zero */
1942:   PetscScalar       one=1.0,zero=0.0;
1943:   /* space to store constraints and their local indices */
1944:   PetscScalar       *constraints_data;
1945:   PetscInt          *constraints_idxs,*constraints_idxs_B;
1946:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1947:   PetscInt          *constraints_n;
1948:   /* iterators */
1949:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1950:   /* BLAS integers */
1951:   PetscBLASInt      lwork,lierr;
1952:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1953:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1954:   /* reuse */
1955:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1956:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1957:   /* change of basis */
1958:   PetscBool         qr_needed;
1959:   PetscBT           change_basis,qr_needed_idx;
1960:   /* auxiliary stuff */
1961:   PetscInt          *nnz,*is_indices;
1962:   PetscInt          ncc;
1963:   /* some quantities */
1964:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1965:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;

1968:   /* Destroy Mat objects computed previously */
1969:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
1970:   MatDestroy(&pcbddc->ConstraintMatrix);
1971:   /* save info on constraints from previous setup (if any) */
1972:   olocal_primal_size = pcbddc->local_primal_size;
1973:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1974:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
1975:   PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
1976:   PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
1977:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
1978:   PetscFree(pcbddc->primal_indices_local_idxs);

1980:   /* print some info */
1981:   if (pcbddc->dbg_flag) {
1982:     IS       vertices;
1983:     PetscInt nv,nedges,nfaces;
1984:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);
1985:     ISGetSize(vertices,&nv);
1986:     ISDestroy(&vertices);
1987:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
1988:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1989:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
1990:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);
1991:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);
1992:     PetscViewerFlush(pcbddc->dbg_viewer);
1993:   }

1995:   if (!pcbddc->adaptive_selection) {
1996:     IS           ISForVertices,*ISForFaces,*ISForEdges;
1997:     MatNullSpace nearnullsp;
1998:     const Vec    *nearnullvecs;
1999:     Vec          *localnearnullsp;
2000:     PetscScalar  *array;
2001:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2002:     PetscBool    nnsp_has_cnst;
2003:     /* LAPACK working arrays for SVD or POD */
2004:     PetscBool    skip_lapack,boolforchange;
2005:     PetscScalar  *work;
2006:     PetscReal    *singular_vals;
2007: #if defined(PETSC_USE_COMPLEX)
2008:     PetscReal    *rwork;
2009: #endif
2010: #if defined(PETSC_MISSING_LAPACK_GESVD)
2011:     PetscScalar  *temp_basis,*correlation_mat;
2012: #else
2013:     PetscBLASInt dummy_int=1;
2014:     PetscScalar  dummy_scalar=1.;
2015: #endif

2017:     /* Get index sets for faces, edges and vertices from graph */
2018:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
2019:     /* free unneeded index sets */
2020:     if (!pcbddc->use_vertices) {
2021:       ISDestroy(&ISForVertices);
2022:     }
2023:     if (!pcbddc->use_edges) {
2024:       for (i=0;i<n_ISForEdges;i++) {
2025:         ISDestroy(&ISForEdges[i]);
2026:       }
2027:       PetscFree(ISForEdges);
2028:       n_ISForEdges = 0;
2029:     }
2030:     if (!pcbddc->use_faces) {
2031:       for (i=0;i<n_ISForFaces;i++) {
2032:         ISDestroy(&ISForFaces[i]);
2033:       }
2034:       PetscFree(ISForFaces);
2035:       n_ISForFaces = 0;
2036:     }

2038: #if defined(PETSC_USE_DEBUG)
2039:     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2040:        Also use_change_of_basis should be consistent among processors */
2041:     if (pcbddc->NullSpace) {
2042:       PetscBool tbool[2],gbool[2];

2044:       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2045:         pcbddc->use_change_of_basis = PETSC_TRUE;
2046:         if (!ISForEdges) {
2047:           pcbddc->use_change_on_faces = PETSC_TRUE;
2048:         }
2049:       }
2050:       tbool[0] = pcbddc->use_change_of_basis;
2051:       tbool[1] = pcbddc->use_change_on_faces;
2052:       MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2053:       pcbddc->use_change_of_basis = gbool[0];
2054:       pcbddc->use_change_on_faces = gbool[1];
2055:     }
2056: #endif

2058:     /* check if near null space is attached to global mat */
2059:     MatGetNearNullSpace(pc->pmat,&nearnullsp);
2060:     if (nearnullsp) {
2061:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
2062:       /* remove any stored info */
2063:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
2064:       PetscFree(pcbddc->onearnullvecs_state);
2065:       /* store information for BDDC solver reuse */
2066:       PetscObjectReference((PetscObject)nearnullsp);
2067:       pcbddc->onearnullspace = nearnullsp;
2068:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
2069:       for (i=0;i<nnsp_size;i++) {
2070:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
2071:       }
2072:     } else { /* if near null space is not provided BDDC uses constants by default */
2073:       nnsp_size = 0;
2074:       nnsp_has_cnst = PETSC_TRUE;
2075:     }
2076:     /* get max number of constraints on a single cc */
2077:     max_constraints = nnsp_size;
2078:     if (nnsp_has_cnst) max_constraints++;

2080:     /*
2081:          Evaluate maximum storage size needed by the procedure
2082:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2083:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2084:          There can be multiple constraints per connected component
2085:                                                                                                                                                            */
2086:     n_vertices = 0;
2087:     if (ISForVertices) {
2088:       ISGetSize(ISForVertices,&n_vertices);
2089:     }
2090:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2091:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

2093:     total_counts = n_ISForFaces+n_ISForEdges;
2094:     total_counts *= max_constraints;
2095:     total_counts += n_vertices;
2096:     PetscBTCreate(total_counts,&change_basis);

2098:     total_counts = 0;
2099:     max_size_of_constraint = 0;
2100:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2101:       IS used_is;
2102:       if (i<n_ISForEdges) {
2103:         used_is = ISForEdges[i];
2104:       } else {
2105:         used_is = ISForFaces[i-n_ISForEdges];
2106:       }
2107:       ISGetSize(used_is,&j);
2108:       total_counts += j;
2109:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2110:     }
2111:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

2113:     /* get local part of global near null space vectors */
2114:     PetscMalloc1(nnsp_size,&localnearnullsp);
2115:     for (k=0;k<nnsp_size;k++) {
2116:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
2117:       VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2118:       VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2119:     }

2121:     /* whether or not to skip lapack calls */
2122:     skip_lapack = PETSC_TRUE;
2123:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

2125:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2126:     if (!skip_lapack) {
2127:       PetscScalar temp_work;

2129: #if defined(PETSC_MISSING_LAPACK_GESVD)
2130:       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2131:       PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
2132:       PetscMalloc1(max_constraints,&singular_vals);
2133:       PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
2134: #if defined(PETSC_USE_COMPLEX)
2135:       PetscMalloc1(3*max_constraints,&rwork);
2136: #endif
2137:       /* now we evaluate the optimal workspace using query with lwork=-1 */
2138:       PetscBLASIntCast(max_constraints,&Blas_N);
2139:       PetscBLASIntCast(max_constraints,&Blas_LDA);
2140:       lwork = -1;
2141:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2142: #if !defined(PETSC_USE_COMPLEX)
2143:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2144: #else
2145:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2146: #endif
2147:       PetscFPTrapPop();
2148:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2149: #else /* on missing GESVD */
2150:       /* SVD */
2151:       PetscInt max_n,min_n;
2152:       max_n = max_size_of_constraint;
2153:       min_n = max_constraints;
2154:       if (max_size_of_constraint < max_constraints) {
2155:         min_n = max_size_of_constraint;
2156:         max_n = max_constraints;
2157:       }
2158:       PetscMalloc1(min_n,&singular_vals);
2159: #if defined(PETSC_USE_COMPLEX)
2160:       PetscMalloc1(5*min_n,&rwork);
2161: #endif
2162:       /* now we evaluate the optimal workspace using query with lwork=-1 */
2163:       lwork = -1;
2164:       PetscBLASIntCast(max_n,&Blas_M);
2165:       PetscBLASIntCast(min_n,&Blas_N);
2166:       PetscBLASIntCast(max_n,&Blas_LDA);
2167:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2168: #if !defined(PETSC_USE_COMPLEX)
2169:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2170: #else
2171:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2172: #endif
2173:       PetscFPTrapPop();
2174:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2175: #endif /* on missing GESVD */
2176:       /* Allocate optimal workspace */
2177:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
2178:       PetscMalloc1(lwork,&work);
2179:     }
2180:     /* Now we can loop on constraining sets */
2181:     total_counts = 0;
2182:     constraints_idxs_ptr[0] = 0;
2183:     constraints_data_ptr[0] = 0;
2184:     /* vertices */
2185:     if (n_vertices) {
2186:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
2187:       if (nnsp_has_cnst) { /* it considers all possible vertices */
2188:         PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
2189:         for (i=0;i<n_vertices;i++) {
2190:           constraints_n[total_counts] = 1;
2191:           constraints_data[total_counts] = 1.0;
2192:           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2193:           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2194:           total_counts++;
2195:         }
2196:       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2197:         PetscBool used_vertex;
2198:         for (i=0;i<n_vertices;i++) {
2199:           used_vertex = PETSC_FALSE;
2200:           k = 0;
2201:           while (!used_vertex && k<nnsp_size) {
2202:             VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2203:             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2204:               constraints_n[total_counts] = 1;
2205:               constraints_idxs[total_counts] = is_indices[i];
2206:               constraints_data[total_counts] = 1.0;
2207:               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2208:               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2209:               total_counts++;
2210:               used_vertex = PETSC_TRUE;
2211:             }
2212:             VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2213:             k++;
2214:           }
2215:         }
2216:       }
2217:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
2218:       n_vertices = total_counts;
2219:     }

2221:     /* edges and faces */
2222:     total_counts_cc = total_counts;
2223:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2224:       IS        used_is;
2225:       PetscBool idxs_copied = PETSC_FALSE;

2227:       if (ncc<n_ISForEdges) {
2228:         used_is = ISForEdges[ncc];
2229:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2230:       } else {
2231:         used_is = ISForFaces[ncc-n_ISForEdges];
2232:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2233:       }
2234:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

2236:       ISGetSize(used_is,&size_of_constraint);
2237:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
2238:       /* change of basis should not be performed on local periodic nodes */
2239:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2240:       if (nnsp_has_cnst) {
2241:         PetscScalar quad_value;

2243:         PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
2244:         idxs_copied = PETSC_TRUE;

2246:         if (!pcbddc->use_nnsp_true) {
2247:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2248:         } else {
2249:           quad_value = 1.0;
2250:         }
2251:         for (j=0;j<size_of_constraint;j++) {
2252:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2253:         }
2254:         temp_constraints++;
2255:         total_counts++;
2256:       }
2257:       for (k=0;k<nnsp_size;k++) {
2258:         PetscReal real_value;
2259:         PetscScalar *ptr_to_data;

2261:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2262:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2263:         for (j=0;j<size_of_constraint;j++) {
2264:           ptr_to_data[j] = array[is_indices[j]];
2265:         }
2266:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2267:         /* check if array is null on the connected component */
2268:         PetscBLASIntCast(size_of_constraint,&Blas_N);
2269:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2270:         if (real_value > 0.0) { /* keep indices and values */
2271:           temp_constraints++;
2272:           total_counts++;
2273:           if (!idxs_copied) {
2274:             PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
2275:             idxs_copied = PETSC_TRUE;
2276:           }
2277:         }
2278:       }
2279:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
2280:       valid_constraints = temp_constraints;
2281:       if (!pcbddc->use_nnsp_true && temp_constraints) {
2282:         if (temp_constraints == 1) { /* just normalize the constraint */
2283:           PetscScalar norm,*ptr_to_data;

2285:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2286:           PetscBLASIntCast(size_of_constraint,&Blas_N);
2287:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2288:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2289:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2290:         } else { /* perform SVD */
2291:           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2292:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

2294: #if defined(PETSC_MISSING_LAPACK_GESVD)
2295:           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2296:              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2297:              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2298:                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2299:                 from that computed using LAPACKgesvd
2300:              -> This is due to a different computation of eigenvectors in LAPACKheev
2301:              -> The quality of the POD-computed basis will be the same */
2302:           PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
2303:           /* Store upper triangular part of correlation matrix */
2304:           PetscBLASIntCast(size_of_constraint,&Blas_N);
2305:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2306:           for (j=0;j<temp_constraints;j++) {
2307:             for (k=0;k<j+1;k++) {
2308:               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2309:             }
2310:           }
2311:           /* compute eigenvalues and eigenvectors of correlation matrix */
2312:           PetscBLASIntCast(temp_constraints,&Blas_N);
2313:           PetscBLASIntCast(temp_constraints,&Blas_LDA);
2314: #if !defined(PETSC_USE_COMPLEX)
2315:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2316: #else
2317:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2318: #endif
2319:           PetscFPTrapPop();
2320:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2321:           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2322:           j = 0;
2323:           while (j < temp_constraints && singular_vals[j] < tol) j++;
2324:           total_counts = total_counts-j;
2325:           valid_constraints = temp_constraints-j;
2326:           /* scale and copy POD basis into used quadrature memory */
2327:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2328:           PetscBLASIntCast(temp_constraints,&Blas_N);
2329:           PetscBLASIntCast(temp_constraints,&Blas_K);
2330:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2331:           PetscBLASIntCast(temp_constraints,&Blas_LDB);
2332:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
2333:           if (j<temp_constraints) {
2334:             PetscInt ii;
2335:             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2336:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2337:             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2338:             PetscFPTrapPop();
2339:             for (k=0;k<temp_constraints-j;k++) {
2340:               for (ii=0;ii<size_of_constraint;ii++) {
2341:                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2342:               }
2343:             }
2344:           }
2345: #else  /* on missing GESVD */
2346:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2347:           PetscBLASIntCast(temp_constraints,&Blas_N);
2348:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2349:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2350: #if !defined(PETSC_USE_COMPLEX)
2351:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2352: #else
2353:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2354: #endif
2355:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2356:           PetscFPTrapPop();
2357:           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2358:           k = temp_constraints;
2359:           if (k > size_of_constraint) k = size_of_constraint;
2360:           j = 0;
2361:           while (j < k && singular_vals[k-j-1] < tol) j++;
2362:           valid_constraints = k-j;
2363:           total_counts = total_counts-temp_constraints+valid_constraints;
2364: #endif /* on missing GESVD */
2365:         }
2366:       }
2367:       /* update pointers information */
2368:       if (valid_constraints) {
2369:         constraints_n[total_counts_cc] = valid_constraints;
2370:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2371:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2372:         /* set change_of_basis flag */
2373:         if (boolforchange) {
2374:           PetscBTSet(change_basis,total_counts_cc);
2375:         }
2376:         total_counts_cc++;
2377:       }
2378:     }
2379:     /* free workspace */
2380:     if (!skip_lapack) {
2381:       PetscFree(work);
2382: #if defined(PETSC_USE_COMPLEX)
2383:       PetscFree(rwork);
2384: #endif
2385:       PetscFree(singular_vals);
2386: #if defined(PETSC_MISSING_LAPACK_GESVD)
2387:       PetscFree(correlation_mat);
2388:       PetscFree(temp_basis);
2389: #endif
2390:     }
2391:     for (k=0;k<nnsp_size;k++) {
2392:       VecDestroy(&localnearnullsp[k]);
2393:     }
2394:     PetscFree(localnearnullsp);
2395:     /* free index sets of faces, edges and vertices */
2396:     for (i=0;i<n_ISForFaces;i++) {
2397:       ISDestroy(&ISForFaces[i]);
2398:     }
2399:     if (n_ISForFaces) {
2400:       PetscFree(ISForFaces);
2401:     }
2402:     for (i=0;i<n_ISForEdges;i++) {
2403:       ISDestroy(&ISForEdges[i]);
2404:     }
2405:     if (n_ISForEdges) {
2406:       PetscFree(ISForEdges);
2407:     }
2408:     ISDestroy(&ISForVertices);
2409:   } else {
2410:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

2412:     total_counts = 0;
2413:     n_vertices = 0;
2414:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2415:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
2416:     }
2417:     max_constraints = 0;
2418:     total_counts_cc = 0;
2419:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2420:       total_counts += pcbddc->adaptive_constraints_n[i];
2421:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2422:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2423:     }
2424:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2425:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2426:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2427:     constraints_data = pcbddc->adaptive_constraints_data;
2428:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2429:     PetscMalloc1(total_counts_cc,&constraints_n);
2430:     total_counts_cc = 0;
2431:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2432:       if (pcbddc->adaptive_constraints_n[i]) {
2433:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2434:       }
2435:     }
2436: #if 0
2437:     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2438:     for (i=0;i<total_counts_cc;i++) {
2439:       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2440:       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2441:       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2442:         printf(" %d",constraints_idxs[j]);
2443:       }
2444:       printf("\n");
2445:       printf("number of cc: %d\n",constraints_n[i]);
2446:     }
2447:     for (i=0;i<n_vertices;i++) {
2448:       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2449:     }
2450:     for (i=0;i<sub_schurs->n_subs;i++) {
2451:       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2452:     }
2453: #endif

2455:     max_size_of_constraint = 0;
2456:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2457:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
2458:     /* Change of basis */
2459:     PetscBTCreate(total_counts_cc,&change_basis);
2460:     if (pcbddc->use_change_of_basis) {
2461:       for (i=0;i<sub_schurs->n_subs;i++) {
2462:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2463:           PetscBTSet(change_basis,i+n_vertices);
2464:         }
2465:       }
2466:     }
2467:   }
2468:   pcbddc->local_primal_size = total_counts;
2469:   PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);

2471:   /* map constraints_idxs in boundary numbering */
2472:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
2473:   if (i != constraints_idxs_ptr[total_counts_cc]) {
2474:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2475:   }

2477:   /* Create constraint matrix */
2478:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
2479:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
2480:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

2482:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2483:   /* determine if a QR strategy is needed for change of basis */
2484:   qr_needed = PETSC_FALSE;
2485:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
2486:   total_primal_vertices=0;
2487:   pcbddc->local_primal_size_cc = 0;
2488:   for (i=0;i<total_counts_cc;i++) {
2489:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2490:     if (size_of_constraint == 1) {
2491:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2492:       pcbddc->local_primal_size_cc += 1;
2493:     } else if (PetscBTLookup(change_basis,i)) {
2494:       for (k=0;k<constraints_n[i];k++) {
2495:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2496:       }
2497:       pcbddc->local_primal_size_cc += constraints_n[i];
2498:       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2499:         PetscBTSet(qr_needed_idx,i);
2500:         qr_needed = PETSC_TRUE;
2501:       }
2502:     } else {
2503:       pcbddc->local_primal_size_cc += 1;
2504:     }
2505:   }
2506:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2507:   pcbddc->n_vertices = total_primal_vertices;
2508:   /* permute indices in order to have a sorted set of vertices */
2509:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);

2511:   PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);
2512:   PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
2513:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

2515:   /* nonzero structure of constraint matrix */
2516:   /* and get reference dof for local constraints */
2517:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
2518:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

2520:   j = total_primal_vertices;
2521:   total_counts = total_primal_vertices;
2522:   cum = total_primal_vertices;
2523:   for (i=n_vertices;i<total_counts_cc;i++) {
2524:     if (!PetscBTLookup(change_basis,i)) {
2525:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2526:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2527:       cum++;
2528:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2529:       for (k=0;k<constraints_n[i];k++) {
2530:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2531:         nnz[j+k] = size_of_constraint;
2532:       }
2533:       j += constraints_n[i];
2534:     }
2535:   }
2536:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
2537:   PetscFree(nnz);

2539:   /* set values in constraint matrix */
2540:   for (i=0;i<total_primal_vertices;i++) {
2541:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
2542:   }
2543:   total_counts = total_primal_vertices;
2544:   for (i=n_vertices;i<total_counts_cc;i++) {
2545:     if (!PetscBTLookup(change_basis,i)) {
2546:       PetscInt *cols;

2548:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2549:       cols = constraints_idxs+constraints_idxs_ptr[i];
2550:       for (k=0;k<constraints_n[i];k++) {
2551:         PetscInt    row = total_counts+k;
2552:         PetscScalar *vals;

2554:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2555:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
2556:       }
2557:       total_counts += constraints_n[i];
2558:     }
2559:   }
2560:   /* assembling */
2561:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
2562:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);

2564:   /*
2565:   PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
2566:   MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
2567:   */
2568:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2569:   if (pcbddc->use_change_of_basis) {
2570:     /* dual and primal dofs on a single cc */
2571:     PetscInt     dual_dofs,primal_dofs;
2572:     /* working stuff for GEQRF */
2573:     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2574:     PetscBLASInt lqr_work;
2575:     /* working stuff for UNGQR */
2576:     PetscScalar  *gqr_work,lgqr_work_t;
2577:     PetscBLASInt lgqr_work;
2578:     /* working stuff for TRTRS */
2579:     PetscScalar  *trs_rhs;
2580:     PetscBLASInt Blas_NRHS;
2581:     /* pointers for values insertion into change of basis matrix */
2582:     PetscInt     *start_rows,*start_cols;
2583:     PetscScalar  *start_vals;
2584:     /* working stuff for values insertion */
2585:     PetscBT      is_primal;
2586:     PetscInt     *aux_primal_numbering_B;
2587:     /* matrix sizes */
2588:     PetscInt     global_size,local_size;
2589:     /* temporary change of basis */
2590:     Mat          localChangeOfBasisMatrix;
2591:     /* extra space for debugging */
2592:     PetscScalar  *dbg_work;

2594:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2595:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
2596:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
2597:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
2598:     /* nonzeros for local mat */
2599:     PetscMalloc1(pcis->n,&nnz);
2600:     for (i=0;i<pcis->n;i++) nnz[i]=1;
2601:     for (i=n_vertices;i<total_counts_cc;i++) {
2602:       if (PetscBTLookup(change_basis,i)) {
2603:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2604:         if (PetscBTLookup(qr_needed_idx,i)) {
2605:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2606:         } else {
2607:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2608:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2609:         }
2610:       }
2611:     }
2612:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
2613:     PetscFree(nnz);
2614:     /* Set initial identity in the matrix */
2615:     for (i=0;i<pcis->n;i++) {
2616:       MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
2617:     }

2619:     if (pcbddc->dbg_flag) {
2620:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
2621:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
2622:     }


2625:     /* Now we loop on the constraints which need a change of basis */
2626:     /*
2627:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2628:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

2630:        Basic blocks of change of basis matrix T computed by

2632:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

2634:             | 1        0   ...        0         s_1/S |
2635:             | 0        1   ...        0         s_2/S |
2636:             |              ...                        |
2637:             | 0        ...            1     s_{n-1}/S |
2638:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

2640:             with S = \sum_{i=1}^n s_i^2
2641:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2642:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

2644:           - QR decomposition of constraints otherwise
2645:     */
2646:     if (qr_needed) {
2647:       /* space to store Q */
2648:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
2649:       /* first we issue queries for optimal work */
2650:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
2651:       PetscBLASIntCast(max_constraints,&Blas_N);
2652:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
2653:       lqr_work = -1;
2654:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2655:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2656:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
2657:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
2658:       lgqr_work = -1;
2659:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
2660:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
2661:       PetscBLASIntCast(max_constraints,&Blas_K);
2662:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
2663:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2664:       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2665:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2666:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
2667:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
2668:       /* array to store scaling factors for reflectors */
2669:       PetscMalloc1(max_constraints,&qr_tau);
2670:       /* array to store rhs and solution of triangular solver */
2671:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
2672:       /* allocating workspace for check */
2673:       if (pcbddc->dbg_flag) {
2674:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
2675:       }
2676:     }
2677:     /* array to store whether a node is primal or not */
2678:     PetscBTCreate(pcis->n_B,&is_primal);
2679:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
2680:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
2681:     if (i != total_primal_vertices) {
2682:       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2683:     }
2684:     for (i=0;i<total_primal_vertices;i++) {
2685:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
2686:     }
2687:     PetscFree(aux_primal_numbering_B);

2689:     /* loop on constraints and see whether or not they need a change of basis and compute it */
2690:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2691:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2692:       if (PetscBTLookup(change_basis,total_counts)) {
2693:         /* get constraint info */
2694:         primal_dofs = constraints_n[total_counts];
2695:         dual_dofs = size_of_constraint-primal_dofs;

2697:         if (pcbddc->dbg_flag) {
2698:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);
2699:         }

2701:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

2703:           /* copy quadrature constraints for change of basis check */
2704:           if (pcbddc->dbg_flag) {
2705:             PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2706:           }
2707:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2708:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

2710:           /* compute QR decomposition of constraints */
2711:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2712:           PetscBLASIntCast(primal_dofs,&Blas_N);
2713:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2714:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2715:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2716:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2717:           PetscFPTrapPop();

2719:           /* explictly compute R^-T */
2720:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
2721:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2722:           PetscBLASIntCast(primal_dofs,&Blas_N);
2723:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
2724:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2725:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
2726:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2727:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2728:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2729:           PetscFPTrapPop();

2731:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2732:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2733:           PetscBLASIntCast(size_of_constraint,&Blas_N);
2734:           PetscBLASIntCast(primal_dofs,&Blas_K);
2735:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2736:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2737:           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2738:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2739:           PetscFPTrapPop();

2741:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2742:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2743:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2744:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2745:           PetscBLASIntCast(primal_dofs,&Blas_N);
2746:           PetscBLASIntCast(primal_dofs,&Blas_K);
2747:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2748:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
2749:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
2750:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2751:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2752:           PetscFPTrapPop();
2753:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

2755:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2756:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2757:           /* insert cols for primal dofs */
2758:           for (j=0;j<primal_dofs;j++) {
2759:             start_vals = &qr_basis[j*size_of_constraint];
2760:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2761:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2762:           }
2763:           /* insert cols for dual dofs */
2764:           for (j=0,k=0;j<dual_dofs;k++) {
2765:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2766:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2767:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2768:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2769:               j++;
2770:             }
2771:           }

2773:           /* check change of basis */
2774:           if (pcbddc->dbg_flag) {
2775:             PetscInt   ii,jj;
2776:             PetscBool valid_qr=PETSC_TRUE;
2777:             PetscBLASIntCast(primal_dofs,&Blas_M);
2778:             PetscBLASIntCast(size_of_constraint,&Blas_N);
2779:             PetscBLASIntCast(size_of_constraint,&Blas_K);
2780:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2781:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
2782:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
2783:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2784:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2785:             PetscFPTrapPop();
2786:             for (jj=0;jj<size_of_constraint;jj++) {
2787:               for (ii=0;ii<primal_dofs;ii++) {
2788:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2789:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2790:               }
2791:             }
2792:             if (!valid_qr) {
2793:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
2794:               for (jj=0;jj<size_of_constraint;jj++) {
2795:                 for (ii=0;ii<primal_dofs;ii++) {
2796:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2797:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2798:                   }
2799:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2800:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2801:                   }
2802:                 }
2803:               }
2804:             } else {
2805:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
2806:             }
2807:           }
2808:         } else { /* simple transformation block */
2809:           PetscInt    row,col;
2810:           PetscScalar val,norm;

2812:           PetscBLASIntCast(size_of_constraint,&Blas_N);
2813:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2814:           for (j=0;j<size_of_constraint;j++) {
2815:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2816:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2817:             if (!PetscBTLookup(is_primal,row_B)) {
2818:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2819:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
2820:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
2821:             } else {
2822:               for (k=0;k<size_of_constraint;k++) {
2823:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2824:                 if (row != col) {
2825:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2826:                 } else {
2827:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2828:                 }
2829:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
2830:               }
2831:             }
2832:           }
2833:           if (pcbddc->dbg_flag) {
2834:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
2835:           }
2836:         }
2837:       } else {
2838:         if (pcbddc->dbg_flag) {
2839:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);
2840:         }
2841:       }
2842:     }

2844:     /* free workspace */
2845:     if (qr_needed) {
2846:       if (pcbddc->dbg_flag) {
2847:         PetscFree(dbg_work);
2848:       }
2849:       PetscFree(trs_rhs);
2850:       PetscFree(qr_tau);
2851:       PetscFree(qr_work);
2852:       PetscFree(gqr_work);
2853:       PetscFree(qr_basis);
2854:     }
2855:     PetscBTDestroy(&is_primal);
2856:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2857:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

2859:     /* assembling of global change of variable */
2860:     {
2861:       Mat      tmat;
2862:       PetscInt bs;

2864:       VecGetSize(pcis->vec1_global,&global_size);
2865:       VecGetLocalSize(pcis->vec1_global,&local_size);
2866:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
2867:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
2868:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
2869:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
2870:       MatGetBlockSize(pc->pmat,&bs);
2871:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
2872:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
2873:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
2874:       MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
2875:       MatDestroy(&tmat);
2876:       VecSet(pcis->vec1_global,0.0);
2877:       VecSet(pcis->vec1_N,1.0);
2878:       VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2879:       VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2880:       VecReciprocal(pcis->vec1_global);
2881:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
2882:     }
2883:     /* check */
2884:     if (pcbddc->dbg_flag) {
2885:       PetscReal error;
2886:       Vec       x,x_change;

2888:       VecDuplicate(pcis->vec1_global,&x);
2889:       VecDuplicate(pcis->vec1_global,&x_change);
2890:       VecSetRandom(x,NULL);
2891:       VecCopy(x,pcis->vec1_global);
2892:       VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2893:       VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2894:       MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
2895:       VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
2896:       VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
2897:       MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
2898:       VecAXPY(x,-1.0,x_change);
2899:       VecNorm(x,NORM_INFINITY,&error);
2900:       PetscViewerFlush(pcbddc->dbg_viewer);
2901:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);
2902:       VecDestroy(&x);
2903:       VecDestroy(&x_change);
2904:     }

2906:     /* adapt sub_schurs computed (if any) */
2907:     if (pcbddc->use_deluxe_scaling) {
2908:       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2909:       if (sub_schurs->S_Ej_all) {
2910:         Mat S_new,tmat;
2911:         IS is_all_N;

2913:         ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
2914:         MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
2915:         ISDestroy(&is_all_N);
2916:         MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
2917:         MatDestroy(&sub_schurs->S_Ej_all);
2918:         PetscObjectReference((PetscObject)S_new);
2919:         sub_schurs->S_Ej_all = S_new;
2920:         MatDestroy(&S_new);
2921:         if (sub_schurs->sum_S_Ej_all) {
2922:           MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
2923:           MatDestroy(&sub_schurs->sum_S_Ej_all);
2924:           PetscObjectReference((PetscObject)S_new);
2925:           sub_schurs->sum_S_Ej_all = S_new;
2926:           MatDestroy(&S_new);
2927:         }
2928:         MatDestroy(&tmat);
2929:       }
2930:     }
2931:     MatDestroy(&localChangeOfBasisMatrix);
2932:   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2933:     PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
2934:     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2935:   }

2937:   /* set up change of basis context */
2938:   if (pcbddc->ChangeOfBasisMatrix) {
2939:     PCBDDCChange_ctx change_ctx;

2941:     if (!pcbddc->new_global_mat) {
2942:       PetscInt global_size,local_size;

2944:       VecGetSize(pcis->vec1_global,&global_size);
2945:       VecGetLocalSize(pcis->vec1_global,&local_size);
2946:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);
2947:       MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);
2948:       MatSetType(pcbddc->new_global_mat,MATSHELL);
2949:       MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);
2950:       MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);
2951:       PetscNew(&change_ctx);
2952:       MatShellSetContext(pcbddc->new_global_mat,change_ctx);
2953:     } else {
2954:       MatShellGetContext(pcbddc->new_global_mat,&change_ctx);
2955:       MatDestroy(&change_ctx->global_change);
2956:       VecDestroyVecs(2,&change_ctx->work);
2957:     }
2958:     if (!pcbddc->user_ChangeOfBasisMatrix) {
2959:       PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);
2960:       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2961:     } else {
2962:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
2963:       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2964:     }
2965:     VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);
2966:     MatSetUp(pcbddc->new_global_mat);
2967:     MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);
2968:     MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);
2969:   }

2971:   /* check if a new primal space has been introduced */
2972:   pcbddc->new_primal_space_local = PETSC_TRUE;
2973:   if (olocal_primal_size == pcbddc->local_primal_size) {
2974:     PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);
2975:     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2976:     if (!pcbddc->new_primal_space_local) {
2977:       PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);
2978:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2979:     }
2980:   }
2981:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
2982:   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2983:   MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));

2985:   /* flush dbg viewer */
2986:   if (pcbddc->dbg_flag) {
2987:     PetscViewerFlush(pcbddc->dbg_viewer);
2988:   }

2990:   /* free workspace */
2991:   PetscBTDestroy(&qr_needed_idx);
2992:   PetscBTDestroy(&change_basis);
2993:   if (!pcbddc->adaptive_selection) {
2994:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
2995:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
2996:   } else {
2997:     PetscFree5(pcbddc->adaptive_constraints_n,
2998:                       pcbddc->adaptive_constraints_idxs_ptr,
2999:                       pcbddc->adaptive_constraints_data_ptr,
3000:                       pcbddc->adaptive_constraints_idxs,
3001:                       pcbddc->adaptive_constraints_data);
3002:     PetscFree(constraints_n);
3003:     PetscFree(constraints_idxs_B);
3004:   }
3005:   return(0);
3006: }

3010: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3011: {
3012:   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3013:   PC_IS       *pcis = (PC_IS*)pc->data;
3014:   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3015:   PetscInt    ierr,i,vertex_size,N;
3016:   PetscViewer viewer=pcbddc->dbg_viewer;

3019:   /* Reset previously computed graph */
3020:   PCBDDCGraphReset(pcbddc->mat_graph);
3021:   /* Init local Graph struct */
3022:   MatGetSize(pc->pmat,&N,NULL);
3023:   PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);

3025:   /* Check validity of the csr graph passed in by the user */
3026:   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3027:     PCBDDCGraphResetCSR(pcbddc->mat_graph);
3028:   }

3030:   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3031:   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3032:     PetscInt  *xadj,*adjncy;
3033:     PetscInt  nvtxs;
3034:     PetscBool flg_row=PETSC_FALSE;

3036:     if (pcbddc->use_local_adj) {

3038:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3039:       if (flg_row) {
3040:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
3041:         pcbddc->computed_rowadj = PETSC_TRUE;
3042:       }
3043:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3044:     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3045:       IS                     is_dummy;
3046:       ISLocalToGlobalMapping l2gmap_dummy;
3047:       PetscInt               j,sum;
3048:       PetscInt               *cxadj,*cadjncy;
3049:       const PetscInt         *idxs;
3050:       PCBDDCGraph            graph;
3051:       PetscBT                is_on_boundary;

3053:       ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);
3054:       ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
3055:       ISDestroy(&is_dummy);
3056:       PCBDDCGraphCreate(&graph);
3057:       PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);
3058:       ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
3059:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3060:       if (flg_row) {
3061:         graph->xadj = xadj;
3062:         graph->adjncy = adjncy;
3063:       }
3064:       PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
3065:       PCBDDCGraphComputeConnectedComponents(graph);
3066:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

3068:       if (pcbddc->dbg_flag) {
3069:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);
3070:         for (i=0;i<graph->ncc;i++) {
3071:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);
3072:         }
3073:       }

3075:       PetscBTCreate(pcis->n,&is_on_boundary);
3076:       ISGetIndices(pcis->is_B_local,&idxs);
3077:       for (i=0;i<pcis->n_B;i++) {
3078:         PetscBTSet(is_on_boundary,idxs[i]);
3079:       }
3080:       ISRestoreIndices(pcis->is_B_local,&idxs);

3082:       PetscCalloc1(pcis->n+1,&cxadj);
3083:       sum = 0;
3084:       for (i=0;i<graph->ncc;i++) {
3085:         PetscInt sizecc = 0;
3086:         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3087:           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3088:             sizecc++;
3089:           }
3090:         }
3091:         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3092:           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3093:             cxadj[graph->queue[j]] = sizecc;
3094:           }
3095:         }
3096:         sum += sizecc*sizecc;
3097:       }
3098:       PetscMalloc1(sum,&cadjncy);
3099:       sum = 0;
3100:       for (i=0;i<pcis->n;i++) {
3101:         PetscInt temp = cxadj[i];
3102:         cxadj[i] = sum;
3103:         sum += temp;
3104:       }
3105:       cxadj[pcis->n] = sum;
3106:       for (i=0;i<graph->ncc;i++) {
3107:         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3108:           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3109:             PetscInt k,sizecc = 0;
3110:             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3111:               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3112:                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3113:                 sizecc++;
3114:               }
3115:             }
3116:           }
3117:         }
3118:       }
3119:       if (sum) {
3120:         PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);
3121:       } else {
3122:         PetscFree(cxadj);
3123:         PetscFree(cadjncy);
3124:       }
3125:       graph->xadj = 0;
3126:       graph->adjncy = 0;
3127:       PCBDDCGraphDestroy(&graph);
3128:       PetscBTDestroy(&is_on_boundary);
3129:     }
3130:   }
3131:   if (pcbddc->dbg_flag) {
3132:     PetscViewerFlush(pcbddc->dbg_viewer);
3133:   }

3135:   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3136:   vertex_size = 1;
3137:   if (pcbddc->user_provided_isfordofs) {
3138:     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3139:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
3140:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3141:         PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
3142:         ISDestroy(&pcbddc->ISForDofs[i]);
3143:       }
3144:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3145:       pcbddc->n_ISForDofs = 0;
3146:       PetscFree(pcbddc->ISForDofs);
3147:     }
3148:     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3149:     MatGetBlockSize(matis->A,&vertex_size);
3150:   } else {
3151:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3152:       MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);
3153:       PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
3154:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3155:         ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
3156:       }
3157:     }
3158:   }

3160:   /* Setup of Graph */
3161:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3162:     PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
3163:   }
3164:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3165:     PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
3166:   }
3167:   PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);

3169:   /* Graph's connected components analysis */
3170:   PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);

3172:   /* print some info to stdout */
3173:   if (pcbddc->dbg_flag) {
3174:     PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3175:   }

3177:   /* mark topography has done */
3178:   pcbddc->recompute_topography = PETSC_FALSE;
3179:   return(0);
3180: }

3182: /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3185: PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3186: {
3187:   PetscSF        sf;
3188:   PetscLayout    map;
3189:   const PetscInt *idxs;
3190:   PetscInt       *leaf_data,*root_data,*gidxs;
3191:   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3192:   PetscInt       n_n,nlocals,start,first_index;
3193:   PetscMPIInt    commsize;
3194:   PetscBool      first_found;

3198:   ISGetLocalSize(subset,&n);
3199:   if (subset_mult) {
3201:     ISGetLocalSize(subset,&i);
3202:     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3203:   }
3204:   /* create workspace layout for computing global indices of subset */
3205:   ISGetIndices(subset,&idxs);
3206:   lbounds[0] = lbounds[1] = 0;
3207:   for (i=0;i<n;i++) {
3208:     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3209:     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3210:   }
3211:   lbounds[0] = -lbounds[0];
3212:   MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));
3213:   gbounds[0] = -gbounds[0];
3214:   N = gbounds[1] - gbounds[0] + 1;
3215:   PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);
3216:   PetscLayoutSetBlockSize(map,1);
3217:   PetscLayoutSetSize(map,N);
3218:   PetscLayoutSetUp(map);
3219:   PetscLayoutGetLocalSize(map,&Nl);

3221:   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3222:   PetscMalloc2(n,&leaf_data,Nl,&root_data);
3223:   if (subset_mult) {
3224:     const PetscInt* idxs_mult;

3226:     ISGetIndices(subset_mult,&idxs_mult);
3227:     PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));
3228:     ISRestoreIndices(subset_mult,&idxs_mult);
3229:   } else {
3230:     for (i=0;i<n;i++) leaf_data[i] = 1;
3231:   }
3232:   /* local size of new subset */
3233:   n_n = 0;
3234:   for (i=0;i<n;i++) n_n += leaf_data[i];

3236:   /* global indexes in layout */
3237:   PetscMalloc1(n_n,&gidxs); /* allocating possibly extra space in gidxs which will be used later */
3238:   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3239:   ISRestoreIndices(subset,&idxs);
3240:   PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);
3241:   PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);
3242:   PetscLayoutDestroy(&map);

3244:   /* reduce from leaves to roots */
3245:   PetscMemzero(root_data,Nl*sizeof(PetscInt));
3246:   PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);
3247:   PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);

3249:   /* count indexes in local part of layout */
3250:   nlocals = 0;
3251:   first_index = -1;
3252:   first_found = PETSC_FALSE;
3253:   for (i=0;i<Nl;i++) {
3254:     if (!first_found && root_data[i]) {
3255:       first_found = PETSC_TRUE;
3256:       first_index = i;
3257:     }
3258:     nlocals += root_data[i];
3259:   }

3261:   /* cumulative of number of indexes and size of subset without holes */
3262: #if defined(PETSC_HAVE_MPI_EXSCAN)
3263:   start = 0;
3264:   MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));
3265: #else
3266:   MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));
3267:   start = start-nlocals;
3268: #endif

3270:   if (N_n) { /* compute total size of new subset if requested */
3271:     *N_n = start + nlocals;
3272:     MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);
3273:     MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));
3274:   }

3276:   /* adapt root data with cumulative */
3277:   if (first_found) {
3278:     PetscInt old_index;

3280:     root_data[first_index] += start;
3281:     old_index = first_index;
3282:     for (i=first_index+1;i<Nl;i++) {
3283:       if (root_data[i]) {
3284:         root_data[i] += root_data[old_index];
3285:         old_index = i;
3286:       }
3287:     }
3288:   }

3290:   /* from roots to leaves */
3291:   PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);
3292:   PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);
3293:   PetscSFDestroy(&sf);

3295:   /* create new IS with global indexes without holes */
3296:   if (subset_mult) {
3297:     const PetscInt* idxs_mult;
3298:     PetscInt        cum;

3300:     cum = 0;
3301:     ISGetIndices(subset_mult,&idxs_mult);
3302:     for (i=0;i<n;i++) {
3303:       PetscInt j;
3304:       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3305:     }
3306:     ISRestoreIndices(subset_mult,&idxs_mult);
3307:   } else {
3308:     for (i=0;i<n;i++) {
3309:       gidxs[i] = leaf_data[i]-1;
3310:     }
3311:   }
3312:   ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);
3313:   PetscFree2(leaf_data,root_data);
3314:   return(0);
3315: }

3319: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3320: {
3321:   PetscInt       i,j;
3322:   PetscScalar    *alphas;

3326:   /* this implements stabilized Gram-Schmidt */
3327:   PetscMalloc1(n,&alphas);
3328:   for (i=0;i<n;i++) {
3329:     VecNormalize(vecs[i],NULL);
3330:     if (i<n) { VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]); }
3331:     for (j=i+1;j<n;j++) { VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]); }
3332:   }
3333:   PetscFree(alphas);
3334:   return(0);
3335: }

3339: PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3340: {
3341:   Mat_IS         *matis;
3342:   IS             ranks_send_to;
3343:   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3344:   PetscMPIInt    size,rank,color;
3345:   PetscInt       *xadj,*adjncy;
3346:   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3347:   PetscInt       i,local_size,threshold=0;
3348:   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3349:   PetscSubcomm   subcomm;

3353:   PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);
3354:   PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
3355:   PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);

3357:   /* Get info on mapping */
3358:   matis = (Mat_IS*)(mat->data);
3359:   ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);
3360:   ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);

3362:   /* build local CSR graph of subdomains' connectivity */
3363:   PetscMalloc1(2,&xadj);
3364:   xadj[0] = 0;
3365:   xadj[1] = PetscMax(n_neighs-1,0);
3366:   PetscMalloc1(xadj[1],&adjncy);
3367:   PetscMalloc1(xadj[1],&adjncy_wgt);

3369:   if (threshold) {
3370:     PetscInt xadj_count = 0;
3371:     for (i=1;i<n_neighs;i++) {
3372:       if (n_shared[i] > threshold) {
3373:         adjncy[xadj_count] = neighs[i];
3374:         adjncy_wgt[xadj_count] = n_shared[i];
3375:         xadj_count++;
3376:       }
3377:     }
3378:     xadj[1] = xadj_count;
3379:   } else {
3380:     if (xadj[1]) {
3381:       PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));
3382:       PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));
3383:     }
3384:   }
3385:   ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);
3386:   if (use_square) {
3387:     for (i=0;i<xadj[1];i++) {
3388:       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3389:     }
3390:   }
3391:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

3393:   PetscMalloc1(1,&ranks_send_to_idx);

3395:   /*
3396:     Restrict work on active processes only.
3397:   */
3398:   PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);
3399:   PetscSubcommSetNumber(subcomm,2); /* 2 groups, active process and not active processes */
3400:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
3401:   PetscMPIIntCast(!local_size,&color);
3402:   PetscSubcommSetTypeGeneral(subcomm,color,rank);
3403:   if (color) {
3404:     PetscFree(xadj);
3405:     PetscFree(adjncy);
3406:     PetscFree(adjncy_wgt);
3407:   } else {
3408:     Mat             subdomain_adj;
3409:     IS              new_ranks,new_ranks_contig;
3410:     MatPartitioning partitioner;
3411:     PetscInt        prank,rstart=0,rend=0;
3412:     PetscInt        *is_indices,*oldranks;
3413:     PetscBool       aggregate;

3415:     MPI_Comm_size(PetscSubcommChild(subcomm),&size);
3416:     PetscMalloc1(size,&oldranks);
3417:     prank = rank;
3418:     MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));
3419:     /*
3420:     for (i=0;i<size;i++) {
3421:       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3422:     }
3423:     */
3424:     for (i=0;i<xadj[1];i++) {
3425:       PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
3426:     }
3427:     PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
3428:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3429:     if (aggregate) {
3430:       PetscInt    lrows,row,ncols,*cols;
3431:       PetscMPIInt nrank;
3432:       PetscScalar *vals;

3434:       MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);
3435:       lrows = 0;
3436:       if (nrank<redprocs) {
3437:         lrows = size/redprocs;
3438:         if (nrank<size%redprocs) lrows++;
3439:       }
3440:       MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
3441:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
3442:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3443:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
3444:       row = nrank;
3445:       ncols = xadj[1]-xadj[0];
3446:       cols = adjncy;
3447:       PetscMalloc1(ncols,&vals);
3448:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3449:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
3450:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
3451:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
3452:       PetscFree(xadj);
3453:       PetscFree(adjncy);
3454:       PetscFree(adjncy_wgt);
3455:       PetscFree(vals);
3456:     } else {
3457:       MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
3458:     }
3459:     /* MatView(subdomain_adj,0); */

3461:     /* Partition */
3462:     MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);
3463:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
3464:     if (use_vwgt) {
3465:       PetscMalloc1(1,&v_wgt);
3466:       v_wgt[0] = local_size;
3467:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
3468:     }
3469:     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3470:     MatPartitioningSetNParts(partitioner,n_subdomains);
3471:     MatPartitioningSetFromOptions(partitioner);
3472:     MatPartitioningApply(partitioner,&new_ranks);
3473:     /* MatPartitioningView(partitioner,0); */

3475:     /* renumber new_ranks to avoid "holes" in new set of processors */
3476:     PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);
3477:     ISDestroy(&new_ranks);
3478:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
3479:     if (!redprocs) {
3480:       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3481:     } else {
3482:       PetscInt    idxs[1];
3483:       PetscMPIInt tag;
3484:       MPI_Request *reqs;

3486:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
3487:       PetscMalloc1(rend-rstart,&reqs);
3488:       for (i=rstart;i<rend;i++) {
3489:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);
3490:       }
3491:       MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);
3492:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
3493:       PetscFree(reqs);
3494:       ranks_send_to_idx[0] = oldranks[idxs[0]];
3495:     }
3496:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
3497:     /* clean up */
3498:     PetscFree(oldranks);
3499:     ISDestroy(&new_ranks_contig);
3500:     MatDestroy(&subdomain_adj);
3501:     MatPartitioningDestroy(&partitioner);
3502:   }
3503:   PetscSubcommDestroy(&subcomm);

3505:   /* assemble parallel IS for sends */
3506:   i = 1;
3507:   if (color) i=0;
3508:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);
3509:   /* get back IS */
3510:   *is_sends = ranks_send_to;
3511:   return(0);
3512: }

3514: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

3518: PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3519: {
3520:   Mat                    local_mat;
3521:   Mat_IS                 *matis;
3522:   IS                     is_sends_internal;
3523:   PetscInt               rows,cols,new_local_rows;
3524:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3525:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3526:   ISLocalToGlobalMapping l2gmap;
3527:   PetscInt*              l2gmap_indices;
3528:   const PetscInt*        is_indices;
3529:   MatType                new_local_type;
3530:   /* buffers */
3531:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3532:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3533:   PetscInt               *recv_buffer_idxs_local;
3534:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3535:   /* MPI */
3536:   MPI_Comm               comm,comm_n;
3537:   PetscSubcomm           subcomm;
3538:   PetscMPIInt            n_sends,n_recvs,commsize;
3539:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3540:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3541:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3542:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3543:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3544:   PetscErrorCode         ierr;

3547:   /* TODO: add missing checks */
3552:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
3553:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3554:   MatISGetLocalMat(mat,&local_mat);
3555:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
3556:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3557:   MatGetSize(local_mat,&rows,&cols);
3558:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3559:   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3560:     PetscInt mrows,mcols,mnrows,mncols;
3561:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
3562:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3563:     MatGetSize(mat,&mrows,&mcols);
3564:     MatGetSize(*mat_n,&mnrows,&mncols);
3565:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3566:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3567:   }
3568:   MatGetBlockSize(local_mat,&bs);
3570:   /* prepare IS for sending if not provided */
3571:   if (!is_sends) {
3572:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3573:     MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);
3574:   } else {
3575:     PetscObjectReference((PetscObject)is_sends);
3576:     is_sends_internal = is_sends;
3577:   }

3579:   /* get pointer of MATIS data */
3580:   matis = (Mat_IS*)mat->data;

3582:   /* get comm */
3583:   PetscObjectGetComm((PetscObject)mat,&comm);

3585:   /* compute number of sends */
3586:   ISGetLocalSize(is_sends_internal,&i);
3587:   PetscMPIIntCast(i,&n_sends);

3589:   /* compute number of receives */
3590:   MPI_Comm_size(comm,&commsize);
3591:   PetscMalloc1(commsize,&iflags);
3592:   PetscMemzero(iflags,commsize*sizeof(*iflags));
3593:   ISGetIndices(is_sends_internal,&is_indices);
3594:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3595:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
3596:   PetscFree(iflags);

3598:   /* restrict comm if requested */
3599:   subcomm = 0;
3600:   destroy_mat = PETSC_FALSE;
3601:   if (restrict_comm) {
3602:     PetscMPIInt color,subcommsize;

3604:     color = 0;
3605:     if (restrict_full) {
3606:       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3607:     } else {
3608:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3609:     }
3610:     MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
3611:     subcommsize = commsize - subcommsize;
3612:     /* check if reuse has been requested */
3613:     if (reuse == MAT_REUSE_MATRIX) {
3614:       if (*mat_n) {
3615:         PetscMPIInt subcommsize2;
3616:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
3617:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3618:         comm_n = PetscObjectComm((PetscObject)*mat_n);
3619:       } else {
3620:         comm_n = PETSC_COMM_SELF;
3621:       }
3622:     } else { /* MAT_INITIAL_MATRIX */
3623:       PetscMPIInt rank;

3625:       MPI_Comm_rank(comm,&rank);
3626:       PetscSubcommCreate(comm,&subcomm);
3627:       PetscSubcommSetNumber(subcomm,2);
3628:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
3629:       comm_n = PetscSubcommChild(subcomm);
3630:     }
3631:     /* flag to destroy *mat_n if not significative */
3632:     if (color) destroy_mat = PETSC_TRUE;
3633:   } else {
3634:     comm_n = comm;
3635:   }

3637:   /* prepare send/receive buffers */
3638:   PetscMalloc1(commsize,&ilengths_idxs);
3639:   PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
3640:   PetscMalloc1(commsize,&ilengths_vals);
3641:   PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
3642:   if (nis) {
3643:     PetscCalloc1(commsize,&ilengths_idxs_is);
3644:   }

3646:   /* Get data from local matrices */
3647:   if (!isdense) {
3648:     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3649:     /* TODO: See below some guidelines on how to prepare the local buffers */
3650:     /*
3651:        send_buffer_vals should contain the raw values of the local matrix
3652:        send_buffer_idxs should contain:
3653:        - MatType_PRIVATE type
3654:        - PetscInt        size_of_l2gmap
3655:        - PetscInt        global_row_indices[size_of_l2gmap]
3656:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3657:     */
3658:   } else {
3659:     MatDenseGetArray(local_mat,&send_buffer_vals);
3660:     ISLocalToGlobalMappingGetSize(matis->mapping,&i);
3661:     PetscMalloc1(i+2,&send_buffer_idxs);
3662:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3663:     send_buffer_idxs[1] = i;
3664:     ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
3665:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
3666:     ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);
3667:     PetscMPIIntCast(i,&len);
3668:     for (i=0;i<n_sends;i++) {
3669:       ilengths_vals[is_indices[i]] = len*len;
3670:       ilengths_idxs[is_indices[i]] = len+2;
3671:     }
3672:   }
3673:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
3674:   /* additional is (if any) */
3675:   if (nis) {
3676:     PetscMPIInt psum;
3677:     PetscInt j;
3678:     for (j=0,psum=0;j<nis;j++) {
3679:       PetscInt plen;
3680:       ISGetLocalSize(isarray[j],&plen);
3681:       PetscMPIIntCast(plen,&len);
3682:       psum += len+1; /* indices + lenght */
3683:     }
3684:     PetscMalloc1(psum,&send_buffer_idxs_is);
3685:     for (j=0,psum=0;j<nis;j++) {
3686:       PetscInt plen;
3687:       const PetscInt *is_array_idxs;
3688:       ISGetLocalSize(isarray[j],&plen);
3689:       send_buffer_idxs_is[psum] = plen;
3690:       ISGetIndices(isarray[j],&is_array_idxs);
3691:       PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
3692:       ISRestoreIndices(isarray[j],&is_array_idxs);
3693:       psum += plen+1; /* indices + lenght */
3694:     }
3695:     for (i=0;i<n_sends;i++) {
3696:       ilengths_idxs_is[is_indices[i]] = psum;
3697:     }
3698:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
3699:   }

3701:   buf_size_idxs = 0;
3702:   buf_size_vals = 0;
3703:   buf_size_idxs_is = 0;
3704:   for (i=0;i<n_recvs;i++) {
3705:     buf_size_idxs += (PetscInt)olengths_idxs[i];
3706:     buf_size_vals += (PetscInt)olengths_vals[i];
3707:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3708:   }
3709:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
3710:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
3711:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);

3713:   /* get new tags for clean communications */
3714:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
3715:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
3716:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);

3718:   /* allocate for requests */
3719:   PetscMalloc1(n_sends,&send_req_idxs);
3720:   PetscMalloc1(n_sends,&send_req_vals);
3721:   PetscMalloc1(n_sends,&send_req_idxs_is);
3722:   PetscMalloc1(n_recvs,&recv_req_idxs);
3723:   PetscMalloc1(n_recvs,&recv_req_vals);
3724:   PetscMalloc1(n_recvs,&recv_req_idxs_is);

3726:   /* communications */
3727:   ptr_idxs = recv_buffer_idxs;
3728:   ptr_vals = recv_buffer_vals;
3729:   ptr_idxs_is = recv_buffer_idxs_is;
3730:   for (i=0;i<n_recvs;i++) {
3731:     source_dest = onodes[i];
3732:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
3733:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
3734:     ptr_idxs += olengths_idxs[i];
3735:     ptr_vals += olengths_vals[i];
3736:     if (nis) {
3737:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
3738:       ptr_idxs_is += olengths_idxs_is[i];
3739:     }
3740:   }
3741:   for (i=0;i<n_sends;i++) {
3742:     PetscMPIIntCast(is_indices[i],&source_dest);
3743:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
3744:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
3745:     if (nis) {
3746:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
3747:     }
3748:   }
3749:   ISRestoreIndices(is_sends_internal,&is_indices);
3750:   ISDestroy(&is_sends_internal);

3752:   /* assemble new l2g map */
3753:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
3754:   ptr_idxs = recv_buffer_idxs;
3755:   new_local_rows = 0;
3756:   for (i=0;i<n_recvs;i++) {
3757:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3758:     ptr_idxs += olengths_idxs[i];
3759:   }
3760:   PetscMalloc1(new_local_rows,&l2gmap_indices);
3761:   ptr_idxs = recv_buffer_idxs;
3762:   new_local_rows = 0;
3763:   for (i=0;i<n_recvs;i++) {
3764:     PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
3765:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3766:     ptr_idxs += olengths_idxs[i];
3767:   }
3768:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
3769:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
3770:   PetscFree(l2gmap_indices);

3772:   /* infer new local matrix type from received local matrices type */
3773:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3774:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3775:   if (n_recvs) {
3776:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3777:     ptr_idxs = recv_buffer_idxs;
3778:     for (i=0;i<n_recvs;i++) {
3779:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3780:         new_local_type_private = MATAIJ_PRIVATE;
3781:         break;
3782:       }
3783:       ptr_idxs += olengths_idxs[i];
3784:     }
3785:     switch (new_local_type_private) {
3786:       case MATDENSE_PRIVATE:
3787:         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3788:           new_local_type = MATSEQAIJ;
3789:           bs = 1;
3790:         } else { /* if I receive only 1 dense matrix */
3791:           new_local_type = MATSEQDENSE;
3792:           bs = 1;
3793:         }
3794:         break;
3795:       case MATAIJ_PRIVATE:
3796:         new_local_type = MATSEQAIJ;
3797:         bs = 1;
3798:         break;
3799:       case MATBAIJ_PRIVATE:
3800:         new_local_type = MATSEQBAIJ;
3801:         break;
3802:       case MATSBAIJ_PRIVATE:
3803:         new_local_type = MATSEQSBAIJ;
3804:         break;
3805:       default:
3806:         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3807:         break;
3808:     }
3809:   } else { /* by default, new_local_type is seqdense */
3810:     new_local_type = MATSEQDENSE;
3811:     bs = 1;
3812:   }

3814:   /* create MATIS object if needed */
3815:   if (reuse == MAT_INITIAL_MATRIX) {
3816:     MatGetSize(mat,&rows,&cols);
3817:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);
3818:   } else {
3819:     /* it also destroys the local matrices */
3820:     MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
3821:   }
3822:   MatISGetLocalMat(*mat_n,&local_mat);
3823:   MatSetType(local_mat,new_local_type);

3825:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

3827:   /* Global to local map of received indices */
3828:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
3829:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
3830:   ISLocalToGlobalMappingDestroy(&l2gmap);

3832:   /* restore attributes -> type of incoming data and its size */
3833:   buf_size_idxs = 0;
3834:   for (i=0;i<n_recvs;i++) {
3835:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3836:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3837:     buf_size_idxs += (PetscInt)olengths_idxs[i];
3838:   }
3839:   PetscFree(recv_buffer_idxs);

3841:   /* set preallocation */
3842:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
3843:   if (!newisdense) {
3844:     PetscInt *new_local_nnz=0;

3846:     ptr_vals = recv_buffer_vals;
3847:     ptr_idxs = recv_buffer_idxs_local;
3848:     if (n_recvs) {
3849:       PetscCalloc1(new_local_rows,&new_local_nnz);
3850:     }
3851:     for (i=0;i<n_recvs;i++) {
3852:       PetscInt j;
3853:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3854:         for (j=0;j<*(ptr_idxs+1);j++) {
3855:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3856:         }
3857:       } else {
3858:         /* TODO */
3859:       }
3860:       ptr_idxs += olengths_idxs[i];
3861:     }
3862:     if (new_local_nnz) {
3863:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3864:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
3865:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3866:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
3867:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3868:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
3869:     } else {
3870:       MatSetUp(local_mat);
3871:     }
3872:     PetscFree(new_local_nnz);
3873:   } else {
3874:     MatSetUp(local_mat);
3875:   }

3877:   /* set values */
3878:   ptr_vals = recv_buffer_vals;
3879:   ptr_idxs = recv_buffer_idxs_local;
3880:   for (i=0;i<n_recvs;i++) {
3881:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3882:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
3883:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
3884:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
3885:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
3886:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
3887:     } else {
3888:       /* TODO */
3889:     }
3890:     ptr_idxs += olengths_idxs[i];
3891:     ptr_vals += olengths_vals[i];
3892:   }
3893:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
3894:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
3895:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
3896:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
3897:   PetscFree(recv_buffer_vals);
3898:   PetscFree(recv_buffer_idxs_local);

3900: #if 0
3901:   if (!restrict_comm) { /* check */
3902:     Vec       lvec,rvec;
3903:     PetscReal infty_error;

3905:     MatCreateVecs(mat,&rvec,&lvec);
3906:     VecSetRandom(rvec,NULL);
3907:     MatMult(mat,rvec,lvec);
3908:     VecScale(lvec,-1.0);
3909:     MatMultAdd(*mat_n,rvec,lvec,lvec);
3910:     VecNorm(lvec,NORM_INFINITY,&infty_error);
3911:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3912:     VecDestroy(&rvec);
3913:     VecDestroy(&lvec);
3914:   }
3915: #endif

3917:   /* assemble new additional is (if any) */
3918:   if (nis) {
3919:     PetscInt **temp_idxs,*count_is,j,psum;

3921:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
3922:     PetscCalloc1(nis,&count_is);
3923:     ptr_idxs = recv_buffer_idxs_is;
3924:     psum = 0;
3925:     for (i=0;i<n_recvs;i++) {
3926:       for (j=0;j<nis;j++) {
3927:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3928:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3929:         psum += plen;
3930:         ptr_idxs += plen+1; /* shift pointer to received data */
3931:       }
3932:     }
3933:     PetscMalloc1(nis,&temp_idxs);
3934:     PetscMalloc1(psum,&temp_idxs[0]);
3935:     for (i=1;i<nis;i++) {
3936:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3937:     }
3938:     PetscMemzero(count_is,nis*sizeof(PetscInt));
3939:     ptr_idxs = recv_buffer_idxs_is;
3940:     for (i=0;i<n_recvs;i++) {
3941:       for (j=0;j<nis;j++) {
3942:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3943:         PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
3944:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3945:         ptr_idxs += plen+1; /* shift pointer to received data */
3946:       }
3947:     }
3948:     for (i=0;i<nis;i++) {
3949:       ISDestroy(&isarray[i]);
3950:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
3951:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
3952:     }
3953:     PetscFree(count_is);
3954:     PetscFree(temp_idxs[0]);
3955:     PetscFree(temp_idxs);
3956:   }
3957:   /* free workspace */
3958:   PetscFree(recv_buffer_idxs_is);
3959:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
3960:   PetscFree(send_buffer_idxs);
3961:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
3962:   if (isdense) {
3963:     MatISGetLocalMat(mat,&local_mat);
3964:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
3965:   } else {
3966:     /* PetscFree(send_buffer_vals); */
3967:   }
3968:   if (nis) {
3969:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
3970:     PetscFree(send_buffer_idxs_is);
3971:   }
3972:   PetscFree(recv_req_idxs);
3973:   PetscFree(recv_req_vals);
3974:   PetscFree(recv_req_idxs_is);
3975:   PetscFree(send_req_idxs);
3976:   PetscFree(send_req_vals);
3977:   PetscFree(send_req_idxs_is);
3978:   PetscFree(ilengths_vals);
3979:   PetscFree(ilengths_idxs);
3980:   PetscFree(olengths_vals);
3981:   PetscFree(olengths_idxs);
3982:   PetscFree(onodes);
3983:   if (nis) {
3984:     PetscFree(ilengths_idxs_is);
3985:     PetscFree(olengths_idxs_is);
3986:     PetscFree(onodes_is);
3987:   }
3988:   PetscSubcommDestroy(&subcomm);
3989:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3990:     MatDestroy(mat_n);
3991:     for (i=0;i<nis;i++) {
3992:       ISDestroy(&isarray[i]);
3993:     }
3994:     *mat_n = NULL;
3995:   }
3996:   return(0);
3997: }

3999: /* temporary hack into ksp private data structure */
4000: #include <petsc/private/kspimpl.h>

4004: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4005: {
4006:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4007:   PC_IS                  *pcis = (PC_IS*)pc->data;
4008:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4009:   MatNullSpace           CoarseNullSpace=NULL;
4010:   ISLocalToGlobalMapping coarse_islg;
4011:   IS                     coarse_is,*isarray;
4012:   PetscInt               i,im_active=-1,active_procs=-1;
4013:   PetscInt               nis,nisdofs,nisneu;
4014:   PC                     pc_temp;
4015:   PCType                 coarse_pc_type;
4016:   KSPType                coarse_ksp_type;
4017:   PetscBool              multilevel_requested,multilevel_allowed;
4018:   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4019:   Mat                    t_coarse_mat_is;
4020:   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4021:   PetscMPIInt            all_procs;
4022:   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4023:   PetscBool              compute_vecs = PETSC_FALSE;
4024:   PetscScalar            *array;
4025:   PetscErrorCode         ierr;

4028:   /* Assign global numbering to coarse dofs */
4029:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4030:     PetscInt ocoarse_size;
4031:     compute_vecs = PETSC_TRUE;
4032:     ocoarse_size = pcbddc->coarse_size;
4033:     PetscFree(pcbddc->global_primal_indices);
4034:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
4035:     /* see if we can avoid some work */
4036:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4037:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4038:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4039:         PC        pc;
4040:         PetscBool isbddc;

4042:         /* temporary workaround since PCBDDC does not have a reset method so far */
4043:         KSPGetPC(pcbddc->coarse_ksp,&pc);
4044:         PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);
4045:         if (isbddc) {
4046:           PCDestroy(&pc);
4047:         }
4048:         KSPReset(pcbddc->coarse_ksp);
4049:         coarse_reuse = PETSC_FALSE;
4050:       } else { /* we can safely reuse already computed coarse matrix */
4051:         coarse_reuse = PETSC_TRUE;
4052:       }
4053:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4054:       coarse_reuse = PETSC_FALSE;
4055:     }
4056:     /* reset any subassembling information */
4057:     ISDestroy(&pcbddc->coarse_subassembling);
4058:     ISDestroy(&pcbddc->coarse_subassembling_init);
4059:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4060:     coarse_reuse = PETSC_TRUE;
4061:   }

4063:   /* count "active" (i.e. with positive local size) and "void" processes */
4064:   im_active = !!(pcis->n);
4065:   MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
4066:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);
4067:   void_procs = all_procs-active_procs;
4068:   csin_type_simple = PETSC_TRUE;
4069:   redist = PETSC_FALSE;
4070:   if (pcbddc->current_level && void_procs) {
4071:     csin_ml = PETSC_TRUE;
4072:     ncoarse_ml = void_procs;
4073:     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4074:     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4075:       csin_ds = PETSC_TRUE;
4076:       ncoarse_ds = pcbddc->redistribute_coarse;
4077:       redist = PETSC_TRUE;
4078:     } else {
4079:       csin_ds = PETSC_TRUE;
4080:       ncoarse_ds = active_procs;
4081:       redist = PETSC_TRUE;
4082:     }
4083:   } else {
4084:     csin_ml = PETSC_FALSE;
4085:     ncoarse_ml = all_procs;
4086:     if (void_procs) {
4087:       csin_ds = PETSC_TRUE;
4088:       ncoarse_ds = void_procs;
4089:       csin_type_simple = PETSC_FALSE;
4090:     } else {
4091:       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4092:         csin_ds = PETSC_TRUE;
4093:         ncoarse_ds = pcbddc->redistribute_coarse;
4094:         redist = PETSC_TRUE;
4095:       } else {
4096:         csin_ds = PETSC_FALSE;
4097:         ncoarse_ds = all_procs;
4098:       }
4099:     }
4100:   }

4102:   /*
4103:     test if we can go multilevel: three conditions must be satisfied:
4104:     - we have not exceeded the number of levels requested
4105:     - we can actually subassemble the active processes
4106:     - we can find a suitable number of MPI processes where we can place the subassembled problem
4107:   */
4108:   multilevel_allowed = PETSC_FALSE;
4109:   multilevel_requested = PETSC_FALSE;
4110:   if (pcbddc->current_level < pcbddc->max_levels) {
4111:     multilevel_requested = PETSC_TRUE;
4112:     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4113:       multilevel_allowed = PETSC_FALSE;
4114:     } else {
4115:       multilevel_allowed = PETSC_TRUE;
4116:     }
4117:   }
4118:   /* determine number of process partecipating to coarse solver */
4119:   if (multilevel_allowed) {
4120:     ncoarse = ncoarse_ml;
4121:     csin = csin_ml;
4122:     redist = PETSC_FALSE;
4123:   } else {
4124:     ncoarse = ncoarse_ds;
4125:     csin = csin_ds;
4126:   }

4128:   /* creates temporary l2gmap and IS for coarse indexes */
4129:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
4130:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

4132:   /* creates temporary MATIS object for coarse matrix */
4133:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
4134:   MatDenseGetArray(coarse_submat_dense,&array);
4135:   PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
4136:   MatDenseRestoreArray(coarse_submat_dense,&array);
4137: #if 0
4138:   {
4139:     PetscViewer viewer;
4140:     char filename[256];
4141:     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4142:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4143:     PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4144:     MatView(coarse_submat_dense,viewer);
4145:     PetscViewerDestroy(&viewer);
4146:   }
4147: #endif
4148:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);
4149:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
4150:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
4151:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
4152:   MatDestroy(&coarse_submat_dense);

4154:   /* compute dofs splitting and neumann boundaries for coarse dofs */
4155:   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4156:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4157:     const PetscInt         *idxs;
4158:     ISLocalToGlobalMapping tmap;

4160:     /* create map between primal indices (in local representative ordering) and local primal numbering */
4161:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
4162:     /* allocate space for temporary storage */
4163:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
4164:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
4165:     /* allocate for IS array */
4166:     nisdofs = pcbddc->n_ISForDofsLocal;
4167:     nisneu = !!pcbddc->NeumannBoundariesLocal;
4168:     nis = nisdofs + nisneu;
4169:     PetscMalloc1(nis,&isarray);
4170:     /* dofs splitting */
4171:     for (i=0;i<nisdofs;i++) {
4172:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
4173:       ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
4174:       ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
4175:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
4176:       ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
4177:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
4178:       ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
4179:       /* ISView(isarray[i],0); */
4180:     }
4181:     /* neumann boundaries */
4182:     if (pcbddc->NeumannBoundariesLocal) {
4183:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
4184:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
4185:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
4186:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
4187:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
4188:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
4189:       ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
4190:       /* ISView(isarray[nisdofs],0); */
4191:     }
4192:     /* free memory */
4193:     PetscFree(tidxs);
4194:     PetscFree(tidxs2);
4195:     ISLocalToGlobalMappingDestroy(&tmap);
4196:   } else {
4197:     nis = 0;
4198:     nisdofs = 0;
4199:     nisneu = 0;
4200:     isarray = NULL;
4201:   }
4202:   /* destroy no longer needed map */
4203:   ISLocalToGlobalMappingDestroy(&coarse_islg);

4205:   /* restrict on coarse candidates (if needed) */
4206:   coarse_mat_is = NULL;
4207:   if (csin) {
4208:     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4209:       if (redist) {
4210:         PetscMPIInt rank;
4211:         PetscInt    spc,n_spc_p1,dest[1],destsize;

4213:         MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
4214:         spc = active_procs/ncoarse;
4215:         n_spc_p1 = active_procs%ncoarse;
4216:         if (im_active) {
4217:           destsize = 1;
4218:           if (rank > n_spc_p1*(spc+1)-1) {
4219:             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4220:           } else {
4221:             dest[0] = rank/(spc+1);
4222:           }
4223:         } else {
4224:           destsize = 0;
4225:         }
4226:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);
4227:       } else if (csin_type_simple) {
4228:         PetscMPIInt rank;
4229:         PetscInt    issize,isidx;

4231:         MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
4232:         if (im_active) {
4233:           issize = 1;
4234:           isidx = (PetscInt)rank;
4235:         } else {
4236:           issize = 0;
4237:           isidx = -1;
4238:         }
4239:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);
4240:       } else { /* get a suitable subassembling pattern from MATIS code */
4241:         MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);
4242:       }

4244:       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4245:       if (!redist || ncoarse <= void_procs) {
4246:         PetscInt ncoarse_cand,tissize,*nisindices;
4247:         PetscInt *coarse_candidates;
4248:         const PetscInt* tisindices;

4250:         /* get coarse candidates' ranks in pc communicator */
4251:         PetscMalloc1(all_procs,&coarse_candidates);
4252:         MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));
4253:         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4254:           if (!coarse_candidates[i]) {
4255:             coarse_candidates[ncoarse_cand++]=i;
4256:           }
4257:         }
4258:         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);


4261:         if (pcbddc->dbg_flag) {
4262:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4263:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");
4264:           ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
4265:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");
4266:           for (i=0;i<ncoarse_cand;i++) {
4267:             PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);
4268:           }
4269:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");
4270:           PetscViewerFlush(pcbddc->dbg_viewer);
4271:         }
4272:         /* shift the pattern on coarse candidates */
4273:         ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);
4274:         ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);
4275:         PetscMalloc1(tissize,&nisindices);
4276:         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4277:         ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);
4278:         ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);
4279:         PetscFree(coarse_candidates);
4280:       }
4281:       if (pcbddc->dbg_flag) {
4282:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4283:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");
4284:         ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
4285:         PetscViewerFlush(pcbddc->dbg_viewer);
4286:       }
4287:     }
4288:     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4289:     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4290:       MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);
4291:     } else { /* this is the last level, so use just receiving processes in subcomm */
4292:       MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);
4293:     }
4294:   } else {
4295:     if (pcbddc->dbg_flag) {
4296:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4297:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");
4298:       PetscViewerFlush(pcbddc->dbg_viewer);
4299:     }
4300:     PetscObjectReference((PetscObject)t_coarse_mat_is);
4301:     coarse_mat_is = t_coarse_mat_is;
4302:   }

4304:   /* create local to global scatters for coarse problem */
4305:   if (compute_vecs) {
4306:     PetscInt lrows;
4307:     VecDestroy(&pcbddc->coarse_vec);
4308:     if (coarse_mat_is) {
4309:       MatGetLocalSize(coarse_mat_is,&lrows,NULL);
4310:     } else {
4311:       lrows = 0;
4312:     }
4313:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
4314:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
4315:     VecSetType(pcbddc->coarse_vec,VECSTANDARD);
4316:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
4317:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
4318:   }
4319:   ISDestroy(&coarse_is);
4320:   MatDestroy(&t_coarse_mat_is);

4322:   /* set defaults for coarse KSP and PC */
4323:   if (multilevel_allowed) {
4324:     coarse_ksp_type = KSPRICHARDSON;
4325:     coarse_pc_type = PCBDDC;
4326:   } else {
4327:     coarse_ksp_type = KSPPREONLY;
4328:     coarse_pc_type = PCREDUNDANT;
4329:   }

4331:   /* print some info if requested */
4332:   if (pcbddc->dbg_flag) {
4333:     if (!multilevel_allowed) {
4334:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4335:       if (multilevel_requested) {
4336:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
4337:       } else if (pcbddc->max_levels) {
4338:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
4339:       }
4340:       PetscViewerFlush(pcbddc->dbg_viewer);
4341:     }
4342:   }

4344:   /* create the coarse KSP object only once with defaults */
4345:   if (coarse_mat_is) {
4346:     MatReuse coarse_mat_reuse;
4347:     PetscViewer dbg_viewer = NULL;
4348:     if (pcbddc->dbg_flag) {
4349:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4350:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
4351:     }
4352:     if (!pcbddc->coarse_ksp) {
4353:       char prefix[256],str_level[16];
4354:       size_t len;
4355:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);
4356:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
4357:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
4358:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
4359:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);
4360:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
4361:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
4362:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4363:       PCSetType(pc_temp,coarse_pc_type);
4364:       /* prefix */
4365:       PetscStrcpy(prefix,"");
4366:       PetscStrcpy(str_level,"");
4367:       if (!pcbddc->current_level) {
4368:         PetscStrcpy(prefix,((PetscObject)pc)->prefix);
4369:         PetscStrcat(prefix,"pc_bddc_coarse_");
4370:       } else {
4371:         PetscStrlen(((PetscObject)pc)->prefix,&len);
4372:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4373:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4374:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
4375:         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4376:         PetscStrcat(prefix,str_level);
4377:       }
4378:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
4379:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4380:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
4381:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
4382:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
4383:       /* allow user customization */
4384:       KSPSetFromOptions(pcbddc->coarse_ksp);
4385:     }
4386:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4387:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4388:     if (nisdofs) {
4389:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
4390:       for (i=0;i<nisdofs;i++) {
4391:         ISDestroy(&isarray[i]);
4392:       }
4393:     }
4394:     if (nisneu) {
4395:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
4396:       ISDestroy(&isarray[nisdofs]);
4397:     }

4399:     /* get some info after set from options */
4400:     PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
4401:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
4402:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
4403:     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4404:       PCSetType(pc_temp,coarse_pc_type);
4405:       isbddc = PETSC_FALSE;
4406:     }
4407:     PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
4408:     if (isredundant) {
4409:       KSP inner_ksp;
4410:       PC  inner_pc;
4411:       PCRedundantGetKSP(pc_temp,&inner_ksp);
4412:       KSPGetPC(inner_ksp,&inner_pc);
4413:       PCFactorSetReuseFill(inner_pc,PETSC_TRUE);
4414:     }

4416:     /* assemble coarse matrix */
4417:     if (coarse_reuse) {
4418:       KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
4419:       PetscObjectReference((PetscObject)coarse_mat);
4420:       coarse_mat_reuse = MAT_REUSE_MATRIX;
4421:     } else {
4422:       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4423:     }
4424:     if (isbddc || isnn) {
4425:       if (pcbddc->coarsening_ratio > 1) {
4426:         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4427:           MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);
4428:           if (pcbddc->dbg_flag) {
4429:             PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");
4430:             PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");
4431:             ISView(pcbddc->coarse_subassembling,dbg_viewer);
4432:             PetscViewerFlush(dbg_viewer);
4433:           }
4434:         }
4435:         MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);
4436:       } else {
4437:         PetscObjectReference((PetscObject)coarse_mat_is);
4438:         coarse_mat = coarse_mat_is;
4439:       }
4440:     } else {
4441:       MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);
4442:     }
4443:     MatDestroy(&coarse_mat_is);

4445:     /* propagate symmetry info of coarse matrix */
4446:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
4447:     if (pc->pmat->symmetric_set) {
4448:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
4449:     }
4450:     if (pc->pmat->hermitian_set) {
4451:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
4452:     }
4453:     if (pc->pmat->spd_set) {
4454:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
4455:     }
4456:     /* set operators */
4457:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
4458:     if (pcbddc->dbg_flag) {
4459:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
4460:     }
4461:   } else { /* processes non partecipating to coarse solver (if any) */
4462:     coarse_mat = 0;
4463:   }
4464:   PetscFree(isarray);
4465: #if 0
4466:   {
4467:     PetscViewer viewer;
4468:     char filename[256];
4469:     sprintf(filename,"coarse_mat.m");
4470:     PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);
4471:     PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4472:     MatView(coarse_mat,viewer);
4473:     PetscViewerDestroy(&viewer);
4474:   }
4475: #endif

4477:   /* Compute coarse null space (special handling by BDDC only) */
4478: #if 0
4479:   if (pcbddc->NullSpace) {
4480:     PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);
4481:   }
4482: #endif

4484:   if (pcbddc->coarse_ksp) {
4485:     Vec crhs,csol;
4486:     PetscBool ispreonly;

4488:     if (CoarseNullSpace) {
4489:       if (isbddc) {
4490:         PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);
4491:       } else {
4492:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
4493:       }
4494:     }
4495:     /* setup coarse ksp */
4496:     KSPSetUp(pcbddc->coarse_ksp);
4497:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
4498:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
4499:     /* hack */
4500:     if (!csol) {
4501:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
4502:     }
4503:     if (!crhs) {
4504:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
4505:     }
4506:     /* Check coarse problem if in debug mode or if solving with an iterative method */
4507:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
4508:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4509:       KSP       check_ksp;
4510:       KSPType   check_ksp_type;
4511:       PC        check_pc;
4512:       Vec       check_vec,coarse_vec;
4513:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4514:       PetscInt  its;
4515:       PetscBool compute_eigs;
4516:       PetscReal *eigs_r,*eigs_c;
4517:       PetscInt  neigs;
4518:       const char *prefix;

4520:       /* Create ksp object suitable for estimation of extreme eigenvalues */
4521:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
4522:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
4523:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
4524:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
4525:       if (ispreonly) {
4526:         check_ksp_type = KSPPREONLY;
4527:         compute_eigs = PETSC_FALSE;
4528:       } else {
4529:         check_ksp_type = KSPGMRES;
4530:         compute_eigs = PETSC_TRUE;
4531:       }
4532:       KSPSetType(check_ksp,check_ksp_type);
4533:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
4534:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
4535:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
4536:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
4537:       KSPSetOptionsPrefix(check_ksp,prefix);
4538:       KSPAppendOptionsPrefix(check_ksp,"check_");
4539:       KSPSetFromOptions(check_ksp);
4540:       KSPSetUp(check_ksp);
4541:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
4542:       KSPSetPC(check_ksp,check_pc);
4543:       /* create random vec */
4544:       KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);
4545:       VecDuplicate(coarse_vec,&check_vec);
4546:       VecSetRandom(check_vec,NULL);
4547:       if (CoarseNullSpace) {
4548:         MatNullSpaceRemove(CoarseNullSpace,check_vec);
4549:       }
4550:       MatMult(coarse_mat,check_vec,coarse_vec);
4551:       /* solve coarse problem */
4552:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
4553:       if (CoarseNullSpace) {
4554:         MatNullSpaceRemove(CoarseNullSpace,coarse_vec);
4555:       }
4556:       /* set eigenvalue estimation if preonly has not been requested */
4557:       if (compute_eigs) {
4558:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
4559:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
4560:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
4561:         lambda_max = eigs_r[neigs-1];
4562:         lambda_min = eigs_r[0];
4563:         if (pcbddc->use_coarse_estimates) {
4564:           if (lambda_max>lambda_min) {
4565:             KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);
4566:             KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
4567:           }
4568:         }
4569:       }

4571:       /* check coarse problem residual error */
4572:       if (pcbddc->dbg_flag) {
4573:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4574:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
4575:         VecAXPY(check_vec,-1.0,coarse_vec);
4576:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
4577:         MatMult(coarse_mat,check_vec,coarse_vec);
4578:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
4579:         VecDestroy(&check_vec);
4580:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
4581:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
4582:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
4583:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
4584:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
4585:         if (compute_eigs) {
4586:           PetscReal lambda_max_s,lambda_min_s;
4587:           KSPGetType(check_ksp,&check_ksp_type);
4588:           KSPGetIterationNumber(check_ksp,&its);
4589:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
4590:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
4591:           for (i=0;i<neigs;i++) {
4592:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
4593:           }
4594:         }
4595:         PetscViewerFlush(dbg_viewer);
4596:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
4597:       }
4598:       KSPDestroy(&check_ksp);
4599:       if (compute_eigs) {
4600:         PetscFree(eigs_r);
4601:         PetscFree(eigs_c);
4602:       }
4603:     }
4604:   }
4605:   /* print additional info */
4606:   if (pcbddc->dbg_flag) {
4607:     /* waits until all processes reaches this point */
4608:     PetscBarrier((PetscObject)pc);
4609:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
4610:     PetscViewerFlush(pcbddc->dbg_viewer);
4611:   }

4613:   /* free memory */
4614:   MatNullSpaceDestroy(&CoarseNullSpace);
4615:   MatDestroy(&coarse_mat);
4616:   return(0);
4617: }

4621: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4622: {
4623:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4624:   PC_IS*         pcis = (PC_IS*)pc->data;
4625:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4626:   IS             subset,subset_mult,subset_n;
4627:   PetscInt       local_size,coarse_size=0;
4628:   PetscInt       *local_primal_indices=NULL;
4629:   const PetscInt *t_local_primal_indices;

4633:   /* Compute global number of coarse dofs */
4634:   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4635:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4636:   }
4637:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
4638:   ISLocalToGlobalMappingApplyIS(matis->mapping,subset_n,&subset);
4639:   ISDestroy(&subset_n);
4640:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
4641:   PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);
4642:   ISDestroy(&subset);
4643:   ISDestroy(&subset_mult);
4644:   ISGetLocalSize(subset_n,&local_size);
4645:   if (local_size != pcbddc->local_primal_size) {
4646:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4647:   }
4648:   PetscMalloc1(local_size,&local_primal_indices);
4649:   ISGetIndices(subset_n,&t_local_primal_indices);
4650:   PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
4651:   ISRestoreIndices(subset_n,&t_local_primal_indices);
4652:   ISDestroy(&subset_n);

4654:   /* check numbering */
4655:   if (pcbddc->dbg_flag) {
4656:     PetscScalar coarsesum,*array;
4657:     PetscInt    i;
4658:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

4660:     PetscViewerFlush(pcbddc->dbg_viewer);
4661:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4662:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
4663:     PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);
4664:     VecSet(pcis->vec1_N,0.0);
4665:     for (i=0;i<pcbddc->local_primal_size;i++) {
4666:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
4667:     }
4668:     VecAssemblyBegin(pcis->vec1_N);
4669:     VecAssemblyEnd(pcis->vec1_N);
4670:     VecSet(pcis->vec1_global,0.0);
4671:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4672:     VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4673:     VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4674:     VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4675:     VecGetArray(pcis->vec1_N,&array);
4676:     for (i=0;i<pcis->n;i++) {
4677:       if (array[i] == 1.0) {
4678:         set_error = PETSC_TRUE;
4679:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);
4680:       }
4681:     }
4682:     MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
4683:     PetscViewerFlush(pcbddc->dbg_viewer);
4684:     for (i=0;i<pcis->n;i++) {
4685:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4686:     }
4687:     VecRestoreArray(pcis->vec1_N,&array);
4688:     VecSet(pcis->vec1_global,0.0);
4689:     VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4690:     VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4691:     VecSum(pcis->vec1_global,&coarsesum);
4692:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
4693:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4694:       PetscInt *gidxs;

4696:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
4697:       ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
4698:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
4699:       PetscViewerFlush(pcbddc->dbg_viewer);
4700:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
4701:       for (i=0;i<pcbddc->local_primal_size;i++) {
4702:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
4703:       }
4704:       PetscViewerFlush(pcbddc->dbg_viewer);
4705:       PetscFree(gidxs);
4706:     }
4707:     PetscViewerFlush(pcbddc->dbg_viewer);
4708:     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4709:   }
4710:   /* PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size); */
4711:   /* get back data */
4712:   *coarse_size_n = coarse_size;
4713:   *local_primal_indices_n = local_primal_indices;
4714:   return(0);
4715: }

4719: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4720: {
4721:   IS             localis_t;
4722:   PetscInt       i,lsize,*idxs,n;
4723:   PetscScalar    *vals;

4727:   /* get indices in local ordering exploiting local to global map */
4728:   ISGetLocalSize(globalis,&lsize);
4729:   PetscMalloc1(lsize,&vals);
4730:   for (i=0;i<lsize;i++) vals[i] = 1.0;
4731:   ISGetIndices(globalis,(const PetscInt**)&idxs);
4732:   VecSet(gwork,0.0);
4733:   VecSet(lwork,0.0);
4734:   if (idxs) { /* multilevel guard */
4735:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
4736:   }
4737:   VecAssemblyBegin(gwork);
4738:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
4739:   PetscFree(vals);
4740:   VecAssemblyEnd(gwork);
4741:   /* now compute set in local ordering */
4742:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4743:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4744:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
4745:   VecGetSize(lwork,&n);
4746:   for (i=0,lsize=0;i<n;i++) {
4747:     if (PetscRealPart(vals[i]) > 0.5) {
4748:       lsize++;
4749:     }
4750:   }
4751:   PetscMalloc1(lsize,&idxs);
4752:   for (i=0,lsize=0;i<n;i++) {
4753:     if (PetscRealPart(vals[i]) > 0.5) {
4754:       idxs[lsize++] = i;
4755:     }
4756:   }
4757:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
4758:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
4759:   *localis = localis_t;
4760:   return(0);
4761: }

4763: /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4766: static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4767: {
4768:   PCBDDCChange_ctx change_ctx;
4769:   PetscErrorCode   ierr;

4772:   MatShellGetContext(A,&change_ctx);
4773:   MatMult(change_ctx->global_change,x,change_ctx->work[0]);
4774:   MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);
4775:   MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);
4776:   return(0);
4777: }

4781: static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4782: {
4783:   PCBDDCChange_ctx change_ctx;
4784:   PetscErrorCode   ierr;

4787:   MatShellGetContext(A,&change_ctx);
4788:   MatMult(change_ctx->global_change,x,change_ctx->work[0]);
4789:   MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);
4790:   MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);
4791:   return(0);
4792: }

4796: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4797: {
4798:   PC_IS               *pcis=(PC_IS*)pc->data;
4799:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4800:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4801:   Mat                 S_j;
4802:   PetscInt            *used_xadj,*used_adjncy;
4803:   PetscBool           free_used_adj;
4804:   PetscErrorCode      ierr;

4807:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4808:   free_used_adj = PETSC_FALSE;
4809:   if (pcbddc->sub_schurs_layers == -1) {
4810:     used_xadj = NULL;
4811:     used_adjncy = NULL;
4812:   } else {
4813:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4814:       used_xadj = pcbddc->mat_graph->xadj;
4815:       used_adjncy = pcbddc->mat_graph->adjncy;
4816:     } else if (pcbddc->computed_rowadj) {
4817:       used_xadj = pcbddc->mat_graph->xadj;
4818:       used_adjncy = pcbddc->mat_graph->adjncy;
4819:     } else {
4820:       PetscBool      flg_row=PETSC_FALSE;
4821:       const PetscInt *xadj,*adjncy;
4822:       PetscInt       nvtxs;

4824:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
4825:       if (flg_row) {
4826:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
4827:         PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
4828:         PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
4829:         free_used_adj = PETSC_TRUE;
4830:       } else {
4831:         pcbddc->sub_schurs_layers = -1;
4832:         used_xadj = NULL;
4833:         used_adjncy = NULL;
4834:       }
4835:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
4836:     }
4837:   }

4839:   /* setup sub_schurs data */
4840:   MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
4841:   if (!sub_schurs->use_mumps) {
4842:     /* pcbddc->ksp_D up to date only if not using MUMPS */
4843:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
4844:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);
4845:   } else {
4846:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4847:     PetscBool isseqaij;
4848:     if (!pcbddc->use_vertices && reuse_solvers) {
4849:       PetscInt n_vertices;

4851:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
4852:       reuse_solvers = (PetscBool)!n_vertices;
4853:     }
4854:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
4855:     if (!isseqaij) {
4856:       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4857:       if (matis->A == pcbddc->local_mat) {
4858:         MatDestroy(&pcbddc->local_mat);
4859:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
4860:       } else {
4861:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);
4862:       }
4863:     }
4864:     PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);
4865:   }
4866:   MatDestroy(&S_j);

4868:   /* free adjacency */
4869:   if (free_used_adj) {
4870:     PetscFree2(used_xadj,used_adjncy);
4871:   }
4872:   return(0);
4873: }

4877: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4878: {
4879:   PC_IS               *pcis=(PC_IS*)pc->data;
4880:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4881:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4882:   PCBDDCGraph         graph;
4883:   PetscErrorCode      ierr;

4886:   /* attach interface graph for determining subsets */
4887:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4888:     IS       verticesIS,verticescomm;
4889:     PetscInt vsize,*idxs;

4891:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
4892:     ISGetSize(verticesIS,&vsize);
4893:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
4894:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
4895:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
4896:     ISDestroy(&verticesIS);
4897:     PCBDDCGraphCreate(&graph);
4898:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);
4899:     PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
4900:     ISDestroy(&verticescomm);
4901:     PCBDDCGraphComputeConnectedComponents(graph);
4902: /*
4903:     if (pcbddc->dbg_flag) {
4904:       PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
4905:     }
4906: */
4907:   } else {
4908:     graph = pcbddc->mat_graph;
4909:   }

4911:   /* sub_schurs init */
4912:   PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);

4914:   /* free graph struct */
4915:   if (pcbddc->sub_schurs_rebuild) {
4916:     PCBDDCGraphDestroy(&graph);
4917:   }
4918:   return(0);
4919: }