Actual source code: bddcprivate.c

petsc-3.7.3 2016-08-01
Report Typos and Errors
  1: #include <../src/ksp/pc/impls/bddc/bddc.h>
  2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3: #include <petscblaslapack.h>

  5: static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
  6: static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);

 10: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
 11: {
 12:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
 13:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
 14:   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
 15:   PetscBLASInt    *B_iwork,*B_ifail;
 16:   PetscScalar     *work,lwork;
 17:   PetscScalar     *St,*S,*eigv;
 18:   PetscScalar     *Sarray,*Starray;
 19:   PetscReal       *eigs,thresh;
 20:   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
 21:   PetscBool       allocated_S_St;
 22: #if defined(PETSC_USE_COMPLEX)
 23:   PetscReal       *rwork;
 24: #endif
 25:   PetscErrorCode  ierr;

 28:   if (!sub_schurs->use_mumps) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");

 30:   if (pcbddc->dbg_flag) {
 31:     PetscViewerFlush(pcbddc->dbg_viewer);
 32:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
 33:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
 34:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
 35:   }

 37:   if (pcbddc->dbg_flag) {
 38:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
 39:   }

 41:   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);

 43:   /* max size of subsets */
 44:   mss = 0;
 45:   for (i=0;i<sub_schurs->n_subs;i++) {
 46:     PetscInt subset_size;

 48:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
 49:     mss = PetscMax(mss,subset_size);
 50:   }

 52:   /* min/max and threshold */
 53:   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
 54:   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
 55:   nmax = PetscMax(nmin,nmax);
 56:   allocated_S_St = PETSC_FALSE;
 57:   if (nmin) {
 58:     allocated_S_St = PETSC_TRUE;
 59:   }

 61:   /* allocate lapack workspace */
 62:   cum = cum2 = 0;
 63:   maxneigs = 0;
 64:   for (i=0;i<sub_schurs->n_subs;i++) {
 65:     PetscInt n,subset_size;

 67:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
 68:     n = PetscMin(subset_size,nmax);
 69:     cum += subset_size;
 70:     cum2 += subset_size*n;
 71:     maxneigs = PetscMax(maxneigs,n);
 72:   }
 73:   if (mss) {
 74:     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
 75:       PetscBLASInt B_itype = 1;
 76:       PetscBLASInt B_N = mss;
 77:       PetscReal    zero = 0.0;
 78:       PetscReal    eps = 0.0; /* dlamch? */

 80:       B_lwork = -1;
 81:       S = NULL;
 82:       St = NULL;
 83:       eigs = NULL;
 84:       eigv = NULL;
 85:       B_iwork = NULL;
 86:       B_ifail = NULL;
 87: #if defined(PETSC_USE_COMPLEX)
 88:       rwork = NULL;
 89: #endif
 90:       thresh = 1.0;
 91:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 92: #if defined(PETSC_USE_COMPLEX)
 93:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
 94: #else
 95:       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
 96: #endif
 97:       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
 98:       PetscFPTrapPop();
 99:     } else {
100:         /* TODO */
101:     }
102:   } else {
103:     lwork = 0;
104:   }

106:   nv = 0;
107:   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
108:     ISGetLocalSize(sub_schurs->is_vertices,&nv);
109:   }
110:   PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
111:   if (allocated_S_St) {
112:     PetscMalloc2(mss*mss,&S,mss*mss,&St);
113:   }
114:   PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
115: #if defined(PETSC_USE_COMPLEX)
116:   PetscMalloc1(7*mss,&rwork);
117: #endif
118:   PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
119:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
120:                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
121:                       nv+cum,&pcbddc->adaptive_constraints_idxs,
122:                       nv+cum2,&pcbddc->adaptive_constraints_data);
123:   PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));

125:   maxneigs = 0;
126:   cum = cumarray = 0;
127:   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
128:   pcbddc->adaptive_constraints_data_ptr[0] = 0;
129:   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
130:     const PetscInt *idxs;

132:     ISGetIndices(sub_schurs->is_vertices,&idxs);
133:     for (cum=0;cum<nv;cum++) {
134:       pcbddc->adaptive_constraints_n[cum] = 1;
135:       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
136:       pcbddc->adaptive_constraints_data[cum] = 1.0;
137:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
138:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
139:     }
140:     ISRestoreIndices(sub_schurs->is_vertices,&idxs);
141:   }

143:   if (mss) { /* multilevel */
144:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
145:     MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
146:   }

148:   for (i=0;i<sub_schurs->n_subs;i++) {

150:     const PetscInt *idxs;
151:     PetscReal      infty = PETSC_MAX_REAL;
152:     PetscInt       j,subset_size,eigs_start = 0;
153:     PetscBLASInt   B_N;
154:     PetscBool      same_data = PETSC_FALSE;

156:     ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
157:     PetscBLASIntCast(subset_size,&B_N);
158:     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
159:       if (sub_schurs->is_hermitian) {
160:         PetscInt j,k;
161:         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
162:           PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));
163:           PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));
164:         }
165:         for (j=0;j<subset_size;j++) {
166:           for (k=j;k<subset_size;k++) {
167:             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
168:             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
169:           }
170:         }
171:       } else {
172:         PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
173:         PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
174:       }
175:     } else {
176:       S = Sarray + cumarray;
177:       St = Starray + cumarray;
178:     }

180:     ISGetIndices(sub_schurs->is_subs[i],&idxs);
181:     /* see if we can save some work */
182:     if (sub_schurs->n_subs == 1) {
183:       PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);
184:     }

186:     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
187:       B_neigs = 0;
188:     } else {
189:       /* Threshold: this is an heuristic for edges */
190:       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;

192:       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
193:         PetscBLASInt B_itype = 1;
194:         PetscBLASInt B_IL, B_IU;
195:         PetscReal    eps = -1.0; /* dlamch? */
196:         PetscInt     nmin_s;

198:         /* ask for eigenvalues larger than thresh */
199:         if (pcbddc->dbg_flag) {
200:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
201:         }
202:         PetscFPTrapPush(PETSC_FP_TRAP_OFF);
203: #if defined(PETSC_USE_COMPLEX)
204:         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
205: #else
206:         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
207: #endif
208:         PetscFPTrapPop();
209:         if (B_ierr) {
210:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
211:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
212:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
213:         }

215:         if (B_neigs > nmax) {
216:           if (pcbddc->dbg_flag) {
217:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
218:           }
219:           eigs_start = B_neigs -nmax;
220:           B_neigs = nmax;
221:         }

223:         nmin_s = PetscMin(nmin,B_N);
224:         if (B_neigs < nmin_s) {
225:           PetscBLASInt B_neigs2;

227:           B_IU = B_N - B_neigs;
228:           B_IL = B_N - nmin_s + 1;
229:           if (pcbddc->dbg_flag) {
230:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
231:           }
232:           if (sub_schurs->is_hermitian) {
233:             PetscInt j;
234:             for (j=0;j<subset_size;j++) {
235:               PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));
236:             }
237:             for (j=0;j<subset_size;j++) {
238:               PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));
239:             }
240:           } else {
241:             PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
242:             PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));
243:           }
244:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
245: #if defined(PETSC_USE_COMPLEX)
246:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
247: #else
248:           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
249: #endif
250:           PetscFPTrapPop();
251:           B_neigs += B_neigs2;
252:         }
253:         if (B_ierr) {
254:           if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
255:           else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256:           else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
257:         }
258:         if (pcbddc->dbg_flag) {
259:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
260:           for (j=0;j<B_neigs;j++) {
261:             if (eigs[j] == 0.0) {
262:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
263:             } else {
264:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
265:             }
266:           }
267:         }
268:       } else {
269:           /* TODO */
270:       }
271:     }
272:     maxneigs = PetscMax(B_neigs,maxneigs);
273:     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
274:     if (B_neigs) {
275:       PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));

277:       if (pcbddc->dbg_flag > 1) {
278:         PetscInt ii;
279:         for (ii=0;ii<B_neigs;ii++) {
280:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
281:           for (j=0;j<B_N;j++) {
282: #if defined(PETSC_USE_COMPLEX)
283:             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
284:             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);
286: #else
287:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
288: #endif
289:           }
290:         }
291:       }
292: #if 0
293:       for (j=0;j<B_neigs;j++) {
294:         PetscBLASInt Blas_N,Blas_one = 1.0;
295:         PetscScalar norm;
296:         PetscBLASIntCast(subset_size,&Blas_N);
297:         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
298:                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
299:         if (pcbddc->adaptive_constraints_data[cum] > 0.0) {
300:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
301:         } else {
302:           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
303:         }
304:         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
305:       }
306: #endif
307:       PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));
308:       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
309:       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
310:       cum++;
311:     }
312:     ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
313:     /* shift for next computation */
314:     cumarray += subset_size*subset_size;
315:   }
316:   if (pcbddc->dbg_flag) {
317:     PetscViewerFlush(pcbddc->dbg_viewer);
318:   }

320:   if (mss) {
321:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
322:     MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
323:     /* destroy matrices (junk) */
324:     MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
325:     MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
326:   }
327:   if (allocated_S_St) {
328:     PetscFree2(S,St);
329:   }
330:   PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
331: #if defined(PETSC_USE_COMPLEX)
332:   PetscFree(rwork);
333: #endif
334:   if (pcbddc->dbg_flag) {
335:     PetscInt maxneigs_r;
336:     MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
337:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);
338:   }
339:   return(0);
340: }

344: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
345: {
346:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
347:   PetscScalar    *coarse_submat_vals;

351:   /* Setup local scatters R_to_B and (optionally) R_to_D */
352:   /* PCBDDCSetUpLocalWorkVectors should be called first! */
353:   PCBDDCSetUpLocalScatters(pc);

355:   /* Setup local neumann solver ksp_R */
356:   /* PCBDDCSetUpLocalScatters should be called first! */
357:   PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);

359:   /* Change global null space passed in by the user if change of basis has been requested */
360:   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
361:     PCBDDCNullSpaceAdaptGlobal(pc);
362:   }

364:   /*
365:      Setup local correction and local part of coarse basis.
366:      Gives back the dense local part of the coarse matrix in column major ordering
367:   */
368:   PCBDDCSetUpCorrection(pc,&coarse_submat_vals);

370:   /* Compute total number of coarse nodes and setup coarse solver */
371:   PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);

373:   /* free */
374:   PetscFree(coarse_submat_vals);
375:   return(0);
376: }

380: PetscErrorCode PCBDDCResetCustomization(PC pc)
381: {
382:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

386:   PCBDDCGraphResetCSR(pcbddc->mat_graph);
387:   ISDestroy(&pcbddc->user_primal_vertices);
388:   MatNullSpaceDestroy(&pcbddc->NullSpace);
389:   ISDestroy(&pcbddc->NeumannBoundaries);
390:   ISDestroy(&pcbddc->NeumannBoundariesLocal);
391:   ISDestroy(&pcbddc->DirichletBoundaries);
392:   MatNullSpaceDestroy(&pcbddc->onearnullspace);
393:   PetscFree(pcbddc->onearnullvecs_state);
394:   ISDestroy(&pcbddc->DirichletBoundariesLocal);
395:   PCBDDCSetDofsSplitting(pc,0,NULL);
396:   PCBDDCSetDofsSplittingLocal(pc,0,NULL);
397:   return(0);
398: }

402: PetscErrorCode PCBDDCResetTopography(PC pc)
403: {
404:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;

408:   MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
409:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
410:   MatDestroy(&pcbddc->ConstraintMatrix);
411:   PCBDDCGraphReset(pcbddc->mat_graph);
412:   PCBDDCSubSchursReset(pcbddc->sub_schurs);
413:   return(0);
414: }

418: PetscErrorCode PCBDDCResetSolvers(PC pc)
419: {
420:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
421:   PetscScalar    *array;

425:   VecDestroy(&pcbddc->coarse_vec);
426:   if (pcbddc->coarse_phi_B) {
427:     MatDenseGetArray(pcbddc->coarse_phi_B,&array);
428:     PetscFree(array);
429:   }
430:   MatDestroy(&pcbddc->coarse_phi_B);
431:   MatDestroy(&pcbddc->coarse_phi_D);
432:   MatDestroy(&pcbddc->coarse_psi_B);
433:   MatDestroy(&pcbddc->coarse_psi_D);
434:   VecDestroy(&pcbddc->vec1_P);
435:   VecDestroy(&pcbddc->vec1_C);
436:   MatDestroy(&pcbddc->local_auxmat2);
437:   MatDestroy(&pcbddc->local_auxmat1);
438:   VecDestroy(&pcbddc->vec1_R);
439:   VecDestroy(&pcbddc->vec2_R);
440:   ISDestroy(&pcbddc->is_R_local);
441:   VecScatterDestroy(&pcbddc->R_to_B);
442:   VecScatterDestroy(&pcbddc->R_to_D);
443:   VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
444:   KSPDestroy(&pcbddc->ksp_D);
445:   KSPDestroy(&pcbddc->ksp_R);
446:   KSPDestroy(&pcbddc->coarse_ksp);
447:   MatDestroy(&pcbddc->local_mat);
448:   PetscFree(pcbddc->primal_indices_local_idxs);
449:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
450:   PetscFree(pcbddc->global_primal_indices);
451:   ISDestroy(&pcbddc->coarse_subassembling);
452:   ISDestroy(&pcbddc->coarse_subassembling_init);
453:   return(0);
454: }

458: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
459: {
460:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
461:   PC_IS          *pcis = (PC_IS*)pc->data;
462:   VecType        impVecType;
463:   PetscInt       n_constraints,n_R,old_size;

467:   if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
468:   /* get sizes */
469:   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
470:   n_R = pcis->n-pcbddc->n_vertices;
471:   VecGetType(pcis->vec1_N,&impVecType);
472:   /* local work vectors (try to avoid unneeded work)*/
473:   /* R nodes */
474:   old_size = -1;
475:   if (pcbddc->vec1_R) {
476:     VecGetSize(pcbddc->vec1_R,&old_size);
477:   }
478:   if (n_R != old_size) {
479:     VecDestroy(&pcbddc->vec1_R);
480:     VecDestroy(&pcbddc->vec2_R);
481:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
482:     VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
483:     VecSetType(pcbddc->vec1_R,impVecType);
484:     VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
485:   }
486:   /* local primal dofs */
487:   old_size = -1;
488:   if (pcbddc->vec1_P) {
489:     VecGetSize(pcbddc->vec1_P,&old_size);
490:   }
491:   if (pcbddc->local_primal_size != old_size) {
492:     VecDestroy(&pcbddc->vec1_P);
493:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
494:     VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
495:     VecSetType(pcbddc->vec1_P,impVecType);
496:   }
497:   /* local explicit constraints */
498:   old_size = -1;
499:   if (pcbddc->vec1_C) {
500:     VecGetSize(pcbddc->vec1_C,&old_size);
501:   }
502:   if (n_constraints && n_constraints != old_size) {
503:     VecDestroy(&pcbddc->vec1_C);
504:     VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
505:     VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
506:     VecSetType(pcbddc->vec1_C,impVecType);
507:   }
508:   return(0);
509: }

513: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
514: {
515:   PetscErrorCode  ierr;
516:   /* pointers to pcis and pcbddc */
517:   PC_IS*          pcis = (PC_IS*)pc->data;
518:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
519:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
520:   /* submatrices of local problem */
521:   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
522:   /* submatrices of local coarse problem */
523:   Mat             S_VV,S_CV,S_VC,S_CC;
524:   /* working matrices */
525:   Mat             C_CR;
526:   /* additional working stuff */
527:   PC              pc_R;
528:   Mat             F;
529:   PetscBool       isLU,isCHOL,isILU;

531:   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
532:   PetscScalar     *work;
533:   PetscInt        *idx_V_B;
534:   PetscInt        n,n_vertices,n_constraints;
535:   PetscInt        i,n_R,n_D,n_B;
536:   PetscBool       unsymmetric_check;
537:   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
538:   MatType         impMatType;
539:   /* some shortcuts to scalars */
540:   PetscScalar     one=1.0,m_one=-1.0;

543:   n_vertices = pcbddc->n_vertices;
544:   n_constraints = pcbddc->local_primal_size-n_vertices;
545:   /* Set Non-overlapping dimensions */
546:   n_B = pcis->n_B;
547:   n_D = pcis->n - n_B;
548:   n_R = pcis->n - n_vertices;

550:   /* Set types for local objects needed by BDDC precondtioner */
551:   impMatType = MATSEQDENSE;

553:   /* vertices in boundary numbering */
554:   PetscMalloc1(n_vertices,&idx_V_B);
555:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
556:   if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i);

558:   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
559:   PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
560:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
561:   MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);
562:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
563:   MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);
564:   MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
565:   MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);
566:   MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
567:   MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);

569:   unsymmetric_check = PETSC_FALSE;
570:   /* allocate workspace */
571:   n = 0;
572:   if (n_constraints) {
573:     n += n_R*n_constraints;
574:   }
575:   if (n_vertices) {
576:     n = PetscMax(2*n_R*n_vertices,n);
577:     n = PetscMax((n_R+n_B)*n_vertices,n);
578:   }
579:   if (!pcbddc->symmetric_primal) {
580:     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
581:     unsymmetric_check = PETSC_TRUE;
582:   }
583:   PetscMalloc1(n,&work);

585:   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
586:   KSPGetPC(pcbddc->ksp_R,&pc_R);
587:   PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
588:   PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);
589:   PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
590:   if (isLU || isILU || isCHOL) {
591:     PCFactorGetMatrix(pc_R,&F);
592:   } else if (sub_schurs->reuse_mumps) {
593:     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
594:     MatFactorType type;

596:     F = reuse_mumps->F;
597:     MatGetFactorType(F,&type);
598:     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
599:   } else {
600:     F = NULL;
601:   }

603:   /* Precompute stuffs needed for preprocessing and application of BDDC*/
604:   if (n_constraints) {
605:     Mat         M1,M2,M3;
606:     Mat         auxmat;
607:     IS          is_aux;
608:     PetscScalar *array,*array2;

610:     MatDestroy(&pcbddc->local_auxmat1);
611:     MatDestroy(&pcbddc->local_auxmat2);

613:     /* Extract constraints on R nodes: C_{CR}  */
614:     ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
615:     MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
616:     MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);

618:     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
619:     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
620:     PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));
621:     for (i=0;i<n_constraints;i++) {
622:       const PetscScalar *row_cmat_values;
623:       const PetscInt    *row_cmat_indices;
624:       PetscInt          size_of_constraint,j;

626:       MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
627:       for (j=0;j<size_of_constraint;j++) {
628:         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
629:       }
630:       MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
631:     }
632:     MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);
633:     if (F) {
634:       Mat B;

636:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
637:       MatMatSolve(F,B,local_auxmat2_R);
638:       MatDestroy(&B);
639:     } else {
640:       PetscScalar *marr;

642:       MatDenseGetArray(local_auxmat2_R,&marr);
643:       for (i=0;i<n_constraints;i++) {
644:         VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
645:         VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);
646:         KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
647:         VecResetArray(pcbddc->vec1_R);
648:         VecResetArray(pcbddc->vec2_R);
649:       }
650:       MatDenseRestoreArray(local_auxmat2_R,&marr);
651:     }
652:     if (!pcbddc->switch_static) {
653:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
654:       MatDenseGetArray(pcbddc->local_auxmat2,&array);
655:       MatDenseGetArray(local_auxmat2_R,&array2);
656:       for (i=0;i<n_constraints;i++) {
657:         VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);
658:         VecPlaceArray(pcis->vec1_B,array+i*n_B);
659:         VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
660:         VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
661:         VecResetArray(pcis->vec1_B);
662:         VecResetArray(pcbddc->vec1_R);
663:       }
664:       MatDenseRestoreArray(local_auxmat2_R,&array2);
665:       MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
666:       MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
667:     } else {
668:       PetscObjectReference((PetscObject)local_auxmat2_R);
669:       pcbddc->local_auxmat2 = local_auxmat2_R;
670:       MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
671:     }
672:     ISDestroy(&is_aux);
673:     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
674:     MatScale(M3,m_one);
675:     MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);
676:     MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);
677:     if (isCHOL) {
678:       MatCholeskyFactor(M3,NULL,NULL);
679:     } else {
680:       MatLUFactor(M3,NULL,NULL,NULL);
681:     }
682:     VecSet(pcbddc->vec1_C,one);
683:     MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);
684:     MatMatSolve(M3,M2,M1);
685:     MatDestroy(&M2);
686:     MatDestroy(&M3);
687:     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
688:     MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
689:     MatDestroy(&auxmat);
690:     MatCopy(M1,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
691:     MatDestroy(&M1);
692:   }
693:   /* Get submatrices from subdomain matrix */
694:   if (n_vertices) {
695:     IS is_aux;

697:     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
698:       IS tis;

700:       ISDuplicate(pcbddc->is_R_local,&tis);
701:       ISSort(tis);
702:       ISComplement(tis,0,pcis->n,&is_aux);
703:       ISDestroy(&tis);
704:     } else {
705:       ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
706:     }
707:     MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
708:     MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
709:     MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
710:     ISDestroy(&is_aux);
711:   }

713:   /* Matrix of coarse basis functions (local) */
714:   if (pcbddc->coarse_phi_B) {
715:     PetscInt on_B,on_primal,on_D=n_D;
716:     if (pcbddc->coarse_phi_D) {
717:       MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
718:     }
719:     MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
720:     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
721:       PetscScalar *marray;

723:       MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
724:       PetscFree(marray);
725:       MatDestroy(&pcbddc->coarse_phi_B);
726:       MatDestroy(&pcbddc->coarse_psi_B);
727:       MatDestroy(&pcbddc->coarse_phi_D);
728:       MatDestroy(&pcbddc->coarse_psi_D);
729:     }
730:   }

732:   if (!pcbddc->coarse_phi_B) {
733:     PetscScalar *marray;

735:     n = n_B*pcbddc->local_primal_size;
736:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
737:       n += n_D*pcbddc->local_primal_size;
738:     }
739:     if (!pcbddc->symmetric_primal) {
740:       n *= 2;
741:     }
742:     PetscCalloc1(n,&marray);
743:     MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);
744:     n = n_B*pcbddc->local_primal_size;
745:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
746:       MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);
747:       n += n_D*pcbddc->local_primal_size;
748:     }
749:     if (!pcbddc->symmetric_primal) {
750:       MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);
751:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
752:         n = n_B*pcbddc->local_primal_size;
753:         MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);
754:       }
755:     } else {
756:       PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
757:       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
758:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
759:         PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
760:         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
761:       }
762:     }
763:   }
764:   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
765:   /* vertices */
766:   if (n_vertices) {

768:     MatConvert(A_VV,impMatType,MAT_INPLACE_MATRIX,&A_VV);

770:     if (n_R) {
771:       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
772:       PetscBLASInt B_N,B_one = 1;
773:       PetscScalar  *x,*y;
774:       PetscBool    isseqaij;

776:       MatScale(A_RV,m_one);
777:       MatConvert(A_RV,impMatType,MAT_INPLACE_MATRIX,&A_RV);
778:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
779:       if (F) { /* TODO could be optimized for symmetric problems */
780:         MatMatSolve(F,A_RV,A_RRmA_RV);
781:       } else {
782:         MatDenseGetArray(A_RV,&y);
783:         for (i=0;i<n_vertices;i++) {
784:           VecPlaceArray(pcbddc->vec1_R,y+i*n_R);
785:           VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
786:           KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
787:           VecResetArray(pcbddc->vec1_R);
788:           VecResetArray(pcbddc->vec2_R);
789:         }
790:         MatDenseRestoreArray(A_RV,&y);
791:       }
792:       MatDestroy(&A_RV);
793:       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
794:       if (n_constraints) {
795:         Mat B;

797:         PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));
798:         for (i=0;i<n_vertices;i++) {
799:           VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
800:           VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);
801:           VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
802:           VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
803:           VecResetArray(pcis->vec1_B);
804:           VecResetArray(pcbddc->vec1_R);
805:         }
806:         MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);
807:         MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);
808:         MatDestroy(&B);
809:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);
810:         MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
811:         MatScale(S_CV,m_one);
812:         PetscBLASIntCast(n_R*n_vertices,&B_N);
813:         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
814:         MatDestroy(&B);
815:       }
816:       PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);
817:       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
818:         MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
819:       }
820:       MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
821:       MatDestroy(&A_RRmA_RV);
822:       PetscBLASIntCast(n_vertices*n_vertices,&B_N);
823:       MatDenseGetArray(A_VV,&x);
824:       MatDenseGetArray(S_VVt,&y);
825:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
826:       MatDenseRestoreArray(A_VV,&x);
827:       MatDenseRestoreArray(S_VVt,&y);
828:       MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
829:       MatDestroy(&S_VVt);
830:     } else {
831:       MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
832:     }
833:     MatDestroy(&A_VV);
834:     /* coarse basis functions */
835:     for (i=0;i<n_vertices;i++) {
836:       PetscScalar *y;

838:       VecPlaceArray(pcbddc->vec1_R,work+n_R*i);
839:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
840:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
841:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
842:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
843:       y[n_B*i+idx_V_B[i]] = 1.0;
844:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
845:       VecResetArray(pcis->vec1_B);

847:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
848:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
849:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
850:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
851:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
852:         VecResetArray(pcis->vec1_D);
853:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
854:       }
855:       VecResetArray(pcbddc->vec1_R);
856:     }
857:     /* if n_R == 0 the object is not destroyed */
858:     MatDestroy(&A_RV);
859:   }

861:   if (n_constraints) {
862:     Mat B;

864:     MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
865:     MatScale(S_CC,m_one);
866:     MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);
867:     MatScale(S_CC,m_one);
868:     if (n_vertices) {
869:       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
870:         MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
871:       } else {
872:         Mat S_VCt;

874:         MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
875:         MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
876:         MatDestroy(&S_VCt);
877:       }
878:     }
879:     MatDestroy(&B);
880:     /* coarse basis functions */
881:     for (i=0;i<n_constraints;i++) {
882:       PetscScalar *y;

884:       VecPlaceArray(pcbddc->vec1_R,work+n_R*i);
885:       MatDenseGetArray(pcbddc->coarse_phi_B,&y);
886:       VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
887:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
888:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
889:       MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
890:       VecResetArray(pcis->vec1_B);
891:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
892:         MatDenseGetArray(pcbddc->coarse_phi_D,&y);
893:         VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
894:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
895:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
896:         VecResetArray(pcis->vec1_D);
897:         MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
898:       }
899:       VecResetArray(pcbddc->vec1_R);
900:     }
901:   }
902:   if (n_constraints) {
903:     MatDestroy(&local_auxmat2_R);
904:   }

906:   /* compute other basis functions for non-symmetric problems */
907:   if (!pcbddc->symmetric_primal) {

909:     if (n_constraints) {
910:       Mat S_CCT,B_C;

912:       /* this is a lazy thing */
913:       MatConvert(C_CR,impMatType,MAT_INPLACE_MATRIX,&C_CR);
914:       MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);
915:       MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
916:       MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);
917:       MatDestroy(&S_CCT);
918:       if (n_vertices) {
919:         Mat B_V,S_VCT;

921:         MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);
922:         MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
923:         MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);
924:         MatDestroy(&B_V);
925:         MatDestroy(&S_VCT);
926:       }
927:       MatDestroy(&B_C);
928:     } else { /* if there are no constraints, reset work */
929:       PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));
930:     }
931:     if (n_vertices && n_R) {
932:       Mat          A_VRT;
933:       PetscScalar  *marray;
934:       PetscBLASInt B_N,B_one = 1;

936:       MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);
937:       MatConvert(A_VRT,impMatType,MAT_INPLACE_MATRIX,&A_VRT);
938:       MatDenseGetArray(A_VRT,&marray);
939:       PetscBLASIntCast(n_vertices*n_R,&B_N);
940:       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
941:       MatDenseRestoreArray(A_VRT,&marray);
942:       MatDestroy(&A_VRT);
943:     }

945:     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
946:       for (i=0;i<pcbddc->local_primal_size;i++) {
947:         VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
948:         VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);
949:         MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);
950:         VecResetArray(pcbddc->vec1_R);
951:         VecResetArray(pcbddc->vec2_R);
952:       }
953:     } else {
954:       for (i=0;i<pcbddc->local_primal_size;i++) {
955:         VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
956:         VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);
957:         KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
958:         VecResetArray(pcbddc->vec1_R);
959:         VecResetArray(pcbddc->vec2_R);
960:       }
961:     }
962:     /* coarse basis functions */
963:     for (i=0;i<pcbddc->local_primal_size;i++) {
964:       PetscScalar *y;

966:       VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));
967:       MatDenseGetArray(pcbddc->coarse_psi_B,&y);
968:       VecPlaceArray(pcis->vec1_B,y+n_B*i);
969:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
970:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
971:       if (i<n_vertices) {
972:         y[n_B*i+idx_V_B[i]] = 1.0;
973:       }
974:       MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
975:       VecResetArray(pcis->vec1_B);

977:       if (pcbddc->switch_static || pcbddc->dbg_flag) {
978:         MatDenseGetArray(pcbddc->coarse_psi_D,&y);
979:         VecPlaceArray(pcis->vec1_D,y+n_D*i);
980:         VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
981:         VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
982:         VecResetArray(pcis->vec1_D);
983:         MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
984:       }
985:       VecResetArray(pcbddc->vec1_R);
986:     }
987:   }
988:   /* free memory */
989:   PetscFree(idx_V_B);
990:   MatDestroy(&S_VV);
991:   MatDestroy(&S_CV);
992:   MatDestroy(&S_VC);
993:   MatDestroy(&S_CC);
994:   PetscFree(work);
995:   if (n_vertices) {
996:     MatDestroy(&A_VR);
997:   }
998:   if (n_constraints) {
999:     MatDestroy(&C_CR);
1000:   }
1001:   /* Checking coarse_sub_mat and coarse basis functios */
1002:   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1003:   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1004:   if (pcbddc->dbg_flag) {
1005:     Mat         coarse_sub_mat;
1006:     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1007:     Mat         coarse_phi_D,coarse_phi_B;
1008:     Mat         coarse_psi_D,coarse_psi_B;
1009:     Mat         A_II,A_BB,A_IB,A_BI;
1010:     Mat         C_B,CPHI;
1011:     IS          is_dummy;
1012:     Vec         mones;
1013:     MatType     checkmattype=MATSEQAIJ;
1014:     PetscReal   real_value;

1016:     MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
1017:     MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
1018:     MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
1019:     MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
1020:     MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
1021:     MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
1022:     if (unsymmetric_check) {
1023:       MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
1024:       MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
1025:     }
1026:     MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);

1028:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1029:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
1030:     PetscViewerFlush(pcbddc->dbg_viewer);
1031:     if (unsymmetric_check) {
1032:       MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1033:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
1034:       MatDestroy(&AUXMAT);
1035:       MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1036:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
1037:       MatDestroy(&AUXMAT);
1038:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1039:       MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
1040:       MatDestroy(&AUXMAT);
1041:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1042:       MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
1043:       MatDestroy(&AUXMAT);
1044:     } else {
1045:       MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
1046:       MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
1047:       MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1048:       MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
1049:       MatDestroy(&AUXMAT);
1050:       MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
1051:       MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
1052:       MatDestroy(&AUXMAT);
1053:     }
1054:     MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
1055:     MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
1056:     MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
1057:     MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
1058:     MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
1059:     MatNorm(TM1,NORM_FROBENIUS,&real_value);
1060:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
1061:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);

1063:     /* check constraints */
1064:     ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);
1065:     MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1066:     MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
1067:     MatCreateVecs(CPHI,&mones,NULL);
1068:     VecSet(mones,-1.0);
1069:     MatDiagonalSet(CPHI,mones,ADD_VALUES);
1070:     MatNorm(CPHI,NORM_FROBENIUS,&real_value);
1071:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
1072:     if (unsymmetric_check) {
1073:       MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
1074:       VecSet(mones,-1.0);
1075:       MatDiagonalSet(CPHI,mones,ADD_VALUES);
1076:       MatNorm(CPHI,NORM_FROBENIUS,&real_value);
1077:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
1078:     }
1079:     MatDestroy(&C_B);
1080:     MatDestroy(&CPHI);
1081:     ISDestroy(&is_dummy);
1082:     VecDestroy(&mones);

1084:     PetscViewerFlush(pcbddc->dbg_viewer);
1085:     MatDestroy(&A_II);
1086:     MatDestroy(&A_BB);
1087:     MatDestroy(&A_IB);
1088:     MatDestroy(&A_BI);
1089:     MatDestroy(&TM1);
1090:     MatDestroy(&TM2);
1091:     MatDestroy(&TM3);
1092:     MatDestroy(&TM4);
1093:     MatDestroy(&coarse_phi_D);
1094:     MatDestroy(&coarse_phi_B);
1095:     if (unsymmetric_check) {
1096:       MatDestroy(&coarse_psi_D);
1097:       MatDestroy(&coarse_psi_B);
1098:     }
1099:     MatDestroy(&coarse_sub_mat);
1100:   }
1101:   /* get back data */
1102:   *coarse_submat_vals_n = coarse_submat_vals;
1103:   return(0);
1104: }

1108: PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1109: {
1110:   Mat            *work_mat;
1111:   IS             isrow_s,iscol_s;
1112:   PetscBool      rsorted,csorted;
1113:   PetscInt       rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;

1117:   ISSorted(isrow,&rsorted);
1118:   ISSorted(iscol,&csorted);
1119:   ISGetLocalSize(isrow,&rsize);
1120:   ISGetLocalSize(iscol,&csize);

1122:   if (!rsorted) {
1123:     const PetscInt *idxs;
1124:     PetscInt *idxs_sorted,i;

1126:     PetscMalloc1(rsize,&idxs_perm_r);
1127:     PetscMalloc1(rsize,&idxs_sorted);
1128:     for (i=0;i<rsize;i++) {
1129:       idxs_perm_r[i] = i;
1130:     }
1131:     ISGetIndices(isrow,&idxs);
1132:     PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
1133:     for (i=0;i<rsize;i++) {
1134:       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1135:     }
1136:     ISRestoreIndices(isrow,&idxs);
1137:     ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
1138:   } else {
1139:     PetscObjectReference((PetscObject)isrow);
1140:     isrow_s = isrow;
1141:   }

1143:   if (!csorted) {
1144:     if (isrow == iscol) {
1145:       PetscObjectReference((PetscObject)isrow_s);
1146:       iscol_s = isrow_s;
1147:     } else {
1148:       const PetscInt *idxs;
1149:       PetscInt       *idxs_sorted,i;

1151:       PetscMalloc1(csize,&idxs_perm_c);
1152:       PetscMalloc1(csize,&idxs_sorted);
1153:       for (i=0;i<csize;i++) {
1154:         idxs_perm_c[i] = i;
1155:       }
1156:       ISGetIndices(iscol,&idxs);
1157:       PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
1158:       for (i=0;i<csize;i++) {
1159:         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1160:       }
1161:       ISRestoreIndices(iscol,&idxs);
1162:       ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
1163:     }
1164:   } else {
1165:     PetscObjectReference((PetscObject)iscol);
1166:     iscol_s = iscol;
1167:   }

1169:   MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);

1171:   if (!rsorted || !csorted) {
1172:     Mat      new_mat;
1173:     IS       is_perm_r,is_perm_c;

1175:     if (!rsorted) {
1176:       PetscInt *idxs_r,i;
1177:       PetscMalloc1(rsize,&idxs_r);
1178:       for (i=0;i<rsize;i++) {
1179:         idxs_r[idxs_perm_r[i]] = i;
1180:       }
1181:       PetscFree(idxs_perm_r);
1182:       ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
1183:     } else {
1184:       ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
1185:     }
1186:     ISSetPermutation(is_perm_r);

1188:     if (!csorted) {
1189:       if (isrow_s == iscol_s) {
1190:         PetscObjectReference((PetscObject)is_perm_r);
1191:         is_perm_c = is_perm_r;
1192:       } else {
1193:         PetscInt *idxs_c,i;
1194:         PetscMalloc1(csize,&idxs_c);
1195:         for (i=0;i<csize;i++) {
1196:           idxs_c[idxs_perm_c[i]] = i;
1197:         }
1198:         PetscFree(idxs_perm_c);
1199:         ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
1200:       }
1201:     } else {
1202:       ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
1203:     }
1204:     ISSetPermutation(is_perm_c);

1206:     MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
1207:     MatDestroy(&work_mat[0]);
1208:     work_mat[0] = new_mat;
1209:     ISDestroy(&is_perm_r);
1210:     ISDestroy(&is_perm_c);
1211:   }

1213:   PetscObjectReference((PetscObject)work_mat[0]);
1214:   *B = work_mat[0];
1215:   MatDestroyMatrices(1,&work_mat);
1216:   ISDestroy(&isrow_s);
1217:   ISDestroy(&iscol_s);
1218:   return(0);
1219: }

1223: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1224: {
1225:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1226:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1227:   Mat            new_mat;
1228:   IS             is_local,is_global;
1229:   PetscInt       local_size;
1230:   PetscBool      isseqaij;

1234:   MatDestroy(&pcbddc->local_mat);
1235:   MatGetSize(matis->A,&local_size,NULL);
1236:   ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
1237:   ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
1238:   ISDestroy(&is_local);
1239:   MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
1240:   ISDestroy(&is_global);

1242:   /* check */
1243:   if (pcbddc->dbg_flag) {
1244:     Vec       x,x_change;
1245:     PetscReal error;

1247:     MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
1248:     VecSetRandom(x,NULL);
1249:     MatMult(ChangeOfBasisMatrix,x,x_change);
1250:     VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
1251:     VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
1252:     MatMult(new_mat,matis->x,matis->y);
1253:     VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
1254:     VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
1255:     VecAXPY(x,-1.0,x_change);
1256:     VecNorm(x,NORM_INFINITY,&error);
1257:     PetscViewerFlush(pcbddc->dbg_viewer);
1258:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);
1259:     VecDestroy(&x);
1260:     VecDestroy(&x_change);
1261:   }

1263:   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1264:   PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
1265:   if (isseqaij) {
1266:     MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
1267:   } else {
1268:     Mat work_mat;
1269:     MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
1270:     MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
1271:     MatDestroy(&work_mat);
1272:   }
1273:   if (matis->A->symmetric_set) {
1274:     MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
1275: #if !defined(PETSC_USE_COMPLEX)
1276:     MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
1277: #endif
1278:   }
1279:   MatDestroy(&new_mat);
1280:   return(0);
1281: }

1285: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1286: {
1287:   PC_IS*          pcis = (PC_IS*)(pc->data);
1288:   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1289:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1290:   PetscInt        *idx_R_local=NULL;
1291:   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1292:   PetscInt        vbs,bs;
1293:   PetscBT         bitmask=NULL;
1294:   PetscErrorCode  ierr;

1297:   /*
1298:     No need to setup local scatters if
1299:       - primal space is unchanged
1300:         AND
1301:       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1302:         AND
1303:       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1304:   */
1305:   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1306:     return(0);
1307:   }
1308:   /* destroy old objects */
1309:   ISDestroy(&pcbddc->is_R_local);
1310:   VecScatterDestroy(&pcbddc->R_to_B);
1311:   VecScatterDestroy(&pcbddc->R_to_D);
1312:   /* Set Non-overlapping dimensions */
1313:   n_B = pcis->n_B;
1314:   n_D = pcis->n - n_B;
1315:   n_vertices = pcbddc->n_vertices;

1317:   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */

1319:   /* create auxiliary bitmask and allocate workspace */
1320:   if (!sub_schurs->reuse_mumps) {
1321:     PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
1322:     PetscBTCreate(pcis->n,&bitmask);
1323:     for (i=0;i<n_vertices;i++) {
1324:       PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
1325:     }

1327:     for (i=0, n_R=0; i<pcis->n; i++) {
1328:       if (!PetscBTLookup(bitmask,i)) {
1329:         idx_R_local[n_R++] = i;
1330:       }
1331:     }
1332:   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1333:     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1335:     ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);
1336:     ISGetLocalSize(reuse_mumps->is_R,&n_R);
1337:   }

1339:   /* Block code */
1340:   vbs = 1;
1341:   MatGetBlockSize(pcbddc->local_mat,&bs);
1342:   if (bs>1 && !(n_vertices%bs)) {
1343:     PetscBool is_blocked = PETSC_TRUE;
1344:     PetscInt  *vary;
1345:     if (!sub_schurs->reuse_mumps) {
1346:       PetscMalloc1(pcis->n/bs,&vary);
1347:       PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));
1348:       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1349:       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1350:       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1351:       for (i=0; i<pcis->n/bs; i++) {
1352:         if (vary[i]!=0 && vary[i]!=bs) {
1353:           is_blocked = PETSC_FALSE;
1354:           break;
1355:         }
1356:       }
1357:       PetscFree(vary);
1358:     } else {
1359:       /* Verify directly the R set */
1360:       for (i=0; i<n_R/bs; i++) {
1361:         PetscInt j,node=idx_R_local[bs*i];
1362:         for (j=1; j<bs; j++) {
1363:           if (node != idx_R_local[bs*i+j]-j) {
1364:             is_blocked = PETSC_FALSE;
1365:             break;
1366:           }
1367:         }
1368:       }
1369:     }
1370:     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1371:       vbs = bs;
1372:       for (i=0;i<n_R/vbs;i++) {
1373:         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1374:       }
1375:     }
1376:   }
1377:   ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
1378:   if (sub_schurs->reuse_mumps) {
1379:     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1381:     ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);
1382:     ISDestroy(&reuse_mumps->is_R);
1383:     PetscObjectReference((PetscObject)pcbddc->is_R_local);
1384:     reuse_mumps->is_R = pcbddc->is_R_local;
1385:   } else {
1386:     PetscFree(idx_R_local);
1387:   }

1389:   /* print some info if requested */
1390:   if (pcbddc->dbg_flag) {
1391:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1392:     PetscViewerFlush(pcbddc->dbg_viewer);
1393:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
1394:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
1395:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);
1396:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);
1397:     PetscViewerFlush(pcbddc->dbg_viewer);
1398:   }

1400:   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1401:   if (!sub_schurs->reuse_mumps) {
1402:     IS       is_aux1,is_aux2;
1403:     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;

1405:     ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
1406:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
1407:     PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
1408:     ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
1409:     for (i=0; i<n_D; i++) {
1410:       PetscBTSet(bitmask,is_indices[i]);
1411:     }
1412:     ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
1413:     for (i=0, j=0; i<n_R; i++) {
1414:       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1415:         aux_array1[j++] = i;
1416:       }
1417:     }
1418:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
1419:     ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
1420:     for (i=0, j=0; i<n_B; i++) {
1421:       if (!PetscBTLookup(bitmask,is_indices[i])) {
1422:         aux_array2[j++] = i;
1423:       }
1424:     }
1425:     ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
1426:     ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
1427:     VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
1428:     ISDestroy(&is_aux1);
1429:     ISDestroy(&is_aux2);

1431:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1432:       PetscMalloc1(n_D,&aux_array1);
1433:       for (i=0, j=0; i<n_R; i++) {
1434:         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1435:           aux_array1[j++] = i;
1436:         }
1437:       }
1438:       ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
1439:       VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
1440:       ISDestroy(&is_aux1);
1441:     }
1442:     PetscBTDestroy(&bitmask);
1443:     ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
1444:   } else {
1445:     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1446:     IS               tis;
1447:     PetscInt         schur_size;

1449:     ISGetLocalSize(reuse_mumps->is_B,&schur_size);
1450:     ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
1451:     VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);
1452:     ISDestroy(&tis);
1453:     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1454:       ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
1455:       VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
1456:       ISDestroy(&tis);
1457:     }
1458:   }
1459:   return(0);
1460: }


1465: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1466: {
1467:   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1468:   PC_IS          *pcis = (PC_IS*)pc->data;
1469:   PC             pc_temp;
1470:   Mat            A_RR;
1471:   MatReuse       reuse;
1472:   PetscScalar    m_one = -1.0;
1473:   PetscReal      value;
1474:   PetscInt       n_D,n_R;
1475:   PetscBool      use_exact,use_exact_reduced,issbaij;
1477:   /* prefixes stuff */
1478:   char           dir_prefix[256],neu_prefix[256],str_level[16];
1479:   size_t         len;


1483:   /* compute prefixes */
1484:   PetscStrcpy(dir_prefix,"");
1485:   PetscStrcpy(neu_prefix,"");
1486:   if (!pcbddc->current_level) {
1487:     PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);
1488:     PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);
1489:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1490:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1491:   } else {
1492:     PetscStrcpy(str_level,"");
1493:     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1494:     PetscStrlen(((PetscObject)pc)->prefix,&len);
1495:     len -= 15; /* remove "pc_bddc_coarse_" */
1496:     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1497:     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1498:     PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
1499:     PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
1500:     PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");
1501:     PetscStrcat(neu_prefix,"pc_bddc_neumann_");
1502:     PetscStrcat(dir_prefix,str_level);
1503:     PetscStrcat(neu_prefix,str_level);
1504:   }

1506:   /* DIRICHLET PROBLEM */
1507:   if (dirichlet) {
1508:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1509:     if (pcbddc->local_mat->symmetric_set) {
1510:       MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
1511:     }
1512:     /* Matrix for Dirichlet problem is pcis->A_II */
1513:     n_D = pcis->n - pcis->n_B;
1514:     if (!pcbddc->ksp_D) { /* create object if not yet build */
1515:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
1516:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
1517:       /* default */
1518:       KSPSetType(pcbddc->ksp_D,KSPPREONLY);
1519:       KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
1520:       PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);
1521:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
1522:       if (issbaij) {
1523:         PCSetType(pc_temp,PCCHOLESKY);
1524:       } else {
1525:         PCSetType(pc_temp,PCLU);
1526:       }
1527:       /* Allow user's customization */
1528:       KSPSetFromOptions(pcbddc->ksp_D);
1529:       PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1530:     }
1531:     KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);
1532:     if (sub_schurs->reuse_mumps) {
1533:       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1535:       KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);
1536:     }
1537:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1538:     if (!n_D) {
1539:       KSPGetPC(pcbddc->ksp_D,&pc_temp);
1540:       PCSetType(pc_temp,PCNONE);
1541:     }
1542:     /* Set Up KSP for Dirichlet problem of BDDC */
1543:     KSPSetUp(pcbddc->ksp_D);
1544:     /* set ksp_D into pcis data */
1545:     KSPDestroy(&pcis->ksp_D);
1546:     PetscObjectReference((PetscObject)pcbddc->ksp_D);
1547:     pcis->ksp_D = pcbddc->ksp_D;
1548:   }

1550:   /* NEUMANN PROBLEM */
1551:   A_RR = 0;
1552:   if (neumann) {
1553:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1554:     PetscInt        ibs,mbs;
1555:     PetscBool       issbaij;
1556:     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1557:     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1558:     ISGetSize(pcbddc->is_R_local,&n_R);
1559:     if (pcbddc->ksp_R) { /* already created ksp */
1560:       PetscInt nn_R;
1561:       KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
1562:       PetscObjectReference((PetscObject)A_RR);
1563:       MatGetSize(A_RR,&nn_R,NULL);
1564:       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1565:         KSPReset(pcbddc->ksp_R);
1566:         MatDestroy(&A_RR);
1567:         reuse = MAT_INITIAL_MATRIX;
1568:       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1569:         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1570:           MatDestroy(&A_RR);
1571:           reuse = MAT_INITIAL_MATRIX;
1572:         } else { /* safe to reuse the matrix */
1573:           reuse = MAT_REUSE_MATRIX;
1574:         }
1575:       }
1576:       /* last check */
1577:       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1578:         MatDestroy(&A_RR);
1579:         reuse = MAT_INITIAL_MATRIX;
1580:       }
1581:     } else { /* first time, so we need to create the matrix */
1582:       reuse = MAT_INITIAL_MATRIX;
1583:     }
1584:     /* extract A_RR */
1585:     MatGetBlockSize(pcbddc->local_mat,&mbs);
1586:     ISGetBlockSize(pcbddc->is_R_local,&ibs);
1587:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
1588:     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1589:       if (matis->A == pcbddc->local_mat) {
1590:         MatDestroy(&pcbddc->local_mat);
1591:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
1592:       } else {
1593:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
1594:       }
1595:     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1596:       if (matis->A == pcbddc->local_mat) {
1597:         MatDestroy(&pcbddc->local_mat);
1598:         MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
1599:       } else {
1600:         MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
1601:       }
1602:     }
1603:     if (!sub_schurs->reuse_mumps) {
1604:       MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
1605:       if (pcbddc->local_mat->symmetric_set) {
1606:         MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);
1607:       }
1608:     } else {
1609:       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1611:       MatDestroy(&A_RR);
1612:       PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);
1613:       PetscObjectReference((PetscObject)A_RR);
1614:     }
1615:     if (!pcbddc->ksp_R) { /* create object if not present */
1616:       KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
1617:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
1618:       /* default */
1619:       KSPSetType(pcbddc->ksp_R,KSPPREONLY);
1620:       KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
1621:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
1622:       PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
1623:       if (issbaij) {
1624:         PCSetType(pc_temp,PCCHOLESKY);
1625:       } else {
1626:         PCSetType(pc_temp,PCLU);
1627:       }
1628:       /* Allow user's customization */
1629:       KSPSetFromOptions(pcbddc->ksp_R);
1630:       PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
1631:     }
1632:     KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
1633:     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1634:     if (!n_R) {
1635:       KSPGetPC(pcbddc->ksp_R,&pc_temp);
1636:       PCSetType(pc_temp,PCNONE);
1637:     }
1638:     /* Reuse MUMPS solver if it is present */
1639:     if (sub_schurs->reuse_mumps) {
1640:       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1642:       KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);
1643:     }
1644:     /* Set Up KSP for Neumann problem of BDDC */
1645:     KSPSetUp(pcbddc->ksp_R);
1646:   }
1647:   /* free Neumann problem's matrix */
1648:   MatDestroy(&A_RR);

1650:   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1651:   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1652:     if (pcbddc->dbg_flag) {
1653:       PetscViewerFlush(pcbddc->dbg_viewer);
1654:       PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
1655:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
1656:     }
1657:     if (dirichlet) { /* Dirichlet */
1658:       VecSetRandom(pcis->vec1_D,NULL);
1659:       MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
1660:       KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
1661:       VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
1662:       VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
1663:       /* need to be adapted? */
1664:       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1665:       MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1666:       PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);
1667:       /* print info */
1668:       if (pcbddc->dbg_flag) {
1669:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
1670:         PetscViewerFlush(pcbddc->dbg_viewer);
1671:       }
1672:       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1673:         PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);
1674:       }
1675:     }
1676:     if (neumann) { /* Neumann */
1677:       KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);
1678:       VecSetRandom(pcbddc->vec1_R,NULL);
1679:       MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
1680:       KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
1681:       VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
1682:       VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
1683:       /* need to be adapted? */
1684:       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1685:       MPIU_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1686:       /* print info */
1687:       if (pcbddc->dbg_flag) {
1688:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
1689:         PetscViewerFlush(pcbddc->dbg_viewer);
1690:       }
1691:       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1692:         PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);
1693:       }
1694:     }
1695:   }
1696:   return(0);
1697: }

1701: static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1702: {
1703:   PetscErrorCode  ierr;
1704:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1705:   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

1708:   if (!sub_schurs->reuse_mumps) {
1709:     VecSet(pcbddc->vec1_R,0.);
1710:   }
1711:   if (!pcbddc->switch_static) {
1712:     if (applytranspose && pcbddc->local_auxmat1) {
1713:       MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
1714:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
1715:     }
1716:     if (!sub_schurs->reuse_mumps) {
1717:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1718:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1719:     } else {
1720:       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1722:       VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
1723:       VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
1724:     }
1725:   } else {
1726:     VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1727:     VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1728:     VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1729:     VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1730:     if (applytranspose && pcbddc->local_auxmat1) {
1731:       MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
1732:       MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
1733:       VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1734:       VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
1735:     }
1736:   }
1737:   if (!sub_schurs->reuse_mumps) {
1738:     if (applytranspose) {
1739:       KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
1740:     } else {
1741:       KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
1742:     }
1743:   } else {
1744:     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1746:     if (applytranspose) {
1747:       MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);
1748:     } else {
1749:       MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);
1750:     }
1751:   }
1752:   VecSet(inout_B,0.);
1753:   if (!pcbddc->switch_static) {
1754:     if (!sub_schurs->reuse_mumps) {
1755:       VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1756:       VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1757:     } else {
1758:       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;

1760:       VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
1761:       VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
1762:     }
1763:     if (!applytranspose && pcbddc->local_auxmat1) {
1764:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
1765:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
1766:     }
1767:   } else {
1768:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1769:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1770:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1771:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1772:     if (!applytranspose && pcbddc->local_auxmat1) {
1773:       MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
1774:       MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
1775:     }
1776:     VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1777:     VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
1778:     VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1779:     VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
1780:   }
1781:   return(0);
1782: }

1784: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1787: PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1788: {
1790:   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1791:   PC_IS*            pcis = (PC_IS*)  (pc->data);
1792:   const PetscScalar zero = 0.0;

1795:   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1796:   if (applytranspose) {
1797:     MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
1798:     if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1799:   } else {
1800:     MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
1801:     if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
1802:   }
1803:   /* start communications from local primal nodes to rhs of coarse solver */
1804:   VecSet(pcbddc->coarse_vec,zero);
1805:   PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
1806:   PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);

1808:   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1809:   /* TODO remove null space when doing multilevel */
1810:   if (pcbddc->coarse_ksp) {
1811:     Vec rhs,sol;

1813:     KSPGetRhs(pcbddc->coarse_ksp,&rhs);
1814:     KSPGetSolution(pcbddc->coarse_ksp,&sol);
1815:     if (applytranspose) {
1816:       KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
1817:     } else {
1818:       KSPSolve(pcbddc->coarse_ksp,rhs,sol);
1819:     }
1820:   }

1822:   /* Local solution on R nodes */
1823:   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1824:     PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
1825:   }

1827:   /* communications from coarse sol to local primal nodes */
1828:   PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
1829:   PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);

1831:   /* Sum contributions from two levels */
1832:   if (applytranspose) {
1833:     MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1834:     if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1835:   } else {
1836:     MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
1837:     if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
1838:   }
1839:   return(0);
1840: }

1844: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1845: {
1847:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1848:   PetscScalar    *array;
1849:   Vec            from,to;

1852:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1853:     from = pcbddc->coarse_vec;
1854:     to = pcbddc->vec1_P;
1855:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1856:       Vec tvec;

1858:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
1859:       VecResetArray(tvec);
1860:       KSPGetSolution(pcbddc->coarse_ksp,&tvec);
1861:       VecGetArray(tvec,&array);
1862:       VecPlaceArray(from,array);
1863:       VecRestoreArray(tvec,&array);
1864:     }
1865:   } else { /* from local to global -> put data in coarse right hand side */
1866:     from = pcbddc->vec1_P;
1867:     to = pcbddc->coarse_vec;
1868:   }
1869:   VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1870:   return(0);
1871: }

1875: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1876: {
1878:   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1879:   PetscScalar    *array;
1880:   Vec            from,to;

1883:   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1884:     from = pcbddc->coarse_vec;
1885:     to = pcbddc->vec1_P;
1886:   } else { /* from local to global -> put data in coarse right hand side */
1887:     from = pcbddc->vec1_P;
1888:     to = pcbddc->coarse_vec;
1889:   }
1890:   VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
1891:   if (smode == SCATTER_FORWARD) {
1892:     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1893:       Vec tvec;

1895:       KSPGetRhs(pcbddc->coarse_ksp,&tvec);
1896:       VecGetArray(to,&array);
1897:       VecPlaceArray(tvec,array);
1898:       VecRestoreArray(to,&array);
1899:     }
1900:   } else {
1901:     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1902:      VecResetArray(from);
1903:     }
1904:   }
1905:   return(0);
1906: }

1908: /* uncomment for testing purposes */
1909: /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1912: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1913: {
1914:   PetscErrorCode    ierr;
1915:   PC_IS*            pcis = (PC_IS*)(pc->data);
1916:   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1917:   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1918:   /* one and zero */
1919:   PetscScalar       one=1.0,zero=0.0;
1920:   /* space to store constraints and their local indices */
1921:   PetscScalar       *constraints_data;
1922:   PetscInt          *constraints_idxs,*constraints_idxs_B;
1923:   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1924:   PetscInt          *constraints_n;
1925:   /* iterators */
1926:   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1927:   /* BLAS integers */
1928:   PetscBLASInt      lwork,lierr;
1929:   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1930:   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1931:   /* reuse */
1932:   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1933:   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1934:   /* change of basis */
1935:   PetscBool         qr_needed;
1936:   PetscBT           change_basis,qr_needed_idx;
1937:   /* auxiliary stuff */
1938:   PetscInt          *nnz,*is_indices;
1939:   PetscInt          ncc;
1940:   /* some quantities */
1941:   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1942:   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;

1945:   /* Destroy Mat objects computed previously */
1946:   MatDestroy(&pcbddc->ChangeOfBasisMatrix);
1947:   MatDestroy(&pcbddc->ConstraintMatrix);
1948:   /* save info on constraints from previous setup (if any) */
1949:   olocal_primal_size = pcbddc->local_primal_size;
1950:   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1951:   PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
1952:   PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));
1953:   PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));
1954:   PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
1955:   PetscFree(pcbddc->primal_indices_local_idxs);

1957:   /* print some info */
1958:   if (pcbddc->dbg_flag) {
1959:     IS       vertices;
1960:     PetscInt nv,nedges,nfaces;
1961:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);
1962:     ISGetSize(vertices,&nv);
1963:     ISDestroy(&vertices);
1964:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
1965:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
1966:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
1967:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);
1968:     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);
1969:     PetscViewerFlush(pcbddc->dbg_viewer);
1970:     PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
1971:   }

1973:   if (!pcbddc->adaptive_selection) {
1974:     IS           ISForVertices,*ISForFaces,*ISForEdges;
1975:     MatNullSpace nearnullsp;
1976:     const Vec    *nearnullvecs;
1977:     Vec          *localnearnullsp;
1978:     PetscScalar  *array;
1979:     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1980:     PetscBool    nnsp_has_cnst;
1981:     /* LAPACK working arrays for SVD or POD */
1982:     PetscBool    skip_lapack,boolforchange;
1983:     PetscScalar  *work;
1984:     PetscReal    *singular_vals;
1985: #if defined(PETSC_USE_COMPLEX)
1986:     PetscReal    *rwork;
1987: #endif
1988: #if defined(PETSC_MISSING_LAPACK_GESVD)
1989:     PetscScalar  *temp_basis,*correlation_mat;
1990: #else
1991:     PetscBLASInt dummy_int=1;
1992:     PetscScalar  dummy_scalar=1.;
1993: #endif

1995:     /* Get index sets for faces, edges and vertices from graph */
1996:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1997:     /* free unneeded index sets */
1998:     if (!pcbddc->use_vertices) {
1999:       ISDestroy(&ISForVertices);
2000:     }
2001:     if (!pcbddc->use_edges) {
2002:       for (i=0;i<n_ISForEdges;i++) {
2003:         ISDestroy(&ISForEdges[i]);
2004:       }
2005:       PetscFree(ISForEdges);
2006:       n_ISForEdges = 0;
2007:     }
2008:     if (!pcbddc->use_faces) {
2009:       for (i=0;i<n_ISForFaces;i++) {
2010:         ISDestroy(&ISForFaces[i]);
2011:       }
2012:       PetscFree(ISForFaces);
2013:       n_ISForFaces = 0;
2014:     }

2016: #if defined(PETSC_USE_DEBUG)
2017:     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2018:        Also use_change_of_basis should be consistent among processors */
2019:     if (pcbddc->NullSpace) {
2020:       PetscBool tbool[2],gbool[2];

2022:       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2023:         pcbddc->use_change_of_basis = PETSC_TRUE;
2024:         if (!ISForEdges) {
2025:           pcbddc->use_change_on_faces = PETSC_TRUE;
2026:         }
2027:       }
2028:       tbool[0] = pcbddc->use_change_of_basis;
2029:       tbool[1] = pcbddc->use_change_on_faces;
2030:       MPIU_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2031:       pcbddc->use_change_of_basis = gbool[0];
2032:       pcbddc->use_change_on_faces = gbool[1];
2033:     }
2034: #endif

2036:     /* check if near null space is attached to global mat */
2037:     MatGetNearNullSpace(pc->pmat,&nearnullsp);
2038:     if (nearnullsp) {
2039:       MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
2040:       /* remove any stored info */
2041:       MatNullSpaceDestroy(&pcbddc->onearnullspace);
2042:       PetscFree(pcbddc->onearnullvecs_state);
2043:       /* store information for BDDC solver reuse */
2044:       PetscObjectReference((PetscObject)nearnullsp);
2045:       pcbddc->onearnullspace = nearnullsp;
2046:       PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
2047:       for (i=0;i<nnsp_size;i++) {
2048:         PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
2049:       }
2050:     } else { /* if near null space is not provided BDDC uses constants by default */
2051:       nnsp_size = 0;
2052:       nnsp_has_cnst = PETSC_TRUE;
2053:     }
2054:     /* get max number of constraints on a single cc */
2055:     max_constraints = nnsp_size;
2056:     if (nnsp_has_cnst) max_constraints++;

2058:     /*
2059:          Evaluate maximum storage size needed by the procedure
2060:          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2061:          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2062:          There can be multiple constraints per connected component
2063:                                                                                                                                                            */
2064:     n_vertices = 0;
2065:     if (ISForVertices) {
2066:       ISGetSize(ISForVertices,&n_vertices);
2067:     }
2068:     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2069:     PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);

2071:     total_counts = n_ISForFaces+n_ISForEdges;
2072:     total_counts *= max_constraints;
2073:     total_counts += n_vertices;
2074:     PetscBTCreate(total_counts,&change_basis);

2076:     total_counts = 0;
2077:     max_size_of_constraint = 0;
2078:     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2079:       IS used_is;
2080:       if (i<n_ISForEdges) {
2081:         used_is = ISForEdges[i];
2082:       } else {
2083:         used_is = ISForFaces[i-n_ISForEdges];
2084:       }
2085:       ISGetSize(used_is,&j);
2086:       total_counts += j;
2087:       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2088:     }
2089:     PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);

2091:     /* get local part of global near null space vectors */
2092:     PetscMalloc1(nnsp_size,&localnearnullsp);
2093:     for (k=0;k<nnsp_size;k++) {
2094:       VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
2095:       VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2096:       VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
2097:     }

2099:     /* whether or not to skip lapack calls */
2100:     skip_lapack = PETSC_TRUE;
2101:     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;

2103:     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2104:     if (!skip_lapack) {
2105:       PetscScalar temp_work;

2107: #if defined(PETSC_MISSING_LAPACK_GESVD)
2108:       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2109:       PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
2110:       PetscMalloc1(max_constraints,&singular_vals);
2111:       PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
2112: #if defined(PETSC_USE_COMPLEX)
2113:       PetscMalloc1(3*max_constraints,&rwork);
2114: #endif
2115:       /* now we evaluate the optimal workspace using query with lwork=-1 */
2116:       PetscBLASIntCast(max_constraints,&Blas_N);
2117:       PetscBLASIntCast(max_constraints,&Blas_LDA);
2118:       lwork = -1;
2119:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2120: #if !defined(PETSC_USE_COMPLEX)
2121:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2122: #else
2123:       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2124: #endif
2125:       PetscFPTrapPop();
2126:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2127: #else /* on missing GESVD */
2128:       /* SVD */
2129:       PetscInt max_n,min_n;
2130:       max_n = max_size_of_constraint;
2131:       min_n = max_constraints;
2132:       if (max_size_of_constraint < max_constraints) {
2133:         min_n = max_size_of_constraint;
2134:         max_n = max_constraints;
2135:       }
2136:       PetscMalloc1(min_n,&singular_vals);
2137: #if defined(PETSC_USE_COMPLEX)
2138:       PetscMalloc1(5*min_n,&rwork);
2139: #endif
2140:       /* now we evaluate the optimal workspace using query with lwork=-1 */
2141:       lwork = -1;
2142:       PetscBLASIntCast(max_n,&Blas_M);
2143:       PetscBLASIntCast(min_n,&Blas_N);
2144:       PetscBLASIntCast(max_n,&Blas_LDA);
2145:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2146: #if !defined(PETSC_USE_COMPLEX)
2147:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2148: #else
2149:       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2150: #endif
2151:       PetscFPTrapPop();
2152:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2153: #endif /* on missing GESVD */
2154:       /* Allocate optimal workspace */
2155:       PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
2156:       PetscMalloc1(lwork,&work);
2157:     }
2158:     /* Now we can loop on constraining sets */
2159:     total_counts = 0;
2160:     constraints_idxs_ptr[0] = 0;
2161:     constraints_data_ptr[0] = 0;
2162:     /* vertices */
2163:     if (n_vertices) {
2164:       ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
2165:       if (nnsp_has_cnst) { /* it considers all possible vertices */
2166:         PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));
2167:         for (i=0;i<n_vertices;i++) {
2168:           constraints_n[total_counts] = 1;
2169:           constraints_data[total_counts] = 1.0;
2170:           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2171:           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2172:           total_counts++;
2173:         }
2174:       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2175:         PetscBool used_vertex;
2176:         for (i=0;i<n_vertices;i++) {
2177:           used_vertex = PETSC_FALSE;
2178:           k = 0;
2179:           while (!used_vertex && k<nnsp_size) {
2180:             VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2181:             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2182:               constraints_n[total_counts] = 1;
2183:               constraints_idxs[total_counts] = is_indices[i];
2184:               constraints_data[total_counts] = 1.0;
2185:               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2186:               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2187:               total_counts++;
2188:               used_vertex = PETSC_TRUE;
2189:             }
2190:             VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2191:             k++;
2192:           }
2193:         }
2194:       }
2195:       ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
2196:       n_vertices = total_counts;
2197:     }

2199:     /* edges and faces */
2200:     total_counts_cc = total_counts;
2201:     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2202:       IS        used_is;
2203:       PetscBool idxs_copied = PETSC_FALSE;

2205:       if (ncc<n_ISForEdges) {
2206:         used_is = ISForEdges[ncc];
2207:         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2208:       } else {
2209:         used_is = ISForFaces[ncc-n_ISForEdges];
2210:         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2211:       }
2212:       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */

2214:       ISGetSize(used_is,&size_of_constraint);
2215:       ISGetIndices(used_is,(const PetscInt**)&is_indices);
2216:       /* change of basis should not be performed on local periodic nodes */
2217:       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2218:       if (nnsp_has_cnst) {
2219:         PetscScalar quad_value;

2221:         PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
2222:         idxs_copied = PETSC_TRUE;

2224:         if (!pcbddc->use_nnsp_true) {
2225:           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2226:         } else {
2227:           quad_value = 1.0;
2228:         }
2229:         for (j=0;j<size_of_constraint;j++) {
2230:           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2231:         }
2232:         temp_constraints++;
2233:         total_counts++;
2234:       }
2235:       for (k=0;k<nnsp_size;k++) {
2236:         PetscReal real_value;
2237:         PetscScalar *ptr_to_data;

2239:         VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2240:         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2241:         for (j=0;j<size_of_constraint;j++) {
2242:           ptr_to_data[j] = array[is_indices[j]];
2243:         }
2244:         VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
2245:         /* check if array is null on the connected component */
2246:         PetscBLASIntCast(size_of_constraint,&Blas_N);
2247:         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2248:         if (real_value > 0.0) { /* keep indices and values */
2249:           temp_constraints++;
2250:           total_counts++;
2251:           if (!idxs_copied) {
2252:             PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));
2253:             idxs_copied = PETSC_TRUE;
2254:           }
2255:         }
2256:       }
2257:       ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
2258:       valid_constraints = temp_constraints;
2259:       if (!pcbddc->use_nnsp_true && temp_constraints) {
2260:         if (temp_constraints == 1) { /* just normalize the constraint */
2261:           PetscScalar norm,*ptr_to_data;

2263:           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2264:           PetscBLASIntCast(size_of_constraint,&Blas_N);
2265:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2266:           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2267:           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2268:         } else { /* perform SVD */
2269:           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2270:           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];

2272: #if defined(PETSC_MISSING_LAPACK_GESVD)
2273:           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2274:              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2275:              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2276:                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2277:                 from that computed using LAPACKgesvd
2278:              -> This is due to a different computation of eigenvectors in LAPACKheev
2279:              -> The quality of the POD-computed basis will be the same */
2280:           PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));
2281:           /* Store upper triangular part of correlation matrix */
2282:           PetscBLASIntCast(size_of_constraint,&Blas_N);
2283:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2284:           for (j=0;j<temp_constraints;j++) {
2285:             for (k=0;k<j+1;k++) {
2286:               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2287:             }
2288:           }
2289:           /* compute eigenvalues and eigenvectors of correlation matrix */
2290:           PetscBLASIntCast(temp_constraints,&Blas_N);
2291:           PetscBLASIntCast(temp_constraints,&Blas_LDA);
2292: #if !defined(PETSC_USE_COMPLEX)
2293:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2294: #else
2295:           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2296: #endif
2297:           PetscFPTrapPop();
2298:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2299:           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2300:           j = 0;
2301:           while (j < temp_constraints && singular_vals[j] < tol) j++;
2302:           total_counts = total_counts-j;
2303:           valid_constraints = temp_constraints-j;
2304:           /* scale and copy POD basis into used quadrature memory */
2305:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2306:           PetscBLASIntCast(temp_constraints,&Blas_N);
2307:           PetscBLASIntCast(temp_constraints,&Blas_K);
2308:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2309:           PetscBLASIntCast(temp_constraints,&Blas_LDB);
2310:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
2311:           if (j<temp_constraints) {
2312:             PetscInt ii;
2313:             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2314:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2315:             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2316:             PetscFPTrapPop();
2317:             for (k=0;k<temp_constraints-j;k++) {
2318:               for (ii=0;ii<size_of_constraint;ii++) {
2319:                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2320:               }
2321:             }
2322:           }
2323: #else  /* on missing GESVD */
2324:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2325:           PetscBLASIntCast(temp_constraints,&Blas_N);
2326:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2327:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2328: #if !defined(PETSC_USE_COMPLEX)
2329:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2330: #else
2331:           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2332: #endif
2333:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2334:           PetscFPTrapPop();
2335:           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2336:           k = temp_constraints;
2337:           if (k > size_of_constraint) k = size_of_constraint;
2338:           j = 0;
2339:           while (j < k && singular_vals[k-j-1] < tol) j++;
2340:           valid_constraints = k-j;
2341:           total_counts = total_counts-temp_constraints+valid_constraints;
2342: #endif /* on missing GESVD */
2343:         }
2344:       }
2345:       /* update pointers information */
2346:       if (valid_constraints) {
2347:         constraints_n[total_counts_cc] = valid_constraints;
2348:         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2349:         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2350:         /* set change_of_basis flag */
2351:         if (boolforchange) {
2352:           PetscBTSet(change_basis,total_counts_cc);
2353:         }
2354:         total_counts_cc++;
2355:       }
2356:     }
2357:     /* free workspace */
2358:     if (!skip_lapack) {
2359:       PetscFree(work);
2360: #if defined(PETSC_USE_COMPLEX)
2361:       PetscFree(rwork);
2362: #endif
2363:       PetscFree(singular_vals);
2364: #if defined(PETSC_MISSING_LAPACK_GESVD)
2365:       PetscFree(correlation_mat);
2366:       PetscFree(temp_basis);
2367: #endif
2368:     }
2369:     for (k=0;k<nnsp_size;k++) {
2370:       VecDestroy(&localnearnullsp[k]);
2371:     }
2372:     PetscFree(localnearnullsp);
2373:     /* free index sets of faces, edges and vertices */
2374:     for (i=0;i<n_ISForFaces;i++) {
2375:       ISDestroy(&ISForFaces[i]);
2376:     }
2377:     if (n_ISForFaces) {
2378:       PetscFree(ISForFaces);
2379:     }
2380:     for (i=0;i<n_ISForEdges;i++) {
2381:       ISDestroy(&ISForEdges[i]);
2382:     }
2383:     if (n_ISForEdges) {
2384:       PetscFree(ISForEdges);
2385:     }
2386:     ISDestroy(&ISForVertices);
2387:   } else {
2388:     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;

2390:     total_counts = 0;
2391:     n_vertices = 0;
2392:     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2393:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
2394:     }
2395:     max_constraints = 0;
2396:     total_counts_cc = 0;
2397:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2398:       total_counts += pcbddc->adaptive_constraints_n[i];
2399:       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2400:       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2401:     }
2402:     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2403:     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2404:     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2405:     constraints_data = pcbddc->adaptive_constraints_data;
2406:     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2407:     PetscMalloc1(total_counts_cc,&constraints_n);
2408:     total_counts_cc = 0;
2409:     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2410:       if (pcbddc->adaptive_constraints_n[i]) {
2411:         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2412:       }
2413:     }
2414: #if 0
2415:     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2416:     for (i=0;i<total_counts_cc;i++) {
2417:       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2418:       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2419:       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2420:         printf(" %d",constraints_idxs[j]);
2421:       }
2422:       printf("\n");
2423:       printf("number of cc: %d\n",constraints_n[i]);
2424:     }
2425:     for (i=0;i<n_vertices;i++) {
2426:       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2427:     }
2428:     for (i=0;i<sub_schurs->n_subs;i++) {
2429:       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2430:     }
2431: #endif

2433:     max_size_of_constraint = 0;
2434:     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2435:     PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
2436:     /* Change of basis */
2437:     PetscBTCreate(total_counts_cc,&change_basis);
2438:     if (pcbddc->use_change_of_basis) {
2439:       for (i=0;i<sub_schurs->n_subs;i++) {
2440:         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2441:           PetscBTSet(change_basis,i+n_vertices);
2442:         }
2443:       }
2444:     }
2445:   }
2446:   pcbddc->local_primal_size = total_counts;
2447:   PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);

2449:   /* map constraints_idxs in boundary numbering */
2450:   ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
2451:   if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i);

2453:   /* Create constraint matrix */
2454:   MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
2455:   MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
2456:   MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);

2458:   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2459:   /* determine if a QR strategy is needed for change of basis */
2460:   qr_needed = PETSC_FALSE;
2461:   PetscBTCreate(total_counts_cc,&qr_needed_idx);
2462:   total_primal_vertices=0;
2463:   pcbddc->local_primal_size_cc = 0;
2464:   for (i=0;i<total_counts_cc;i++) {
2465:     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2466:     if (size_of_constraint == 1) {
2467:       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2468:       pcbddc->local_primal_size_cc += 1;
2469:     } else if (PetscBTLookup(change_basis,i)) {
2470:       for (k=0;k<constraints_n[i];k++) {
2471:         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2472:       }
2473:       pcbddc->local_primal_size_cc += constraints_n[i];
2474:       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2475:         PetscBTSet(qr_needed_idx,i);
2476:         qr_needed = PETSC_TRUE;
2477:       }
2478:     } else {
2479:       pcbddc->local_primal_size_cc += 1;
2480:     }
2481:   }
2482:   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2483:   pcbddc->n_vertices = total_primal_vertices;
2484:   /* permute indices in order to have a sorted set of vertices */
2485:   PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);

2487:   PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);
2488:   PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));
2489:   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;

2491:   /* nonzero structure of constraint matrix */
2492:   /* and get reference dof for local constraints */
2493:   PetscMalloc1(pcbddc->local_primal_size,&nnz);
2494:   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;

2496:   j = total_primal_vertices;
2497:   total_counts = total_primal_vertices;
2498:   cum = total_primal_vertices;
2499:   for (i=n_vertices;i<total_counts_cc;i++) {
2500:     if (!PetscBTLookup(change_basis,i)) {
2501:       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2502:       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2503:       cum++;
2504:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2505:       for (k=0;k<constraints_n[i];k++) {
2506:         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2507:         nnz[j+k] = size_of_constraint;
2508:       }
2509:       j += constraints_n[i];
2510:     }
2511:   }
2512:   MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
2513:   PetscFree(nnz);

2515:   /* set values in constraint matrix */
2516:   for (i=0;i<total_primal_vertices;i++) {
2517:     MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
2518:   }
2519:   total_counts = total_primal_vertices;
2520:   for (i=n_vertices;i<total_counts_cc;i++) {
2521:     if (!PetscBTLookup(change_basis,i)) {
2522:       PetscInt *cols;

2524:       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2525:       cols = constraints_idxs+constraints_idxs_ptr[i];
2526:       for (k=0;k<constraints_n[i];k++) {
2527:         PetscInt    row = total_counts+k;
2528:         PetscScalar *vals;

2530:         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2531:         MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
2532:       }
2533:       total_counts += constraints_n[i];
2534:     }
2535:   }
2536:   /* assembling */
2537:   MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
2538:   MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);

2540:   /*
2541:   PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
2542:   MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);
2543:   PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);
2544:   */
2545:   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2546:   if (pcbddc->use_change_of_basis) {
2547:     /* dual and primal dofs on a single cc */
2548:     PetscInt     dual_dofs,primal_dofs;
2549:     /* working stuff for GEQRF */
2550:     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2551:     PetscBLASInt lqr_work;
2552:     /* working stuff for UNGQR */
2553:     PetscScalar  *gqr_work,lgqr_work_t;
2554:     PetscBLASInt lgqr_work;
2555:     /* working stuff for TRTRS */
2556:     PetscScalar  *trs_rhs;
2557:     PetscBLASInt Blas_NRHS;
2558:     /* pointers for values insertion into change of basis matrix */
2559:     PetscInt     *start_rows,*start_cols;
2560:     PetscScalar  *start_vals;
2561:     /* working stuff for values insertion */
2562:     PetscBT      is_primal;
2563:     PetscInt     *aux_primal_numbering_B;
2564:     /* matrix sizes */
2565:     PetscInt     global_size,local_size;
2566:     /* temporary change of basis */
2567:     Mat          localChangeOfBasisMatrix;
2568:     /* extra space for debugging */
2569:     PetscScalar  *dbg_work;

2571:     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2572:     MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
2573:     MatSetType(localChangeOfBasisMatrix,MATAIJ);
2574:     MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
2575:     /* nonzeros for local mat */
2576:     PetscMalloc1(pcis->n,&nnz);
2577:     for (i=0;i<pcis->n;i++) nnz[i]=1;
2578:     for (i=n_vertices;i<total_counts_cc;i++) {
2579:       if (PetscBTLookup(change_basis,i)) {
2580:         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2581:         if (PetscBTLookup(qr_needed_idx,i)) {
2582:           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2583:         } else {
2584:           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2585:           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2586:         }
2587:       }
2588:     }
2589:     MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
2590:     PetscFree(nnz);
2591:     /* Set initial identity in the matrix */
2592:     for (i=0;i<pcis->n;i++) {
2593:       MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
2594:     }

2596:     if (pcbddc->dbg_flag) {
2597:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
2598:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
2599:     }


2602:     /* Now we loop on the constraints which need a change of basis */
2603:     /*
2604:        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2605:        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)

2607:        Basic blocks of change of basis matrix T computed by

2609:           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)

2611:             | 1        0   ...        0         s_1/S |
2612:             | 0        1   ...        0         s_2/S |
2613:             |              ...                        |
2614:             | 0        ...            1     s_{n-1}/S |
2615:             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |

2617:             with S = \sum_{i=1}^n s_i^2
2618:             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2619:                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering

2621:           - QR decomposition of constraints otherwise
2622:     */
2623:     if (qr_needed) {
2624:       /* space to store Q */
2625:       PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
2626:       /* first we issue queries for optimal work */
2627:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
2628:       PetscBLASIntCast(max_constraints,&Blas_N);
2629:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
2630:       lqr_work = -1;
2631:       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2632:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2633:       PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
2634:       PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
2635:       lgqr_work = -1;
2636:       PetscBLASIntCast(max_size_of_constraint,&Blas_M);
2637:       PetscBLASIntCast(max_size_of_constraint,&Blas_N);
2638:       PetscBLASIntCast(max_constraints,&Blas_K);
2639:       PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
2640:       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2641:       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2642:       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2643:       PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
2644:       PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
2645:       /* array to store scaling factors for reflectors */
2646:       PetscMalloc1(max_constraints,&qr_tau);
2647:       /* array to store rhs and solution of triangular solver */
2648:       PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
2649:       /* allocating workspace for check */
2650:       if (pcbddc->dbg_flag) {
2651:         PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
2652:       }
2653:     }
2654:     /* array to store whether a node is primal or not */
2655:     PetscBTCreate(pcis->n_B,&is_primal);
2656:     PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
2657:     ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
2658:     if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i);
2659:     for (i=0;i<total_primal_vertices;i++) {
2660:       PetscBTSet(is_primal,aux_primal_numbering_B[i]);
2661:     }
2662:     PetscFree(aux_primal_numbering_B);

2664:     /* loop on constraints and see whether or not they need a change of basis and compute it */
2665:     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2666:       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2667:       if (PetscBTLookup(change_basis,total_counts)) {
2668:         /* get constraint info */
2669:         primal_dofs = constraints_n[total_counts];
2670:         dual_dofs = size_of_constraint-primal_dofs;

2672:         if (pcbddc->dbg_flag) {
2673:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);
2674:         }

2676:         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */

2678:           /* copy quadrature constraints for change of basis check */
2679:           if (pcbddc->dbg_flag) {
2680:             PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));
2681:           }
2682:           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2683:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

2685:           /* compute QR decomposition of constraints */
2686:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2687:           PetscBLASIntCast(primal_dofs,&Blas_N);
2688:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2689:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2690:           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2691:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2692:           PetscFPTrapPop();

2694:           /* explictly compute R^-T */
2695:           PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));
2696:           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2697:           PetscBLASIntCast(primal_dofs,&Blas_N);
2698:           PetscBLASIntCast(primal_dofs,&Blas_NRHS);
2699:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2700:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
2701:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2702:           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2703:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2704:           PetscFPTrapPop();

2706:           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2707:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2708:           PetscBLASIntCast(size_of_constraint,&Blas_N);
2709:           PetscBLASIntCast(primal_dofs,&Blas_K);
2710:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2711:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2712:           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2713:           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2714:           PetscFPTrapPop();

2716:           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2717:              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2718:              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2719:           PetscBLASIntCast(size_of_constraint,&Blas_M);
2720:           PetscBLASIntCast(primal_dofs,&Blas_N);
2721:           PetscBLASIntCast(primal_dofs,&Blas_K);
2722:           PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2723:           PetscBLASIntCast(primal_dofs,&Blas_LDB);
2724:           PetscBLASIntCast(size_of_constraint,&Blas_LDC);
2725:           PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2726:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2727:           PetscFPTrapPop();
2728:           PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));

2730:           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2731:           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2732:           /* insert cols for primal dofs */
2733:           for (j=0;j<primal_dofs;j++) {
2734:             start_vals = &qr_basis[j*size_of_constraint];
2735:             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2736:             MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2737:           }
2738:           /* insert cols for dual dofs */
2739:           for (j=0,k=0;j<dual_dofs;k++) {
2740:             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2741:               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2742:               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2743:               MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
2744:               j++;
2745:             }
2746:           }

2748:           /* check change of basis */
2749:           if (pcbddc->dbg_flag) {
2750:             PetscInt   ii,jj;
2751:             PetscBool valid_qr=PETSC_TRUE;
2752:             PetscBLASIntCast(primal_dofs,&Blas_M);
2753:             PetscBLASIntCast(size_of_constraint,&Blas_N);
2754:             PetscBLASIntCast(size_of_constraint,&Blas_K);
2755:             PetscBLASIntCast(size_of_constraint,&Blas_LDA);
2756:             PetscBLASIntCast(size_of_constraint,&Blas_LDB);
2757:             PetscBLASIntCast(primal_dofs,&Blas_LDC);
2758:             PetscFPTrapPush(PETSC_FP_TRAP_OFF);
2759:             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2760:             PetscFPTrapPop();
2761:             for (jj=0;jj<size_of_constraint;jj++) {
2762:               for (ii=0;ii<primal_dofs;ii++) {
2763:                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2764:                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2765:               }
2766:             }
2767:             if (!valid_qr) {
2768:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
2769:               for (jj=0;jj<size_of_constraint;jj++) {
2770:                 for (ii=0;ii<primal_dofs;ii++) {
2771:                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2772:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2773:                   }
2774:                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2775:                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2776:                   }
2777:                 }
2778:               }
2779:             } else {
2780:               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
2781:             }
2782:           }
2783:         } else { /* simple transformation block */
2784:           PetscInt    row,col;
2785:           PetscScalar val,norm;

2787:           PetscBLASIntCast(size_of_constraint,&Blas_N);
2788:           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2789:           for (j=0;j<size_of_constraint;j++) {
2790:             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2791:             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2792:             if (!PetscBTLookup(is_primal,row_B)) {
2793:               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2794:               MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
2795:               MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
2796:             } else {
2797:               for (k=0;k<size_of_constraint;k++) {
2798:                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2799:                 if (row != col) {
2800:                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2801:                 } else {
2802:                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2803:                 }
2804:                 MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
2805:               }
2806:             }
2807:           }
2808:           if (pcbddc->dbg_flag) {
2809:             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
2810:           }
2811:         }
2812:       } else {
2813:         if (pcbddc->dbg_flag) {
2814:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);
2815:         }
2816:       }
2817:     }

2819:     /* free workspace */
2820:     if (qr_needed) {
2821:       if (pcbddc->dbg_flag) {
2822:         PetscFree(dbg_work);
2823:       }
2824:       PetscFree(trs_rhs);
2825:       PetscFree(qr_tau);
2826:       PetscFree(qr_work);
2827:       PetscFree(gqr_work);
2828:       PetscFree(qr_basis);
2829:     }
2830:     PetscBTDestroy(&is_primal);
2831:     MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
2832:     MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);

2834:     /* assembling of global change of variable */
2835:     {
2836:       Mat      tmat;
2837:       PetscInt bs;

2839:       VecGetSize(pcis->vec1_global,&global_size);
2840:       VecGetLocalSize(pcis->vec1_global,&local_size);
2841:       MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
2842:       MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
2843:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
2844:       MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
2845:       MatGetBlockSize(pc->pmat,&bs);
2846:       MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
2847:       MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
2848:       MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
2849:       MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
2850:       MatDestroy(&tmat);
2851:       VecSet(pcis->vec1_global,0.0);
2852:       VecSet(pcis->vec1_N,1.0);
2853:       VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2854:       VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
2855:       VecReciprocal(pcis->vec1_global);
2856:       MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
2857:     }
2858:     /* check */
2859:     if (pcbddc->dbg_flag) {
2860:       PetscReal error;
2861:       Vec       x,x_change;

2863:       VecDuplicate(pcis->vec1_global,&x);
2864:       VecDuplicate(pcis->vec1_global,&x_change);
2865:       VecSetRandom(x,NULL);
2866:       VecCopy(x,pcis->vec1_global);
2867:       VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2868:       VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
2869:       MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
2870:       VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
2871:       VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
2872:       MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
2873:       VecAXPY(x,-1.0,x_change);
2874:       VecNorm(x,NORM_INFINITY,&error);
2875:       PetscViewerFlush(pcbddc->dbg_viewer);
2876:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);
2877:       VecDestroy(&x);
2878:       VecDestroy(&x_change);
2879:     }

2881:     /* adapt sub_schurs computed (if any) */
2882:     if (pcbddc->use_deluxe_scaling) {
2883:       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2884:       if (sub_schurs->S_Ej_all) {
2885:         Mat S_new,tmat;
2886:         IS is_all_N;

2888:         ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
2889:         MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
2890:         ISDestroy(&is_all_N);
2891:         MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
2892:         MatDestroy(&sub_schurs->S_Ej_all);
2893:         PetscObjectReference((PetscObject)S_new);
2894:         sub_schurs->S_Ej_all = S_new;
2895:         MatDestroy(&S_new);
2896:         if (sub_schurs->sum_S_Ej_all) {
2897:           MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
2898:           MatDestroy(&sub_schurs->sum_S_Ej_all);
2899:           PetscObjectReference((PetscObject)S_new);
2900:           sub_schurs->sum_S_Ej_all = S_new;
2901:           MatDestroy(&S_new);
2902:         }
2903:         MatDestroy(&tmat);
2904:       }
2905:     }
2906:     MatDestroy(&localChangeOfBasisMatrix);
2907:   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2908:     PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
2909:     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2910:   }

2912:   /* set up change of basis context */
2913:   if (pcbddc->ChangeOfBasisMatrix) {
2914:     PCBDDCChange_ctx change_ctx;

2916:     if (!pcbddc->new_global_mat) {
2917:       PetscInt global_size,local_size;

2919:       VecGetSize(pcis->vec1_global,&global_size);
2920:       VecGetLocalSize(pcis->vec1_global,&local_size);
2921:       MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);
2922:       MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);
2923:       MatSetType(pcbddc->new_global_mat,MATSHELL);
2924:       MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);
2925:       MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);
2926:       PetscNew(&change_ctx);
2927:       MatShellSetContext(pcbddc->new_global_mat,change_ctx);
2928:     } else {
2929:       MatShellGetContext(pcbddc->new_global_mat,&change_ctx);
2930:       MatDestroy(&change_ctx->global_change);
2931:       VecDestroyVecs(2,&change_ctx->work);
2932:     }
2933:     if (!pcbddc->user_ChangeOfBasisMatrix) {
2934:       PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);
2935:       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2936:     } else {
2937:       PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
2938:       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2939:     }
2940:     VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);
2941:     MatSetUp(pcbddc->new_global_mat);
2942:     MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);
2943:     MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);
2944:   }

2946:   /* check if a new primal space has been introduced */
2947:   pcbddc->new_primal_space_local = PETSC_TRUE;
2948:   if (olocal_primal_size == pcbddc->local_primal_size) {
2949:     PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);
2950:     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2951:     if (!pcbddc->new_primal_space_local) {
2952:       PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);
2953:       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2954:     }
2955:   }
2956:   PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
2957:   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2958:   MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));

2960:   /* flush dbg viewer */
2961:   if (pcbddc->dbg_flag) {
2962:     PetscViewerFlush(pcbddc->dbg_viewer);
2963:   }

2965:   /* free workspace */
2966:   PetscBTDestroy(&qr_needed_idx);
2967:   PetscBTDestroy(&change_basis);
2968:   if (!pcbddc->adaptive_selection) {
2969:     PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
2970:     PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
2971:   } else {
2972:     PetscFree5(pcbddc->adaptive_constraints_n,
2973:                       pcbddc->adaptive_constraints_idxs_ptr,
2974:                       pcbddc->adaptive_constraints_data_ptr,
2975:                       pcbddc->adaptive_constraints_idxs,
2976:                       pcbddc->adaptive_constraints_data);
2977:     PetscFree(constraints_n);
2978:     PetscFree(constraints_idxs_B);
2979:   }
2980:   return(0);
2981: }

2985: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2986: {
2987:   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2988:   PC_IS       *pcis = (PC_IS*)pc->data;
2989:   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2990:   PetscInt    ierr,i,vertex_size,N;
2991:   PetscViewer viewer=pcbddc->dbg_viewer;

2994:   /* Reset previously computed graph */
2995:   PCBDDCGraphReset(pcbddc->mat_graph);
2996:   /* Init local Graph struct */
2997:   MatGetSize(pc->pmat,&N,NULL);
2998:   PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);

3000:   /* Check validity of the csr graph passed in by the user */
3001:   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3002:     PCBDDCGraphResetCSR(pcbddc->mat_graph);
3003:   }

3005:   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3006:   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3007:     PetscInt  *xadj,*adjncy;
3008:     PetscInt  nvtxs;
3009:     PetscBool flg_row=PETSC_FALSE;

3011:     if (pcbddc->use_local_adj) {

3013:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3014:       if (flg_row) {
3015:         PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
3016:         pcbddc->computed_rowadj = PETSC_TRUE;
3017:       }
3018:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3019:     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3020:       IS                     is_dummy;
3021:       ISLocalToGlobalMapping l2gmap_dummy;
3022:       PetscInt               j,sum;
3023:       PetscInt               *cxadj,*cadjncy;
3024:       const PetscInt         *idxs;
3025:       PCBDDCGraph            graph;
3026:       PetscBT                is_on_boundary;

3028:       ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);
3029:       ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
3030:       ISDestroy(&is_dummy);
3031:       PCBDDCGraphCreate(&graph);
3032:       PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);
3033:       ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
3034:       MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
3035:       if (flg_row) {
3036:         graph->xadj = xadj;
3037:         graph->adjncy = adjncy;
3038:       }
3039:       PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
3040:       PCBDDCGraphComputeConnectedComponents(graph);
3041:       MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);

3043:       if (pcbddc->dbg_flag) {
3044:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);
3045:         for (i=0;i<graph->ncc;i++) {
3046:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);
3047:         }
3048:       }

3050:       PetscBTCreate(pcis->n,&is_on_boundary);
3051:       ISGetIndices(pcis->is_B_local,&idxs);
3052:       for (i=0;i<pcis->n_B;i++) {
3053:         PetscBTSet(is_on_boundary,idxs[i]);
3054:       }
3055:       ISRestoreIndices(pcis->is_B_local,&idxs);

3057:       PetscCalloc1(pcis->n+1,&cxadj);
3058:       sum = 0;
3059:       for (i=0;i<graph->ncc;i++) {
3060:         PetscInt sizecc = 0;
3061:         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3062:           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3063:             sizecc++;
3064:           }
3065:         }
3066:         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3067:           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3068:             cxadj[graph->queue[j]] = sizecc;
3069:           }
3070:         }
3071:         sum += sizecc*sizecc;
3072:       }
3073:       PetscMalloc1(sum,&cadjncy);
3074:       sum = 0;
3075:       for (i=0;i<pcis->n;i++) {
3076:         PetscInt temp = cxadj[i];
3077:         cxadj[i] = sum;
3078:         sum += temp;
3079:       }
3080:       cxadj[pcis->n] = sum;
3081:       for (i=0;i<graph->ncc;i++) {
3082:         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3083:           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3084:             PetscInt k,sizecc = 0;
3085:             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3086:               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3087:                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3088:                 sizecc++;
3089:               }
3090:             }
3091:           }
3092:         }
3093:       }
3094:       if (sum) {
3095:         PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);
3096:       } else {
3097:         PetscFree(cxadj);
3098:         PetscFree(cadjncy);
3099:       }
3100:       graph->xadj = 0;
3101:       graph->adjncy = 0;
3102:       PCBDDCGraphDestroy(&graph);
3103:       PetscBTDestroy(&is_on_boundary);
3104:     }
3105:   }
3106:   if (pcbddc->dbg_flag) {
3107:     PetscViewerFlush(pcbddc->dbg_viewer);
3108:   }

3110:   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3111:   vertex_size = 1;
3112:   if (pcbddc->user_provided_isfordofs) {
3113:     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3114:       PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
3115:       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3116:         PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
3117:         ISDestroy(&pcbddc->ISForDofs[i]);
3118:       }
3119:       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3120:       pcbddc->n_ISForDofs = 0;
3121:       PetscFree(pcbddc->ISForDofs);
3122:     }
3123:     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3124:     MatGetBlockSize(matis->A,&vertex_size);
3125:   } else {
3126:     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3127:       MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);
3128:       PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
3129:       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3130:         ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
3131:       }
3132:     }
3133:   }

3135:   /* Setup of Graph */
3136:   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3137:     PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
3138:   }
3139:   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3140:     PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
3141:   }
3142:   PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);

3144:   /* Graph's connected components analysis */
3145:   PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);

3147:   /* print some info to stdout */
3148:   if (pcbddc->dbg_flag) {
3149:     PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3150:   }

3152:   /* mark topography has done */
3153:   pcbddc->recompute_topography = PETSC_FALSE;
3154:   return(0);
3155: }

3157: /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3160: PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3161: {
3162:   PetscSF        sf;
3163:   PetscLayout    map;
3164:   const PetscInt *idxs;
3165:   PetscInt       *leaf_data,*root_data,*gidxs;
3166:   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3167:   PetscInt       n_n,nlocals,start,first_index;
3168:   PetscMPIInt    commsize;
3169:   PetscBool      first_found;

3173:   ISGetLocalSize(subset,&n);
3174:   if (subset_mult) {
3176:     ISGetLocalSize(subset,&i);
3177:     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3178:   }
3179:   /* create workspace layout for computing global indices of subset */
3180:   ISGetIndices(subset,&idxs);
3181:   lbounds[0] = lbounds[1] = 0;
3182:   for (i=0;i<n;i++) {
3183:     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3184:     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3185:   }
3186:   lbounds[0] = -lbounds[0];
3187:   MPIU_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));
3188:   gbounds[0] = -gbounds[0];
3189:   N = gbounds[1] - gbounds[0] + 1;
3190:   PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);
3191:   PetscLayoutSetBlockSize(map,1);
3192:   PetscLayoutSetSize(map,N);
3193:   PetscLayoutSetUp(map);
3194:   PetscLayoutGetLocalSize(map,&Nl);

3196:   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3197:   PetscMalloc2(n,&leaf_data,Nl,&root_data);
3198:   if (subset_mult) {
3199:     const PetscInt* idxs_mult;

3201:     ISGetIndices(subset_mult,&idxs_mult);
3202:     PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));
3203:     ISRestoreIndices(subset_mult,&idxs_mult);
3204:   } else {
3205:     for (i=0;i<n;i++) leaf_data[i] = 1;
3206:   }
3207:   /* local size of new subset */
3208:   n_n = 0;
3209:   for (i=0;i<n;i++) n_n += leaf_data[i];

3211:   /* global indexes in layout */
3212:   PetscMalloc1(n_n,&gidxs); /* allocating possibly extra space in gidxs which will be used later */
3213:   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3214:   ISRestoreIndices(subset,&idxs);
3215:   PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);
3216:   PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);
3217:   PetscLayoutDestroy(&map);

3219:   /* reduce from leaves to roots */
3220:   PetscMemzero(root_data,Nl*sizeof(PetscInt));
3221:   PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);
3222:   PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);

3224:   /* count indexes in local part of layout */
3225:   nlocals = 0;
3226:   first_index = -1;
3227:   first_found = PETSC_FALSE;
3228:   for (i=0;i<Nl;i++) {
3229:     if (!first_found && root_data[i]) {
3230:       first_found = PETSC_TRUE;
3231:       first_index = i;
3232:     }
3233:     nlocals += root_data[i];
3234:   }

3236:   /* cumulative of number of indexes and size of subset without holes */
3237: #if defined(PETSC_HAVE_MPI_EXSCAN)
3238:   start = 0;
3239:   MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));
3240: #else
3241:   MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));
3242:   start = start-nlocals;
3243: #endif

3245:   if (N_n) { /* compute total size of new subset if requested */
3246:     *N_n = start + nlocals;
3247:     MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);
3248:     MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));
3249:   }

3251:   /* adapt root data with cumulative */
3252:   if (first_found) {
3253:     PetscInt old_index;

3255:     root_data[first_index] += start;
3256:     old_index = first_index;
3257:     for (i=first_index+1;i<Nl;i++) {
3258:       if (root_data[i]) {
3259:         root_data[i] += root_data[old_index];
3260:         old_index = i;
3261:       }
3262:     }
3263:   }

3265:   /* from roots to leaves */
3266:   PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);
3267:   PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);
3268:   PetscSFDestroy(&sf);

3270:   /* create new IS with global indexes without holes */
3271:   if (subset_mult) {
3272:     const PetscInt* idxs_mult;
3273:     PetscInt        cum;

3275:     cum = 0;
3276:     ISGetIndices(subset_mult,&idxs_mult);
3277:     for (i=0;i<n;i++) {
3278:       PetscInt j;
3279:       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3280:     }
3281:     ISRestoreIndices(subset_mult,&idxs_mult);
3282:   } else {
3283:     for (i=0;i<n;i++) {
3284:       gidxs[i] = leaf_data[i]-1;
3285:     }
3286:   }
3287:   ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);
3288:   PetscFree2(leaf_data,root_data);
3289:   return(0);
3290: }

3294: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3295: {
3296:   PetscInt       i,j;
3297:   PetscScalar    *alphas;

3301:   /* this implements stabilized Gram-Schmidt */
3302:   PetscMalloc1(n,&alphas);
3303:   for (i=0;i<n;i++) {
3304:     VecNormalize(vecs[i],NULL);
3305:     if (i<n) { VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]); }
3306:     for (j=i+1;j<n;j++) { VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]); }
3307:   }
3308:   PetscFree(alphas);
3309:   return(0);
3310: }

3314: PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3315: {
3316:   IS             ranks_send_to;
3317:   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3318:   PetscMPIInt    size,rank,color;
3319:   PetscInt       *xadj,*adjncy;
3320:   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3321:   PetscInt       i,local_size,threshold=0;
3322:   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3323:   PetscSubcomm   subcomm;

3327:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_square",&use_square,NULL);
3328:   PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
3329:   PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);

3331:   /* Get info on mapping */
3332:   ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);
3333:   ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);

3335:   /* build local CSR graph of subdomains' connectivity */
3336:   PetscMalloc1(2,&xadj);
3337:   xadj[0] = 0;
3338:   xadj[1] = PetscMax(n_neighs-1,0);
3339:   PetscMalloc1(xadj[1],&adjncy);
3340:   PetscMalloc1(xadj[1],&adjncy_wgt);

3342:   if (threshold) {
3343:     PetscInt xadj_count = 0;
3344:     for (i=1;i<n_neighs;i++) {
3345:       if (n_shared[i] > threshold) {
3346:         adjncy[xadj_count] = neighs[i];
3347:         adjncy_wgt[xadj_count] = n_shared[i];
3348:         xadj_count++;
3349:       }
3350:     }
3351:     xadj[1] = xadj_count;
3352:   } else {
3353:     if (xadj[1]) {
3354:       PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));
3355:       PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));
3356:     }
3357:   }
3358:   ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
3359:   if (use_square) {
3360:     for (i=0;i<xadj[1];i++) {
3361:       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3362:     }
3363:   }
3364:   PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);

3366:   PetscMalloc1(1,&ranks_send_to_idx);

3368:   /*
3369:     Restrict work on active processes only.
3370:   */
3371:   PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);
3372:   PetscSubcommSetNumber(subcomm,2); /* 2 groups, active process and not active processes */
3373:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
3374:   PetscMPIIntCast(!local_size,&color);
3375:   PetscSubcommSetTypeGeneral(subcomm,color,rank);
3376:   if (color) {
3377:     PetscFree(xadj);
3378:     PetscFree(adjncy);
3379:     PetscFree(adjncy_wgt);
3380:   } else {
3381:     Mat             subdomain_adj;
3382:     IS              new_ranks,new_ranks_contig;
3383:     MatPartitioning partitioner;
3384:     PetscInt        prank,rstart=0,rend=0;
3385:     PetscInt        *is_indices,*oldranks;
3386:     PetscBool       aggregate;

3388:     MPI_Comm_size(PetscSubcommChild(subcomm),&size);
3389:     PetscMalloc1(size,&oldranks);
3390:     prank = rank;
3391:     MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));
3392:     /*
3393:     for (i=0;i<size;i++) {
3394:       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3395:     }
3396:     */
3397:     for (i=0;i<xadj[1];i++) {
3398:       PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
3399:     }
3400:     PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
3401:     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3402:     if (aggregate) {
3403:       PetscInt    lrows,row,ncols,*cols;
3404:       PetscMPIInt nrank;
3405:       PetscScalar *vals;

3407:       MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);
3408:       lrows = 0;
3409:       if (nrank<redprocs) {
3410:         lrows = size/redprocs;
3411:         if (nrank<size%redprocs) lrows++;
3412:       }
3413:       MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
3414:       MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
3415:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3416:       MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
3417:       row = nrank;
3418:       ncols = xadj[1]-xadj[0];
3419:       cols = adjncy;
3420:       PetscMalloc1(ncols,&vals);
3421:       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3422:       MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
3423:       MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
3424:       MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
3425:       PetscFree(xadj);
3426:       PetscFree(adjncy);
3427:       PetscFree(adjncy_wgt);
3428:       PetscFree(vals);
3429:     } else {
3430:       MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
3431:     }
3432:     /* MatView(subdomain_adj,0); */

3434:     /* Partition */
3435:     MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);
3436:     MatPartitioningSetAdjacency(partitioner,subdomain_adj);
3437:     if (use_vwgt) {
3438:       PetscMalloc1(1,&v_wgt);
3439:       v_wgt[0] = local_size;
3440:       MatPartitioningSetVertexWeights(partitioner,v_wgt);
3441:     }
3442:     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3443:     MatPartitioningSetNParts(partitioner,n_subdomains);
3444:     MatPartitioningSetFromOptions(partitioner);
3445:     MatPartitioningApply(partitioner,&new_ranks);
3446:     /* MatPartitioningView(partitioner,0); */

3448:     /* renumber new_ranks to avoid "holes" in new set of processors */
3449:     PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);
3450:     ISDestroy(&new_ranks);
3451:     ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
3452:     if (!redprocs) {
3453:       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3454:     } else {
3455:       PetscInt    idxs[1];
3456:       PetscMPIInt tag;
3457:       MPI_Request *reqs;

3459:       PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
3460:       PetscMalloc1(rend-rstart,&reqs);
3461:       for (i=rstart;i<rend;i++) {
3462:         MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);
3463:       }
3464:       MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);
3465:       MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
3466:       PetscFree(reqs);
3467:       ranks_send_to_idx[0] = oldranks[idxs[0]];
3468:     }
3469:     ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
3470:     /* clean up */
3471:     PetscFree(oldranks);
3472:     ISDestroy(&new_ranks_contig);
3473:     MatDestroy(&subdomain_adj);
3474:     MatPartitioningDestroy(&partitioner);
3475:   }
3476:   PetscSubcommDestroy(&subcomm);

3478:   /* assemble parallel IS for sends */
3479:   i = 1;
3480:   if (color) i=0;
3481:   ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);
3482:   /* get back IS */
3483:   *is_sends = ranks_send_to;
3484:   return(0);
3485: }

3487: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;

3491: PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3492: {
3493:   Mat                    local_mat;
3494:   IS                     is_sends_internal;
3495:   PetscInt               rows,cols,new_local_rows;
3496:   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3497:   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3498:   ISLocalToGlobalMapping l2gmap;
3499:   PetscInt*              l2gmap_indices;
3500:   const PetscInt*        is_indices;
3501:   MatType                new_local_type;
3502:   /* buffers */
3503:   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3504:   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3505:   PetscInt               *recv_buffer_idxs_local;
3506:   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3507:   /* MPI */
3508:   MPI_Comm               comm,comm_n;
3509:   PetscSubcomm           subcomm;
3510:   PetscMPIInt            n_sends,n_recvs,commsize;
3511:   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3512:   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3513:   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3514:   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3515:   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3516:   PetscErrorCode         ierr;

3519:   /* TODO: add missing checks */
3524:   PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
3525:   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3526:   MatISGetLocalMat(mat,&local_mat);
3527:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
3528:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3529:   MatGetSize(local_mat,&rows,&cols);
3530:   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3531:   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3532:     PetscInt mrows,mcols,mnrows,mncols;
3533:     PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
3534:     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3535:     MatGetSize(mat,&mrows,&mcols);
3536:     MatGetSize(*mat_n,&mnrows,&mncols);
3537:     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3538:     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3539:   }
3540:   MatGetBlockSize(local_mat,&bs);
3542:   /* prepare IS for sending if not provided */
3543:   if (!is_sends) {
3544:     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3545:     MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);
3546:   } else {
3547:     PetscObjectReference((PetscObject)is_sends);
3548:     is_sends_internal = is_sends;
3549:   }

3551:   /* get comm */
3552:   PetscObjectGetComm((PetscObject)mat,&comm);

3554:   /* compute number of sends */
3555:   ISGetLocalSize(is_sends_internal,&i);
3556:   PetscMPIIntCast(i,&n_sends);

3558:   /* compute number of receives */
3559:   MPI_Comm_size(comm,&commsize);
3560:   PetscMalloc1(commsize,&iflags);
3561:   PetscMemzero(iflags,commsize*sizeof(*iflags));
3562:   ISGetIndices(is_sends_internal,&is_indices);
3563:   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3564:   PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
3565:   PetscFree(iflags);

3567:   /* restrict comm if requested */
3568:   subcomm = 0;
3569:   destroy_mat = PETSC_FALSE;
3570:   if (restrict_comm) {
3571:     PetscMPIInt color,subcommsize;

3573:     color = 0;
3574:     if (restrict_full) {
3575:       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3576:     } else {
3577:       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3578:     }
3579:     MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
3580:     subcommsize = commsize - subcommsize;
3581:     /* check if reuse has been requested */
3582:     if (reuse == MAT_REUSE_MATRIX) {
3583:       if (*mat_n) {
3584:         PetscMPIInt subcommsize2;
3585:         MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
3586:         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3587:         comm_n = PetscObjectComm((PetscObject)*mat_n);
3588:       } else {
3589:         comm_n = PETSC_COMM_SELF;
3590:       }
3591:     } else { /* MAT_INITIAL_MATRIX */
3592:       PetscMPIInt rank;

3594:       MPI_Comm_rank(comm,&rank);
3595:       PetscSubcommCreate(comm,&subcomm);
3596:       PetscSubcommSetNumber(subcomm,2);
3597:       PetscSubcommSetTypeGeneral(subcomm,color,rank);
3598:       comm_n = PetscSubcommChild(subcomm);
3599:     }
3600:     /* flag to destroy *mat_n if not significative */
3601:     if (color) destroy_mat = PETSC_TRUE;
3602:   } else {
3603:     comm_n = comm;
3604:   }

3606:   /* prepare send/receive buffers */
3607:   PetscMalloc1(commsize,&ilengths_idxs);
3608:   PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));
3609:   PetscMalloc1(commsize,&ilengths_vals);
3610:   PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));
3611:   if (nis) {
3612:     PetscCalloc1(commsize,&ilengths_idxs_is);
3613:   }

3615:   /* Get data from local matrices */
3616:   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3617:     /* TODO: See below some guidelines on how to prepare the local buffers */
3618:     /*
3619:        send_buffer_vals should contain the raw values of the local matrix
3620:        send_buffer_idxs should contain:
3621:        - MatType_PRIVATE type
3622:        - PetscInt        size_of_l2gmap
3623:        - PetscInt        global_row_indices[size_of_l2gmap]
3624:        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3625:     */
3626:   else {
3627:     MatDenseGetArray(local_mat,&send_buffer_vals);
3628:     ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
3629:     PetscMalloc1(i+2,&send_buffer_idxs);
3630:     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3631:     send_buffer_idxs[1] = i;
3632:     ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
3633:     PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));
3634:     ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
3635:     PetscMPIIntCast(i,&len);
3636:     for (i=0;i<n_sends;i++) {
3637:       ilengths_vals[is_indices[i]] = len*len;
3638:       ilengths_idxs[is_indices[i]] = len+2;
3639:     }
3640:   }
3641:   PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
3642:   /* additional is (if any) */
3643:   if (nis) {
3644:     PetscMPIInt psum;
3645:     PetscInt j;
3646:     for (j=0,psum=0;j<nis;j++) {
3647:       PetscInt plen;
3648:       ISGetLocalSize(isarray[j],&plen);
3649:       PetscMPIIntCast(plen,&len);
3650:       psum += len+1; /* indices + lenght */
3651:     }
3652:     PetscMalloc1(psum,&send_buffer_idxs_is);
3653:     for (j=0,psum=0;j<nis;j++) {
3654:       PetscInt plen;
3655:       const PetscInt *is_array_idxs;
3656:       ISGetLocalSize(isarray[j],&plen);
3657:       send_buffer_idxs_is[psum] = plen;
3658:       ISGetIndices(isarray[j],&is_array_idxs);
3659:       PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));
3660:       ISRestoreIndices(isarray[j],&is_array_idxs);
3661:       psum += plen+1; /* indices + lenght */
3662:     }
3663:     for (i=0;i<n_sends;i++) {
3664:       ilengths_idxs_is[is_indices[i]] = psum;
3665:     }
3666:     PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
3667:   }

3669:   buf_size_idxs = 0;
3670:   buf_size_vals = 0;
3671:   buf_size_idxs_is = 0;
3672:   for (i=0;i<n_recvs;i++) {
3673:     buf_size_idxs += (PetscInt)olengths_idxs[i];
3674:     buf_size_vals += (PetscInt)olengths_vals[i];
3675:     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3676:   }
3677:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
3678:   PetscMalloc1(buf_size_vals,&recv_buffer_vals);
3679:   PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);

3681:   /* get new tags for clean communications */
3682:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
3683:   PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
3684:   PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);

3686:   /* allocate for requests */
3687:   PetscMalloc1(n_sends,&send_req_idxs);
3688:   PetscMalloc1(n_sends,&send_req_vals);
3689:   PetscMalloc1(n_sends,&send_req_idxs_is);
3690:   PetscMalloc1(n_recvs,&recv_req_idxs);
3691:   PetscMalloc1(n_recvs,&recv_req_vals);
3692:   PetscMalloc1(n_recvs,&recv_req_idxs_is);

3694:   /* communications */
3695:   ptr_idxs = recv_buffer_idxs;
3696:   ptr_vals = recv_buffer_vals;
3697:   ptr_idxs_is = recv_buffer_idxs_is;
3698:   for (i=0;i<n_recvs;i++) {
3699:     source_dest = onodes[i];
3700:     MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
3701:     MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
3702:     ptr_idxs += olengths_idxs[i];
3703:     ptr_vals += olengths_vals[i];
3704:     if (nis) {
3705:       MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
3706:       ptr_idxs_is += olengths_idxs_is[i];
3707:     }
3708:   }
3709:   for (i=0;i<n_sends;i++) {
3710:     PetscMPIIntCast(is_indices[i],&source_dest);
3711:     MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
3712:     MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
3713:     if (nis) {
3714:       MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
3715:     }
3716:   }
3717:   ISRestoreIndices(is_sends_internal,&is_indices);
3718:   ISDestroy(&is_sends_internal);

3720:   /* assemble new l2g map */
3721:   MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
3722:   ptr_idxs = recv_buffer_idxs;
3723:   new_local_rows = 0;
3724:   for (i=0;i<n_recvs;i++) {
3725:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3726:     ptr_idxs += olengths_idxs[i];
3727:   }
3728:   PetscMalloc1(new_local_rows,&l2gmap_indices);
3729:   ptr_idxs = recv_buffer_idxs;
3730:   new_local_rows = 0;
3731:   for (i=0;i<n_recvs;i++) {
3732:     PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));
3733:     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3734:     ptr_idxs += olengths_idxs[i];
3735:   }
3736:   PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
3737:   ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
3738:   PetscFree(l2gmap_indices);

3740:   /* infer new local matrix type from received local matrices type */
3741:   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3742:   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3743:   if (n_recvs) {
3744:     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3745:     ptr_idxs = recv_buffer_idxs;
3746:     for (i=0;i<n_recvs;i++) {
3747:       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3748:         new_local_type_private = MATAIJ_PRIVATE;
3749:         break;
3750:       }
3751:       ptr_idxs += olengths_idxs[i];
3752:     }
3753:     switch (new_local_type_private) {
3754:       case MATDENSE_PRIVATE:
3755:         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3756:           new_local_type = MATSEQAIJ;
3757:           bs = 1;
3758:         } else { /* if I receive only 1 dense matrix */
3759:           new_local_type = MATSEQDENSE;
3760:           bs = 1;
3761:         }
3762:         break;
3763:       case MATAIJ_PRIVATE:
3764:         new_local_type = MATSEQAIJ;
3765:         bs = 1;
3766:         break;
3767:       case MATBAIJ_PRIVATE:
3768:         new_local_type = MATSEQBAIJ;
3769:         break;
3770:       case MATSBAIJ_PRIVATE:
3771:         new_local_type = MATSEQSBAIJ;
3772:         break;
3773:       default:
3774:         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3775:         break;
3776:     }
3777:   } else { /* by default, new_local_type is seqdense */
3778:     new_local_type = MATSEQDENSE;
3779:     bs = 1;
3780:   }

3782:   /* create MATIS object if needed */
3783:   if (reuse == MAT_INITIAL_MATRIX) {
3784:     MatGetSize(mat,&rows,&cols);
3785:     MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
3786:   } else {
3787:     /* it also destroys the local matrices */
3788:     MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
3789:   }
3790:   MatISGetLocalMat(*mat_n,&local_mat);
3791:   MatSetType(local_mat,new_local_type);

3793:   MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);

3795:   /* Global to local map of received indices */
3796:   PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
3797:   ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
3798:   ISLocalToGlobalMappingDestroy(&l2gmap);

3800:   /* restore attributes -> type of incoming data and its size */
3801:   buf_size_idxs = 0;
3802:   for (i=0;i<n_recvs;i++) {
3803:     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3804:     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3805:     buf_size_idxs += (PetscInt)olengths_idxs[i];
3806:   }
3807:   PetscFree(recv_buffer_idxs);

3809:   /* set preallocation */
3810:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
3811:   if (!newisdense) {
3812:     PetscInt *new_local_nnz=0;

3814:     ptr_idxs = recv_buffer_idxs_local;
3815:     if (n_recvs) {
3816:       PetscCalloc1(new_local_rows,&new_local_nnz);
3817:     }
3818:     for (i=0;i<n_recvs;i++) {
3819:       PetscInt j;
3820:       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3821:         for (j=0;j<*(ptr_idxs+1);j++) {
3822:           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3823:         }
3824:       } else {
3825:         /* TODO */
3826:       }
3827:       ptr_idxs += olengths_idxs[i];
3828:     }
3829:     if (new_local_nnz) {
3830:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3831:       MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
3832:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3833:       MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
3834:       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3835:       MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
3836:     } else {
3837:       MatSetUp(local_mat);
3838:     }
3839:     PetscFree(new_local_nnz);
3840:   } else {
3841:     MatSetUp(local_mat);
3842:   }

3844:   /* set values */
3845:   ptr_vals = recv_buffer_vals;
3846:   ptr_idxs = recv_buffer_idxs_local;
3847:   for (i=0;i<n_recvs;i++) {
3848:     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3849:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
3850:       MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
3851:       MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
3852:       MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
3853:       MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
3854:     } else {
3855:       /* TODO */
3856:     }
3857:     ptr_idxs += olengths_idxs[i];
3858:     ptr_vals += olengths_vals[i];
3859:   }
3860:   MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
3861:   MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
3862:   MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
3863:   MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
3864:   PetscFree(recv_buffer_vals);
3865:   PetscFree(recv_buffer_idxs_local);

3867: #if 0
3868:   if (!restrict_comm) { /* check */
3869:     Vec       lvec,rvec;
3870:     PetscReal infty_error;

3872:     MatCreateVecs(mat,&rvec,&lvec);
3873:     VecSetRandom(rvec,NULL);
3874:     MatMult(mat,rvec,lvec);
3875:     VecScale(lvec,-1.0);
3876:     MatMultAdd(*mat_n,rvec,lvec,lvec);
3877:     VecNorm(lvec,NORM_INFINITY,&infty_error);
3878:     PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3879:     VecDestroy(&rvec);
3880:     VecDestroy(&lvec);
3881:   }
3882: #endif

3884:   /* assemble new additional is (if any) */
3885:   if (nis) {
3886:     PetscInt **temp_idxs,*count_is,j,psum;

3888:     MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
3889:     PetscCalloc1(nis,&count_is);
3890:     ptr_idxs = recv_buffer_idxs_is;
3891:     psum = 0;
3892:     for (i=0;i<n_recvs;i++) {
3893:       for (j=0;j<nis;j++) {
3894:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3895:         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3896:         psum += plen;
3897:         ptr_idxs += plen+1; /* shift pointer to received data */
3898:       }
3899:     }
3900:     PetscMalloc1(nis,&temp_idxs);
3901:     PetscMalloc1(psum,&temp_idxs[0]);
3902:     for (i=1;i<nis;i++) {
3903:       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3904:     }
3905:     PetscMemzero(count_is,nis*sizeof(PetscInt));
3906:     ptr_idxs = recv_buffer_idxs_is;
3907:     for (i=0;i<n_recvs;i++) {
3908:       for (j=0;j<nis;j++) {
3909:         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3910:         PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));
3911:         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3912:         ptr_idxs += plen+1; /* shift pointer to received data */
3913:       }
3914:     }
3915:     for (i=0;i<nis;i++) {
3916:       ISDestroy(&isarray[i]);
3917:       PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
3918:       ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
3919:     }
3920:     PetscFree(count_is);
3921:     PetscFree(temp_idxs[0]);
3922:     PetscFree(temp_idxs);
3923:   }
3924:   /* free workspace */
3925:   PetscFree(recv_buffer_idxs_is);
3926:   MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
3927:   PetscFree(send_buffer_idxs);
3928:   MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
3929:   if (isdense) {
3930:     MatISGetLocalMat(mat,&local_mat);
3931:     MatDenseRestoreArray(local_mat,&send_buffer_vals);
3932:   } else {
3933:     /* PetscFree(send_buffer_vals); */
3934:   }
3935:   if (nis) {
3936:     MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
3937:     PetscFree(send_buffer_idxs_is);
3938:   }
3939:   PetscFree(recv_req_idxs);
3940:   PetscFree(recv_req_vals);
3941:   PetscFree(recv_req_idxs_is);
3942:   PetscFree(send_req_idxs);
3943:   PetscFree(send_req_vals);
3944:   PetscFree(send_req_idxs_is);
3945:   PetscFree(ilengths_vals);
3946:   PetscFree(ilengths_idxs);
3947:   PetscFree(olengths_vals);
3948:   PetscFree(olengths_idxs);
3949:   PetscFree(onodes);
3950:   if (nis) {
3951:     PetscFree(ilengths_idxs_is);
3952:     PetscFree(olengths_idxs_is);
3953:     PetscFree(onodes_is);
3954:   }
3955:   PetscSubcommDestroy(&subcomm);
3956:   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3957:     MatDestroy(mat_n);
3958:     for (i=0;i<nis;i++) {
3959:       ISDestroy(&isarray[i]);
3960:     }
3961:     *mat_n = NULL;
3962:   }
3963:   return(0);
3964: }

3966: /* temporary hack into ksp private data structure */
3967: #include <petsc/private/kspimpl.h>

3971: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3972: {
3973:   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3974:   PC_IS                  *pcis = (PC_IS*)pc->data;
3975:   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3976:   MatNullSpace           CoarseNullSpace=NULL;
3977:   ISLocalToGlobalMapping coarse_islg;
3978:   IS                     coarse_is,*isarray;
3979:   PetscInt               i,im_active=-1,active_procs=-1;
3980:   PetscInt               nis,nisdofs,nisneu;
3981:   PC                     pc_temp;
3982:   PCType                 coarse_pc_type;
3983:   KSPType                coarse_ksp_type;
3984:   PetscBool              multilevel_requested,multilevel_allowed;
3985:   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3986:   Mat                    t_coarse_mat_is;
3987:   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3988:   PetscMPIInt            all_procs;
3989:   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3990:   PetscBool              compute_vecs = PETSC_FALSE;
3991:   PetscScalar            *array;
3992:   PetscErrorCode         ierr;

3995:   /* Assign global numbering to coarse dofs */
3996:   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3997:     PetscInt ocoarse_size;
3998:     compute_vecs = PETSC_TRUE;
3999:     ocoarse_size = pcbddc->coarse_size;
4000:     PetscFree(pcbddc->global_primal_indices);
4001:     PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
4002:     /* see if we can avoid some work */
4003:     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4004:       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4005:       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4006:         PC        pc;
4007:         PetscBool isbddc;

4009:         /* temporary workaround since PCBDDC does not have a reset method so far */
4010:         KSPGetPC(pcbddc->coarse_ksp,&pc);
4011:         PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);
4012:         if (isbddc) {
4013:           PCDestroy(&pc);
4014:         }
4015:         KSPReset(pcbddc->coarse_ksp);
4016:         coarse_reuse = PETSC_FALSE;
4017:       } else { /* we can safely reuse already computed coarse matrix */
4018:         coarse_reuse = PETSC_TRUE;
4019:       }
4020:     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4021:       coarse_reuse = PETSC_FALSE;
4022:     }
4023:     /* reset any subassembling information */
4024:     ISDestroy(&pcbddc->coarse_subassembling);
4025:     ISDestroy(&pcbddc->coarse_subassembling_init);
4026:   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4027:     coarse_reuse = PETSC_TRUE;
4028:   }

4030:   /* count "active" (i.e. with positive local size) and "void" processes */
4031:   im_active = !!(pcis->n);
4032:   MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
4033:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);
4034:   void_procs = all_procs-active_procs;
4035:   csin_type_simple = PETSC_TRUE;
4036:   redist = PETSC_FALSE;
4037:   if (pcbddc->current_level && void_procs) {
4038:     csin_ml = PETSC_TRUE;
4039:     ncoarse_ml = void_procs;
4040:     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4041:     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4042:       csin_ds = PETSC_TRUE;
4043:       ncoarse_ds = pcbddc->redistribute_coarse;
4044:       redist = PETSC_TRUE;
4045:     } else {
4046:       csin_ds = PETSC_TRUE;
4047:       ncoarse_ds = active_procs;
4048:       redist = PETSC_TRUE;
4049:     }
4050:   } else {
4051:     csin_ml = PETSC_FALSE;
4052:     ncoarse_ml = all_procs;
4053:     if (void_procs) {
4054:       csin_ds = PETSC_TRUE;
4055:       ncoarse_ds = void_procs;
4056:       csin_type_simple = PETSC_FALSE;
4057:     } else {
4058:       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4059:         csin_ds = PETSC_TRUE;
4060:         ncoarse_ds = pcbddc->redistribute_coarse;
4061:         redist = PETSC_TRUE;
4062:       } else {
4063:         csin_ds = PETSC_FALSE;
4064:         ncoarse_ds = all_procs;
4065:       }
4066:     }
4067:   }

4069:   /*
4070:     test if we can go multilevel: three conditions must be satisfied:
4071:     - we have not exceeded the number of levels requested
4072:     - we can actually subassemble the active processes
4073:     - we can find a suitable number of MPI processes where we can place the subassembled problem
4074:   */
4075:   multilevel_allowed = PETSC_FALSE;
4076:   multilevel_requested = PETSC_FALSE;
4077:   if (pcbddc->current_level < pcbddc->max_levels) {
4078:     multilevel_requested = PETSC_TRUE;
4079:     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4080:       multilevel_allowed = PETSC_FALSE;
4081:     } else {
4082:       multilevel_allowed = PETSC_TRUE;
4083:     }
4084:   }
4085:   /* determine number of process partecipating to coarse solver */
4086:   if (multilevel_allowed) {
4087:     ncoarse = ncoarse_ml;
4088:     csin = csin_ml;
4089:     redist = PETSC_FALSE;
4090:   } else {
4091:     ncoarse = ncoarse_ds;
4092:     csin = csin_ds;
4093:   }

4095:   /* creates temporary l2gmap and IS for coarse indexes */
4096:   ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
4097:   ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);

4099:   /* creates temporary MATIS object for coarse matrix */
4100:   MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);
4101:   MatDenseGetArray(coarse_submat_dense,&array);
4102:   PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);
4103:   MatDenseRestoreArray(coarse_submat_dense,&array);
4104: #if 0
4105:   {
4106:     PetscViewer viewer;
4107:     char filename[256];
4108:     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4109:     PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4110:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4111:     MatView(coarse_submat_dense,viewer);
4112:     PetscViewerPopFormat(viewer);
4113:     PetscViewerDestroy(&viewer);
4114:   }
4115: #endif
4116:   MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
4117:   MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
4118:   MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
4119:   MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
4120:   MatDestroy(&coarse_submat_dense);

4122:   /* compute dofs splitting and neumann boundaries for coarse dofs */
4123:   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4124:     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4125:     const PetscInt         *idxs;
4126:     ISLocalToGlobalMapping tmap;

4128:     /* create map between primal indices (in local representative ordering) and local primal numbering */
4129:     ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
4130:     /* allocate space for temporary storage */
4131:     PetscMalloc1(pcbddc->local_primal_size,&tidxs);
4132:     PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
4133:     /* allocate for IS array */
4134:     nisdofs = pcbddc->n_ISForDofsLocal;
4135:     nisneu = !!pcbddc->NeumannBoundariesLocal;
4136:     nis = nisdofs + nisneu;
4137:     PetscMalloc1(nis,&isarray);
4138:     /* dofs splitting */
4139:     for (i=0;i<nisdofs;i++) {
4140:       /* ISView(pcbddc->ISForDofsLocal[i],0); */
4141:       ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
4142:       ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
4143:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
4144:       ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
4145:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
4146:       ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
4147:       /* ISView(isarray[i],0); */
4148:     }
4149:     /* neumann boundaries */
4150:     if (pcbddc->NeumannBoundariesLocal) {
4151:       /* ISView(pcbddc->NeumannBoundariesLocal,0); */
4152:       ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
4153:       ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
4154:       ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
4155:       ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
4156:       ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
4157:       ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
4158:       /* ISView(isarray[nisdofs],0); */
4159:     }
4160:     /* free memory */
4161:     PetscFree(tidxs);
4162:     PetscFree(tidxs2);
4163:     ISLocalToGlobalMappingDestroy(&tmap);
4164:   } else {
4165:     nis = 0;
4166:     nisdofs = 0;
4167:     nisneu = 0;
4168:     isarray = NULL;
4169:   }
4170:   /* destroy no longer needed map */
4171:   ISLocalToGlobalMappingDestroy(&coarse_islg);

4173:   /* restrict on coarse candidates (if needed) */
4174:   coarse_mat_is = NULL;
4175:   if (csin) {
4176:     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4177:       if (redist) {
4178:         PetscMPIInt rank;
4179:         PetscInt    spc,n_spc_p1,dest[1],destsize;

4181:         MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
4182:         spc = active_procs/ncoarse;
4183:         n_spc_p1 = active_procs%ncoarse;
4184:         if (im_active) {
4185:           destsize = 1;
4186:           if (rank > n_spc_p1*(spc+1)-1) {
4187:             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4188:           } else {
4189:             dest[0] = rank/(spc+1);
4190:           }
4191:         } else {
4192:           destsize = 0;
4193:         }
4194:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);
4195:       } else if (csin_type_simple) {
4196:         PetscMPIInt rank;
4197:         PetscInt    issize,isidx;

4199:         MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
4200:         if (im_active) {
4201:           issize = 1;
4202:           isidx = (PetscInt)rank;
4203:         } else {
4204:           issize = 0;
4205:           isidx = -1;
4206:         }
4207:         ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);
4208:       } else { /* get a suitable subassembling pattern from MATIS code */
4209:         MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);
4210:       }

4212:       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4213:       if (!redist || ncoarse <= void_procs) {
4214:         PetscInt ncoarse_cand,tissize,*nisindices;
4215:         PetscInt *coarse_candidates;
4216:         const PetscInt* tisindices;

4218:         /* get coarse candidates' ranks in pc communicator */
4219:         PetscMalloc1(all_procs,&coarse_candidates);
4220:         MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));
4221:         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4222:           if (!coarse_candidates[i]) {
4223:             coarse_candidates[ncoarse_cand++]=i;
4224:           }
4225:         }
4226:         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);


4229:         if (pcbddc->dbg_flag) {
4230:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4231:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");
4232:           ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
4233:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");
4234:           for (i=0;i<ncoarse_cand;i++) {
4235:             PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);
4236:           }
4237:           PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");
4238:           PetscViewerFlush(pcbddc->dbg_viewer);
4239:         }
4240:         /* shift the pattern on coarse candidates */
4241:         ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);
4242:         ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);
4243:         PetscMalloc1(tissize,&nisindices);
4244:         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4245:         ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);
4246:         ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);
4247:         PetscFree(coarse_candidates);
4248:       }
4249:       if (pcbddc->dbg_flag) {
4250:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4251:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");
4252:         ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);
4253:         PetscViewerFlush(pcbddc->dbg_viewer);
4254:       }
4255:     }
4256:     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4257:     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4258:       MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);
4259:     } else { /* this is the last level, so use just receiving processes in subcomm */
4260:       MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);
4261:     }
4262:   } else {
4263:     if (pcbddc->dbg_flag) {
4264:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4265:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");
4266:       PetscViewerFlush(pcbddc->dbg_viewer);
4267:     }
4268:     PetscObjectReference((PetscObject)t_coarse_mat_is);
4269:     coarse_mat_is = t_coarse_mat_is;
4270:   }

4272:   /* create local to global scatters for coarse problem */
4273:   if (compute_vecs) {
4274:     PetscInt lrows;
4275:     VecDestroy(&pcbddc->coarse_vec);
4276:     if (coarse_mat_is) {
4277:       MatGetLocalSize(coarse_mat_is,&lrows,NULL);
4278:     } else {
4279:       lrows = 0;
4280:     }
4281:     VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
4282:     VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
4283:     VecSetType(pcbddc->coarse_vec,VECSTANDARD);
4284:     VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
4285:     VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
4286:   }
4287:   ISDestroy(&coarse_is);
4288:   MatDestroy(&t_coarse_mat_is);

4290:   /* set defaults for coarse KSP and PC */
4291:   if (multilevel_allowed) {
4292:     coarse_ksp_type = KSPRICHARDSON;
4293:     coarse_pc_type = PCBDDC;
4294:   } else {
4295:     coarse_ksp_type = KSPPREONLY;
4296:     coarse_pc_type = PCREDUNDANT;
4297:   }

4299:   /* print some info if requested */
4300:   if (pcbddc->dbg_flag) {
4301:     if (!multilevel_allowed) {
4302:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4303:       if (multilevel_requested) {
4304:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
4305:       } else if (pcbddc->max_levels) {
4306:         PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);
4307:       }
4308:       PetscViewerFlush(pcbddc->dbg_viewer);
4309:     }
4310:   }

4312:   /* create the coarse KSP object only once with defaults */
4313:   if (coarse_mat_is) {
4314:     MatReuse coarse_mat_reuse;
4315:     PetscViewer dbg_viewer = NULL;
4316:     if (pcbddc->dbg_flag) {
4317:       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4318:       PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
4319:     }
4320:     if (!pcbddc->coarse_ksp) {
4321:       char prefix[256],str_level[16];
4322:       size_t len;
4323:       KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);
4324:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
4325:       PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
4326:       KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
4327:       KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);
4328:       KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
4329:       KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
4330:       KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4331:       PCSetType(pc_temp,coarse_pc_type);
4332:       /* prefix */
4333:       PetscStrcpy(prefix,"");
4334:       PetscStrcpy(str_level,"");
4335:       if (!pcbddc->current_level) {
4336:         PetscStrcpy(prefix,((PetscObject)pc)->prefix);
4337:         PetscStrcat(prefix,"pc_bddc_coarse_");
4338:       } else {
4339:         PetscStrlen(((PetscObject)pc)->prefix,&len);
4340:         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4341:         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4342:         PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
4343:         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4344:         PetscStrcat(prefix,str_level);
4345:       }
4346:       KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
4347:       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4348:       PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
4349:       PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
4350:       PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
4351:       /* allow user customization */
4352:       KSPSetFromOptions(pcbddc->coarse_ksp);
4353:     }
4354:     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4355:     KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
4356:     if (nisdofs) {
4357:       PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
4358:       for (i=0;i<nisdofs;i++) {
4359:         ISDestroy(&isarray[i]);
4360:       }
4361:     }
4362:     if (nisneu) {
4363:       PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
4364:       ISDestroy(&isarray[nisdofs]);
4365:     }

4367:     /* get some info after set from options */
4368:     PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);
4369:     PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
4370:     PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
4371:     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4372:       PCSetType(pc_temp,coarse_pc_type);
4373:       isbddc = PETSC_FALSE;
4374:     }
4375:     PCFactorSetReuseFill(pc_temp,PETSC_TRUE);
4376:     if (isredundant) {
4377:       KSP inner_ksp;
4378:       PC  inner_pc;
4379:       PCRedundantGetKSP(pc_temp,&inner_ksp);
4380:       KSPGetPC(inner_ksp,&inner_pc);
4381:       PCFactorSetReuseFill(inner_pc,PETSC_TRUE);
4382:     }

4384:     /* assemble coarse matrix */
4385:     if (coarse_reuse) {
4386:       KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
4387:       PetscObjectReference((PetscObject)coarse_mat);
4388:       coarse_mat_reuse = MAT_REUSE_MATRIX;
4389:     } else {
4390:       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4391:     }
4392:     if (isbddc || isnn) {
4393:       if (pcbddc->coarsening_ratio > 1) {
4394:         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4395:           MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);
4396:           if (pcbddc->dbg_flag) {
4397:             PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");
4398:             PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");
4399:             ISView(pcbddc->coarse_subassembling,dbg_viewer);
4400:             PetscViewerFlush(dbg_viewer);
4401:           }
4402:         }
4403:         MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);
4404:       } else {
4405:         PetscObjectReference((PetscObject)coarse_mat_is);
4406:         coarse_mat = coarse_mat_is;
4407:       }
4408:     } else {
4409:       MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);
4410:     }
4411:     MatDestroy(&coarse_mat_is);

4413:     /* propagate symmetry info of coarse matrix */
4414:     MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
4415:     if (pc->pmat->symmetric_set) {
4416:       MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
4417:     }
4418:     if (pc->pmat->hermitian_set) {
4419:       MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
4420:     }
4421:     if (pc->pmat->spd_set) {
4422:       MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
4423:     }
4424:     /* set operators */
4425:     KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
4426:     if (pcbddc->dbg_flag) {
4427:       PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
4428:     }
4429:   } else { /* processes non partecipating to coarse solver (if any) */
4430:     coarse_mat = 0;
4431:   }
4432:   PetscFree(isarray);
4433: #if 0
4434:   {
4435:     PetscViewer viewer;
4436:     char filename[256];
4437:     sprintf(filename,"coarse_mat.m");
4438:     PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);
4439:     PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4440:     MatView(coarse_mat,viewer);
4441:     PetscViewerPopFormat(viewer);
4442:     PetscViewerDestroy(&viewer);
4443:   }
4444: #endif

4446:   /* Compute coarse null space (special handling by BDDC only) */
4447: #if 0
4448:   if (pcbddc->NullSpace) {
4449:     PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);
4450:   }
4451: #endif

4453:   if (pcbddc->coarse_ksp) {
4454:     Vec crhs,csol;
4455:     PetscBool ispreonly;

4457:     if (CoarseNullSpace) {
4458:       if (isbddc) {
4459:         PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);
4460:       } else {
4461:         MatSetNullSpace(coarse_mat,CoarseNullSpace);
4462:       }
4463:     }
4464:     /* setup coarse ksp */
4465:     KSPSetUp(pcbddc->coarse_ksp);
4466:     KSPGetSolution(pcbddc->coarse_ksp,&csol);
4467:     KSPGetRhs(pcbddc->coarse_ksp,&crhs);
4468:     /* hack */
4469:     if (!csol) {
4470:       MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
4471:     }
4472:     if (!crhs) {
4473:       MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
4474:     }
4475:     /* Check coarse problem if in debug mode or if solving with an iterative method */
4476:     PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
4477:     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4478:       KSP       check_ksp;
4479:       KSPType   check_ksp_type;
4480:       PC        check_pc;
4481:       Vec       check_vec,coarse_vec;
4482:       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4483:       PetscInt  its;
4484:       PetscBool compute_eigs;
4485:       PetscReal *eigs_r,*eigs_c;
4486:       PetscInt  neigs;
4487:       const char *prefix;

4489:       /* Create ksp object suitable for estimation of extreme eigenvalues */
4490:       KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
4491:       KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
4492:       KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
4493:       KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
4494:       if (ispreonly) {
4495:         check_ksp_type = KSPPREONLY;
4496:         compute_eigs = PETSC_FALSE;
4497:       } else {
4498:         check_ksp_type = KSPGMRES;
4499:         compute_eigs = PETSC_TRUE;
4500:       }
4501:       KSPSetType(check_ksp,check_ksp_type);
4502:       KSPSetComputeSingularValues(check_ksp,compute_eigs);
4503:       KSPSetComputeEigenvalues(check_ksp,compute_eigs);
4504:       KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
4505:       KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
4506:       KSPSetOptionsPrefix(check_ksp,prefix);
4507:       KSPAppendOptionsPrefix(check_ksp,"check_");
4508:       KSPSetFromOptions(check_ksp);
4509:       KSPSetUp(check_ksp);
4510:       KSPGetPC(pcbddc->coarse_ksp,&check_pc);
4511:       KSPSetPC(check_ksp,check_pc);
4512:       /* create random vec */
4513:       KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);
4514:       VecDuplicate(coarse_vec,&check_vec);
4515:       VecSetRandom(check_vec,NULL);
4516:       if (CoarseNullSpace) {
4517:         MatNullSpaceRemove(CoarseNullSpace,check_vec);
4518:       }
4519:       MatMult(coarse_mat,check_vec,coarse_vec);
4520:       /* solve coarse problem */
4521:       KSPSolve(check_ksp,coarse_vec,coarse_vec);
4522:       if (CoarseNullSpace) {
4523:         MatNullSpaceRemove(CoarseNullSpace,coarse_vec);
4524:       }
4525:       /* set eigenvalue estimation if preonly has not been requested */
4526:       if (compute_eigs) {
4527:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
4528:         PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
4529:         KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
4530:         lambda_max = eigs_r[neigs-1];
4531:         lambda_min = eigs_r[0];
4532:         if (pcbddc->use_coarse_estimates) {
4533:           if (lambda_max>lambda_min) {
4534:             KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);
4535:             KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
4536:           }
4537:         }
4538:       }

4540:       /* check coarse problem residual error */
4541:       if (pcbddc->dbg_flag) {
4542:         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4543:         PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
4544:         VecAXPY(check_vec,-1.0,coarse_vec);
4545:         VecNorm(check_vec,NORM_INFINITY,&infty_error);
4546:         MatMult(coarse_mat,check_vec,coarse_vec);
4547:         VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
4548:         VecDestroy(&check_vec);
4549:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
4550:         PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
4551:         PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
4552:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);
4553:         PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
4554:         if (compute_eigs) {
4555:           PetscReal lambda_max_s,lambda_min_s;
4556:           KSPGetType(check_ksp,&check_ksp_type);
4557:           KSPGetIterationNumber(check_ksp,&its);
4558:           KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
4559:           PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
4560:           for (i=0;i<neigs;i++) {
4561:             PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
4562:           }
4563:         }
4564:         PetscViewerFlush(dbg_viewer);
4565:         PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
4566:       }
4567:       KSPDestroy(&check_ksp);
4568:       if (compute_eigs) {
4569:         PetscFree(eigs_r);
4570:         PetscFree(eigs_c);
4571:       }
4572:     }
4573:   }
4574:   /* print additional info */
4575:   if (pcbddc->dbg_flag) {
4576:     /* waits until all processes reaches this point */
4577:     PetscBarrier((PetscObject)pc);
4578:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);
4579:     PetscViewerFlush(pcbddc->dbg_viewer);
4580:   }

4582:   /* free memory */
4583:   MatNullSpaceDestroy(&CoarseNullSpace);
4584:   MatDestroy(&coarse_mat);
4585:   return(0);
4586: }

4590: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4591: {
4592:   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4593:   PC_IS*         pcis = (PC_IS*)pc->data;
4594:   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4595:   IS             subset,subset_mult,subset_n;
4596:   PetscInt       local_size,coarse_size=0;
4597:   PetscInt       *local_primal_indices=NULL;
4598:   const PetscInt *t_local_primal_indices;

4602:   /* Compute global number of coarse dofs */
4603:   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4604:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
4605:   ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
4606:   ISDestroy(&subset_n);
4607:   ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
4608:   PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);
4609:   ISDestroy(&subset);
4610:   ISDestroy(&subset_mult);
4611:   ISGetLocalSize(subset_n,&local_size);
4612:   if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
4613:   PetscMalloc1(local_size,&local_primal_indices);
4614:   ISGetIndices(subset_n,&t_local_primal_indices);
4615:   PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));
4616:   ISRestoreIndices(subset_n,&t_local_primal_indices);
4617:   ISDestroy(&subset_n);

4619:   /* check numbering */
4620:   if (pcbddc->dbg_flag) {
4621:     PetscScalar coarsesum,*array;
4622:     PetscInt    i;
4623:     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;

4625:     PetscViewerFlush(pcbddc->dbg_viewer);
4626:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4627:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
4628:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4629:     VecSet(pcis->vec1_N,0.0);
4630:     for (i=0;i<pcbddc->local_primal_size;i++) {
4631:       VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
4632:     }
4633:     VecAssemblyBegin(pcis->vec1_N);
4634:     VecAssemblyEnd(pcis->vec1_N);
4635:     VecSet(pcis->vec1_global,0.0);
4636:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4637:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4638:     VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4639:     VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
4640:     VecGetArray(pcis->vec1_N,&array);
4641:     for (i=0;i<pcis->n;i++) {
4642:       if (array[i] == 1.0) {
4643:         set_error = PETSC_TRUE;
4644:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);
4645:       }
4646:     }
4647:     MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
4648:     PetscViewerFlush(pcbddc->dbg_viewer);
4649:     for (i=0;i<pcis->n;i++) {
4650:       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4651:     }
4652:     VecRestoreArray(pcis->vec1_N,&array);
4653:     VecSet(pcis->vec1_global,0.0);
4654:     VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4655:     VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
4656:     VecSum(pcis->vec1_global,&coarsesum);
4657:     PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));
4658:     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4659:       PetscInt *gidxs;

4661:       PetscMalloc1(pcbddc->local_primal_size,&gidxs);
4662:       ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
4663:       PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
4664:       PetscViewerFlush(pcbddc->dbg_viewer);
4665:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
4666:       for (i=0;i<pcbddc->local_primal_size;i++) {
4667:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
4668:       }
4669:       PetscViewerFlush(pcbddc->dbg_viewer);
4670:       PetscFree(gidxs);
4671:     }
4672:     PetscViewerFlush(pcbddc->dbg_viewer);
4673:     PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4674:     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4675:   }
4676:   /* PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size); */
4677:   /* get back data */
4678:   *coarse_size_n = coarse_size;
4679:   *local_primal_indices_n = local_primal_indices;
4680:   return(0);
4681: }

4685: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4686: {
4687:   IS             localis_t;
4688:   PetscInt       i,lsize,*idxs,n;
4689:   PetscScalar    *vals;

4693:   /* get indices in local ordering exploiting local to global map */
4694:   ISGetLocalSize(globalis,&lsize);
4695:   PetscMalloc1(lsize,&vals);
4696:   for (i=0;i<lsize;i++) vals[i] = 1.0;
4697:   ISGetIndices(globalis,(const PetscInt**)&idxs);
4698:   VecSet(gwork,0.0);
4699:   VecSet(lwork,0.0);
4700:   if (idxs) { /* multilevel guard */
4701:     VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
4702:   }
4703:   VecAssemblyBegin(gwork);
4704:   ISRestoreIndices(globalis,(const PetscInt**)&idxs);
4705:   PetscFree(vals);
4706:   VecAssemblyEnd(gwork);
4707:   /* now compute set in local ordering */
4708:   VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4709:   VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
4710:   VecGetArrayRead(lwork,(const PetscScalar**)&vals);
4711:   VecGetSize(lwork,&n);
4712:   for (i=0,lsize=0;i<n;i++) {
4713:     if (PetscRealPart(vals[i]) > 0.5) {
4714:       lsize++;
4715:     }
4716:   }
4717:   PetscMalloc1(lsize,&idxs);
4718:   for (i=0,lsize=0;i<n;i++) {
4719:     if (PetscRealPart(vals[i]) > 0.5) {
4720:       idxs[lsize++] = i;
4721:     }
4722:   }
4723:   VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
4724:   ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
4725:   *localis = localis_t;
4726:   return(0);
4727: }

4729: /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4732: static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4733: {
4734:   PCBDDCChange_ctx change_ctx;
4735:   PetscErrorCode   ierr;

4738:   MatShellGetContext(A,&change_ctx);
4739:   MatMult(change_ctx->global_change,x,change_ctx->work[0]);
4740:   MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);
4741:   MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);
4742:   return(0);
4743: }

4747: static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4748: {
4749:   PCBDDCChange_ctx change_ctx;
4750:   PetscErrorCode   ierr;

4753:   MatShellGetContext(A,&change_ctx);
4754:   MatMult(change_ctx->global_change,x,change_ctx->work[0]);
4755:   MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);
4756:   MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);
4757:   return(0);
4758: }

4762: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4763: {
4764:   PC_IS               *pcis=(PC_IS*)pc->data;
4765:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4766:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4767:   Mat                 S_j;
4768:   PetscInt            *used_xadj,*used_adjncy;
4769:   PetscBool           free_used_adj;
4770:   PetscErrorCode      ierr;

4773:   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4774:   free_used_adj = PETSC_FALSE;
4775:   if (pcbddc->sub_schurs_layers == -1) {
4776:     used_xadj = NULL;
4777:     used_adjncy = NULL;
4778:   } else {
4779:     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4780:       used_xadj = pcbddc->mat_graph->xadj;
4781:       used_adjncy = pcbddc->mat_graph->adjncy;
4782:     } else if (pcbddc->computed_rowadj) {
4783:       used_xadj = pcbddc->mat_graph->xadj;
4784:       used_adjncy = pcbddc->mat_graph->adjncy;
4785:     } else {
4786:       PetscBool      flg_row=PETSC_FALSE;
4787:       const PetscInt *xadj,*adjncy;
4788:       PetscInt       nvtxs;

4790:       MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
4791:       if (flg_row) {
4792:         PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
4793:         PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));
4794:         PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));
4795:         free_used_adj = PETSC_TRUE;
4796:       } else {
4797:         pcbddc->sub_schurs_layers = -1;
4798:         used_xadj = NULL;
4799:         used_adjncy = NULL;
4800:       }
4801:       MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
4802:     }
4803:   }

4805:   /* setup sub_schurs data */
4806:   MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
4807:   if (!sub_schurs->use_mumps) {
4808:     /* pcbddc->ksp_D up to date only if not using MUMPS */
4809:     MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
4810:     PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE);
4811:   } else {
4812:     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4813:     PetscBool isseqaij;
4814:     if (!pcbddc->use_vertices && reuse_solvers) {
4815:       PetscInt n_vertices;

4817:       ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
4818:       reuse_solvers = (PetscBool)!n_vertices;
4819:     }
4820:     PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);
4821:     if (!isseqaij) {
4822:       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
4823:       if (matis->A == pcbddc->local_mat) {
4824:         MatDestroy(&pcbddc->local_mat);
4825:         MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
4826:       } else {
4827:         MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
4828:       }
4829:     }
4830:     PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers);
4831:   }
4832:   MatDestroy(&S_j);

4834:   /* free adjacency */
4835:   if (free_used_adj) {
4836:     PetscFree2(used_xadj,used_adjncy);
4837:   }
4838:   return(0);
4839: }

4843: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4844: {
4845:   PC_IS               *pcis=(PC_IS*)pc->data;
4846:   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4847:   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4848:   PCBDDCGraph         graph;
4849:   PetscErrorCode      ierr;

4852:   /* attach interface graph for determining subsets */
4853:   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4854:     IS       verticesIS,verticescomm;
4855:     PetscInt vsize,*idxs;

4857:     PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
4858:     ISGetSize(verticesIS,&vsize);
4859:     ISGetIndices(verticesIS,(const PetscInt**)&idxs);
4860:     ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
4861:     ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
4862:     ISDestroy(&verticesIS);
4863:     PCBDDCGraphCreate(&graph);
4864:     PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);
4865:     PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
4866:     ISDestroy(&verticescomm);
4867:     PCBDDCGraphComputeConnectedComponents(graph);
4868: /*
4869:     if (pcbddc->dbg_flag) {
4870:       PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
4871:     }
4872: */
4873:   } else {
4874:     graph = pcbddc->mat_graph;
4875:   }

4877:   /* sub_schurs init */
4878:   PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);

4880:   /* free graph struct */
4881:   if (pcbddc->sub_schurs_rebuild) {
4882:     PCBDDCGraphDestroy(&graph);
4883:   }
4884:   return(0);
4885: }