Actual source code: nn.c
petsc-3.13.6 2020-09-29
2: #include <../src/ksp/pc/impls/is/nn/nn.h>
4: /* -------------------------------------------------------------------------- */
5: /*
6: PCSetUp_NN - Prepares for the use of the NN preconditioner
7: by setting data structures and options.
9: Input Parameter:
10: . pc - the preconditioner context
12: Application Interface Routine: PCSetUp()
14: Notes:
15: The interface routine PCSetUp() is not usually called directly by
16: the user, but instead is called by PCApply() if necessary.
17: */
18: static PetscErrorCode PCSetUp_NN(PC pc)
19: {
23: if (!pc->setupcalled) {
24: /* Set up all the "iterative substructuring" common block */
25: PCISSetUp(pc,PETSC_TRUE,PETSC_TRUE);
26: /* Create the coarse matrix. */
27: PCNNCreateCoarseMatrix(pc);
28: }
29: return(0);
30: }
32: /* -------------------------------------------------------------------------- */
33: /*
34: PCApply_NN - Applies the NN preconditioner to a vector.
36: Input Parameters:
37: . pc - the preconditioner context
38: . r - input vector (global)
40: Output Parameter:
41: . z - output vector (global)
43: Application Interface Routine: PCApply()
44: */
45: static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
46: {
47: PC_IS *pcis = (PC_IS*)(pc->data);
49: PetscScalar m_one = -1.0;
50: Vec w = pcis->vec1_global;
53: /*
54: Dirichlet solvers.
55: Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
56: Storing the local results at vec2_D
57: */
58: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
59: VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
60: KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
62: /*
63: Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
64: Storing the result in the interface portion of the global vector w.
65: */
66: MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
67: VecScale(pcis->vec1_B,m_one);
68: VecCopy(r,w);
69: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);
70: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);
72: /*
73: Apply the interface preconditioner
74: */
75: PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
76: pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);
78: /*
79: Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
80: The result is stored in vec1_D.
81: */
82: VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
83: VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
84: MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);
86: /*
87: Dirichlet solvers.
88: Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
89: $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
90: */
91: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
92: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
93: KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
94: VecScale(pcis->vec2_D,m_one);
95: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
96: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
97: return(0);
98: }
100: /* -------------------------------------------------------------------------- */
101: /*
102: PCDestroy_NN - Destroys the private context for the NN preconditioner
103: that was created with PCCreate_NN().
105: Input Parameter:
106: . pc - the preconditioner context
108: Application Interface Routine: PCDestroy()
109: */
110: static PetscErrorCode PCDestroy_NN(PC pc)
111: {
112: PC_NN *pcnn = (PC_NN*)pc->data;
116: PCISDestroy(pc);
118: MatDestroy(&pcnn->coarse_mat);
119: VecDestroy(&pcnn->coarse_x);
120: VecDestroy(&pcnn->coarse_b);
121: KSPDestroy(&pcnn->ksp_coarse);
122: if (pcnn->DZ_IN) {
123: PetscFree(pcnn->DZ_IN[0]);
124: PetscFree(pcnn->DZ_IN);
125: }
127: /*
128: Free the private data structure that was hanging off the PC
129: */
130: PetscFree(pc->data);
131: return(0);
132: }
134: /* -------------------------------------------------------------------------- */
135: /*MC
136: PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
138: Options Database Keys:
139: + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
140: (this skips the first coarse grid solve in the preconditioner)
141: . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
142: (this skips the second coarse grid solve in the preconditioner)
143: . -pc_is_damp_fixed <fact> -
144: . -pc_is_remove_nullspace_fixed -
145: . -pc_is_set_damping_factor_floating <fact> -
146: . -pc_is_not_damp_floating -
147: - -pc_is_not_remove_nullspace_floating -
149: Level: intermediate
151: Notes:
152: The matrix used with this preconditioner must be of type MATIS
154: Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
155: degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
156: on the subdomains; though in our experience using approximate solvers is slower.).
158: Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
159: Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
160: Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
162: Contributed by Paulo Goldfeld
164: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS
165: M*/
167: PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc)
168: {
170: PC_NN *pcnn;
173: /*
174: Creates the private data structure for this preconditioner and
175: attach it to the PC object.
176: */
177: PetscNewLog(pc,&pcnn);
178: pc->data = (void*)pcnn;
180: PCISCreate(pc);
181: pcnn->coarse_mat = 0;
182: pcnn->coarse_x = 0;
183: pcnn->coarse_b = 0;
184: pcnn->ksp_coarse = 0;
185: pcnn->DZ_IN = 0;
187: /*
188: Set the pointers for the functions that are provided above.
189: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
190: are called, they will automatically call these functions. Note we
191: choose not to provide a couple of these functions since they are
192: not needed.
193: */
194: pc->ops->apply = PCApply_NN;
195: pc->ops->applytranspose = 0;
196: pc->ops->setup = PCSetUp_NN;
197: pc->ops->destroy = PCDestroy_NN;
198: pc->ops->view = 0;
199: pc->ops->applyrichardson = 0;
200: pc->ops->applysymmetricleft = 0;
201: pc->ops->applysymmetricright = 0;
202: return(0);
203: }
205: /* -------------------------------------------------------------------------- */
206: /*
207: PCNNCreateCoarseMatrix -
208: */
209: PetscErrorCode PCNNCreateCoarseMatrix(PC pc)
210: {
211: MPI_Request *send_request, *recv_request;
213: PetscInt i, j, k;
214: PetscScalar *mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */
215: PetscScalar **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
217: /* aliasing some names */
218: PC_IS *pcis = (PC_IS*)(pc->data);
219: PC_NN *pcnn = (PC_NN*)pc->data;
220: PetscInt n_neigh = pcis->n_neigh;
221: PetscInt *neigh = pcis->neigh;
222: PetscInt *n_shared = pcis->n_shared;
223: PetscInt **shared = pcis->shared;
224: PetscScalar **DZ_IN; /* Must be initialized after memory allocation. */
227: /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
228: PetscMalloc1(n_neigh*n_neigh+1,&mat);
230: /* Allocate memory for DZ */
231: /* Notice that DZ_OUT[0] is allocated some space that is never used. */
232: /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
233: {
234: PetscInt size_of_Z = 0;
235: PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
236: DZ_IN = pcnn->DZ_IN;
237: PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
238: for (i=0; i<n_neigh; i++) size_of_Z += n_shared[i];
239: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
240: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
241: }
242: for (i=1; i<n_neigh; i++) {
243: DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1];
244: DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
245: }
247: /* Set the values of DZ_OUT, in order to send this info to the neighbours */
248: /* First, set the auxiliary array pcis->work_N. */
249: PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
250: for (i=1; i<n_neigh; i++) {
251: for (j=0; j<n_shared[i]; j++) {
252: DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
253: }
254: }
256: /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
257: /* Notice that send_request[] and recv_request[] could have one less element. */
258: /* We make them longer to have request[i] corresponding to neigh[i]. */
259: {
260: PetscMPIInt tag;
261: PetscObjectGetNewTag((PetscObject)pc,&tag);
262: PetscMalloc2(n_neigh+1,&send_request,n_neigh+1,&recv_request);
263: for (i=1; i<n_neigh; i++) {
264: MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(send_request[i]));
265: MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(recv_request[i]));
266: }
267: }
269: /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
270: for (j=0; j<n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]];
272: /* Start computing with local D*Z while communication goes on. */
273: /* Apply Schur complement. The result is "stored" in vec (more */
274: /* precisely, vec points to the result, stored in pc_nn->vec1_B) */
275: /* and also scattered to pcnn->work_N. */
276: PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
277: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
279: /* Compute the first column, while completing the receiving. */
280: for (i=0; i<n_neigh; i++) {
281: MPI_Status stat;
282: PetscMPIInt ind=0;
283: if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
284: mat[ind*n_neigh+0] = 0.0;
285: for (k=0; k<n_shared[ind]; k++) mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
286: }
288: /* Compute the remaining of the columns */
289: for (j=1; j<n_neigh; j++) {
290: PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
291: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
292: for (i=0; i<n_neigh; i++) {
293: mat[i*n_neigh+j] = 0.0;
294: for (k=0; k<n_shared[i]; k++) mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
295: }
296: }
298: /* Complete the sending. */
299: if (n_neigh>1) {
300: MPI_Status *stat;
301: PetscMalloc1(n_neigh-1,&stat);
302: if (n_neigh-1) {MPI_Waitall(n_neigh-1,&(send_request[1]),stat);}
303: PetscFree(stat);
304: }
306: /* Free the memory for the MPI requests */
307: PetscFree2(send_request,recv_request);
309: /* Free the memory for DZ_OUT */
310: if (DZ_OUT) {
311: PetscFree(DZ_OUT[0]);
312: PetscFree(DZ_OUT);
313: }
315: {
316: PetscMPIInt size;
317: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
318: /* Create the global coarse vectors (rhs and solution). */
319: VecCreateMPI(PetscObjectComm((PetscObject)pc),1,size,&(pcnn->coarse_b));
320: VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
321: /* Create and set the global coarse AIJ matrix. */
322: MatCreate(PetscObjectComm((PetscObject)pc),&(pcnn->coarse_mat));
323: MatSetSizes(pcnn->coarse_mat,1,1,size,size);
324: MatSetType(pcnn->coarse_mat,MATAIJ);
325: MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,NULL);
326: MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,NULL,n_neigh,NULL);
327: MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
328: MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
329: MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
330: MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
331: MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
332: }
334: {
335: PetscMPIInt rank;
336: PetscScalar one = 1.0;
337: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
338: /* "Zero out" rows of not-purely-Neumann subdomains */
339: if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
340: MatZeroRows(pcnn->coarse_mat,0,NULL,one,0,0);
341: } else { /* here it DOES zero the row, since it's not a floating subdomain. */
342: PetscInt row = (PetscInt) rank;
343: MatZeroRows(pcnn->coarse_mat,1,&row,one,0,0);
344: }
345: }
347: /* Create the coarse linear solver context */
348: {
349: PC pc_ctx, inner_pc;
350: KSP inner_ksp;
352: KSPCreate(PetscObjectComm((PetscObject)pc),&pcnn->ksp_coarse);
353: PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse,(PetscObject)pc,2);
354: KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat);
355: KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
356: PCSetType(pc_ctx,PCREDUNDANT);
357: KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
358: PCRedundantGetKSP(pc_ctx,&inner_ksp);
359: KSPGetPC(inner_ksp,&inner_pc);
360: PCSetType(inner_pc,PCLU);
361: KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
362: KSPSetFromOptions(pcnn->ksp_coarse);
363: /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
364: KSPSetUp(pcnn->ksp_coarse);
365: }
367: /* Free the memory for mat */
368: PetscFree(mat);
370: /* for DEBUGGING, save the coarse matrix to a file. */
371: {
372: PetscBool flg = PETSC_FALSE;
373: PetscOptionsGetBool(NULL,NULL,"-pc_nn_save_coarse_matrix",&flg,NULL);
374: if (flg) {
375: PetscViewer viewer;
376: PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
377: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
378: MatView(pcnn->coarse_mat,viewer);
379: PetscViewerPopFormat(viewer);
380: PetscViewerDestroy(&viewer);
381: }
382: }
384: /* Set the variable pcnn->factor_coarse_rhs. */
385: pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
387: /* See historical note 02, at the bottom of this file. */
388: return(0);
389: }
391: /* -------------------------------------------------------------------------- */
392: /*
393: PCNNApplySchurToChunk -
395: Input parameters:
396: . pcnn
397: . n - size of chunk
398: . idx - indices of chunk
399: . chunk - values
401: Output parameters:
402: . array_N - result of Schur complement applied to chunk, scattered to big array
403: . vec1_B - result of Schur complement applied to chunk
404: . vec2_B - garbage (used as work space)
405: . vec1_D - garbage (used as work space)
406: . vec2_D - garbage (used as work space)
408: */
409: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
410: {
412: PetscInt i;
413: PC_IS *pcis = (PC_IS*)(pc->data);
416: PetscArrayzero(array_N, pcis->n);
417: for (i=0; i<n; i++) array_N[idx[i]] = chunk[i];
418: PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
419: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
420: PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
421: return(0);
422: }
424: /* -------------------------------------------------------------------------- */
425: /*
426: PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
427: the preconditioner for the Schur complement.
429: Input parameter:
430: . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
432: Output parameters:
433: . z - global vector of interior and interface nodes. The values on the interface are the result of
434: the Section 1.5 Writing Application Codes with PETSc of the interface preconditioner to the interface part of r. The values on the
435: interior nodes are garbage.
436: . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
437: . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
438: . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
439: . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
440: . vec1_D - vector of local interior nodes; returns garbage (used as work space)
441: . vec2_D - vector of local interior nodes; returns garbage (used as work space)
442: . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
443: . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
445: */
446: PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,Vec vec2_D, Vec vec1_N, Vec vec2_N)
447: {
449: PC_IS *pcis = (PC_IS*)(pc->data);
452: /*
453: First balancing step.
454: */
455: {
456: PetscBool flg = PETSC_FALSE;
457: PetscOptionsGetBool(NULL,NULL,"-pc_nn_turn_off_first_balancing",&flg,NULL);
458: if (!flg) {
459: PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
460: } else {
461: VecCopy(r,z);
462: }
463: }
465: /*
466: Extract the local interface part of z and scale it by D
467: */
468: VecScatterBegin(pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
469: VecScatterEnd (pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
470: VecPointwiseMult(vec2_B,pcis->D,vec1_B);
472: /* Neumann Solver */
473: PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);
475: /*
476: Second balancing step.
477: */
478: {
479: PetscBool flg = PETSC_FALSE;
480: PetscOptionsGetBool(NULL,NULL,"-pc_turn_off_second_balancing",&flg,NULL);
481: if (!flg) {
482: PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
483: } else {
484: VecPointwiseMult(vec2_B,pcis->D,vec1_B);
485: VecSet(z,0.0);
486: VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
487: VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
488: }
489: }
490: return(0);
491: }
493: /* -------------------------------------------------------------------------- */
494: /*
495: PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
496: input argument u is provided), or s, as given in equations
497: (12) and (13), if the input argument u is a null vector.
498: Notice that the input argument u plays the role of u_i in
499: equation (14). The equation numbers refer to [Man93].
501: Input Parameters:
502: . pcnn - NN preconditioner context.
503: . r - MPI vector of all nodes (interior and interface). It's preserved.
504: . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
506: Output Parameters:
507: . z - MPI vector of interior and interface nodes. Returns s or z (see description above).
508: . vec1_B - Sequential vector of local interface nodes. Workspace.
509: . vec2_B - Sequential vector of local interface nodes. Workspace.
510: . vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
511: . vec1_D - Sequential vector of local interior nodes. Workspace.
512: . vec2_D - Sequential vector of local interior nodes. Workspace.
513: . work_N - Array of all local nodes (interior and interface). Workspace.
515: */
516: PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
517: {
519: PetscInt k;
520: PetscScalar value;
521: PetscScalar *lambda;
522: PC_NN *pcnn = (PC_NN*)(pc->data);
523: PC_IS *pcis = (PC_IS*)(pc->data);
526: PetscLogEventBegin(PC_ApplyCoarse,pc,0,0,0);
527: if (u) {
528: if (!vec3_B) vec3_B = u;
529: VecPointwiseMult(vec1_B,pcis->D,u);
530: VecSet(z,0.0);
531: VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
532: VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
533: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
534: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
535: PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
536: VecScale(vec3_B,-1.0);
537: VecCopy(r,z);
538: VecScatterBegin(pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
539: VecScatterEnd (pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
540: } else {
541: VecCopy(r,z);
542: }
543: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
544: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
545: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
546: for (k=0, value=0.0; k<pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]];
547: value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
548: {
549: PetscMPIInt rank;
550: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
551: VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
552: /*
553: Since we are only inserting local values (one value actually) we don't need to do the
554: reduction that tells us there is no data that needs to be moved. Hence we comment out these
555: VecAssemblyBegin(pcnn->coarse_b);
556: VecAssemblyEnd (pcnn->coarse_b);
557: */
558: }
559: KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);
560: if (!u) { VecScale(pcnn->coarse_x,-1.0); }
561: VecGetArray(pcnn->coarse_x,&lambda);
562: for (k=0; k<pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k];
563: VecRestoreArray(pcnn->coarse_x,&lambda);
564: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
565: VecSet(z,0.0);
566: VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
567: VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
568: if (!u) {
569: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
570: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
571: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
572: VecCopy(r,z);
573: }
574: VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
575: VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
576: PetscLogEventEnd(PC_ApplyCoarse,pc,0,0,0);
577: return(0);
578: }
583: /* ------- E N D O F T H E C O D E ------- */
584: /* */
585: /* From now on, "footnotes" (or "historical notes"). */
586: /* */
587: /* ------------------------------------------------- */
591: /* --------------------------------------------------------------------------
592: Historical note 01
593: -------------------------------------------------------------------------- */
594: /*
595: We considered the possibility of an alternative D_i that would still
596: provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $).
597: The basic principle was still the pseudo-inverse of the counting
598: function; the difference was that we would not count subdomains
599: that do not contribute to the coarse space (i.e., not pure-Neumann
600: subdomains).
602: This turned out to be a bad idea: we would solve trivial Neumann
603: problems in the not pure-Neumann subdomains, since we would be scaling
604: the balanced residual by zero.
605: */
610: /* --------------------------------------------------------------------------
611: Historical note 02
612: -------------------------------------------------------------------------- */
613: /*
614: We tried an alternative coarse problem, that would eliminate exactly a
615: constant error. Turned out not to improve the overall convergence.
616: */