Actual source code: nn.c
2: #include <../src/ksp/pc/impls/is/nn/nn.h>
4: /* -------------------------------------------------------------------------- */
5: /*
6: PCSetUp_NN - Prepares for the use of the NN preconditioner
7: by setting data structures and options.
9: Input Parameter:
10: . pc - the preconditioner context
12: Application Interface Routine: PCSetUp()
14: Notes:
15: The interface routine PCSetUp() is not usually called directly by
16: the user, but instead is called by PCApply() if necessary.
17: */
18: static PetscErrorCode PCSetUp_NN(PC pc)
19: {
20: if (!pc->setupcalled) {
21: /* Set up all the "iterative substructuring" common block */
22: PCISSetUp(pc,PETSC_TRUE,PETSC_TRUE);
23: /* Create the coarse matrix. */
24: PCNNCreateCoarseMatrix(pc);
25: }
26: return 0;
27: }
29: /* -------------------------------------------------------------------------- */
30: /*
31: PCApply_NN - Applies the NN preconditioner to a vector.
33: Input Parameters:
34: . pc - the preconditioner context
35: . r - input vector (global)
37: Output Parameter:
38: . z - output vector (global)
40: Application Interface Routine: PCApply()
41: */
42: static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
43: {
44: PC_IS *pcis = (PC_IS*)(pc->data);
46: PetscScalar m_one = -1.0;
47: Vec w = pcis->vec1_global;
49: /*
50: Dirichlet solvers.
51: Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
52: Storing the local results at vec2_D
53: */
54: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
55: VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
56: KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
58: /*
59: Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
60: Storing the result in the interface portion of the global vector w.
61: */
62: MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
63: VecScale(pcis->vec1_B,m_one);
64: VecCopy(r,w);
65: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);
66: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);
68: /*
69: Apply the interface preconditioner
70: */
71: PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
72: pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);
74: /*
75: Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
76: The result is stored in vec1_D.
77: */
78: VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
79: VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
80: MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);
82: /*
83: Dirichlet solvers.
84: Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
85: $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
86: */
87: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
88: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
89: KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
90: VecScale(pcis->vec2_D,m_one);
91: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
92: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
93: return 0;
94: }
96: /* -------------------------------------------------------------------------- */
97: /*
98: PCDestroy_NN - Destroys the private context for the NN preconditioner
99: that was created with PCCreate_NN().
101: Input Parameter:
102: . pc - the preconditioner context
104: Application Interface Routine: PCDestroy()
105: */
106: static PetscErrorCode PCDestroy_NN(PC pc)
107: {
108: PC_NN *pcnn = (PC_NN*)pc->data;
110: PCISDestroy(pc);
112: MatDestroy(&pcnn->coarse_mat);
113: VecDestroy(&pcnn->coarse_x);
114: VecDestroy(&pcnn->coarse_b);
115: KSPDestroy(&pcnn->ksp_coarse);
116: if (pcnn->DZ_IN) {
117: PetscFree(pcnn->DZ_IN[0]);
118: PetscFree(pcnn->DZ_IN);
119: }
121: /*
122: Free the private data structure that was hanging off the PC
123: */
124: PetscFree(pc->data);
125: return 0;
126: }
128: /* -------------------------------------------------------------------------- */
129: /*MC
130: PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
132: Options Database Keys:
133: + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
134: (this skips the first coarse grid solve in the preconditioner)
135: . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
136: (this skips the second coarse grid solve in the preconditioner)
137: . -pc_is_damp_fixed <fact> -
138: . -pc_is_remove_nullspace_fixed -
139: . -pc_is_set_damping_factor_floating <fact> -
140: . -pc_is_not_damp_floating -
141: - -pc_is_not_remove_nullspace_floating -
143: Level: intermediate
145: Notes:
146: The matrix used with this preconditioner must be of type MATIS
148: Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
149: degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
150: on the subdomains; though in our experience using approximate solvers is slower.).
152: Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
153: Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
154: Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
156: Contributed by Paulo Goldfeld
158: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS
159: M*/
161: PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc)
162: {
163: PC_NN *pcnn;
165: /*
166: Creates the private data structure for this preconditioner and
167: attach it to the PC object.
168: */
169: PetscNewLog(pc,&pcnn);
170: pc->data = (void*)pcnn;
172: PCISCreate(pc);
173: pcnn->coarse_mat = NULL;
174: pcnn->coarse_x = NULL;
175: pcnn->coarse_b = NULL;
176: pcnn->ksp_coarse = NULL;
177: pcnn->DZ_IN = NULL;
179: /*
180: Set the pointers for the functions that are provided above.
181: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
182: are called, they will automatically call these functions. Note we
183: choose not to provide a couple of these functions since they are
184: not needed.
185: */
186: pc->ops->apply = PCApply_NN;
187: pc->ops->applytranspose = NULL;
188: pc->ops->setup = PCSetUp_NN;
189: pc->ops->destroy = PCDestroy_NN;
190: pc->ops->view = NULL;
191: pc->ops->applyrichardson = NULL;
192: pc->ops->applysymmetricleft = NULL;
193: pc->ops->applysymmetricright = NULL;
194: return 0;
195: }
197: /* -------------------------------------------------------------------------- */
198: /*
199: PCNNCreateCoarseMatrix -
200: */
201: PetscErrorCode PCNNCreateCoarseMatrix(PC pc)
202: {
203: MPI_Request *send_request, *recv_request;
204: PetscInt i, j, k;
205: PetscScalar *mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */
206: PetscScalar **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
208: /* aliasing some names */
209: PC_IS *pcis = (PC_IS*)(pc->data);
210: PC_NN *pcnn = (PC_NN*)pc->data;
211: PetscInt n_neigh = pcis->n_neigh;
212: PetscInt *neigh = pcis->neigh;
213: PetscInt *n_shared = pcis->n_shared;
214: PetscInt **shared = pcis->shared;
215: PetscScalar **DZ_IN; /* Must be initialized after memory allocation. */
217: /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
218: PetscMalloc1(n_neigh*n_neigh+1,&mat);
220: /* Allocate memory for DZ */
221: /* Notice that DZ_OUT[0] is allocated some space that is never used. */
222: /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
223: {
224: PetscInt size_of_Z = 0;
225: PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
226: DZ_IN = pcnn->DZ_IN;
227: PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
228: for (i=0; i<n_neigh; i++) size_of_Z += n_shared[i];
229: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
230: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
231: }
232: for (i=1; i<n_neigh; i++) {
233: DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1];
234: DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
235: }
237: /* Set the values of DZ_OUT, in order to send this info to the neighbours */
238: /* First, set the auxiliary array pcis->work_N. */
239: PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
240: for (i=1; i<n_neigh; i++) {
241: for (j=0; j<n_shared[i]; j++) {
242: DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
243: }
244: }
246: /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
247: /* Notice that send_request[] and recv_request[] could have one less element. */
248: /* We make them longer to have request[i] corresponding to neigh[i]. */
249: {
250: PetscMPIInt tag;
251: PetscObjectGetNewTag((PetscObject)pc,&tag);
252: PetscMalloc2(n_neigh+1,&send_request,n_neigh+1,&recv_request);
253: for (i=1; i<n_neigh; i++) {
254: MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(send_request[i]));
255: MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,PetscObjectComm((PetscObject)pc),&(recv_request[i]));
256: }
257: }
259: /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
260: for (j=0; j<n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]];
262: /* Start computing with local D*Z while communication goes on. */
263: /* Apply Schur complement. The result is "stored" in vec (more */
264: /* precisely, vec points to the result, stored in pc_nn->vec1_B) */
265: /* and also scattered to pcnn->work_N. */
266: PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
268: /* Compute the first column, while completing the receiving. */
269: for (i=0; i<n_neigh; i++) {
270: MPI_Status stat;
271: PetscMPIInt ind=0;
272: if (i>0) {MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
273: mat[ind*n_neigh+0] = 0.0;
274: for (k=0; k<n_shared[ind]; k++) mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
275: }
277: /* Compute the remaining of the columns */
278: for (j=1; j<n_neigh; j++) {
279: PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
280: for (i=0; i<n_neigh; i++) {
281: mat[i*n_neigh+j] = 0.0;
282: for (k=0; k<n_shared[i]; k++) mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
283: }
284: }
286: /* Complete the sending. */
287: if (n_neigh>1) {
288: MPI_Status *stat;
289: PetscMalloc1(n_neigh-1,&stat);
290: if (n_neigh-1) MPI_Waitall(n_neigh-1,&(send_request[1]),stat);
291: PetscFree(stat);
292: }
294: /* Free the memory for the MPI requests */
295: PetscFree2(send_request,recv_request);
297: /* Free the memory for DZ_OUT */
298: if (DZ_OUT) {
299: PetscFree(DZ_OUT[0]);
300: PetscFree(DZ_OUT);
301: }
303: {
304: PetscMPIInt size;
305: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
306: /* Create the global coarse vectors (rhs and solution). */
307: VecCreateMPI(PetscObjectComm((PetscObject)pc),1,size,&(pcnn->coarse_b));
308: VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
309: /* Create and set the global coarse AIJ matrix. */
310: MatCreate(PetscObjectComm((PetscObject)pc),&(pcnn->coarse_mat));
311: MatSetSizes(pcnn->coarse_mat,1,1,size,size);
312: MatSetType(pcnn->coarse_mat,MATAIJ);
313: MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,NULL);
314: MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,NULL,n_neigh,NULL);
315: MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
316: MatSetOption(pcnn->coarse_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
317: MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
318: MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
319: MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
320: }
322: {
323: PetscMPIInt rank;
324: PetscScalar one = 1.0;
325: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
326: /* "Zero out" rows of not-purely-Neumann subdomains */
327: if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
328: MatZeroRows(pcnn->coarse_mat,0,NULL,one,NULL,NULL);
329: } else { /* here it DOES zero the row, since it's not a floating subdomain. */
330: PetscInt row = (PetscInt) rank;
331: MatZeroRows(pcnn->coarse_mat,1,&row,one,NULL,NULL);
332: }
333: }
335: /* Create the coarse linear solver context */
336: {
337: PC pc_ctx, inner_pc;
338: KSP inner_ksp;
340: KSPCreate(PetscObjectComm((PetscObject)pc),&pcnn->ksp_coarse);
341: PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse,(PetscObject)pc,2);
342: KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat);
343: KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
344: PCSetType(pc_ctx,PCREDUNDANT);
345: KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
346: PCRedundantGetKSP(pc_ctx,&inner_ksp);
347: KSPGetPC(inner_ksp,&inner_pc);
348: PCSetType(inner_pc,PCLU);
349: KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
350: KSPSetFromOptions(pcnn->ksp_coarse);
351: /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
352: KSPSetUp(pcnn->ksp_coarse);
353: }
355: /* Free the memory for mat */
356: PetscFree(mat);
358: /* for DEBUGGING, save the coarse matrix to a file. */
359: {
360: PetscBool flg = PETSC_FALSE;
361: PetscOptionsGetBool(NULL,NULL,"-pc_nn_save_coarse_matrix",&flg,NULL);
362: if (flg) {
363: PetscViewer viewer;
364: PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
365: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
366: MatView(pcnn->coarse_mat,viewer);
367: PetscViewerPopFormat(viewer);
368: PetscViewerDestroy(&viewer);
369: }
370: }
372: /* Set the variable pcnn->factor_coarse_rhs. */
373: pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
375: /* See historical note 02, at the bottom of this file. */
376: return 0;
377: }
379: /* -------------------------------------------------------------------------- */
380: /*
381: PCNNApplySchurToChunk -
383: Input parameters:
384: . pcnn
385: . n - size of chunk
386: . idx - indices of chunk
387: . chunk - values
389: Output parameters:
390: . array_N - result of Schur complement applied to chunk, scattered to big array
391: . vec1_B - result of Schur complement applied to chunk
392: . vec2_B - garbage (used as work space)
393: . vec1_D - garbage (used as work space)
394: . vec2_D - garbage (used as work space)
396: */
397: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
398: {
399: PetscInt i;
400: PC_IS *pcis = (PC_IS*)(pc->data);
402: PetscArrayzero(array_N, pcis->n);
403: for (i=0; i<n; i++) array_N[idx[i]] = chunk[i];
404: PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
405: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
406: PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
407: return 0;
408: }
410: /* -------------------------------------------------------------------------- */
411: /*
412: PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
413: the preconditioner for the Schur complement.
415: Input parameter:
416: . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
418: Output parameters:
419: . z - global vector of interior and interface nodes. The values on the interface are the result of
420: the application of the interface preconditioner to the interface part of r. The values on the
421: interior nodes are garbage.
422: . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
423: . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
424: . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
425: . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
426: . vec1_D - vector of local interior nodes; returns garbage (used as work space)
427: . vec2_D - vector of local interior nodes; returns garbage (used as work space)
428: . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
429: . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
431: */
432: PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,Vec vec2_D, Vec vec1_N, Vec vec2_N)
433: {
434: PC_IS *pcis = (PC_IS*)(pc->data);
436: /*
437: First balancing step.
438: */
439: {
440: PetscBool flg = PETSC_FALSE;
441: PetscOptionsGetBool(NULL,NULL,"-pc_nn_turn_off_first_balancing",&flg,NULL);
442: if (!flg) {
443: PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
444: } else {
445: VecCopy(r,z);
446: }
447: }
449: /*
450: Extract the local interface part of z and scale it by D
451: */
452: VecScatterBegin(pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
453: VecScatterEnd (pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
454: VecPointwiseMult(vec2_B,pcis->D,vec1_B);
456: /* Neumann Solver */
457: PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);
459: /*
460: Second balancing step.
461: */
462: {
463: PetscBool flg = PETSC_FALSE;
464: PetscOptionsGetBool(NULL,NULL,"-pc_turn_off_second_balancing",&flg,NULL);
465: if (!flg) {
466: PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
467: } else {
468: VecPointwiseMult(vec2_B,pcis->D,vec1_B);
469: VecSet(z,0.0);
470: VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
471: VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
472: }
473: }
474: return 0;
475: }
477: /* -------------------------------------------------------------------------- */
478: /*
479: PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
480: input argument u is provided), or s, as given in equations
481: (12) and (13), if the input argument u is a null vector.
482: Notice that the input argument u plays the role of u_i in
483: equation (14). The equation numbers refer to [Man93].
485: Input Parameters:
486: . pcnn - NN preconditioner context.
487: . r - MPI vector of all nodes (interior and interface). It's preserved.
488: . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
490: Output Parameters:
491: . z - MPI vector of interior and interface nodes. Returns s or z (see description above).
492: . vec1_B - Sequential vector of local interface nodes. Workspace.
493: . vec2_B - Sequential vector of local interface nodes. Workspace.
494: . vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
495: . vec1_D - Sequential vector of local interior nodes. Workspace.
496: . vec2_D - Sequential vector of local interior nodes. Workspace.
497: . work_N - Array of all local nodes (interior and interface). Workspace.
499: */
500: PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
501: {
502: PetscInt k;
503: PetscScalar value;
504: PetscScalar *lambda;
505: PC_NN *pcnn = (PC_NN*)(pc->data);
506: PC_IS *pcis = (PC_IS*)(pc->data);
508: PetscLogEventBegin(PC_ApplyCoarse,pc,0,0,0);
509: if (u) {
510: if (!vec3_B) vec3_B = u;
511: VecPointwiseMult(vec1_B,pcis->D,u);
512: VecSet(z,0.0);
513: VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
514: VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
515: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
516: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
517: PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
518: VecScale(vec3_B,-1.0);
519: VecCopy(r,z);
520: VecScatterBegin(pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
521: VecScatterEnd (pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
522: } else {
523: VecCopy(r,z);
524: }
525: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
526: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
527: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
528: for (k=0, value=0.0; k<pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]];
529: value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
530: {
531: PetscMPIInt rank;
532: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
533: VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
534: /*
535: Since we are only inserting local values (one value actually) we don't need to do the
536: reduction that tells us there is no data that needs to be moved. Hence we comment out these
537: VecAssemblyBegin(pcnn->coarse_b);
538: VecAssemblyEnd (pcnn->coarse_b);
539: */
540: }
541: KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);
542: if (!u) VecScale(pcnn->coarse_x,-1.0);
543: VecGetArray(pcnn->coarse_x,&lambda);
544: for (k=0; k<pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k];
545: VecRestoreArray(pcnn->coarse_x,&lambda);
546: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
547: VecSet(z,0.0);
548: VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
549: VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
550: if (!u) {
551: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
552: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
553: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
554: VecCopy(r,z);
555: }
556: VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
557: VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
558: PetscLogEventEnd(PC_ApplyCoarse,pc,0,0,0);
559: return 0;
560: }
562: /* ------- E N D O F T H E C O D E ------- */
563: /* */
564: /* From now on, "footnotes" (or "historical notes"). */
565: /* */
566: /* ------------------------------------------------- */
568: /* --------------------------------------------------------------------------
569: Historical note 01
570: -------------------------------------------------------------------------- */
571: /*
572: We considered the possibility of an alternative D_i that would still
573: provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $).
574: The basic principle was still the pseudo-inverse of the counting
575: function; the difference was that we would not count subdomains
576: that do not contribute to the coarse space (i.e., not pure-Neumann
577: subdomains).
579: This turned out to be a bad idea: we would solve trivial Neumann
580: problems in the not pure-Neumann subdomains, since we would be scaling
581: the balanced residual by zero.
582: */
584: /* --------------------------------------------------------------------------
585: Historical note 02
586: -------------------------------------------------------------------------- */
587: /*
588: We tried an alternative coarse problem, that would eliminate exactly a
589: constant error. Turned out not to improve the overall convergence.
590: */