Actual source code: nn.c
petsc-3.3-p7 2013-05-11
2: #include <../src/ksp/pc/impls/is/nn/nn.h>
4: /* -------------------------------------------------------------------------- */
5: /*
6: PCSetUp_NN - Prepares for the use of the NN preconditioner
7: by setting data structures and options.
9: Input Parameter:
10: . pc - the preconditioner context
12: Application Interface Routine: PCSetUp()
14: Notes:
15: The interface routine PCSetUp() is not usually called directly by
16: the user, but instead is called by PCApply() if necessary.
17: */
20: static PetscErrorCode PCSetUp_NN(PC pc)
21: {
23:
25: if (!pc->setupcalled) {
26: /* Set up all the "iterative substructuring" common block */
27: PCISSetUp(pc);
28: /* Create the coarse matrix. */
29: PCNNCreateCoarseMatrix(pc);
30: }
31: return(0);
32: }
34: /* -------------------------------------------------------------------------- */
35: /*
36: PCApply_NN - Applies the NN preconditioner to a vector.
38: Input Parameters:
39: . pc - the preconditioner context
40: . r - input vector (global)
42: Output Parameter:
43: . z - output vector (global)
45: Application Interface Routine: PCApply()
46: */
49: static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
50: {
51: PC_IS *pcis = (PC_IS*)(pc->data);
53: PetscScalar m_one = -1.0;
54: Vec w = pcis->vec1_global;
57: /*
58: Dirichlet solvers.
59: Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
60: Storing the local results at vec2_D
61: */
62: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
63: VecScatterEnd (pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
64: KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
65:
66: /*
67: Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
68: Storing the result in the interface portion of the global vector w.
69: */
70: MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
71: VecScale(pcis->vec1_B,m_one);
72: VecCopy(r,w);
73: VecScatterBegin(pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);
74: VecScatterEnd (pcis->global_to_B,pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE);
76: /*
77: Apply the interface preconditioner
78: */
79: PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
80: pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);
82: /*
83: Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
84: The result is stored in vec1_D.
85: */
86: VecScatterBegin(pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
87: VecScatterEnd (pcis->global_to_B,z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
88: MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);
90: /*
91: Dirichlet solvers.
92: Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
93: $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
94: */
95: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
96: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
97: KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
98: VecScale(pcis->vec2_D,m_one);
99: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
100: VecScatterEnd (pcis->global_to_D,pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE);
101: return(0);
102: }
104: /* -------------------------------------------------------------------------- */
105: /*
106: PCDestroy_NN - Destroys the private context for the NN preconditioner
107: that was created with PCCreate_NN().
109: Input Parameter:
110: . pc - the preconditioner context
112: Application Interface Routine: PCDestroy()
113: */
116: static PetscErrorCode PCDestroy_NN(PC pc)
117: {
118: PC_NN *pcnn = (PC_NN*)pc->data;
122: PCISDestroy(pc);
124: MatDestroy(&pcnn->coarse_mat);
125: VecDestroy(&pcnn->coarse_x);
126: VecDestroy(&pcnn->coarse_b);
127: KSPDestroy(&pcnn->ksp_coarse);
128: if (pcnn->DZ_IN) {
129: PetscFree(pcnn->DZ_IN[0]);
130: PetscFree(pcnn->DZ_IN);
131: }
133: /*
134: Free the private data structure that was hanging off the PC
135: */
136: PetscFree(pc->data);
137: return(0);
138: }
140: /* -------------------------------------------------------------------------- */
141: /*MC
142: PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.
144: Options Database Keys:
145: + -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
146: (this skips the first coarse grid solve in the preconditioner)
147: . -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
148: (this skips the second coarse grid solve in the preconditioner)
149: . -pc_is_damp_fixed <fact> -
150: . -pc_is_remove_nullspace_fixed -
151: . -pc_is_set_damping_factor_floating <fact> -
152: . -pc_is_not_damp_floating -
153: + -pc_is_not_remove_nullspace_floating -
155: Level: intermediate
157: Notes: The matrix used with this preconditioner must be of type MATIS
159: Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
160: degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
161: on the subdomains; though in our experience using approximate solvers is slower.).
163: Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
164: Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
165: Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx
167: Contributed by Paulo Goldfeld
169: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, MATIS
170: M*/
171: EXTERN_C_BEGIN
174: PetscErrorCode PCCreate_NN(PC pc)
175: {
177: PC_NN *pcnn;
180: /*
181: Creates the private data structure for this preconditioner and
182: attach it to the PC object.
183: */
184: PetscNewLog(pc,PC_NN,&pcnn);
185: pc->data = (void*)pcnn;
187: PCISCreate(pc);
188: pcnn->coarse_mat = 0;
189: pcnn->coarse_x = 0;
190: pcnn->coarse_b = 0;
191: pcnn->ksp_coarse = 0;
192: pcnn->DZ_IN = 0;
194: /*
195: Set the pointers for the functions that are provided above.
196: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
197: are called, they will automatically call these functions. Note we
198: choose not to provide a couple of these functions since they are
199: not needed.
200: */
201: pc->ops->apply = PCApply_NN;
202: pc->ops->applytranspose = 0;
203: pc->ops->setup = PCSetUp_NN;
204: pc->ops->destroy = PCDestroy_NN;
205: pc->ops->view = 0;
206: pc->ops->applyrichardson = 0;
207: pc->ops->applysymmetricleft = 0;
208: pc->ops->applysymmetricright = 0;
209: return(0);
210: }
211: EXTERN_C_END
214: /* -------------------------------------------------------------------------- */
215: /*
216: PCNNCreateCoarseMatrix -
217: */
220: PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
221: {
222: MPI_Request *send_request, *recv_request;
224: PetscInt i, j, k;
225: PetscScalar* mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */
226: PetscScalar** DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
228: /* aliasing some names */
229: PC_IS* pcis = (PC_IS*)(pc->data);
230: PC_NN* pcnn = (PC_NN*)pc->data;
231: PetscInt n_neigh = pcis->n_neigh;
232: PetscInt* neigh = pcis->neigh;
233: PetscInt* n_shared = pcis->n_shared;
234: PetscInt** shared = pcis->shared;
235: PetscScalar** DZ_IN; /* Must be initialized after memory allocation. */
238: /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
239: PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);
241: /* Allocate memory for DZ */
242: /* Notice that DZ_OUT[0] is allocated some space that is never used. */
243: /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
244: {
245: PetscInt size_of_Z = 0;
246: PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
247: DZ_IN = pcnn->DZ_IN;
248: PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
249: for (i=0; i<n_neigh; i++) {
250: size_of_Z += n_shared[i];
251: }
252: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
253: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
254: }
255: for (i=1; i<n_neigh; i++) {
256: DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1];
257: DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
258: }
260: /* Set the values of DZ_OUT, in order to send this info to the neighbours */
261: /* First, set the auxiliary array pcis->work_N. */
262: PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
263: for (i=1; i<n_neigh; i++){
264: for (j=0; j<n_shared[i]; j++) {
265: DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
266: }
267: }
269: /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
270: /* Notice that send_request[] and recv_request[] could have one less element. */
271: /* We make them longer to have request[i] corresponding to neigh[i]. */
272: {
273: PetscMPIInt tag;
274: PetscObjectGetNewTag((PetscObject)pc,&tag);
275: PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
276: recv_request = send_request + (n_neigh);
277: for (i=1; i<n_neigh; i++) {
278: MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,((PetscObject)pc)->comm,&(send_request[i]));
279: MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,((PetscObject)pc)->comm,&(recv_request[i]));
280: }
281: }
283: /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
284: for(j=0; j<n_shared[0]; j++) {
285: DZ_IN[0][j] = pcis->work_N[shared[0][j]];
286: }
288: /* Start computing with local D*Z while communication goes on. */
289: /* Apply Schur complement. The result is "stored" in vec (more */
290: /* precisely, vec points to the result, stored in pc_nn->vec1_B) */
291: /* and also scattered to pcnn->work_N. */
292: PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
293: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
295: /* Compute the first column, while completing the receiving. */
296: for (i=0; i<n_neigh; i++) {
297: MPI_Status stat;
298: PetscMPIInt ind=0;
299: if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
300: mat[ind*n_neigh+0] = 0.0;
301: for (k=0; k<n_shared[ind]; k++) {
302: mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
303: }
304: }
306: /* Compute the remaining of the columns */
307: for (j=1; j<n_neigh; j++) {
308: PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
309: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
310: for (i=0; i<n_neigh; i++) {
311: mat[i*n_neigh+j] = 0.0;
312: for (k=0; k<n_shared[i]; k++) {
313: mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
314: }
315: }
316: }
318: /* Complete the sending. */
319: if (n_neigh>1) {
320: MPI_Status *stat;
321: PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
322: if (n_neigh-1) {MPI_Waitall(n_neigh-1,&(send_request[1]),stat);}
323: PetscFree(stat);
324: }
326: /* Free the memory for the MPI requests */
327: PetscFree(send_request);
329: /* Free the memory for DZ_OUT */
330: if (DZ_OUT) {
331: PetscFree(DZ_OUT[0]);
332: PetscFree(DZ_OUT);
333: }
335: {
336: PetscMPIInt size;
337: MPI_Comm_size(((PetscObject)pc)->comm,&size);
338: /* Create the global coarse vectors (rhs and solution). */
339: VecCreateMPI(((PetscObject)pc)->comm,1,size,&(pcnn->coarse_b));
340: VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
341: /* Create and set the global coarse AIJ matrix. */
342: MatCreate(((PetscObject)pc)->comm,&(pcnn->coarse_mat));
343: MatSetSizes(pcnn->coarse_mat,1,1,size,size);
344: MatSetType(pcnn->coarse_mat,MATAIJ);
345: MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);
346: MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);
347: MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
348: MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
349: MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
350: }
352: {
353: PetscMPIInt rank;
354: PetscScalar one = 1.0;
355: MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
356: /* "Zero out" rows of not-purely-Neumann subdomains */
357: if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
358: MatZeroRows(pcnn->coarse_mat,0,PETSC_NULL,one,0,0);
359: } else { /* here it DOES zero the row, since it's not a floating subdomain. */
360: PetscInt row = (PetscInt) rank;
361: MatZeroRows(pcnn->coarse_mat,1,&row,one,0,0);
362: }
363: }
365: /* Create the coarse linear solver context */
366: {
367: PC pc_ctx, inner_pc;
368: KSP inner_ksp;
370: KSPCreate(((PetscObject)pc)->comm,&pcnn->ksp_coarse);
371: PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse,(PetscObject)pc,2);
372: KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
373: KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
374: PCSetType(pc_ctx,PCREDUNDANT);
375: KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
376: PCRedundantGetKSP(pc_ctx,&inner_ksp);
377: KSPGetPC(inner_ksp,&inner_pc);
378: PCSetType(inner_pc,PCLU);
379: KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
380: KSPSetFromOptions(pcnn->ksp_coarse);
381: /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
382: KSPSetUp(pcnn->ksp_coarse);
383: }
385: /* Free the memory for mat */
386: PetscFree(mat);
388: /* for DEBUGGING, save the coarse matrix to a file. */
389: {
390: PetscBool flg = PETSC_FALSE;
391: PetscOptionsGetBool(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg,PETSC_NULL);
392: if (flg) {
393: PetscViewer viewer;
394: PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
395: PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
396: MatView(pcnn->coarse_mat,viewer);
397: PetscViewerDestroy(&viewer);
398: }
399: }
401: /* Set the variable pcnn->factor_coarse_rhs. */
402: pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
404: /* See historical note 02, at the bottom of this file. */
405: return(0);
406: }
408: /* -------------------------------------------------------------------------- */
409: /*
410: PCNNApplySchurToChunk -
412: Input parameters:
413: . pcnn
414: . n - size of chunk
415: . idx - indices of chunk
416: . chunk - values
418: Output parameters:
419: . array_N - result of Schur complement applied to chunk, scattered to big array
420: . vec1_B - result of Schur complement applied to chunk
421: . vec2_B - garbage (used as work space)
422: . vec1_D - garbage (used as work space)
423: . vec2_D - garbage (used as work space)
425: */
428: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
429: {
431: PetscInt i;
432: PC_IS *pcis = (PC_IS*)(pc->data);
435: PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
436: for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
437: PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
438: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
439: PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
440: return(0);
441: }
443: /* -------------------------------------------------------------------------- */
444: /*
445: PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
446: the preconditioner for the Schur complement.
448: Input parameter:
449: . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
451: Output parameters:
452: . z - global vector of interior and interface nodes. The values on the interface are the result of
453: the application of the interface preconditioner to the interface part of r. The values on the
454: interior nodes are garbage.
455: . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
456: . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
457: . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
458: . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
459: . vec1_D - vector of local interior nodes; returns garbage (used as work space)
460: . vec2_D - vector of local interior nodes; returns garbage (used as work space)
461: . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
462: . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
464: */
467: PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
468: Vec vec2_D, Vec vec1_N, Vec vec2_N)
469: {
471: PC_IS* pcis = (PC_IS*)(pc->data);
474: /*
475: First balancing step.
476: */
477: {
478: PetscBool flg = PETSC_FALSE;
479: PetscOptionsGetBool(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg,PETSC_NULL);
480: if (!flg) {
481: PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
482: } else {
483: VecCopy(r,z);
484: }
485: }
487: /*
488: Extract the local interface part of z and scale it by D
489: */
490: VecScatterBegin(pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
491: VecScatterEnd (pcis->global_to_B,z,vec1_B,INSERT_VALUES,SCATTER_FORWARD);
492: VecPointwiseMult(vec2_B,pcis->D,vec1_B);
494: /* Neumann Solver */
495: PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);
497: /*
498: Second balancing step.
499: */
500: {
501: PetscBool flg = PETSC_FALSE;
502: PetscOptionsGetBool(PETSC_NULL,"-pc_turn_off_second_balancing",&flg,PETSC_NULL);
503: if (!flg) {
504: PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
505: } else {
506: VecPointwiseMult(vec2_B,pcis->D,vec1_B);
507: VecSet(z,0.0);
508: VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
509: VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
510: }
511: }
512: return(0);
513: }
515: /* -------------------------------------------------------------------------- */
516: /*
517: PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
518: input argument u is provided), or s, as given in equations
519: (12) and (13), if the input argument u is a null vector.
520: Notice that the input argument u plays the role of u_i in
521: equation (14). The equation numbers refer to [Man93].
523: Input Parameters:
524: . pcnn - NN preconditioner context.
525: . r - MPI vector of all nodes (interior and interface). It's preserved.
526: . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
528: Output Parameters:
529: . z - MPI vector of interior and interface nodes. Returns s or z (see description above).
530: . vec1_B - Sequential vector of local interface nodes. Workspace.
531: . vec2_B - Sequential vector of local interface nodes. Workspace.
532: . vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
533: . vec1_D - Sequential vector of local interior nodes. Workspace.
534: . vec2_D - Sequential vector of local interior nodes. Workspace.
535: . work_N - Array of all local nodes (interior and interface). Workspace.
537: */
540: PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
541: Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
542: {
544: PetscInt k;
545: PetscScalar value;
546: PetscScalar* lambda;
547: PC_NN* pcnn = (PC_NN*)(pc->data);
548: PC_IS* pcis = (PC_IS*)(pc->data);
551: PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);
552: if (u) {
553: if (!vec3_B) { vec3_B = u; }
554: VecPointwiseMult(vec1_B,pcis->D,u);
555: VecSet(z,0.0);
556: VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
557: VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
558: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
559: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
560: PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
561: VecScale(vec3_B,-1.0);
562: VecCopy(r,z);
563: VecScatterBegin(pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
564: VecScatterEnd (pcis->global_to_B,vec3_B,z,ADD_VALUES,SCATTER_REVERSE);
565: } else {
566: VecCopy(r,z);
567: }
568: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
569: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
570: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
571: for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
572: value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
573: {
574: PetscMPIInt rank;
575: MPI_Comm_rank(((PetscObject)pc)->comm,&rank);
576: VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
577: /*
578: Since we are only inserting local values (one value actually) we don't need to do the
579: reduction that tells us there is no data that needs to be moved. Hence we comment out these
580: VecAssemblyBegin(pcnn->coarse_b);
581: VecAssemblyEnd (pcnn->coarse_b);
582: */
583: }
584: KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);
585: if (!u) { VecScale(pcnn->coarse_x,-1.0); }
586: VecGetArray(pcnn->coarse_x,&lambda);
587: for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
588: VecRestoreArray(pcnn->coarse_x,&lambda);
589: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
590: VecSet(z,0.0);
591: VecScatterBegin(pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
592: VecScatterEnd (pcis->global_to_B,vec2_B,z,ADD_VALUES,SCATTER_REVERSE);
593: if (!u) {
594: VecScatterBegin(pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
595: VecScatterEnd (pcis->global_to_B,z,vec2_B,INSERT_VALUES,SCATTER_FORWARD);
596: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
597: VecCopy(r,z);
598: }
599: VecScatterBegin(pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
600: VecScatterEnd (pcis->global_to_B,vec1_B,z,ADD_VALUES,SCATTER_REVERSE);
601: PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);
602: return(0);
603: }
609: /* ------- E N D O F T H E C O D E ------- */
610: /* */
611: /* From now on, "footnotes" (or "historical notes"). */
612: /* */
613: /* ------------------------------------------------- */
617: /* --------------------------------------------------------------------------
618: Historical note 01
619: -------------------------------------------------------------------------- */
620: /*
621: We considered the possibility of an alternative D_i that would still
622: provide a partition of unity (i.e., $ \sum_i N_i D_i N_i^T = I $).
623: The basic principle was still the pseudo-inverse of the counting
624: function; the difference was that we would not count subdomains
625: that do not contribute to the coarse space (i.e., not pure-Neumann
626: subdomains).
628: This turned out to be a bad idea: we would solve trivial Neumann
629: problems in the not pure-Neumann subdomains, since we would be scaling
630: the balanced residual by zero.
631: */
636: /* --------------------------------------------------------------------------
637: Historical note 02
638: -------------------------------------------------------------------------- */
639: /*
640: We tried an alternative coarse problem, that would eliminate exactly a
641: constant error. Turned out not to improve the overall convergence.
642: */