Actual source code: asa.c
petsc-3.4.5 2014-06-29
2: /* --------------------------------------------------------------------
4: Contributed by Arvid Bessen, Columbia University, June 2007
6: This file implements a ASA preconditioner in PETSc as part of PC.
8: The adaptive smoothed aggregation algorithm is described in the paper
9: "Adaptive Smoothed Aggregation (ASA)", M. Brezina, R. Falgout, S. MacLachlan,
10: T. Manteuffel, S. McCormick, and J. Ruge, SIAM Journal on Scientific Computing,
11: SISC Volume 25 Issue 6, Pages 1896-1920.
13: For an example usage of this preconditioner, see, e.g.
14: $PETSC_DIR/src/ksp/ksp/examples/tutorials/ex38.c ex39.c
15: and other files in that directory.
17: This code is still somewhat experimental. A number of improvements would be
18: - keep vectors allocated on each level, instead of destroying them
19: (see mainly PCApplyVcycleOnLevel_ASA)
20: - in PCCreateTransferOp_ASA we get all of the submatrices at once, this could
21: be optimized by differentiating between local and global matrices
22: - the code does not handle it gracefully if there is just one level
23: - if relaxation is sufficient, exit of PCInitializationStage_ASA is not
24: completely clean
25: - default values could be more reasonable, especially for parallel solves,
26: where we need a parallel LU or similar
27: - the richardson scaling parameter is somewhat special, should be treated in a
28: good default way
29: - a number of parameters for smoother (sor_omega, etc.) that we store explicitly
30: could be kept in the respective smoothers themselves
31: - some parameters have to be set via command line options, there are no direct
32: function calls available
33: - numerous other stuff
35: Example runs in parallel would be with parameters like
36: mpiexec ./program -pc_asa_coarse_pc_factor_mat_solver_package mumps -pc_asa_direct_solver 200
37: -pc_asa_max_cand_vecs 4 -pc_asa_mu_initial 50 -pc_asa_richardson_scale 1.0
38: -pc_asa_rq_improve 0.9 -asa_smoother_pc_type asm -asa_smoother_sub_pc_type sor
40: -------------------------------------------------------------------- */
42: /*
43: This defines the data structures for the smoothed aggregation procedure
44: */
45: #include <../src/ksp/pc/impls/asa/asa.h>
46: #include <petscblaslapack.h>
48: /* -------------------------------------------------------------------------- */
50: /* Event logging */
52: PetscLogEvent PC_InitializationStage_ASA, PC_GeneralSetupStage_ASA;
53: PetscLogEvent PC_CreateTransferOp_ASA, PC_CreateVcycle_ASA;
54: PetscBool asa_events_registered = PETSC_FALSE;
58: /*@C
59: PCASASetTolerances - Sets the convergence thresholds for ASA algorithm
61: Collective on PC
63: Input Parameter:
64: + pc - the context
65: . rtol - the relative convergence tolerance
66: (relative decrease in the residual norm)
67: . abstol - the absolute convergence tolerance
68: (absolute size of the residual norm)
69: . dtol - the divergence tolerance
70: (amount residual can increase before KSPDefaultConverged()
71: concludes that the method is diverging)
72: - maxits - maximum number of iterations to use
74: Notes:
75: Use PETSC_DEFAULT to retain the default value of any of the tolerances.
77: Level: advanced
78: @*/
79: PetscErrorCode PCASASetTolerances(PC pc, PetscReal rtol, PetscReal abstol,PetscReal dtol, PetscInt maxits)
80: {
85: PetscTryMethod(pc,"PCASASetTolerances_C",(PC,PetscReal,PetscReal,PetscReal,PetscInt),(pc,rtol,abstol,dtol,maxits));
86: return(0);
87: }
91: static PetscErrorCode PCASASetTolerances_ASA(PC pc, PetscReal rtol, PetscReal abstol,PetscReal dtol, PetscInt maxits)
92: {
93: PC_ASA *asa = (PC_ASA*) pc->data;
97: if (rtol != PETSC_DEFAULT) asa->rtol = rtol;
98: if (abstol != PETSC_DEFAULT) asa->abstol = abstol;
99: if (dtol != PETSC_DEFAULT) asa->divtol = dtol;
100: if (maxits != PETSC_DEFAULT) asa->max_it = maxits;
101: return(0);
102: }
106: /*
107: PCCreateLevel_ASA - Creates one level for the ASA algorithm
109: Input Parameters:
110: + level - current level
111: . comm - MPI communicator object
112: . next - pointer to next level
113: . prev - pointer to previous level
114: . ksptype - the KSP type for the smoothers on this level
115: - pctype - the PC type for the smoothers on this level
117: Output Parameters:
118: . new_asa_lev - the newly created level
120: .keywords: ASA, create, levels, multigrid
121: */
122: PetscErrorCode PCCreateLevel_ASA(PC_ASA_level **new_asa_lev, int level,MPI_Comm comm, PC_ASA_level *prev,PC_ASA_level *next,KSPType ksptype, PCType pctype)
123: {
125: PC_ASA_level *asa_lev;
128: PetscMalloc(sizeof(PC_ASA_level), &asa_lev);
130: asa_lev->level = level;
131: asa_lev->size = 0;
133: asa_lev->A = 0;
134: asa_lev->B = 0;
135: asa_lev->x = 0;
136: asa_lev->b = 0;
137: asa_lev->r = 0;
139: asa_lev->dm = 0;
140: asa_lev->aggnum = 0;
141: asa_lev->agg = 0;
142: asa_lev->loc_agg_dofs = 0;
143: asa_lev->agg_corr = 0;
144: asa_lev->bridge_corr = 0;
146: asa_lev->P = 0;
147: asa_lev->Pt = 0;
148: asa_lev->smP = 0;
149: asa_lev->smPt = 0;
151: asa_lev->comm = comm;
153: asa_lev->smoothd = 0;
154: asa_lev->smoothu = 0;
156: asa_lev->prev = prev;
157: asa_lev->next = next;
159: *new_asa_lev = asa_lev;
160: return(0);
161: }
165: PetscErrorCode PrintResNorm(Mat A, Vec x, Vec b, Vec r)
166: {
168: PetscBool destroyr = PETSC_FALSE;
169: PetscReal resnorm;
170: MPI_Comm Acomm;
173: if (!r) {
174: MatGetVecs(A, NULL, &r);
175: destroyr = PETSC_TRUE;
176: }
177: MatMult(A, x, r);
178: VecAYPX(r, -1.0, b);
179: VecNorm(r, NORM_2, &resnorm);
180: PetscObjectGetComm((PetscObject) A, &Acomm);
181: PetscPrintf(Acomm, "Residual norm is %f.\n", resnorm);
183: if (destroyr) {
184: VecDestroy(&r);
185: }
186: return(0);
187: }
191: PetscErrorCode PrintEnergyNormOfDiff(Mat A, Vec x, Vec y)
192: {
194: Vec vecdiff, Avecdiff;
195: PetscScalar dotprod;
196: PetscReal dotabs;
197: MPI_Comm Acomm;
200: VecDuplicate(x, &vecdiff);
201: VecWAXPY(vecdiff, -1.0, x, y);
202: MatGetVecs(A, NULL, &Avecdiff);
203: MatMult(A, vecdiff, Avecdiff);
204: VecDot(vecdiff, Avecdiff, &dotprod);
205: dotabs = PetscAbsScalar(dotprod);
206: PetscObjectGetComm((PetscObject) A, &Acomm);
207: PetscPrintf(Acomm, "Energy norm %f.\n", dotabs);
208: VecDestroy(&vecdiff);
209: VecDestroy(&Avecdiff);
210: return(0);
211: }
213: /* -------------------------------------------------------------------------- */
214: /*
215: PCDestroyLevel_ASA - Destroys one level of the ASA preconditioner
217: Input Parameter:
218: . asa_lev - pointer to level that should be destroyed
220: */
223: PetscErrorCode PCDestroyLevel_ASA(PC_ASA_level *asa_lev)
224: {
228: MatDestroy(&(asa_lev->A));
229: MatDestroy(&(asa_lev->B));
230: VecDestroy(&(asa_lev->x));
231: VecDestroy(&(asa_lev->b));
232: VecDestroy(&(asa_lev->r));
234: if (asa_lev->dm) {DMDestroy(&asa_lev->dm);}
236: MatDestroy(&(asa_lev->agg));
237: PetscFree(asa_lev->loc_agg_dofs);
238: MatDestroy(&(asa_lev->agg_corr));
239: MatDestroy(&(asa_lev->bridge_corr));
241: MatDestroy(&(asa_lev->P));
242: MatDestroy(&(asa_lev->Pt));
243: MatDestroy(&(asa_lev->smP));
244: MatDestroy(&(asa_lev->smPt));
246: if (asa_lev->smoothd != asa_lev->smoothu) {
247: if (asa_lev->smoothd) {KSPDestroy(&asa_lev->smoothd);}
248: }
249: if (asa_lev->smoothu) {KSPDestroy(&asa_lev->smoothu);}
251: PetscFree(asa_lev);
252: return(0);
253: }
255: /* -------------------------------------------------------------------------- */
256: /*
257: PCComputeSpectralRadius_ASA - Computes the spectral radius of asa_lev->A
258: and stores it it asa_lev->spec_rad
260: Input Parameters:
261: . asa_lev - the level we are treating
263: Compute spectral radius with sqrt(||A||_1 ||A||_inf) >= ||A||_2 >= rho(A)
265: */
268: PetscErrorCode PCComputeSpectralRadius_ASA(PC_ASA_level *asa_lev)
269: {
271: PetscReal norm_1, norm_inf;
274: MatNorm(asa_lev->A, NORM_1, &norm_1);
275: MatNorm(asa_lev->A, NORM_INFINITY, &norm_inf);
277: asa_lev->spec_rad = PetscSqrtReal(norm_1*norm_inf);
278: return(0);
279: }
283: PetscErrorCode PCSetRichardsonScale_ASA(KSP ksp, PetscReal spec_rad, PetscReal richardson_scale)
284: {
286: PC pc;
287: PetscBool flg;
288: PetscReal spec_rad_inv;
291: KSPSetInitialGuessNonzero(ksp, PETSC_TRUE);
292: if (richardson_scale != PETSC_DECIDE) {
293: KSPRichardsonSetScale(ksp, richardson_scale);
294: } else {
295: KSPGetPC(ksp, &pc);
296: PetscObjectTypeCompare((PetscObject)(pc), PCNONE, &flg);
297: if (flg) {
298: /* WORK: this is just an educated guess. Any number between 0 and 2/rho(A)
299: should do. asa_lev->spec_rad has to be an upper bound on rho(A). */
300: spec_rad_inv = 1.0/spec_rad;
301: KSPRichardsonSetScale(ksp, spec_rad_inv);
302: } else SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP, "Unknown PC type for smoother. Please specify scaling factor with -pc_asa_richardson_scale\n");
303: }
304: return(0);
305: }
309: PetscErrorCode PCSetSORomega_ASA(PC pc, PetscReal sor_omega)
310: {
314: if (sor_omega != PETSC_DECIDE) {
315: PCSORSetOmega(pc, sor_omega);
316: }
317: return(0);
318: }
321: /* -------------------------------------------------------------------------- */
322: /*
323: PCSetupSmoothersOnLevel_ASA - Creates the smoothers of the level.
324: We assume that asa_lev->A and asa_lev->spec_rad are correctly computed
326: Input Parameters:
327: + asa - the data structure for the ASA preconditioner
328: . asa_lev - the level we are treating
329: - maxits - maximum number of iterations to use
330: */
333: PetscErrorCode PCSetupSmoothersOnLevel_ASA(PC_ASA *asa, PC_ASA_level *asa_lev, PetscInt maxits)
334: {
336: PetscBool flg;
337: PC pc;
340: /* destroy old smoothers */
341: if (asa_lev->smoothu && asa_lev->smoothu != asa_lev->smoothd) {
342: KSPDestroy(&asa_lev->smoothu);
343: }
344: asa_lev->smoothu = 0;
345: if (asa_lev->smoothd) {
346: KSPDestroy(&asa_lev->smoothd);
347: }
348: asa_lev->smoothd = 0;
349: /* create smoothers */
350: KSPCreate(asa_lev->comm,&asa_lev->smoothd);
351: KSPSetType(asa_lev->smoothd, asa->ksptype_smooth);
352: KSPGetPC(asa_lev->smoothd,&pc);
353: PCSetType(pc,asa->pctype_smooth);
355: /* set up problems for smoothers */
356: KSPSetOperators(asa_lev->smoothd, asa_lev->A, asa_lev->A, DIFFERENT_NONZERO_PATTERN);
357: KSPSetTolerances(asa_lev->smoothd, asa->smoother_rtol, asa->smoother_abstol, asa->smoother_dtol, maxits);
358: PetscObjectTypeCompare((PetscObject)(asa_lev->smoothd), KSPRICHARDSON, &flg);
359: if (flg) {
360: /* special parameters for certain smoothers */
361: KSPSetInitialGuessNonzero(asa_lev->smoothd, PETSC_TRUE);
362: KSPGetPC(asa_lev->smoothd, &pc);
363: PetscObjectTypeCompare((PetscObject)pc, PCSOR, &flg);
364: if (flg) {
365: PCSetSORomega_ASA(pc, asa->sor_omega);
366: } else {
367: /* just set asa->richardson_scale to get some very basic smoother */
368: PCSetRichardsonScale_ASA(asa_lev->smoothd, asa_lev->spec_rad, asa->richardson_scale);
369: }
370: /* this would be the place to add support for other preconditioners */
371: }
372: KSPSetOptionsPrefix(asa_lev->smoothd, "asa_smoother_");
373: KSPSetFromOptions(asa_lev->smoothd);
374: /* set smoothu equal to smoothd, this could change later */
375: asa_lev->smoothu = asa_lev->smoothd;
376: return(0);
377: }
379: /* -------------------------------------------------------------------------- */
380: /*
381: PCSetupDirectSolversOnLevel_ASA - Creates the direct solvers on the coarsest level.
382: We assume that asa_lev->A and asa_lev->spec_rad are correctly computed
384: Input Parameters:
385: + asa - the data structure for the ASA preconditioner
386: . asa_lev - the level we are treating
387: - maxits - maximum number of iterations to use
388: */
391: PetscErrorCode PCSetupDirectSolversOnLevel_ASA(PC_ASA *asa, PC_ASA_level *asa_lev, PetscInt maxits)
392: {
394: PetscBool flg;
395: PetscMPIInt comm_size;
396: PC pc;
399: if (asa_lev->smoothu && asa_lev->smoothu != asa_lev->smoothd) {
400: KSPDestroy(&asa_lev->smoothu);
401: }
402: asa_lev->smoothu = 0;
403: if (asa_lev->smoothd) {
404: KSPDestroy(&asa_lev->smoothd);
405: asa_lev->smoothd = 0;
406: }
407: PetscStrcmp(asa->ksptype_direct, KSPPREONLY, &flg);
408: if (flg) {
409: PetscStrcmp(asa->pctype_direct, PCLU, &flg);
410: if (flg) {
411: MPI_Comm_size(asa_lev->comm, &comm_size);
412: if (comm_size > 1) {
413: /* the LU PC will call MatSolve, we may have to set the correct type for the matrix
414: to have support for this in parallel */
415: MatConvert(asa_lev->A, asa->coarse_mat_type, MAT_REUSE_MATRIX, &(asa_lev->A));
416: }
417: }
418: }
419: /* create new solvers */
420: KSPCreate(asa_lev->comm,&asa_lev->smoothd);
421: KSPSetType(asa_lev->smoothd, asa->ksptype_direct);
422: KSPGetPC(asa_lev->smoothd,&pc);
423: PCSetType(pc,asa->pctype_direct);
424: /* set up problems for direct solvers */
425: KSPSetOperators(asa_lev->smoothd, asa_lev->A, asa_lev->A, DIFFERENT_NONZERO_PATTERN);
426: KSPSetTolerances(asa_lev->smoothd, asa->direct_rtol, asa->direct_abstol, asa->direct_dtol, maxits);
427: /* user can set any option by using -pc_asa_direct_xxx */
428: KSPSetOptionsPrefix(asa_lev->smoothd, "asa_coarse_");
429: KSPSetFromOptions(asa_lev->smoothd);
430: /* set smoothu equal to 0, not used */
431: asa_lev->smoothu = 0;
432: return(0);
433: }
435: /* -------------------------------------------------------------------------- */
436: /*
437: PCCreateAggregates_ASA - Creates the aggregates
439: Input Parameters:
440: . asa_lev - the level for which we should create the projection matrix
442: */
445: PetscErrorCode PCCreateAggregates_ASA(PC_ASA_level *asa_lev)
446: {
447: PetscInt m,n, m_loc,n_loc;
448: PetscInt m_loc_s, m_loc_e;
449: const PetscScalar one = 1.0;
450: PetscErrorCode ierr;
453: /* Create nodal aggregates A_i^l */
454: /* we use the DM grid information for that */
455: if (asa_lev->dm) {
456: /* coarsen DM and get the restriction matrix */
457: DMCoarsen(asa_lev->dm, MPI_COMM_NULL, &(asa_lev->next->dm));
458: DMCreateAggregates(asa_lev->next->dm, asa_lev->dm, &(asa_lev->agg));
459: MatGetSize(asa_lev->agg, &m, &n);
460: MatGetLocalSize(asa_lev->agg, &m_loc, &n_loc);
461: if (n!=asa_lev->size) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"DM interpolation matrix has incorrect size!\n");
462: asa_lev->next->size = m;
463: asa_lev->aggnum = m;
464: /* create the correlators, right now just identity matrices */
465: MatCreateAIJ(asa_lev->comm, n_loc, n_loc, n, n, 1, NULL, 1, NULL,&(asa_lev->agg_corr));
466: MatGetOwnershipRange(asa_lev->agg_corr, &m_loc_s, &m_loc_e);
467: for (m=m_loc_s; m<m_loc_e; m++) {
468: MatSetValues(asa_lev->agg_corr, 1, &m, 1, &m, &one, INSERT_VALUES);
469: }
470: MatAssemblyBegin(asa_lev->agg_corr, MAT_FINAL_ASSEMBLY);
471: MatAssemblyEnd(asa_lev->agg_corr, MAT_FINAL_ASSEMBLY);
472: /* MatShift(asa_lev->agg_corr, 1.0); */
473: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP, "Currently pure algebraic coarsening is not supported!");
474: /* somehow define the aggregates without knowing the geometry */
475: /* future WORK */
476: return(0);
477: }
479: /* -------------------------------------------------------------------------- */
480: /*
481: PCCreateTransferOp_ASA - Creates the transfer operator P_{l+1}^l for current level
483: Input Parameters:
484: + asa_lev - the level for which should create the transfer operator
485: - construct_bridge - true, if we should construct a bridge operator, false for normal prolongator
487: If we add a second, third, ... candidate vector (i.e. more than one column in B), we
488: have to relate the additional dimensions to the original aggregates. This is done through
489: the "aggregate correlators" agg_corr and bridge_corr.
490: The aggregate that is used in the construction is then given by
491: asa_lev->agg * asa_lev->agg_corr
492: for the regular prolongator construction and
493: asa_lev->agg * asa_lev->bridge_corr
494: for the bridging prolongator constructions.
495: */
498: PetscErrorCode PCCreateTransferOp_ASA(PC_ASA_level *asa_lev, PetscBool construct_bridge)
499: {
502: const PetscReal Ca = 1e-3;
503: PetscReal cutoff;
504: PetscInt nodes_on_lev;
506: Mat logical_agg;
507: PetscInt mat_agg_loc_start, mat_agg_loc_end, mat_agg_loc_size;
508: PetscInt a;
509: const PetscInt *agg = 0;
510: PetscInt **agg_arr = 0;
511: IS *idxm_is_B_arr = 0;
512: PetscInt *idxn_B = 0;
513: IS idxn_is_B, *idxn_is_B_arr = 0;
514: Mat *b_submat_arr = 0;
515: PetscScalar *b_submat = 0, *b_submat_tp = 0;
516: PetscInt *idxm = 0, *idxn = 0;
517: PetscInt cand_vecs_num;
518: PetscInt *cand_vec_length = 0;
519: PetscInt max_cand_vec_length = 0;
520: PetscScalar **b_orth_arr = 0;
521: PetscInt i,j;
522: PetscScalar *tau = 0, *work = 0;
523: PetscBLASInt info,b1,b2;
524: PetscInt max_cand_vecs_to_add;
525: PetscInt *new_loc_agg_dofs = 0;
526: PetscInt total_loc_cols = 0;
527: PetscReal norm;
528: PetscInt a_loc_m, a_loc_n;
529: PetscInt mat_loc_col_start, mat_loc_col_end, mat_loc_col_size;
530: PetscInt loc_agg_dofs_sum;
531: PetscInt row, col;
532: PetscScalar val;
533: PetscMPIInt comm_size, comm_rank;
534: PetscInt *loc_cols = 0;
537: PetscLogEventBegin(PC_CreateTransferOp_ASA,0,0,0,0);
539: MatGetSize(asa_lev->B, &nodes_on_lev, NULL);
541: /* If we add another candidate vector, we want to be able to judge, how much the new candidate
542: improves our current projection operators and whether it is worth adding it.
543: This is the precomputation necessary for implementing Notes (4.1) to (4.7).
544: We require that all candidate vectors x stored in B are normalized such that
545: <A x, x> = 1 and we thus do not have to compute this.
546: For each aggregate A we can now test condition (4.5) and (4.6) by computing
547: || quantity to check ||_{A}^2 <= cutoff * card(A)/N_l */
548: cutoff = Ca/(asa_lev->spec_rad);
550: /* compute logical aggregates by using the correlators */
551: if (construct_bridge) {
552: /* construct bridging operator */
553: MatMatMult(asa_lev->agg, asa_lev->bridge_corr, MAT_INITIAL_MATRIX, 1.0, &logical_agg);
554: } else {
555: /* construct "regular" prolongator */
556: MatMatMult(asa_lev->agg, asa_lev->agg_corr, MAT_INITIAL_MATRIX, 1.0, &logical_agg);
557: }
559: /* destroy correlator matrices for next level, these will be rebuilt in this routine */
560: if (asa_lev->next) {
561: MatDestroy(&(asa_lev->next->agg_corr));
562: MatDestroy(&(asa_lev->next->bridge_corr));
563: }
565: /* find out the correct local row indices */
566: MatGetOwnershipRange(logical_agg, &mat_agg_loc_start, &mat_agg_loc_end);
568: mat_agg_loc_size = mat_agg_loc_end-mat_agg_loc_start;
570: cand_vecs_num = asa_lev->cand_vecs;
572: /* construct column indices idxn_B for reading from B */
573: PetscMalloc(sizeof(PetscInt)*(cand_vecs_num), &idxn_B);
574: for (i=0; i<cand_vecs_num; i++) idxn_B[i] = i;
575: ISCreateGeneral(asa_lev->comm, asa_lev->cand_vecs, idxn_B,PETSC_COPY_VALUES, &idxn_is_B);
576: PetscFree(idxn_B);
577: PetscMalloc(sizeof(IS)*mat_agg_loc_size, &idxn_is_B_arr);
578: for (a=0; a<mat_agg_loc_size; a++) idxn_is_B_arr[a] = idxn_is_B;
579: /* allocate storage for row indices idxm_B */
580: PetscMalloc(sizeof(IS)*mat_agg_loc_size, &idxm_is_B_arr);
582: /* Storage for the orthogonalized submatrices of B and their sizes */
583: PetscMalloc(sizeof(PetscInt)*mat_agg_loc_size, &cand_vec_length);
584: PetscMalloc(sizeof(PetscScalar*)*mat_agg_loc_size, &b_orth_arr);
585: /* Storage for the information about each aggregate */
586: PetscMalloc(sizeof(PetscInt*)*mat_agg_loc_size, &agg_arr);
587: /* Storage for the number of candidate vectors that are orthonormal and used in each submatrix */
588: PetscMalloc(sizeof(PetscInt)*mat_agg_loc_size, &new_loc_agg_dofs);
590: /* loop over local aggregates */
591: for (a=0; a<mat_agg_loc_size; a++) {
592: /* get info about current aggregate, this gives the rows we have to get from B */
593: MatGetRow(logical_agg, a+mat_agg_loc_start, &cand_vec_length[a], &agg, 0);
594: /* copy aggregate information */
595: PetscMalloc(sizeof(PetscInt)*cand_vec_length[a], &(agg_arr[a]));
596: PetscMemcpy(agg_arr[a], agg, sizeof(PetscInt)*cand_vec_length[a]);
597: /* restore row */
598: MatRestoreRow(logical_agg, a+mat_agg_loc_start, NULL, &agg, 0);
600: /* create index sets */
601: ISCreateGeneral(PETSC_COMM_SELF, cand_vec_length[a], agg_arr[a],PETSC_COPY_VALUES, &(idxm_is_B_arr[a]));
602: /* maximum candidate vector length */
603: if (cand_vec_length[a] > max_cand_vec_length) max_cand_vec_length = cand_vec_length[a];
604: }
605: /* destroy logical_agg, no longer needed */
606: MatDestroy(&logical_agg);
608: /* get the entries for aggregate from B */
609: MatGetSubMatrices(asa_lev->B, mat_agg_loc_size, idxm_is_B_arr, idxn_is_B_arr, MAT_INITIAL_MATRIX, &b_submat_arr);
611: /* clean up all the index sets */
612: for (a=0; a<mat_agg_loc_size; a++) { ISDestroy(&idxm_is_B_arr[a]); }
613: PetscFree(idxm_is_B_arr);
614: ISDestroy(&idxn_is_B);
615: PetscFree(idxn_is_B_arr);
617: /* storage for the values from each submatrix */
618: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length*cand_vecs_num, &b_submat);
619: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length*cand_vecs_num, &b_submat_tp);
620: PetscMalloc(sizeof(PetscInt)*max_cand_vec_length, &idxm);
621: for (i=0; i<max_cand_vec_length; i++) idxm[i] = i;
622: PetscMalloc(sizeof(PetscInt)*cand_vecs_num, &idxn);
623: for (i=0; i<cand_vecs_num; i++) idxn[i] = i;
624: /* work storage for QR algorithm */
625: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length, &tau);
626: PetscMalloc(sizeof(PetscScalar)*cand_vecs_num, &work);
628: /* orthogonalize all submatrices and store them in b_orth_arr */
629: for (a=0; a<mat_agg_loc_size; a++) {
630: /* Get the entries for aggregate from B. This is row ordered (although internally
631: it is column ordered and we will waste some energy transposing it).
632: WORK: use something like MatGetArray(b_submat_arr[a], &b_submat) but be really
633: careful about all the different matrix types */
634: MatGetValues(b_submat_arr[a], cand_vec_length[a], idxm, cand_vecs_num, idxn, b_submat);
636: if (construct_bridge) {
637: /* if we are constructing a bridging restriction/interpolation operator, we have
638: to use the same number of dofs as in our previous construction */
639: max_cand_vecs_to_add = asa_lev->loc_agg_dofs[a];
640: } else {
641: /* for a normal restriction/interpolation operator, we should make sure that we
642: do not create linear dependence by accident */
643: max_cand_vecs_to_add = PetscMin(cand_vec_length[a], cand_vecs_num);
644: }
646: /* We use LAPACK to compute the QR decomposition of b_submat. For LAPACK we have to
647: transpose the matrix. We might throw out some column vectors during this process.
648: We are keeping count of the number of column vectors that we use (and therefore the
649: number of dofs on the lower level) in new_loc_agg_dofs[a]. */
650: new_loc_agg_dofs[a] = 0;
651: for (j=0; j<max_cand_vecs_to_add; j++) {
652: /* check for condition (4.5) */
653: norm = 0.0;
654: for (i=0; i<cand_vec_length[a]; i++) {
655: norm += PetscRealPart(b_submat[i*cand_vecs_num+j])*PetscRealPart(b_submat[i*cand_vecs_num+j])
656: + PetscImaginaryPart(b_submat[i*cand_vecs_num+j])*PetscImaginaryPart(b_submat[i*cand_vecs_num+j]);
657: }
658: /* only add candidate vector if bigger than cutoff or first candidate */
659: if ((!j) || (norm > cutoff*((PetscReal) cand_vec_length[a])/((PetscReal) nodes_on_lev))) {
660: /* passed criterion (4.5), we have not implemented criterion (4.6) yet */
661: for (i=0; i<cand_vec_length[a]; i++) {
662: b_submat_tp[new_loc_agg_dofs[a]*cand_vec_length[a]+i] = b_submat[i*cand_vecs_num+j];
663: }
664: new_loc_agg_dofs[a]++;
665: }
666: /* #if defined(PCASA_VERBOSE) */
667: else {
668: PetscPrintf(asa_lev->comm, "Cutoff criteria invoked\n");
669: }
670: /* #endif */
671: }
673: CHKMEMQ;
674: /* orthogonalize b_submat_tp using the QR algorithm from LAPACK */
675: PetscBLASIntCast(*(cand_vec_length+a),&b1);
676: PetscBLASIntCast(*(new_loc_agg_dofs+a),&b2);
678: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
679: #if !defined(PETSC_MISSING_LAPACK_GEQRF)
680: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&b1, &b2, b_submat_tp, &b1, tau, work, &b2, &info));
681: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB, "LAPACKgeqrf_ LAPACK routine failed");
682: #else
683: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"geqrf() - Lapack routine is unavailable\n");
684: #endif
685: #if !defined(PETSC_MISSING_LAPACK_ORGQR)
686: PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&b1, &b2, &b2, b_submat_tp, &b1, tau, work, &b2, &info));
687: #else
688: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ORGQR - Lapack routine is unavailable\nIf linking with ESSL you MUST also link with full LAPACK, for example\nuse ./configure with --with-blas-lib=libessl.a --with-lapack-lib=/usr/local/lib/liblapack.a'");
689: #endif
690: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB, "LAPACKungqr_ LAPACK routine failed");
691: PetscFPTrapPop();
693: /* Transpose b_submat_tp and store it in b_orth_arr[a]. If we are constructing a
694: bridging restriction/interpolation operator, we could end up with less dofs than
695: we previously had. We fill those up with zeros. */
696: if (!construct_bridge) {
697: PetscMalloc(sizeof(PetscScalar)*cand_vec_length[a]*new_loc_agg_dofs[a], b_orth_arr+a);
698: for (j=0; j<new_loc_agg_dofs[a]; j++) {
699: for (i=0; i<cand_vec_length[a]; i++) {
700: b_orth_arr[a][i*new_loc_agg_dofs[a]+j] = b_submat_tp[j*cand_vec_length[a]+i];
701: }
702: }
703: } else {
704: /* bridge, might have to fill up */
705: PetscMalloc(sizeof(PetscScalar)*cand_vec_length[a]*max_cand_vecs_to_add, b_orth_arr+a);
706: for (j=0; j<new_loc_agg_dofs[a]; j++) {
707: for (i=0; i<cand_vec_length[a]; i++) {
708: b_orth_arr[a][i*max_cand_vecs_to_add+j] = b_submat_tp[j*cand_vec_length[a]+i];
709: }
710: }
711: for (j=new_loc_agg_dofs[a]; j<max_cand_vecs_to_add; j++) {
712: for (i=0; i<cand_vec_length[a]; i++) {
713: b_orth_arr[a][i*max_cand_vecs_to_add+j] = 0.0;
714: }
715: }
716: new_loc_agg_dofs[a] = max_cand_vecs_to_add;
717: }
718: /* the number of columns in asa_lev->P that are local to this process */
719: total_loc_cols += new_loc_agg_dofs[a];
720: } /* end of loop over local aggregates */
722: /* destroy the submatrices, also frees all allocated space */
723: MatDestroyMatrices(mat_agg_loc_size, &b_submat_arr);
724: /* destroy all other workspace */
725: PetscFree(b_submat);
726: PetscFree(b_submat_tp);
727: PetscFree(idxm);
728: PetscFree(idxn);
729: PetscFree(tau);
730: PetscFree(work);
732: /* destroy old matrix P, Pt */
733: MatDestroy(&(asa_lev->P));
734: MatDestroy(&(asa_lev->Pt));
736: MatGetLocalSize(asa_lev->A, &a_loc_m, &a_loc_n);
738: /* determine local range */
739: MPI_Comm_size(asa_lev->comm, &comm_size);
740: MPI_Comm_rank(asa_lev->comm, &comm_rank);
741: PetscMalloc(comm_size*sizeof(PetscInt), &loc_cols);
742: MPI_Allgather(&total_loc_cols, 1, MPIU_INT, loc_cols, 1, MPIU_INT, asa_lev->comm);
744: mat_loc_col_start = 0;
745: for (i=0;i<comm_rank;i++) mat_loc_col_start += loc_cols[i];
746: mat_loc_col_end = mat_loc_col_start + loc_cols[i];
747: mat_loc_col_size = mat_loc_col_end-mat_loc_col_start;
748: if (mat_loc_col_size != total_loc_cols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR, "Local size does not match matrix size");
749: PetscFree(loc_cols);
751: /* we now have enough information to create asa_lev->P */
752: MatCreateAIJ(asa_lev->comm, a_loc_n, total_loc_cols, asa_lev->size, PETSC_DETERMINE,
753: cand_vecs_num, NULL, cand_vecs_num, NULL, &(asa_lev->P));
754: /* create asa_lev->Pt */
755: MatCreateAIJ(asa_lev->comm, total_loc_cols, a_loc_n, PETSC_DETERMINE, asa_lev->size,
756: max_cand_vec_length, NULL, max_cand_vec_length, NULL, &(asa_lev->Pt));
757: if (asa_lev->next) {
758: /* create correlator for aggregates of next level */
759: MatCreateAIJ(asa_lev->comm, mat_agg_loc_size, total_loc_cols, PETSC_DETERMINE, PETSC_DETERMINE,
760: cand_vecs_num, NULL, cand_vecs_num, NULL, &(asa_lev->next->agg_corr));
761: /* create asa_lev->next->bridge_corr matrix */
762: MatCreateAIJ(asa_lev->comm, mat_agg_loc_size, total_loc_cols, PETSC_DETERMINE, PETSC_DETERMINE,
763: cand_vecs_num, NULL, cand_vecs_num, NULL, &(asa_lev->next->bridge_corr));
764: }
766: /* this is my own hack, but it should give the columns that we should write to */
767: MatGetOwnershipRangeColumn(asa_lev->P, &mat_loc_col_start, &mat_loc_col_end);
769: mat_loc_col_size = mat_loc_col_end-mat_loc_col_start;
770: if (mat_loc_col_size != total_loc_cols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "The number of local columns in asa_lev->P assigned to this processor does not match the local vector size");
772: loc_agg_dofs_sum = 0;
773: /* construct P, Pt, agg_corr, bridge_corr */
774: for (a=0; a<mat_agg_loc_size; a++) {
775: /* store b_orth_arr[a] in P */
776: for (i=0; i<cand_vec_length[a]; i++) {
777: row = agg_arr[a][i];
778: for (j=0; j<new_loc_agg_dofs[a]; j++) {
779: col = mat_loc_col_start + loc_agg_dofs_sum + j;
780: val = b_orth_arr[a][i*new_loc_agg_dofs[a] + j];
781: MatSetValues(asa_lev->P, 1, &row, 1, &col, &val, INSERT_VALUES);
782: val = PetscConj(val);
783: MatSetValues(asa_lev->Pt, 1, &col, 1, &row, &val, INSERT_VALUES);
784: }
785: }
787: /* compute aggregate correlation matrices */
788: if (asa_lev->next) {
789: row = a+mat_agg_loc_start;
790: for (i=0; i<new_loc_agg_dofs[a]; i++) {
791: col = mat_loc_col_start + loc_agg_dofs_sum + i;
792: val = 1.0;
793: MatSetValues(asa_lev->next->agg_corr, 1, &row, 1, &col, &val, INSERT_VALUES);
794: /* for the bridge operator we leave out the newest candidates, i.e.
795: we set bridge_corr to 1.0 for all columns up to asa_lev->loc_agg_dofs[a] and to
796: 0.0 between asa_lev->loc_agg_dofs[a] and new_loc_agg_dofs[a] */
797: if (!(asa_lev->loc_agg_dofs && (i >= asa_lev->loc_agg_dofs[a]))) {
798: MatSetValues(asa_lev->next->bridge_corr, 1, &row, 1, &col, &val, INSERT_VALUES);
799: }
800: }
801: }
803: /* move to next entry point col */
804: loc_agg_dofs_sum += new_loc_agg_dofs[a];
805: } /* end of loop over local aggregates */
807: MatAssemblyBegin(asa_lev->P,MAT_FINAL_ASSEMBLY);
808: MatAssemblyEnd(asa_lev->P,MAT_FINAL_ASSEMBLY);
809: MatAssemblyBegin(asa_lev->Pt,MAT_FINAL_ASSEMBLY);
810: MatAssemblyEnd(asa_lev->Pt,MAT_FINAL_ASSEMBLY);
811: if (asa_lev->next) {
812: MatAssemblyBegin(asa_lev->next->agg_corr,MAT_FINAL_ASSEMBLY);
813: MatAssemblyEnd(asa_lev->next->agg_corr,MAT_FINAL_ASSEMBLY);
814: MatAssemblyBegin(asa_lev->next->bridge_corr,MAT_FINAL_ASSEMBLY);
815: MatAssemblyEnd(asa_lev->next->bridge_corr,MAT_FINAL_ASSEMBLY);
816: }
818: /* if we are not constructing a bridging operator, switch asa_lev->loc_agg_dofs
819: and new_loc_agg_dofs */
820: if (construct_bridge) {
821: PetscFree(new_loc_agg_dofs);
822: } else {
823: if (asa_lev->loc_agg_dofs) {
824: PetscFree(asa_lev->loc_agg_dofs);
825: }
826: asa_lev->loc_agg_dofs = new_loc_agg_dofs;
827: }
829: /* clean up */
830: for (a=0; a<mat_agg_loc_size; a++) {
831: PetscFree(b_orth_arr[a]);
832: PetscFree(agg_arr[a]);
833: }
834: PetscFree(cand_vec_length);
835: PetscFree(b_orth_arr);
836: PetscFree(agg_arr);
838: PetscLogEventEnd(PC_CreateTransferOp_ASA, 0,0,0,0);
839: return(0);
840: }
842: /* -------------------------------------------------------------------------- */
843: /*
844: PCSmoothProlongator_ASA - Computes the smoothed prolongators I and It on the level
846: Input Parameters:
847: . asa_lev - the level for which the smoothed prolongator is constructed
848: */
851: PetscErrorCode PCSmoothProlongator_ASA(PC_ASA_level *asa_lev)
852: {
856: MatDestroy(&(asa_lev->smP));
857: MatDestroy(&(asa_lev->smPt));
858: /* compute prolongator I_{l+1}^l = S_l P_{l+1}^l */
859: /* step 1: compute I_{l+1}^l = A_l P_{l+1}^l */
860: MatMatMult(asa_lev->A, asa_lev->P, MAT_INITIAL_MATRIX, 1, &(asa_lev->smP));
861: MatMatMult(asa_lev->Pt, asa_lev->A, MAT_INITIAL_MATRIX, 1, &(asa_lev->smPt));
862: /* step 2: shift and scale to get I_{l+1}^l = P_{l+1}^l - 4/(3/rho) A_l P_{l+1}^l */
863: MatAYPX(asa_lev->smP, -4./(3.*(asa_lev->spec_rad)), asa_lev->P, SUBSET_NONZERO_PATTERN);
864: MatAYPX(asa_lev->smPt, -4./(3.*(asa_lev->spec_rad)), asa_lev->Pt, SUBSET_NONZERO_PATTERN);
865: return(0);
866: }
869: /* -------------------------------------------------------------------------- */
870: /*
871: PCCreateVcycle_ASA - Creates the V-cycle, when aggregates are already defined
873: Input Parameters:
874: . asa - the preconditioner context
875: */
878: PetscErrorCode PCCreateVcycle_ASA(PC_ASA *asa)
879: {
881: PC_ASA_level *asa_lev, *asa_next_lev;
882: Mat AI;
885: PetscLogEventBegin(PC_CreateVcycle_ASA, 0,0,0,0);
887: if (!asa) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "asa pointer is NULL");
888: if (!(asa->levellist)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "no levels found");
889: asa_lev = asa->levellist;
890: PCComputeSpectralRadius_ASA(asa_lev);
891: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->nu);
893: while (asa_lev->next) {
894: asa_next_lev = asa_lev->next;
895: /* (a) aggregates are already constructed */
897: /* (b) construct B_{l+1} and P_{l+1}^l using (2.11) */
898: /* construct P_{l+1}^l */
899: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
901: /* construct B_{l+1} */
902: MatDestroy(&(asa_next_lev->B));
903: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->B));
905: asa_next_lev->cand_vecs = asa_lev->cand_vecs;
907: /* (c) construct smoothed prolongator */
908: PCSmoothProlongator_ASA(asa_lev);
910: /* (d) construct coarse matrix */
911: /* Define coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
912: MatDestroy(&(asa_next_lev->A));
913: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
914: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
915: MatDestroy(&AI);
916: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
917: MatGetSize(asa_next_lev->A, NULL, &(asa_next_lev->size));
918: PCComputeSpectralRadius_ASA(asa_next_lev);
919: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->nu);
920: /* create corresponding vectors x_{l+1}, b_{l+1}, r_{l+1} */
921: VecDestroy(&(asa_next_lev->x));
922: VecDestroy(&(asa_next_lev->b));
923: VecDestroy(&(asa_next_lev->r));
924: MatGetVecs(asa_next_lev->A, &(asa_next_lev->x), &(asa_next_lev->b));
925: MatGetVecs(asa_next_lev->A, NULL, &(asa_next_lev->r));
927: /* go to next level */
928: asa_lev = asa_lev->next;
929: } /* end of while loop over the levels */
931: /* asa_lev now points to the coarsest level, set up direct solver there */
932: PCComputeSpectralRadius_ASA(asa_lev);
933: PCSetupDirectSolversOnLevel_ASA(asa, asa_lev, asa->nu);
935: PetscLogEventEnd(PC_CreateVcycle_ASA, 0,0,0,0);
936: return(0);
937: }
939: /* -------------------------------------------------------------------------- */
940: /*
941: PCAddCandidateToB_ASA - Inserts a candidate vector in B
943: Input Parameters:
944: + B - the matrix to insert into
945: . col_idx - the column we should insert to
946: . x - the vector to insert
947: - A - system matrix
949: Function will insert normalized x into B, such that <A x, x> = 1
950: (x itself is not changed). If B is projected down then this property
951: is kept. If <A_l x_l, x_l> = 1 and the next level is defined by
952: x_{l+1} = Pt x_l and A_{l+1} = Pt A_l P then
953: <A_{l+1} x_{l+1}, x_l> = <Pt A_l P Pt x_l, Pt x_l>
954: = <A_l P Pt x_l, P Pt x_l> = <A_l x_l, x_l> = 1
955: because of the definition of P in (2.11).
956: */
959: PetscErrorCode PCAddCandidateToB_ASA(Mat B, PetscInt col_idx, Vec x, Mat A)
960: {
962: Vec Ax;
963: PetscScalar dotprod;
964: PetscReal norm;
965: PetscInt i, loc_start, loc_end;
966: PetscScalar val, *vecarray;
969: MatGetVecs(A, NULL, &Ax);
970: MatMult(A, x, Ax);
971: VecDot(Ax, x, &dotprod);
972: norm = PetscSqrtReal(PetscAbsScalar(dotprod));
973: VecGetOwnershipRange(x, &loc_start, &loc_end);
974: VecGetArray(x, &vecarray);
975: for (i=loc_start; i<loc_end; i++) {
976: val = vecarray[i-loc_start]/norm;
977: MatSetValues(B, 1, &i, 1, &col_idx, &val, INSERT_VALUES);
978: }
979: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
980: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
981: VecRestoreArray(x, &vecarray);
982: VecDestroy(&Ax);
983: return(0);
984: }
986: /* -------------------------------------------------------------------------- */
987: /*
988: - x - a starting guess for a hard to approximate vector, if NULL, will be generated
989: */
992: PetscErrorCode PCInitializationStage_ASA(PC pc, Vec x)
993: {
995: PetscInt l;
996: PC_ASA *asa = (PC_ASA*)pc->data;
997: PC_ASA_level *asa_lev, *asa_next_lev;
998: PetscRandom rctx; /* random number generator context */
999: Vec ax;
1000: PetscScalar tmp;
1001: PetscReal prevnorm, norm;
1002: PetscBool skip_steps_f_i = PETSC_FALSE;
1003: PetscBool sufficiently_coarsened = PETSC_FALSE;
1004: PetscInt vec_size, vec_loc_size;
1005: PetscInt loc_vec_low, loc_vec_high;
1006: PetscInt i,j;
1007: Mat AI;
1008: Vec cand_vec, cand_vec_new;
1009: PetscBool isrichardson;
1010: PC coarse_pc;
1013: PetscLogEventBegin(PC_InitializationStage_ASA,0,0,0,0);
1014: l = 1;
1015: /* create first level */
1016: PCCreateLevel_ASA(&(asa->levellist), l, asa->comm, 0, 0, asa->ksptype_smooth, asa->pctype_smooth);
1017: asa_lev = asa->levellist;
1019: /* Set matrix */
1020: asa_lev->A = asa->A;
1021: MatGetSize(asa_lev->A, &i, &j);
1022: asa_lev->size = i;
1023: PCComputeSpectralRadius_ASA(asa_lev);
1024: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->mu_initial);
1026: /* Set DM */
1027: asa_lev->dm = pc->dm;
1028: PetscObjectReference((PetscObject)pc->dm);
1030: PetscPrintf(asa_lev->comm, "Initialization stage\n");
1032: if (x) {
1033: /* use starting guess */
1034: VecDestroy(&(asa_lev->x));
1035: VecDuplicate(x, &(asa_lev->x));
1036: VecCopy(x, asa_lev->x);
1037: } else {
1038: /* select random starting vector */
1039: VecDestroy(&(asa_lev->x));
1040: MatGetVecs(asa_lev->A, &(asa_lev->x), 0);
1041: PetscRandomCreate(asa_lev->comm,&rctx);
1042: PetscRandomSetFromOptions(rctx);
1043: VecSetRandom(asa_lev->x, rctx);
1044: PetscRandomDestroy(&rctx);
1045: }
1047: /* create right hand side */
1048: VecDestroy(&(asa_lev->b));
1049: MatGetVecs(asa_lev->A, &(asa_lev->b), 0);
1050: VecSet(asa_lev->b, 0.0);
1052: /* relax and check whether that's enough already */
1053: /* compute old norm */
1054: MatGetVecs(asa_lev->A, 0, &ax);
1055: MatMult(asa_lev->A, asa_lev->x, ax);
1056: VecDot(asa_lev->x, ax, &tmp);
1057: prevnorm = PetscAbsScalar(tmp);
1058: PetscPrintf(asa_lev->comm, "Residual norm of starting guess: %f\n", prevnorm);
1060: /* apply mu_initial relaxations */
1061: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1062: /* compute new norm */
1063: MatMult(asa_lev->A, asa_lev->x, ax);
1064: VecDot(asa_lev->x, ax, &tmp);
1065: norm = PetscAbsScalar(tmp);
1066: VecDestroy(&(ax));
1067: PetscPrintf(asa_lev->comm, "Residual norm of relaxation after %g %D relaxations: %g %g\n", asa->epsilon,asa->mu_initial, norm,prevnorm);
1069: /* Check if it already converges by itself */
1070: if (norm/prevnorm <= PetscPowReal(asa->epsilon, (PetscReal) asa->mu_initial)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP, "Relaxation should be sufficient to treat this problem. Use relaxation or decrease epsilon with -pc_asa_epsilon");
1071: else {
1072: /* set the number of relaxations to asa->mu from asa->mu_initial */
1073: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->mu);
1075: /* Let's do some multigrid ! */
1076: sufficiently_coarsened = PETSC_FALSE;
1078: /* do the whole initialization stage loop */
1079: while (!sufficiently_coarsened) {
1080: PetscPrintf(asa_lev->comm, "Initialization stage: creating level %D\n", asa_lev->level+1);
1082: /* (a) Set candidate matrix B_l = x_l */
1083: /* get the correct vector sizes and data */
1084: VecGetSize(asa_lev->x, &vec_size);
1085: VecGetOwnershipRange(asa_lev->x, &loc_vec_low, &loc_vec_high);
1086: vec_loc_size = loc_vec_high - loc_vec_low;
1088: /* create matrix for candidates */
1089: MatCreateDense(asa_lev->comm, vec_loc_size, PETSC_DECIDE, vec_size, asa->max_cand_vecs, NULL, &(asa_lev->B));
1090: /* set the first column */
1091: PCAddCandidateToB_ASA(asa_lev->B, 0, asa_lev->x, asa_lev->A);
1093: asa_lev->cand_vecs = 1;
1095: /* create next level */
1096: PCCreateLevel_ASA(&(asa_lev->next), asa_lev->level+1, asa_lev->comm, asa_lev, NULL, asa->ksptype_smooth, asa->pctype_smooth);
1097: asa_next_lev = asa_lev->next;
1099: /* (b) Create nodal aggregates A_i^l */
1100: PCCreateAggregates_ASA(asa_lev);
1102: /* (c) Define tentatative prolongator P_{l+1}^l and candidate matrix B_{l+1}
1103: using P_{l+1}^l B_{l+1} = B_l and (P_{l+1}^l)^T P_{l+1}^l = I */
1104: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
1106: /* future WORK: set correct fill ratios for all the operations below */
1107: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->B));
1109: asa_next_lev->cand_vecs = asa_lev->cand_vecs;
1111: /* (d) Define prolongator I_{l+1}^l = S_l P_{l+1}^l */
1112: PCSmoothProlongator_ASA(asa_lev);
1114: /* (e) Define coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
1115: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
1116: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
1117: MatDestroy(&AI);
1118: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
1119: MatGetSize(asa_next_lev->A, NULL, &(asa_next_lev->size));
1120: PCComputeSpectralRadius_ASA(asa_next_lev);
1121: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->mu);
1123: /* coarse enough for direct solver? */
1124: MatGetSize(asa_next_lev->A, &i, &j);
1125: if (PetscMax(i,j) <= asa->direct_solver) {
1126: PetscPrintf(asa_lev->comm, "Level %D can be treated directly.\n"
1127: "Algorithm will use %D levels.\n", asa_next_lev->level,
1128: asa_next_lev->level);
1129: break; /* go to step 5 */
1130: }
1132: if (!skip_steps_f_i) {
1133: /* (f) Set x_{l+1} = B_{l+1}, we just compute it again */
1134: VecDestroy(&(asa_next_lev->x));
1135: MatGetVecs(asa_lev->P, &(asa_next_lev->x), 0);
1136: MatMult(asa_lev->Pt, asa_lev->x, asa_next_lev->x);
1138: /* /\* (g) Make copy \hat{x}_{l+1} = x_{l+1} *\/ */
1139: /* VecDuplicate(asa_next_lev->x, &xhat); */
1140: /* VecCopy(asa_next_lev->x, xhat); */
1142: /* Create b_{l+1} */
1143: VecDestroy(&(asa_next_lev->b));
1144: MatGetVecs(asa_next_lev->A, &(asa_next_lev->b), 0);
1145: VecSet(asa_next_lev->b, 0.0);
1147: /* (h) Relax mu times on A_{l+1} x = 0 */
1148: /* compute old norm */
1149: MatGetVecs(asa_next_lev->A, 0, &ax);
1150: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1151: VecDot(asa_next_lev->x, ax, &tmp);
1152: prevnorm = PetscAbsScalar(tmp);
1153: PetscPrintf(asa_next_lev->comm, "Residual norm of starting guess on level %D: %f\n", asa_next_lev->level, prevnorm);
1154: /* apply mu relaxations: WORK, make sure that mu is set correctly */
1155: KSPSolve(asa_next_lev->smoothd, asa_next_lev->b, asa_next_lev->x);
1156: /* compute new norm */
1157: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1158: VecDot(asa_next_lev->x, ax, &tmp);
1159: norm = PetscAbsScalar(tmp);
1160: VecDestroy(&(ax));
1161: PetscPrintf(asa_next_lev->comm, "Residual norm after Richardson iteration on level %D: %f\n", asa_next_lev->level, norm);
1162: /* (i) Check if it already converges by itself */
1163: if (norm/prevnorm <= PetscPowReal(asa->epsilon, (PetscReal) asa->mu)) {
1164: /* relaxation reduces error sufficiently */
1165: skip_steps_f_i = PETSC_TRUE;
1166: }
1167: }
1168: /* (j) go to next coarser level */
1169: l++;
1170: asa_lev = asa_next_lev;
1171: }
1172: /* Step 5. */
1173: asa->levels = asa_next_lev->level; /* WORK: correct? */
1175: /* Set up direct solvers on coarsest level */
1176: if (asa_next_lev->smoothd != asa_next_lev->smoothu) {
1177: if (asa_next_lev->smoothu) { KSPDestroy(&asa_next_lev->smoothu); }
1178: }
1179: KSPSetType(asa_next_lev->smoothd, asa->ksptype_direct);
1180: PetscObjectTypeCompare((PetscObject)(asa_next_lev->smoothd), KSPRICHARDSON, &isrichardson);
1181: if (isrichardson) {
1182: KSPSetInitialGuessNonzero(asa_next_lev->smoothd, PETSC_TRUE);
1183: } else {
1184: KSPSetInitialGuessNonzero(asa_next_lev->smoothd, PETSC_FALSE);
1185: }
1186: KSPGetPC(asa_next_lev->smoothd, &coarse_pc);
1187: PCSetType(coarse_pc, asa->pctype_direct);
1189: asa_next_lev->smoothu = asa_next_lev->smoothd;
1191: PCSetupDirectSolversOnLevel_ASA(asa, asa_next_lev, asa->nu);
1193: /* update finest-level candidate matrix B_1 = I_2^1 I_3^2 ... I_{L-1}^{L-2} x_{L-1} */
1194: if (!asa_lev->prev) {
1195: /* just one relaxation level */
1196: VecDuplicate(asa_lev->x, &cand_vec);
1197: VecCopy(asa_lev->x, cand_vec);
1198: } else {
1199: /* interpolate up the chain */
1200: cand_vec = asa_lev->x;
1201: asa_lev->x = 0;
1202: while (asa_lev->prev) {
1203: /* interpolate to higher level */
1204: MatGetVecs(asa_lev->prev->smP, 0, &cand_vec_new);
1205: MatMult(asa_lev->prev->smP, cand_vec, cand_vec_new);
1206: VecDestroy(&(cand_vec));
1207: cand_vec = cand_vec_new;
1209: /* destroy all working vectors on the way */
1210: VecDestroy(&(asa_lev->x));
1211: VecDestroy(&(asa_lev->b));
1213: /* move to next higher level */
1214: asa_lev = asa_lev->prev;
1215: }
1216: }
1217: /* set the first column of B1 */
1218: PCAddCandidateToB_ASA(asa_lev->B, 0, cand_vec, asa_lev->A);
1219: VecDestroy(&(cand_vec));
1221: /* Step 6. Create V-cycle */
1222: PCCreateVcycle_ASA(asa);
1223: }
1224: PetscLogEventEnd(PC_InitializationStage_ASA,0,0,0,0);
1225: return(0);
1226: }
1228: /* -------------------------------------------------------------------------- */
1229: /*
1230: PCApplyVcycleOnLevel_ASA - Applies current V-cycle
1232: Input Parameters:
1233: + asa_lev - the current level we should recurse on
1234: - gamma - the number of recursive cycles we should run
1236: */
1239: PetscErrorCode PCApplyVcycleOnLevel_ASA(PC_ASA_level *asa_lev, PetscInt gamma)
1240: {
1242: PC_ASA_level *asa_next_lev;
1243: PetscInt g;
1246: if (!asa_lev) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "Level is empty in PCApplyVcycleOnLevel_ASA");
1247: asa_next_lev = asa_lev->next;
1249: if (asa_next_lev) {
1250: /* 1. Presmoothing */
1251: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1252: /* 2. Coarse grid corrections */
1253: /* MatGetVecs(asa_lev->A, 0, &tmp); */
1254: /* MatGetVecs(asa_lev->smP, &(asa_next_lev->b), 0); */
1255: /* MatGetVecs(asa_next_lev->A, &(asa_next_lev->x), 0); */
1256: for (g=0; g<gamma; g++) {
1257: /* (a) get coarsened b_{l+1} = (I_{l+1}^l)^T (b_l - A_l x_l) */
1258: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1259: VecAYPX(asa_lev->r, -1.0, asa_lev->b);
1260: MatMult(asa_lev->smPt, asa_lev->r, asa_next_lev->b);
1262: /* (b) Set x_{l+1} = 0 and recurse */
1263: VecSet(asa_next_lev->x, 0.0);
1264: PCApplyVcycleOnLevel_ASA(asa_next_lev, gamma);
1266: /* (c) correct solution x_l = x_l + I_{l+1}^l x_{l+1} */
1267: MatMultAdd(asa_lev->smP, asa_next_lev->x, asa_lev->x, asa_lev->x);
1268: }
1269: /* VecDestroy(&(asa_lev->r)); */
1270: /* /\* discard x_{l+1}, b_{l+1} *\/ */
1271: /* VecDestroy(&(asa_next_lev->x)); */
1272: /* VecDestroy(&(asa_next_lev->b)); */
1274: /* 3. Postsmoothing */
1275: KSPSolve(asa_lev->smoothu, asa_lev->b, asa_lev->x);
1276: } else {
1277: /* Base case: solve directly */
1278: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1279: }
1280: return(0);
1281: }
1284: /* -------------------------------------------------------------------------- */
1285: /*
1286: PCGeneralSetupStage_ASA - Applies the ASA preconditioner to a vector. Algorithm
1287: 4 from the ASA paper
1289: Input Parameters:
1290: + asa - the data structure for the ASA algorithm
1291: - cand - a possible candidate vector, if NULL, will be constructed randomly
1293: Output Parameters:
1294: . cand_added - PETSC_TRUE, if new candidate vector added, PETSC_FALSE otherwise
1295: */
1298: PetscErrorCode PCGeneralSetupStage_ASA(PC_ASA *asa, Vec cand, PetscBool *cand_added)
1299: {
1301: PC_ASA_level *asa_lev, *asa_next_lev;
1303: PetscRandom rctx; /* random number generator context */
1304: PetscReal r;
1305: PetscScalar rs;
1306: PetscBool nd_fast;
1307: Vec ax;
1308: PetscScalar tmp;
1309: PetscReal norm, prevnorm = 0.0;
1310: PetscInt c;
1311: PetscInt loc_vec_low, loc_vec_high;
1312: PetscInt i;
1313: PetscBool skip_steps_d_j = PETSC_FALSE;
1314: PetscInt *idxm, *idxn;
1315: PetscScalar *v;
1316: Mat AI;
1317: Vec cand_vec, cand_vec_new;
1320: *cand_added = PETSC_FALSE;
1322: asa_lev = asa->levellist;
1323: if (asa_lev == 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "No levels found in PCGeneralSetupStage_ASA");
1324: asa_next_lev = asa_lev->next;
1325: if (asa_next_lev == 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "Just one level, not implemented yet");
1327: PetscPrintf(asa_lev->comm, "General setup stage\n");
1329: PetscLogEventBegin(PC_GeneralSetupStage_ASA,0,0,0,0);
1331: /* 1. If max. dof per node on level 2 equals K, stop */
1332: if (asa_next_lev->cand_vecs >= asa->max_dof_lev_2) {
1333: PetscPrintf(PETSC_COMM_WORLD,
1334: "Maximum dof on level 2 reached: %D\n"
1335: "Consider increasing this limit by setting it with -pc_asa_max_dof_lev_2\n",
1336: asa->max_dof_lev_2);
1337: return(0);
1338: }
1340: /* 2. Create copy of B_1 (skipped, we just replace the last column in step 8.) */
1342: if (!cand) {
1343: /* 3. Select a random x_1 */
1344: VecDestroy(&(asa_lev->x));
1345: MatGetVecs(asa_lev->A, &(asa_lev->x), 0);
1346: PetscRandomCreate(asa_lev->comm,&rctx);
1347: PetscRandomSetFromOptions(rctx);
1348: VecGetOwnershipRange(asa_lev->x, &loc_vec_low, &loc_vec_high);
1349: for (i=loc_vec_low; i<loc_vec_high; i++) {
1350: PetscRandomGetValueReal(rctx, &r);
1351: rs = r;
1352: VecSetValues(asa_lev->x, 1, &i, &rs, INSERT_VALUES);
1353: }
1354: VecAssemblyBegin(asa_lev->x);
1355: VecAssemblyEnd(asa_lev->x);
1356: PetscRandomDestroy(&rctx);
1357: } else {
1358: VecDestroy(&(asa_lev->x));
1359: VecDuplicate(cand, &(asa_lev->x));
1360: VecCopy(cand, asa_lev->x);
1361: }
1363: /* create right hand side */
1364: VecDestroy(&(asa_lev->b));
1365: MatGetVecs(asa_lev->A, &(asa_lev->b), 0);
1366: VecSet(asa_lev->b, 0.0);
1368: /* Apply mu iterations of current V-cycle */
1369: nd_fast = PETSC_FALSE;
1370: MatGetVecs(asa_lev->A, 0, &ax);
1371: for (c=0; c<asa->mu; c++) {
1372: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1374: MatMult(asa_lev->A, asa_lev->x, ax);
1375: VecDot(asa_lev->x, ax, &tmp);
1376: norm = PetscAbsScalar(tmp);
1377: if (c>0) {
1378: if (norm/prevnorm < asa->epsilon) {
1379: nd_fast = PETSC_TRUE;
1380: break;
1381: }
1382: }
1383: prevnorm = norm;
1384: }
1385: VecDestroy(&(ax));
1387: /* 4. If energy norm decreases sufficiently fast, then stop */
1388: if (nd_fast) {
1389: PetscPrintf(asa_lev->comm, "nd_fast is true\n");
1390: return(0);
1391: }
1393: /* 5. Update B_1, by adding new column x_1 */
1394: if (asa_lev->cand_vecs >= asa->max_cand_vecs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM, "Number of candidate vectors will exceed allocated storage space");
1395: else {
1396: PetscPrintf(asa_lev->comm, "Adding candidate vector %D\n", asa_lev->cand_vecs+1);
1397: }
1398: PCAddCandidateToB_ASA(asa_lev->B, asa_lev->cand_vecs, asa_lev->x, asa_lev->A);
1399: *cand_added = PETSC_TRUE;
1400: asa_lev->cand_vecs++;
1402: /* 6. loop over levels */
1403: while (asa_next_lev && asa_next_lev->next) {
1404: PetscPrintf(asa_lev->comm, "General setup stage: processing level %D\n", asa_next_lev->level);
1405: /* (a) define B_{l+1} and P_{l+1}^L */
1406: /* construct P_{l+1}^l */
1407: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
1409: /* construct B_{l+1} */
1410: MatDestroy(&(asa_next_lev->B));
1411: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->B));
1412: /* do not increase asa_next_lev->cand_vecs until step (j) */
1414: /* (b) construct prolongator I_{l+1}^l = S_l P_{l+1}^l */
1415: PCSmoothProlongator_ASA(asa_lev);
1417: /* (c) construct coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
1418: MatDestroy(&(asa_next_lev->A));
1419: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
1420: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
1421: MatDestroy(&AI);
1422: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
1423: MatGetSize(asa_next_lev->A, NULL, &(asa_next_lev->size));
1424: PCComputeSpectralRadius_ASA(asa_next_lev);
1425: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->mu);
1427: if (!skip_steps_d_j) {
1428: /* (d) get vector x_{l+1} from last column in B_{l+1} */
1429: VecDestroy(&(asa_next_lev->x));
1430: MatGetVecs(asa_next_lev->B, 0, &(asa_next_lev->x));
1432: VecGetOwnershipRange(asa_next_lev->x, &loc_vec_low, &loc_vec_high);
1433: PetscMalloc(sizeof(PetscInt)*(loc_vec_high-loc_vec_low), &idxm);
1434: for (i=loc_vec_low; i<loc_vec_high; i++) idxm[i-loc_vec_low] = i;
1435: PetscMalloc(sizeof(PetscInt)*1, &idxn);
1436: idxn[0] = asa_next_lev->cand_vecs;
1438: PetscMalloc(sizeof(PetscScalar)*(loc_vec_high-loc_vec_low), &v);
1439: MatGetValues(asa_next_lev->B, loc_vec_high-loc_vec_low, idxm, 1, idxn, v);
1441: VecSetValues(asa_next_lev->x, loc_vec_high-loc_vec_low, idxm, v, INSERT_VALUES);
1442: VecAssemblyBegin(asa_next_lev->x);
1443: VecAssemblyEnd(asa_next_lev->x);
1445: PetscFree(v);
1446: PetscFree(idxm);
1447: PetscFree(idxn);
1449: /* (e) create bridge transfer operator P_{l+2}^{l+1}, by using the previously
1450: computed candidates */
1451: PCCreateTransferOp_ASA(asa_next_lev, PETSC_TRUE);
1453: /* (f) construct bridging prolongator I_{l+2}^{l+1} = S_{l+1} P_{l+2}^{l+1} */
1454: PCSmoothProlongator_ASA(asa_next_lev);
1456: /* (g) compute <A_{l+1} x_{l+1}, x_{l+1}> and save it */
1457: MatGetVecs(asa_next_lev->A, 0, &ax);
1458: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1459: VecDot(asa_next_lev->x, ax, &tmp);
1460: prevnorm = PetscAbsScalar(tmp);
1461: VecDestroy(&(ax));
1463: /* (h) apply mu iterations of current V-cycle */
1464: /* set asa_next_lev->b */
1465: VecDestroy(&(asa_next_lev->b));
1466: VecDestroy(&(asa_next_lev->r));
1467: MatGetVecs(asa_next_lev->A, &(asa_next_lev->b), &(asa_next_lev->r));
1468: VecSet(asa_next_lev->b, 0.0);
1469: /* apply V-cycle */
1470: for (c=0; c<asa->mu; c++) {
1471: PCApplyVcycleOnLevel_ASA(asa_next_lev, asa->gamma);
1472: }
1474: /* (i) check convergence */
1475: /* compute <A_{l+1} x_{l+1}, x_{l+1}> and save it */
1476: MatGetVecs(asa_next_lev->A, 0, &ax);
1477: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1478: VecDot(asa_next_lev->x, ax, &tmp);
1479: norm = PetscAbsScalar(tmp);
1480: VecDestroy(&(ax));
1482: if (norm/prevnorm <= PetscPowReal(asa->epsilon, (PetscReal) asa->mu)) skip_steps_d_j = PETSC_TRUE;
1484: /* (j) update candidate B_{l+1} */
1485: PCAddCandidateToB_ASA(asa_next_lev->B, asa_next_lev->cand_vecs, asa_next_lev->x, asa_next_lev->A);
1486: asa_next_lev->cand_vecs++;
1487: }
1488: /* go to next level */
1489: asa_lev = asa_lev->next;
1490: asa_next_lev = asa_next_lev->next;
1491: }
1493: /* 7. update the fine-level candidate */
1494: if (!asa_lev->prev) {
1495: /* just one coarsening level */
1496: VecDuplicate(asa_lev->x, &cand_vec);
1497: VecCopy(asa_lev->x, cand_vec);
1498: } else {
1499: cand_vec = asa_lev->x;
1500: asa_lev->x = 0;
1501: while (asa_lev->prev) {
1502: /* interpolate to higher level */
1503: MatGetVecs(asa_lev->prev->smP, 0, &cand_vec_new);
1504: MatMult(asa_lev->prev->smP, cand_vec, cand_vec_new);
1505: VecDestroy(&(cand_vec));
1506: cand_vec = cand_vec_new;
1508: /* destroy all working vectors on the way */
1509: VecDestroy(&(asa_lev->x));
1510: VecDestroy(&(asa_lev->b));
1512: /* move to next higher level */
1513: asa_lev = asa_lev->prev;
1514: }
1515: }
1516: /* 8. update B_1 by setting the last column of B_1 */
1517: PCAddCandidateToB_ASA(asa_lev->B, asa_lev->cand_vecs-1, cand_vec, asa_lev->A);
1518: VecDestroy(&(cand_vec));
1520: /* 9. create V-cycle */
1521: PCCreateVcycle_ASA(asa);
1523: PetscLogEventEnd(PC_GeneralSetupStage_ASA,0,0,0,0);
1524: return(0);
1525: }
1527: /* -------------------------------------------------------------------------- */
1528: /*
1529: PCConstructMultigrid_ASA - creates the multigrid preconditionier, this is a fairly
1530: involved process, which runs extensive testing to compute good candidate vectors
1532: Input Parameters:
1533: . pc - the preconditioner context
1535: */
1538: PetscErrorCode PCConstructMultigrid_ASA(PC pc)
1539: {
1541: PC_ASA *asa = (PC_ASA*)pc->data;
1542: PC_ASA_level *asa_lev;
1543: PetscInt i, ls, le;
1544: PetscScalar *d;
1545: PetscBool zeroflag = PETSC_FALSE;
1546: PetscReal rnorm, rnorm_start;
1547: PetscReal rq, rq_prev;
1548: PetscScalar rq_nom, rq_denom;
1549: PetscBool cand_added;
1550: PetscRandom rctx;
1553: /* check if we should scale with diagonal */
1554: if (asa->scale_diag) {
1555: /* Get diagonal scaling factors */
1556: MatGetVecs(pc->pmat,&(asa->invsqrtdiag),0);
1557: MatGetDiagonal(pc->pmat,asa->invsqrtdiag);
1558: /* compute (inverse) sqrt of diagonal */
1559: VecGetOwnershipRange(asa->invsqrtdiag, &ls, &le);
1560: VecGetArray(asa->invsqrtdiag, &d);
1561: for (i=0; i<le-ls; i++) {
1562: if (d[i] == 0.0) {
1563: d[i] = 1.0;
1564: zeroflag = PETSC_TRUE;
1565: } else d[i] = 1./PetscSqrtReal(PetscAbsScalar(d[i]));
1566: }
1567: VecRestoreArray(asa->invsqrtdiag,&d);
1568: VecAssemblyBegin(asa->invsqrtdiag);
1569: VecAssemblyEnd(asa->invsqrtdiag);
1570: if (zeroflag) {
1571: PetscInfo(pc,"Zero detected in diagonal of matrix, using 1 at those locations\n");
1572: }
1574: /* scale the matrix and store it: D^{-1/2} A D^{-1/2} */
1575: MatDuplicate(pc->pmat, MAT_COPY_VALUES, &(asa->A)); /* probably inefficient */
1576: MatDiagonalScale(asa->A, asa->invsqrtdiag, asa->invsqrtdiag);
1577: } else {
1578: /* don't scale */
1579: asa->A = pc->pmat;
1580: }
1581: /* Initialization stage */
1582: PCInitializationStage_ASA(pc, NULL);
1584: /* get first level */
1585: asa_lev = asa->levellist;
1587: PetscRandomCreate(asa->comm,&rctx);
1588: PetscRandomSetFromOptions(rctx);
1589: VecSetRandom(asa_lev->x,rctx);
1591: /* compute starting residual */
1592: VecDestroy(&(asa_lev->r));
1593: MatGetVecs(asa_lev->A, NULL, &(asa_lev->r));
1594: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1595: /* starting residual norm */
1596: VecNorm(asa_lev->r, NORM_2, &rnorm_start);
1597: /* compute Rayleigh quotients */
1598: VecDot(asa_lev->x, asa_lev->r, &rq_nom);
1599: VecDot(asa_lev->x, asa_lev->x, &rq_denom);
1600: rq_prev = PetscAbsScalar(rq_nom / rq_denom);
1602: /* check if we have to add more candidates */
1603: for (i=0; i<asa->max_it; i++) {
1604: if (asa_lev->cand_vecs >= asa->max_cand_vecs) {
1605: /* reached limit for candidate vectors */
1606: break;
1607: }
1608: /* apply V-cycle */
1609: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1610: /* check convergence */
1611: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1612: VecNorm(asa_lev->r, NORM_2, &rnorm);
1613: PetscPrintf(asa->comm, "After %D iterations residual norm is %f\n", i+1, rnorm);
1614: if (rnorm < rnorm_start*(asa->rtol) || rnorm < asa->abstol) {
1615: /* convergence */
1616: break;
1617: }
1618: /* compute new Rayleigh quotient */
1619: VecDot(asa_lev->x, asa_lev->r, &rq_nom);
1620: VecDot(asa_lev->x, asa_lev->x, &rq_denom);
1621: rq = PetscAbsScalar(rq_nom / rq_denom);
1622: PetscPrintf(asa->comm, "After %D iterations Rayleigh quotient of residual is %f\n", i+1, rq);
1623: /* test Rayleigh quotient decrease and add more candidate vectors if necessary */
1624: if (i && (rq > asa->rq_improve*rq_prev)) {
1625: /* improve interpolation by adding another candidate vector */
1626: PCGeneralSetupStage_ASA(asa, asa_lev->r, &cand_added);
1627: if (!cand_added) {
1628: /* either too many candidates for storage or cycle is already effective */
1629: PetscPrintf(asa->comm, "either too many candidates for storage or cycle is already effective\n");
1630: break;
1631: }
1632: VecSetRandom(asa_lev->x, rctx);
1633: rq_prev = rq*10000.; /* give the new V-cycle some grace period */
1634: } else {
1635: rq_prev = rq;
1636: }
1637: }
1639: VecDestroy(&(asa_lev->x));
1640: VecDestroy(&(asa_lev->b));
1641: PetscRandomDestroy(&rctx);
1643: asa->multigrid_constructed = PETSC_TRUE;
1644: return(0);
1645: }
1647: /* -------------------------------------------------------------------------- */
1648: /*
1649: PCApply_ASA - Applies the ASA preconditioner to a vector.
1651: Input Parameters:
1652: . pc - the preconditioner context
1653: . x - input vector
1655: Output Parameter:
1656: . y - output vector
1658: Application Interface Routine: PCApply()
1659: */
1662: PetscErrorCode PCApply_ASA(PC pc,Vec x,Vec y)
1663: {
1664: PC_ASA *asa = (PC_ASA*)pc->data;
1665: PC_ASA_level *asa_lev;
1670: if (!asa->multigrid_constructed) {
1671: PCConstructMultigrid_ASA(pc);
1672: }
1674: /* get first level */
1675: asa_lev = asa->levellist;
1677: /* set the right hand side */
1678: VecDuplicate(x, &(asa->b));
1679: VecCopy(x, asa->b);
1680: /* set starting vector */
1681: VecDestroy(&(asa->x));
1682: MatGetVecs(asa->A, &(asa->x), NULL);
1683: VecSet(asa->x, 0.0);
1685: /* set vectors */
1686: asa_lev->x = asa->x;
1687: asa_lev->b = asa->b;
1689: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1691: /* Return solution */
1692: VecCopy(asa->x, y);
1694: /* delete working vectors */
1695: VecDestroy(&(asa->x));
1696: VecDestroy(&(asa->b));
1697: asa_lev->x = NULL;
1698: asa_lev->b = NULL;
1699: return(0);
1700: }
1702: /* -------------------------------------------------------------------------- */
1703: /*
1704: PCApplyRichardson_ASA - Applies the ASA iteration to solve a linear system
1706: Input Parameters:
1707: . pc - the preconditioner context
1708: . b - the right hand side
1710: Output Parameter:
1711: . x - output vector
1713: DOES NOT WORK!!!!!
1715: */
1718: PetscErrorCode PCApplyRichardson_ASA(PC pc,Vec b,Vec x,Vec w,PetscReal rtol,PetscReal abstol, PetscReal dtol,PetscInt its,PetscBool guesszero,PetscInt *outits,PCRichardsonConvergedReason *reason)
1719: {
1720: PC_ASA *asa = (PC_ASA*)pc->data;
1721: PC_ASA_level *asa_lev;
1722: PetscInt i;
1723: PetscReal rnorm, rnorm_start;
1728: if (!asa->multigrid_constructed) {
1729: PCConstructMultigrid_ASA(pc);
1730: }
1732: /* get first level */
1733: asa_lev = asa->levellist;
1735: /* set the right hand side */
1736: VecDuplicate(b, &(asa->b));
1737: if (asa->scale_diag) {
1738: VecPointwiseMult(asa->b, asa->invsqrtdiag, b);
1739: } else {
1740: VecCopy(b, asa->b);
1741: }
1742: /* set starting vector */
1743: VecDuplicate(x, &(asa->x));
1744: VecCopy(x, asa->x);
1746: /* compute starting residual */
1747: VecDestroy(&(asa->r));
1748: MatGetVecs(asa->A, &(asa->r), NULL);
1749: MatMult(asa->A, asa->x, asa->r);
1750: VecAYPX(asa->r, -1.0, asa->b);
1751: /* starting residual norm */
1752: VecNorm(asa->r, NORM_2, &rnorm_start);
1754: /* set vectors */
1755: asa_lev->x = asa->x;
1756: asa_lev->b = asa->b;
1758: *reason = PCRICHARDSON_CONVERGED_ITS;
1759: /* **************** Full algorithm loop *********************************** */
1760: for (i=0; i<its; i++) {
1761: /* apply V-cycle */
1762: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1763: /* check convergence */
1764: MatMult(asa->A, asa->x, asa->r);
1765: VecAYPX(asa->r, -1.0, asa->b);
1766: VecNorm(asa->r, NORM_2, &rnorm);
1767: PetscPrintf(asa->comm, "After %D iterations residual norm is %f\n", i+1, rnorm);
1768: if (rnorm < rnorm_start*(rtol)) {
1769: *reason = PCRICHARDSON_CONVERGED_RTOL;
1770: break;
1771: } else if (rnorm < asa->abstol) {
1772: *reason = PCRICHARDSON_CONVERGED_ATOL;
1773: break;
1774: } else if (rnorm > rnorm_start*(dtol)) {
1775: *reason = PCRICHARDSON_DIVERGED_DTOL;
1776: break;
1777: }
1778: }
1779: *outits = i;
1781: /* Return solution */
1782: if (asa->scale_diag) {
1783: VecPointwiseMult(x, asa->x, asa->invsqrtdiag);
1784: } else {
1785: VecCopy(x, asa->x);
1786: }
1788: /* delete working vectors */
1789: VecDestroy(&(asa->x));
1790: VecDestroy(&(asa->b));
1791: VecDestroy(&(asa->r));
1792: asa_lev->x = NULL;
1793: asa_lev->b = NULL;
1794: return(0);
1795: }
1797: /* -------------------------------------------------------------------------- */
1798: /*
1799: PCDestroy_ASA - Destroys the private context for the ASA preconditioner
1800: that was created with PCCreate_ASA().
1802: Input Parameter:
1803: . pc - the preconditioner context
1805: Application Interface Routine: PCDestroy()
1806: */
1809: static PetscErrorCode PCDestroy_ASA(PC pc)
1810: {
1811: PC_ASA *asa;
1812: PC_ASA_level *asa_lev;
1813: PC_ASA_level *asa_next_level;
1818: asa = (PC_ASA*)pc->data;
1819: asa_lev = asa->levellist;
1821: /* Delete top level data */
1822: PetscFree(asa->ksptype_smooth);
1823: PetscFree(asa->pctype_smooth);
1824: PetscFree(asa->ksptype_direct);
1825: PetscFree(asa->pctype_direct);
1826: PetscFree(asa->coarse_mat_type);
1828: /* this is destroyed by the levels below */
1829: /* MatDestroy(&(asa->A)); */
1830: VecDestroy(&(asa->invsqrtdiag));
1831: VecDestroy(&(asa->b));
1832: VecDestroy(&(asa->x));
1833: VecDestroy(&(asa->r));
1835: /* Destroy each of the levels */
1836: while (asa_lev) {
1837: asa_next_level = asa_lev->next;
1838: PCDestroyLevel_ASA(asa_lev);
1839: asa_lev = asa_next_level;
1840: }
1842: PetscFree(asa);
1843: return(0);
1844: }
1848: static PetscErrorCode PCSetFromOptions_ASA(PC pc)
1849: {
1850: PC_ASA *asa = (PC_ASA*)pc->data;
1851: PetscBool flg;
1853: char type[20];
1858: PetscOptionsHead("ASA options");
1859: /* convergence parameters */
1860: PetscOptionsInt("-pc_asa_nu","Number of cycles to run smoother","No manual page yet",asa->nu,&(asa->nu),&flg);
1861: PetscOptionsInt("-pc_asa_gamma","Number of cycles to run coarse grid correction","No manual page yet",asa->gamma,&(asa->gamma),&flg);
1862: PetscOptionsReal("-pc_asa_epsilon","Tolerance for the relaxation method","No manual page yet",asa->epsilon,&(asa->epsilon),&flg);
1863: PetscOptionsInt("-pc_asa_mu","Number of cycles to relax in setup stages","No manual page yet",asa->mu,&(asa->mu),&flg);
1864: PetscOptionsInt("-pc_asa_mu_initial","Number of cycles to relax for generating first candidate vector","No manual page yet",asa->mu_initial,&(asa->mu_initial),&flg);
1865: PetscOptionsInt("-pc_asa_direct_solver","For which matrix size should we use the direct solver?","No manual page yet",asa->direct_solver,&(asa->direct_solver),&flg);
1866: PetscOptionsBool("-pc_asa_scale_diag","Should we scale the matrix with the inverse of its diagonal?","No manual page yet",asa->scale_diag,&(asa->scale_diag),&flg);
1867: /* type of smoother used */
1868: PetscOptionsList("-pc_asa_smoother_ksp_type","The type of KSP to be used in the smoothers","No manual page yet",KSPList,asa->ksptype_smooth,type,20,&flg);
1869: if (flg) {
1870: PetscFree(asa->ksptype_smooth);
1871: PetscStrallocpy(type,&(asa->ksptype_smooth));
1872: }
1873: PetscOptionsList("-pc_asa_smoother_pc_type","The type of PC to be used in the smoothers","No manual page yet",PCList,asa->pctype_smooth,type,20,&flg);
1874: if (flg) {
1875: PetscFree(asa->pctype_smooth);
1876: PetscStrallocpy(type,&(asa->pctype_smooth));
1877: }
1878: PetscOptionsList("-pc_asa_direct_ksp_type","The type of KSP to be used in the direct solver","No manual page yet",KSPList,asa->ksptype_direct,type,20,&flg);
1879: if (flg) {
1880: PetscFree(asa->ksptype_direct);
1881: PetscStrallocpy(type,&(asa->ksptype_direct));
1882: }
1883: PetscOptionsList("-pc_asa_direct_pc_type","The type of PC to be used in the direct solver","No manual page yet",PCList,asa->pctype_direct,type,20,&flg);
1884: if (flg) {
1885: PetscFree(asa->pctype_direct);
1886: PetscStrallocpy(type,&(asa->pctype_direct));
1887: }
1888: /* options specific for certain smoothers */
1889: PetscOptionsReal("-pc_asa_richardson_scale","Scaling parameter for preconditioning in relaxation, if smoothing KSP is Richardson","No manual page yet",asa->richardson_scale,&(asa->richardson_scale),&flg);
1890: PetscOptionsReal("-pc_asa_sor_omega","Scaling parameter for preconditioning in relaxation, if smoothing KSP is Richardson","No manual page yet",asa->sor_omega,&(asa->sor_omega),&flg);
1891: /* options for direct solver */
1892: PetscOptionsString("-pc_asa_coarse_mat_type","The coarse level matrix type (e.g. SuperLU, MUMPS, ...)","No manual page yet",asa->coarse_mat_type, type,20,&flg);
1893: if (flg) {
1894: PetscFree(asa->coarse_mat_type);
1895: PetscStrallocpy(type,&(asa->coarse_mat_type));
1896: }
1897: /* storage allocation parameters */
1898: PetscOptionsInt("-pc_asa_max_cand_vecs","Maximum number of candidate vectors","No manual page yet",asa->max_cand_vecs,&(asa->max_cand_vecs),&flg);
1899: PetscOptionsInt("-pc_asa_max_dof_lev_2","The maximum number of degrees of freedom per node on level 2 (K in paper)","No manual page yet",asa->max_dof_lev_2,&(asa->max_dof_lev_2),&flg);
1900: /* construction parameters */
1901: PetscOptionsReal("-pc_asa_rq_improve","Threshold in RQ improvement for adding another candidate","No manual page yet",asa->rq_improve,&(asa->rq_improve),&flg);
1902: PetscOptionsTail();
1903: return(0);
1904: }
1908: static PetscErrorCode PCView_ASA(PC pc,PetscViewer viewer)
1909: {
1910: PC_ASA *asa = (PC_ASA*)pc->data;
1912: PetscBool iascii;
1913: PC_ASA_level *asa_lev = asa->levellist;
1916: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1917: if (iascii) {
1918: PetscViewerASCIIPrintf(viewer," ASA:\n");
1919: asa_lev = asa->levellist;
1920: while (asa_lev) {
1921: if (!asa_lev->next) {
1922: PetscViewerASCIIPrintf(viewer,"Coarse grid solver -- level %D -------------------------------\n",0);
1923: } else {
1924: PetscViewerASCIIPrintf(viewer,"Down solver (pre-smoother) on level ? -------------------------------\n");
1925: }
1926: PetscViewerASCIIPushTab(viewer);
1927: KSPView(asa_lev->smoothd,viewer);
1928: PetscViewerASCIIPopTab(viewer);
1929: if (asa_lev->next && asa_lev->smoothd == asa_lev->smoothu) {
1930: PetscViewerASCIIPrintf(viewer,"Up solver (post-smoother) same as down solver (pre-smoother)\n");
1931: } else if (asa_lev->next) {
1932: PetscViewerASCIIPrintf(viewer,"Up solver (post-smoother) on level ? -------------------------------\n");
1933: PetscViewerASCIIPushTab(viewer);
1934: KSPView(asa_lev->smoothu,viewer);
1935: PetscViewerASCIIPopTab(viewer);
1936: }
1937: asa_lev = asa_lev->next;
1938: }
1939: }
1940: return(0);
1941: }
1943: /* -------------------------------------------------------------------------- */
1944: /*
1945: PCCreate_ASA - Creates a ASA preconditioner context, PC_ASA,
1946: and sets this as the private data within the generic preconditioning
1947: context, PC, that was created within PCCreate().
1949: Input Parameter:
1950: . pc - the preconditioner context
1952: Application Interface Routine: PCCreate()
1953: */
1956: PETSC_EXTERN PetscErrorCode PCCreate_ASA(PC pc)
1957: {
1959: PC_ASA *asa;
1964: /*
1965: Set the pointers for the functions that are provided above.
1966: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
1967: are called, they will automatically call these functions. Note we
1968: choose not to provide a couple of these functions since they are
1969: not needed.
1970: */
1971: /* pc->ops->applytranspose = PCApply_ASA;*/
1972: pc->ops->apply = PCApply_ASA;
1973: pc->ops->applyrichardson = PCApplyRichardson_ASA;
1974: pc->ops->setup = 0;
1975: pc->ops->destroy = PCDestroy_ASA;
1976: pc->ops->setfromoptions = PCSetFromOptions_ASA;
1977: pc->ops->view = PCView_ASA;
1979: /* Set the data to pointer to 0 */
1980: pc->data = (void*)0;
1982: PetscObjectComposeFunction((PetscObject)pc,"PCASASetTolerances_C",PCASASetTolerances_ASA);
1984: /* register events */
1985: if (!asa_events_registered) {
1986: PetscLogEventRegister("PCInitializationStage_ASA", PC_CLASSID,&PC_InitializationStage_ASA);
1987: PetscLogEventRegister("PCGeneralSetupStage_ASA", PC_CLASSID,&PC_GeneralSetupStage_ASA);
1988: PetscLogEventRegister("PCCreateTransferOp_ASA", PC_CLASSID,&PC_CreateTransferOp_ASA);
1989: PetscLogEventRegister("PCCreateVcycle_ASA", PC_CLASSID,&PC_CreateVcycle_ASA);
1991: asa_events_registered = PETSC_TRUE;
1992: }
1994: /* Create new PC_ASA object */
1995: PetscNewLog(pc,PC_ASA,&asa);
1996: pc->data = (void*)asa;
1998: /* WORK: find some better initial values */
1999: asa->nu = 3;
2000: asa->gamma = 1;
2001: asa->epsilon = 1e-4;
2002: asa->mu = 3;
2003: asa->mu_initial = 20;
2004: asa->direct_solver = 100;
2005: asa->scale_diag = PETSC_TRUE;
2007: PetscStrallocpy(KSPRICHARDSON, (char**) &(asa->ksptype_smooth));
2008: PetscStrallocpy(PCSOR, (char**) &(asa->pctype_smooth));
2010: asa->smoother_rtol = 1e-10;
2011: asa->smoother_abstol = 1e-20;
2012: asa->smoother_dtol = PETSC_DEFAULT;
2014: PetscStrallocpy(KSPPREONLY, (char**) &(asa->ksptype_direct));
2015: PetscStrallocpy(PCREDUNDANT, (char**) &(asa->pctype_direct));
2017: asa->direct_rtol = 1e-10;
2018: asa->direct_abstol = 1e-20;
2019: asa->direct_dtol = PETSC_DEFAULT;
2020: asa->richardson_scale = PETSC_DECIDE;
2021: asa->sor_omega = PETSC_DECIDE;
2023: PetscStrallocpy(MATSAME, (char**) &(asa->coarse_mat_type));
2025: asa->max_cand_vecs = 4;
2026: asa->max_dof_lev_2 = 640; /* I don't think this parameter really matters, 640 should be enough for everyone! */
2028: asa->multigrid_constructed = PETSC_FALSE;
2030: asa->rtol = 1e-10;
2031: asa->abstol = 1e-15;
2032: asa->divtol = 1e5;
2033: asa->max_it = 10000;
2034: asa->rq_improve = 0.9;
2036: asa->A = 0;
2037: asa->invsqrtdiag = 0;
2038: asa->b = 0;
2039: asa->x = 0;
2040: asa->r = 0;
2042: asa->levels = 0;
2043: asa->levellist = 0;
2045: PetscObjectGetComm((PetscObject)pc,&asa->comm);
2046: return(0);
2047: }