Actual source code: asa.c
petsc-3.3-p7 2013-05-11
2: /* --------------------------------------------------------------------
4: Contributed by Arvid Bessen, Columbia University, June 2007
5:
6: This file implements a ASA preconditioner in PETSc as part of PC.
8: The adaptive smoothed aggregation algorithm is described in the paper
9: "Adaptive Smoothed Aggregation (ASA)", M. Brezina, R. Falgout, S. MacLachlan,
10: T. Manteuffel, S. McCormick, and J. Ruge, SIAM Journal on Scientific Computing,
11: SISC Volume 25 Issue 6, Pages 1896-1920.
13: For an example usage of this preconditioner, see, e.g.
14: $PETSC_DIR/src/ksp/ksp/examples/tutorials/ex38.c ex39.c
15: and other files in that directory.
17: This code is still somewhat experimental. A number of improvements would be
18: - keep vectors allocated on each level, instead of destroying them
19: (see mainly PCApplyVcycleOnLevel_ASA)
20: - in PCCreateTransferOp_ASA we get all of the submatrices at once, this could
21: be optimized by differentiating between local and global matrices
22: - the code does not handle it gracefully if there is just one level
23: - if relaxation is sufficient, exit of PCInitializationStage_ASA is not
24: completely clean
25: - default values could be more reasonable, especially for parallel solves,
26: where we need a parallel LU or similar
27: - the richardson scaling parameter is somewhat special, should be treated in a
28: good default way
29: - a number of parameters for smoother (sor_omega, etc.) that we store explicitly
30: could be kept in the respective smoothers themselves
31: - some parameters have to be set via command line options, there are no direct
32: function calls available
33: - numerous other stuff
35: Example runs in parallel would be with parameters like
36: mpiexec ./program -pc_asa_coarse_pc_factor_mat_solver_package mumps -pc_asa_direct_solver 200
37: -pc_asa_max_cand_vecs 4 -pc_asa_mu_initial 50 -pc_asa_richardson_scale 1.0
38: -pc_asa_rq_improve 0.9 -asa_smoother_pc_type asm -asa_smoother_sub_pc_type sor
40: -------------------------------------------------------------------- */
42: /*
43: This defines the data structures for the smoothed aggregation procedure
44: */
45: #include <../src/ksp/pc/impls/asa/asa.h>
46: #include <petscblaslapack.h>
48: /* -------------------------------------------------------------------------- */
50: /* Event logging */
52: PetscLogEvent PC_InitializationStage_ASA, PC_GeneralSetupStage_ASA;
53: PetscLogEvent PC_CreateTransferOp_ASA, PC_CreateVcycle_ASA;
54: PetscBool asa_events_registered = PETSC_FALSE;
58: /*@C
59: PCASASetTolerances - Sets the convergence thresholds for ASA algorithm
61: Collective on PC
63: Input Parameter:
64: + pc - the context
65: . rtol - the relative convergence tolerance
66: (relative decrease in the residual norm)
67: . abstol - the absolute convergence tolerance
68: (absolute size of the residual norm)
69: . dtol - the divergence tolerance
70: (amount residual can increase before KSPDefaultConverged()
71: concludes that the method is diverging)
72: - maxits - maximum number of iterations to use
74: Notes:
75: Use PETSC_DEFAULT to retain the default value of any of the tolerances.
77: Level: advanced
78: @*/
79: PetscErrorCode PCASASetTolerances(PC pc, PetscReal rtol, PetscReal abstol,PetscReal dtol, PetscInt maxits)
80: {
85: PetscTryMethod(pc,"PCASASetTolerances_C",(PC,PetscReal,PetscReal,PetscReal,PetscInt),(pc,rtol,abstol,dtol,maxits));
86: return(0);
87: }
89: EXTERN_C_BEGIN
92: PetscErrorCode PCASASetTolerances_ASA(PC pc, PetscReal rtol, PetscReal abstol,PetscReal dtol, PetscInt maxits)
93: {
94: PC_ASA *asa = (PC_ASA *) pc->data;
98: if (rtol != PETSC_DEFAULT) asa->rtol = rtol;
99: if (abstol != PETSC_DEFAULT) asa->abstol = abstol;
100: if (dtol != PETSC_DEFAULT) asa->divtol = dtol;
101: if (maxits != PETSC_DEFAULT) asa->max_it = maxits;
102: return(0);
103: }
104: EXTERN_C_END
108: /*
109: PCCreateLevel_ASA - Creates one level for the ASA algorithm
111: Input Parameters:
112: + level - current level
113: . comm - MPI communicator object
114: . next - pointer to next level
115: . prev - pointer to previous level
116: . ksptype - the KSP type for the smoothers on this level
117: - pctype - the PC type for the smoothers on this level
119: Output Parameters:
120: . new_asa_lev - the newly created level
122: .keywords: ASA, create, levels, multigrid
123: */
124: PetscErrorCode PCCreateLevel_ASA(PC_ASA_level **new_asa_lev, int level,MPI_Comm comm, PC_ASA_level *prev,
125: PC_ASA_level *next,KSPType ksptype, PCType pctype)
126: {
128: PC_ASA_level *asa_lev;
129:
131: PetscMalloc(sizeof(PC_ASA_level), &asa_lev);
133: asa_lev->level = level;
134: asa_lev->size = 0;
136: asa_lev->A = 0;
137: asa_lev->B = 0;
138: asa_lev->x = 0;
139: asa_lev->b = 0;
140: asa_lev->r = 0;
141:
142: asa_lev->dm = 0;
143: asa_lev->aggnum = 0;
144: asa_lev->agg = 0;
145: asa_lev->loc_agg_dofs = 0;
146: asa_lev->agg_corr = 0;
147: asa_lev->bridge_corr = 0;
148:
149: asa_lev->P = 0;
150: asa_lev->Pt = 0;
151: asa_lev->smP = 0;
152: asa_lev->smPt = 0;
154: asa_lev->comm = comm;
156: asa_lev->smoothd = 0;
157: asa_lev->smoothu = 0;
159: asa_lev->prev = prev;
160: asa_lev->next = next;
161:
162: *new_asa_lev = asa_lev;
163: return(0);
164: }
168: PetscErrorCode PrintResNorm(Mat A, Vec x, Vec b, Vec r)
169: {
171: PetscBool destroyr = PETSC_FALSE;
172: PetscReal resnorm;
173: MPI_Comm Acomm;
176: if (!r) {
177: MatGetVecs(A, PETSC_NULL, &r);
178: destroyr = PETSC_TRUE;
179: }
180: MatMult(A, x, r);
181: VecAYPX(r, -1.0, b);
182: VecNorm(r, NORM_2, &resnorm);
183: PetscObjectGetComm((PetscObject) A, &Acomm);
184: PetscPrintf(Acomm, "Residual norm is %f.\n", resnorm);
186: if (destroyr) {
187: VecDestroy(&r);
188: }
189:
190: return(0);
191: }
195: PetscErrorCode PrintEnergyNormOfDiff(Mat A, Vec x, Vec y)
196: {
198: Vec vecdiff, Avecdiff;
199: PetscScalar dotprod;
200: PetscReal dotabs;
201: MPI_Comm Acomm;
204: VecDuplicate(x, &vecdiff);
205: VecWAXPY(vecdiff, -1.0, x, y);
206: MatGetVecs(A, PETSC_NULL, &Avecdiff);
207: MatMult(A, vecdiff, Avecdiff);
208: VecDot(vecdiff, Avecdiff, &dotprod);
209: dotabs = PetscAbsScalar(dotprod);
210: PetscObjectGetComm((PetscObject) A, &Acomm);
211: PetscPrintf(Acomm, "Energy norm %f.\n", dotabs);
212: VecDestroy(&vecdiff);
213: VecDestroy(&Avecdiff);
214: return(0);
215: }
217: /* -------------------------------------------------------------------------- */
218: /*
219: PCDestroyLevel_ASA - Destroys one level of the ASA preconditioner
221: Input Parameter:
222: . asa_lev - pointer to level that should be destroyed
224: */
227: PetscErrorCode PCDestroyLevel_ASA(PC_ASA_level *asa_lev)
228: {
232: MatDestroy(&(asa_lev->A));
233: MatDestroy(&(asa_lev->B));
234: VecDestroy(&(asa_lev->x));
235: VecDestroy(&(asa_lev->b));
236: VecDestroy(&(asa_lev->r));
238: if (asa_lev->dm) {DMDestroy(&asa_lev->dm);}
240: MatDestroy(&(asa_lev->agg));
241: PetscFree(asa_lev->loc_agg_dofs);
242: MatDestroy(&(asa_lev->agg_corr));
243: MatDestroy(&(asa_lev->bridge_corr));
245: MatDestroy(&(asa_lev->P));
246: MatDestroy(&(asa_lev->Pt));
247: MatDestroy(&(asa_lev->smP));
248: MatDestroy(&(asa_lev->smPt));
250: if (asa_lev->smoothd != asa_lev->smoothu) {
251: if (asa_lev->smoothd) {KSPDestroy(&asa_lev->smoothd);}
252: }
253: if (asa_lev->smoothu) {KSPDestroy(&asa_lev->smoothu);}
255: PetscFree(asa_lev);
256: return(0);
257: }
259: /* -------------------------------------------------------------------------- */
260: /*
261: PCComputeSpectralRadius_ASA - Computes the spectral radius of asa_lev->A
262: and stores it it asa_lev->spec_rad
264: Input Parameters:
265: . asa_lev - the level we are treating
267: Compute spectral radius with sqrt(||A||_1 ||A||_inf) >= ||A||_2 >= rho(A)
269: */
272: PetscErrorCode PCComputeSpectralRadius_ASA(PC_ASA_level *asa_lev)
273: {
275: PetscReal norm_1, norm_inf;
278: MatNorm(asa_lev->A, NORM_1, &norm_1);
279: MatNorm(asa_lev->A, NORM_INFINITY, &norm_inf);
280: asa_lev->spec_rad = PetscSqrtReal(norm_1*norm_inf);
281: return(0);
282: }
286: PetscErrorCode PCSetRichardsonScale_ASA(KSP ksp, PetscReal spec_rad, PetscReal richardson_scale) {
288: PC pc;
289: PetscBool flg;
290: PetscReal spec_rad_inv;
293: KSPSetInitialGuessNonzero(ksp, PETSC_TRUE);
294: if (richardson_scale != PETSC_DECIDE) {
295: KSPRichardsonSetScale(ksp, richardson_scale);
296: } else {
297: KSPGetPC(ksp, &pc);
298: PetscObjectTypeCompare((PetscObject)(pc), PCNONE, &flg);
299: if (flg) {
300: /* WORK: this is just an educated guess. Any number between 0 and 2/rho(A)
301: should do. asa_lev->spec_rad has to be an upper bound on rho(A). */
302: spec_rad_inv = 1.0/spec_rad;
303: KSPRichardsonSetScale(ksp, spec_rad_inv);
304: } else {
305: SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP, "Unknown PC type for smoother. Please specify scaling factor with -pc_asa_richardson_scale\n");
306: }
307: }
308: return(0);
309: }
313: PetscErrorCode PCSetSORomega_ASA(PC pc, PetscReal sor_omega)
314: {
318: if (sor_omega != PETSC_DECIDE) {
319: PCSORSetOmega(pc, sor_omega);
320: }
321: return(0);
322: }
325: /* -------------------------------------------------------------------------- */
326: /*
327: PCSetupSmoothersOnLevel_ASA - Creates the smoothers of the level.
328: We assume that asa_lev->A and asa_lev->spec_rad are correctly computed
330: Input Parameters:
331: + asa - the data structure for the ASA preconditioner
332: . asa_lev - the level we are treating
333: - maxits - maximum number of iterations to use
334: */
337: PetscErrorCode PCSetupSmoothersOnLevel_ASA(PC_ASA *asa, PC_ASA_level *asa_lev, PetscInt maxits)
338: {
339: PetscErrorCode ierr;
340: PetscBool flg;
341: PC pc;
344: /* destroy old smoothers */
345: if (asa_lev->smoothu && asa_lev->smoothu != asa_lev->smoothd) {
346: KSPDestroy(&asa_lev->smoothu);
347: }
348: asa_lev->smoothu = 0;
349: if (asa_lev->smoothd) {
350: KSPDestroy(&asa_lev->smoothd);
351: }
352: asa_lev->smoothd = 0;
353: /* create smoothers */
354: KSPCreate(asa_lev->comm,&asa_lev->smoothd);
355: KSPSetType(asa_lev->smoothd, asa->ksptype_smooth);
356: KSPGetPC(asa_lev->smoothd,&pc);
357: PCSetType(pc,asa->pctype_smooth);
359: /* set up problems for smoothers */
360: KSPSetOperators(asa_lev->smoothd, asa_lev->A, asa_lev->A, DIFFERENT_NONZERO_PATTERN);
361: KSPSetTolerances(asa_lev->smoothd, asa->smoother_rtol, asa->smoother_abstol, asa->smoother_dtol, maxits);
362: PetscObjectTypeCompare((PetscObject)(asa_lev->smoothd), KSPRICHARDSON, &flg);
363: if (flg) {
364: /* special parameters for certain smoothers */
365: KSPSetInitialGuessNonzero(asa_lev->smoothd, PETSC_TRUE);
366: KSPGetPC(asa_lev->smoothd, &pc);
367: PetscObjectTypeCompare((PetscObject)pc, PCSOR, &flg);
368: if (flg) {
369: PCSetSORomega_ASA(pc, asa->sor_omega);
370: } else {
371: /* just set asa->richardson_scale to get some very basic smoother */
372: PCSetRichardsonScale_ASA(asa_lev->smoothd, asa_lev->spec_rad, asa->richardson_scale);
373: }
374: /* this would be the place to add support for other preconditioners */
375: }
376: KSPSetOptionsPrefix(asa_lev->smoothd, "asa_smoother_");
377: KSPSetFromOptions(asa_lev->smoothd);
378: /* set smoothu equal to smoothd, this could change later */
379: asa_lev->smoothu = asa_lev->smoothd;
380: return(0);
381: }
383: /* -------------------------------------------------------------------------- */
384: /*
385: PCSetupDirectSolversOnLevel_ASA - Creates the direct solvers on the coarsest level.
386: We assume that asa_lev->A and asa_lev->spec_rad are correctly computed
388: Input Parameters:
389: + asa - the data structure for the ASA preconditioner
390: . asa_lev - the level we are treating
391: - maxits - maximum number of iterations to use
392: */
395: PetscErrorCode PCSetupDirectSolversOnLevel_ASA(PC_ASA *asa, PC_ASA_level *asa_lev, PetscInt maxits)
396: {
397: PetscErrorCode ierr;
398: PetscBool flg;
399: PetscMPIInt comm_size;
400: PC pc;
403: if (asa_lev->smoothu && asa_lev->smoothu != asa_lev->smoothd) {
404: KSPDestroy(&asa_lev->smoothu);
405: }
406: asa_lev->smoothu = 0;
407: if (asa_lev->smoothd) {
408: KSPDestroy(&asa_lev->smoothd);
409: asa_lev->smoothd = 0;
410: }
411: PetscStrcmp(asa->ksptype_direct, KSPPREONLY, &flg);
412: if (flg) {
413: PetscStrcmp(asa->pctype_direct, PCLU, &flg);
414: if (flg) {
415: MPI_Comm_size(asa_lev->comm, &comm_size);
416: if (comm_size > 1) {
417: /* the LU PC will call MatSolve, we may have to set the correct type for the matrix
418: to have support for this in parallel */
419: MatConvert(asa_lev->A, asa->coarse_mat_type, MAT_REUSE_MATRIX, &(asa_lev->A));
420: }
421: }
422: }
423: /* create new solvers */
424: KSPCreate(asa_lev->comm,&asa_lev->smoothd);
425: KSPSetType(asa_lev->smoothd, asa->ksptype_direct);
426: KSPGetPC(asa_lev->smoothd,&pc);
427: PCSetType(pc,asa->pctype_direct);
428: /* set up problems for direct solvers */
429: KSPSetOperators(asa_lev->smoothd, asa_lev->A, asa_lev->A, DIFFERENT_NONZERO_PATTERN);
430: KSPSetTolerances(asa_lev->smoothd, asa->direct_rtol, asa->direct_abstol, asa->direct_dtol, maxits);
431: /* user can set any option by using -pc_asa_direct_xxx */
432: KSPSetOptionsPrefix(asa_lev->smoothd, "asa_coarse_");
433: KSPSetFromOptions(asa_lev->smoothd);
434: /* set smoothu equal to 0, not used */
435: asa_lev->smoothu = 0;
436: return(0);
437: }
439: /* -------------------------------------------------------------------------- */
440: /*
441: PCCreateAggregates_ASA - Creates the aggregates
443: Input Parameters:
444: . asa_lev - the level for which we should create the projection matrix
446: */
449: PetscErrorCode PCCreateAggregates_ASA(PC_ASA_level *asa_lev)
450: {
451: PetscInt m,n, m_loc,n_loc;
452: PetscInt m_loc_s, m_loc_e;
453: const PetscScalar one = 1.0;
454: PetscErrorCode ierr;
457: /* Create nodal aggregates A_i^l */
458: /* we use the DM grid information for that */
459: if (asa_lev->dm) {
460: /* coarsen DM and get the restriction matrix */
461: DMCoarsen(asa_lev->dm, MPI_COMM_NULL, &(asa_lev->next->dm));
462: DMCreateAggregates(asa_lev->next->dm, asa_lev->dm, &(asa_lev->agg));
463: MatGetSize(asa_lev->agg, &m, &n);
464: MatGetLocalSize(asa_lev->agg, &m_loc, &n_loc);
465: if (n!=asa_lev->size) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"DM interpolation matrix has incorrect size!\n");
466: asa_lev->next->size = m;
467: asa_lev->aggnum = m;
468: /* create the correlators, right now just identity matrices */
469: MatCreateAIJ(asa_lev->comm, n_loc, n_loc, n, n, 1, PETSC_NULL, 1, PETSC_NULL,&(asa_lev->agg_corr));
470: MatGetOwnershipRange(asa_lev->agg_corr, &m_loc_s, &m_loc_e);
471: for (m=m_loc_s; m<m_loc_e; m++) {
472: MatSetValues(asa_lev->agg_corr, 1, &m, 1, &m, &one, INSERT_VALUES);
473: }
474: MatAssemblyBegin(asa_lev->agg_corr, MAT_FINAL_ASSEMBLY);
475: MatAssemblyEnd(asa_lev->agg_corr, MAT_FINAL_ASSEMBLY);
476: /* MatShift(asa_lev->agg_corr, 1.0); */
477: } else {
478: /* somehow define the aggregates without knowing the geometry */
479: /* future WORK */
480: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP, "Currently pure algebraic coarsening is not supported!");
481: }
482: return(0);
483: }
485: /* -------------------------------------------------------------------------- */
486: /*
487: PCCreateTransferOp_ASA - Creates the transfer operator P_{l+1}^l for current level
489: Input Parameters:
490: + asa_lev - the level for which should create the transfer operator
491: - construct_bridge - true, if we should construct a bridge operator, false for normal prolongator
493: If we add a second, third, ... candidate vector (i.e. more than one column in B), we
494: have to relate the additional dimensions to the original aggregates. This is done through
495: the "aggregate correlators" agg_corr and bridge_corr.
496: The aggregate that is used in the construction is then given by
497: asa_lev->agg * asa_lev->agg_corr
498: for the regular prolongator construction and
499: asa_lev->agg * asa_lev->bridge_corr
500: for the bridging prolongator constructions.
501: */
504: PetscErrorCode PCCreateTransferOp_ASA(PC_ASA_level *asa_lev, PetscBool construct_bridge)
505: {
508: const PetscReal Ca = 1e-3;
509: PetscReal cutoff;
510: PetscInt nodes_on_lev;
512: Mat logical_agg;
513: PetscInt mat_agg_loc_start, mat_agg_loc_end, mat_agg_loc_size;
514: PetscInt a;
515: const PetscInt *agg = 0;
516: PetscInt **agg_arr = 0;
518: IS *idxm_is_B_arr = 0;
519: PetscInt *idxn_B = 0;
520: IS idxn_is_B, *idxn_is_B_arr = 0;
522: Mat *b_submat_arr = 0;
524: PetscScalar *b_submat = 0, *b_submat_tp = 0;
525: PetscInt *idxm = 0, *idxn = 0;
526: PetscInt cand_vecs_num;
527: PetscInt *cand_vec_length = 0;
528: PetscInt max_cand_vec_length = 0;
529: PetscScalar **b_orth_arr = 0;
531: PetscInt i,j;
533: PetscScalar *tau = 0, *work = 0;
534: PetscBLASInt info,b1,b2;
536: PetscInt max_cand_vecs_to_add;
537: PetscInt *new_loc_agg_dofs = 0;
539: PetscInt total_loc_cols = 0;
540: PetscReal norm;
542: PetscInt a_loc_m, a_loc_n;
543: PetscInt mat_loc_col_start, mat_loc_col_end, mat_loc_col_size;
544: PetscInt loc_agg_dofs_sum;
545: PetscInt row, col;
546: PetscScalar val;
547: PetscMPIInt comm_size, comm_rank;
548: PetscInt *loc_cols = 0;
551: PetscLogEventBegin(PC_CreateTransferOp_ASA,0,0,0,0);
553: MatGetSize(asa_lev->B, &nodes_on_lev, PETSC_NULL);
555: /* If we add another candidate vector, we want to be able to judge, how much the new candidate
556: improves our current projection operators and whether it is worth adding it.
557: This is the precomputation necessary for implementing Notes (4.1) to (4.7).
558: We require that all candidate vectors x stored in B are normalized such that
559: <A x, x> = 1 and we thus do not have to compute this.
560: For each aggregate A we can now test condition (4.5) and (4.6) by computing
561: || quantity to check ||_{A}^2 <= cutoff * card(A)/N_l */
562: cutoff = Ca/(asa_lev->spec_rad);
564: /* compute logical aggregates by using the correlators */
565: if (construct_bridge) {
566: /* construct bridging operator */
567: MatMatMult(asa_lev->agg, asa_lev->bridge_corr, MAT_INITIAL_MATRIX, 1.0, &logical_agg);
568: } else {
569: /* construct "regular" prolongator */
570: MatMatMult(asa_lev->agg, asa_lev->agg_corr, MAT_INITIAL_MATRIX, 1.0, &logical_agg);
571: }
573: /* destroy correlator matrices for next level, these will be rebuilt in this routine */
574: if (asa_lev->next) {
575: MatDestroy(&(asa_lev->next->agg_corr));
576: MatDestroy(&(asa_lev->next->bridge_corr));
577: }
579: /* find out the correct local row indices */
580: MatGetOwnershipRange(logical_agg, &mat_agg_loc_start, &mat_agg_loc_end);
581: mat_agg_loc_size = mat_agg_loc_end-mat_agg_loc_start;
582:
583: cand_vecs_num = asa_lev->cand_vecs;
585: /* construct column indices idxn_B for reading from B */
586: PetscMalloc(sizeof(PetscInt)*(cand_vecs_num), &idxn_B);
587: for (i=0; i<cand_vecs_num; i++) {
588: idxn_B[i] = i;
589: }
590: ISCreateGeneral(asa_lev->comm, asa_lev->cand_vecs, idxn_B,PETSC_COPY_VALUES, &idxn_is_B);
591: PetscFree(idxn_B);
592: PetscMalloc(sizeof(IS)*mat_agg_loc_size, &idxn_is_B_arr);
593: for (a=0; a<mat_agg_loc_size; a++) {
594: idxn_is_B_arr[a] = idxn_is_B;
595: }
596: /* allocate storage for row indices idxm_B */
597: PetscMalloc(sizeof(IS)*mat_agg_loc_size, &idxm_is_B_arr);
599: /* Storage for the orthogonalized submatrices of B and their sizes */
600: PetscMalloc(sizeof(PetscInt)*mat_agg_loc_size, &cand_vec_length);
601: PetscMalloc(sizeof(PetscScalar*)*mat_agg_loc_size, &b_orth_arr);
602: /* Storage for the information about each aggregate */
603: PetscMalloc(sizeof(PetscInt*)*mat_agg_loc_size, &agg_arr);
604: /* Storage for the number of candidate vectors that are orthonormal and used in each submatrix */
605: PetscMalloc(sizeof(PetscInt)*mat_agg_loc_size, &new_loc_agg_dofs);
607: /* loop over local aggregates */
608: for (a=0; a<mat_agg_loc_size; a++) {
609: /* get info about current aggregate, this gives the rows we have to get from B */
610: MatGetRow(logical_agg, a+mat_agg_loc_start, &cand_vec_length[a], &agg, 0);
611: /* copy aggregate information */
612: PetscMalloc(sizeof(PetscInt)*cand_vec_length[a], &(agg_arr[a]));
613: PetscMemcpy(agg_arr[a], agg, sizeof(PetscInt)*cand_vec_length[a]);
614: /* restore row */
615: MatRestoreRow(logical_agg, a+mat_agg_loc_start, &cand_vec_length[a], &agg, 0);
616:
617: /* create index sets */
618: ISCreateGeneral(PETSC_COMM_SELF, cand_vec_length[a], agg_arr[a],PETSC_COPY_VALUES, &(idxm_is_B_arr[a]));
619: /* maximum candidate vector length */
620: if (cand_vec_length[a] > max_cand_vec_length) { max_cand_vec_length = cand_vec_length[a]; }
621: }
622: /* destroy logical_agg, no longer needed */
623: MatDestroy(&logical_agg);
625: /* get the entries for aggregate from B */
626: MatGetSubMatrices(asa_lev->B, mat_agg_loc_size, idxm_is_B_arr, idxn_is_B_arr, MAT_INITIAL_MATRIX, &b_submat_arr);
627:
628: /* clean up all the index sets */
629: for (a=0; a<mat_agg_loc_size; a++) { ISDestroy(&idxm_is_B_arr[a]); }
630: PetscFree(idxm_is_B_arr);
631: ISDestroy(&idxn_is_B);
632: PetscFree(idxn_is_B_arr);
633:
634: /* storage for the values from each submatrix */
635: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length*cand_vecs_num, &b_submat);
636: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length*cand_vecs_num, &b_submat_tp);
637: PetscMalloc(sizeof(PetscInt)*max_cand_vec_length, &idxm);
638: for (i=0; i<max_cand_vec_length; i++) { idxm[i] = i; }
639: PetscMalloc(sizeof(PetscInt)*cand_vecs_num, &idxn);
640: for (i=0; i<cand_vecs_num; i++) { idxn[i] = i; }
641: /* work storage for QR algorithm */
642: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length, &tau);
643: PetscMalloc(sizeof(PetscScalar)*cand_vecs_num, &work);
645: /* orthogonalize all submatrices and store them in b_orth_arr */
646: for (a=0; a<mat_agg_loc_size; a++) {
647: /* Get the entries for aggregate from B. This is row ordered (although internally
648: it is column ordered and we will waste some energy transposing it).
649: WORK: use something like MatGetArray(b_submat_arr[a], &b_submat) but be really
650: careful about all the different matrix types */
651: MatGetValues(b_submat_arr[a], cand_vec_length[a], idxm, cand_vecs_num, idxn, b_submat);
653: if (construct_bridge) {
654: /* if we are constructing a bridging restriction/interpolation operator, we have
655: to use the same number of dofs as in our previous construction */
656: max_cand_vecs_to_add = asa_lev->loc_agg_dofs[a];
657: } else {
658: /* for a normal restriction/interpolation operator, we should make sure that we
659: do not create linear dependence by accident */
660: max_cand_vecs_to_add = PetscMin(cand_vec_length[a], cand_vecs_num);
661: }
663: /* We use LAPACK to compute the QR decomposition of b_submat. For LAPACK we have to
664: transpose the matrix. We might throw out some column vectors during this process.
665: We are keeping count of the number of column vectors that we use (and therefore the
666: number of dofs on the lower level) in new_loc_agg_dofs[a]. */
667: new_loc_agg_dofs[a] = 0;
668: for (j=0; j<max_cand_vecs_to_add; j++) {
669: /* check for condition (4.5) */
670: norm = 0.0;
671: for (i=0; i<cand_vec_length[a]; i++) {
672: norm += PetscRealPart(b_submat[i*cand_vecs_num+j])*PetscRealPart(b_submat[i*cand_vecs_num+j])
673: + PetscImaginaryPart(b_submat[i*cand_vecs_num+j])*PetscImaginaryPart(b_submat[i*cand_vecs_num+j]);
674: }
675: /* only add candidate vector if bigger than cutoff or first candidate */
676: if ((!j) || (norm > cutoff*((PetscReal) cand_vec_length[a])/((PetscReal) nodes_on_lev))) {
677: /* passed criterion (4.5), we have not implemented criterion (4.6) yet */
678: for (i=0; i<cand_vec_length[a]; i++) {
679: b_submat_tp[new_loc_agg_dofs[a]*cand_vec_length[a]+i] = b_submat[i*cand_vecs_num+j];
680: }
681: new_loc_agg_dofs[a]++;
682: }
683: /* #ifdef PCASA_VERBOSE */
684: else {
685: PetscPrintf(asa_lev->comm, "Cutoff criteria invoked\n");
686: }
687: /* #endif */
688: }
690: CHKMEMQ;
691: /* orthogonalize b_submat_tp using the QR algorithm from LAPACK */
692: b1 = PetscBLASIntCast(*(cand_vec_length+a));
693: b2 = PetscBLASIntCast(*(new_loc_agg_dofs+a));
695: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
696: #if !defined(PETSC_MISSING_LAPACK_GEQRF)
697: LAPACKgeqrf_(&b1, &b2, b_submat_tp, &b1, tau, work, &b2, &info);
698: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB, "LAPACKgeqrf_ LAPACK routine failed");
699: #else
700: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"geqrf() - Lapack routine is unavailable\n");
701: #endif
702: #if !defined(PETSC_MISSING_LAPACK_ORGQR)
703: LAPACKungqr_(&b1, &b2, &b2, b_submat_tp, &b1, tau, work, &b2, &info);
704: #else
705: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ORGQR - Lapack routine is unavailable\nIf linking with ESSL you MUST also link with full LAPACK, for example\nuse ./configure with --with-blas-lib=libessl.a --with-lapack-lib=/usr/local/lib/liblapack.a'");
706: #endif
707: if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB, "LAPACKungqr_ LAPACK routine failed");
708: PetscFPTrapPop();
710: /* Transpose b_submat_tp and store it in b_orth_arr[a]. If we are constructing a
711: bridging restriction/interpolation operator, we could end up with less dofs than
712: we previously had. We fill those up with zeros. */
713: if (!construct_bridge) {
714: PetscMalloc(sizeof(PetscScalar)*cand_vec_length[a]*new_loc_agg_dofs[a], b_orth_arr+a);
715: for (j=0; j<new_loc_agg_dofs[a]; j++) {
716: for (i=0; i<cand_vec_length[a]; i++) {
717: b_orth_arr[a][i*new_loc_agg_dofs[a]+j] = b_submat_tp[j*cand_vec_length[a]+i];
718: }
719: }
720: } else {
721: /* bridge, might have to fill up */
722: PetscMalloc(sizeof(PetscScalar)*cand_vec_length[a]*max_cand_vecs_to_add, b_orth_arr+a);
723: for (j=0; j<new_loc_agg_dofs[a]; j++) {
724: for (i=0; i<cand_vec_length[a]; i++) {
725: b_orth_arr[a][i*max_cand_vecs_to_add+j] = b_submat_tp[j*cand_vec_length[a]+i];
726: }
727: }
728: for (j=new_loc_agg_dofs[a]; j<max_cand_vecs_to_add; j++) {
729: for (i=0; i<cand_vec_length[a]; i++) {
730: b_orth_arr[a][i*max_cand_vecs_to_add+j] = 0.0;
731: }
732: }
733: new_loc_agg_dofs[a] = max_cand_vecs_to_add;
734: }
735: /* the number of columns in asa_lev->P that are local to this process */
736: total_loc_cols += new_loc_agg_dofs[a];
737: } /* end of loop over local aggregates */
739: /* destroy the submatrices, also frees all allocated space */
740: MatDestroyMatrices(mat_agg_loc_size, &b_submat_arr);
741: /* destroy all other workspace */
742: PetscFree(b_submat);
743: PetscFree(b_submat_tp);
744: PetscFree(idxm);
745: PetscFree(idxn);
746: PetscFree(tau);
747: PetscFree(work);
749: /* destroy old matrix P, Pt */
750: MatDestroy(&(asa_lev->P));
751: MatDestroy(&(asa_lev->Pt));
753: MatGetLocalSize(asa_lev->A, &a_loc_m, &a_loc_n);
755: /* determine local range */
756: MPI_Comm_size(asa_lev->comm, &comm_size);
757: MPI_Comm_rank(asa_lev->comm, &comm_rank);
758: PetscMalloc(comm_size*sizeof(PetscInt), &loc_cols);
759: MPI_Allgather(&total_loc_cols, 1, MPIU_INT, loc_cols, 1, MPIU_INT, asa_lev->comm);
760: mat_loc_col_start = 0;
761: for (i=0;i<comm_rank;i++) {
762: mat_loc_col_start += loc_cols[i];
763: }
764: mat_loc_col_end = mat_loc_col_start + loc_cols[i];
765: mat_loc_col_size = mat_loc_col_end-mat_loc_col_start;
766: if (mat_loc_col_size != total_loc_cols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR, "Local size does not match matrix size");
767: PetscFree(loc_cols);
769: /* we now have enough information to create asa_lev->P */
770: MatCreateAIJ(asa_lev->comm, a_loc_n, total_loc_cols, asa_lev->size, PETSC_DETERMINE,
771: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->P));
772: /* create asa_lev->Pt */
773: MatCreateAIJ(asa_lev->comm, total_loc_cols, a_loc_n, PETSC_DETERMINE, asa_lev->size,
774: max_cand_vec_length, PETSC_NULL, max_cand_vec_length, PETSC_NULL, &(asa_lev->Pt));
775: if (asa_lev->next) {
776: /* create correlator for aggregates of next level */
777: MatCreateAIJ(asa_lev->comm, mat_agg_loc_size, total_loc_cols, PETSC_DETERMINE, PETSC_DETERMINE,
778: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->next->agg_corr));
779: /* create asa_lev->next->bridge_corr matrix */
780: MatCreateAIJ(asa_lev->comm, mat_agg_loc_size, total_loc_cols, PETSC_DETERMINE, PETSC_DETERMINE,
781: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->next->bridge_corr));
782: }
784: /* this is my own hack, but it should give the columns that we should write to */
785: MatGetOwnershipRangeColumn(asa_lev->P, &mat_loc_col_start, &mat_loc_col_end);
786: mat_loc_col_size = mat_loc_col_end-mat_loc_col_start;
787: if (mat_loc_col_size != total_loc_cols) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ, "The number of local columns in asa_lev->P assigned to this processor does not match the local vector size");
789: loc_agg_dofs_sum = 0;
790: /* construct P, Pt, agg_corr, bridge_corr */
791: for (a=0; a<mat_agg_loc_size; a++) {
792: /* store b_orth_arr[a] in P */
793: for (i=0; i<cand_vec_length[a]; i++) {
794: row = agg_arr[a][i];
795: for (j=0; j<new_loc_agg_dofs[a]; j++) {
796: col = mat_loc_col_start + loc_agg_dofs_sum + j;
797: val = b_orth_arr[a][i*new_loc_agg_dofs[a] + j];
798: MatSetValues(asa_lev->P, 1, &row, 1, &col, &val, INSERT_VALUES);
799: val = PetscConj(val);
800: MatSetValues(asa_lev->Pt, 1, &col, 1, &row, &val, INSERT_VALUES);
801: }
802: }
804: /* compute aggregate correlation matrices */
805: if (asa_lev->next) {
806: row = a+mat_agg_loc_start;
807: for (i=0; i<new_loc_agg_dofs[a]; i++) {
808: col = mat_loc_col_start + loc_agg_dofs_sum + i;
809: val = 1.0;
810: MatSetValues(asa_lev->next->agg_corr, 1, &row, 1, &col, &val, INSERT_VALUES);
811: /* for the bridge operator we leave out the newest candidates, i.e.
812: we set bridge_corr to 1.0 for all columns up to asa_lev->loc_agg_dofs[a] and to
813: 0.0 between asa_lev->loc_agg_dofs[a] and new_loc_agg_dofs[a] */
814: if (!(asa_lev->loc_agg_dofs && (i >= asa_lev->loc_agg_dofs[a]))) {
815: MatSetValues(asa_lev->next->bridge_corr, 1, &row, 1, &col, &val, INSERT_VALUES);
816: }
817: }
818: }
820: /* move to next entry point col */
821: loc_agg_dofs_sum += new_loc_agg_dofs[a];
822: } /* end of loop over local aggregates */
824: MatAssemblyBegin(asa_lev->P,MAT_FINAL_ASSEMBLY);
825: MatAssemblyEnd(asa_lev->P,MAT_FINAL_ASSEMBLY);
826: MatAssemblyBegin(asa_lev->Pt,MAT_FINAL_ASSEMBLY);
827: MatAssemblyEnd(asa_lev->Pt,MAT_FINAL_ASSEMBLY);
828: if (asa_lev->next) {
829: MatAssemblyBegin(asa_lev->next->agg_corr,MAT_FINAL_ASSEMBLY);
830: MatAssemblyEnd(asa_lev->next->agg_corr,MAT_FINAL_ASSEMBLY);
831: MatAssemblyBegin(asa_lev->next->bridge_corr,MAT_FINAL_ASSEMBLY);
832: MatAssemblyEnd(asa_lev->next->bridge_corr,MAT_FINAL_ASSEMBLY);
833: }
835: /* if we are not constructing a bridging operator, switch asa_lev->loc_agg_dofs
836: and new_loc_agg_dofs */
837: if (construct_bridge) {
838: PetscFree(new_loc_agg_dofs);
839: } else {
840: if (asa_lev->loc_agg_dofs) {
841: PetscFree(asa_lev->loc_agg_dofs);
842: }
843: asa_lev->loc_agg_dofs = new_loc_agg_dofs;
844: }
846: /* clean up */
847: for (a=0; a<mat_agg_loc_size; a++) {
848: PetscFree(b_orth_arr[a]);
849: PetscFree(agg_arr[a]);
850: }
851: PetscFree(cand_vec_length);
852: PetscFree(b_orth_arr);
853: PetscFree(agg_arr);
855: PetscLogEventEnd(PC_CreateTransferOp_ASA, 0,0,0,0);
856: return(0);
857: }
859: /* -------------------------------------------------------------------------- */
860: /*
861: PCSmoothProlongator_ASA - Computes the smoothed prolongators I and It on the level
863: Input Parameters:
864: . asa_lev - the level for which the smoothed prolongator is constructed
865: */
868: PetscErrorCode PCSmoothProlongator_ASA(PC_ASA_level *asa_lev)
869: {
873: MatDestroy(&(asa_lev->smP));
874: MatDestroy(&(asa_lev->smPt));
875: /* compute prolongator I_{l+1}^l = S_l P_{l+1}^l */
876: /* step 1: compute I_{l+1}^l = A_l P_{l+1}^l */
877: MatMatMult(asa_lev->A, asa_lev->P, MAT_INITIAL_MATRIX, 1, &(asa_lev->smP));
878: MatMatMult(asa_lev->Pt, asa_lev->A, MAT_INITIAL_MATRIX, 1, &(asa_lev->smPt));
879: /* step 2: shift and scale to get I_{l+1}^l = P_{l+1}^l - 4/(3/rho) A_l P_{l+1}^l */
880: MatAYPX(asa_lev->smP, -4./(3.*(asa_lev->spec_rad)), asa_lev->P, SUBSET_NONZERO_PATTERN);
881: MatAYPX(asa_lev->smPt, -4./(3.*(asa_lev->spec_rad)), asa_lev->Pt, SUBSET_NONZERO_PATTERN);
883: return(0);
884: }
887: /* -------------------------------------------------------------------------- */
888: /*
889: PCCreateVcycle_ASA - Creates the V-cycle, when aggregates are already defined
891: Input Parameters:
892: . asa - the preconditioner context
893: */
896: PetscErrorCode PCCreateVcycle_ASA(PC_ASA *asa)
897: {
899: PC_ASA_level *asa_lev, *asa_next_lev;
900: Mat AI;
903: PetscLogEventBegin(PC_CreateVcycle_ASA, 0,0,0,0);
905: if (!asa) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "asa pointer is NULL");
906: if (!(asa->levellist)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "no levels found");
907: asa_lev = asa->levellist;
908: PCComputeSpectralRadius_ASA(asa_lev);
909: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->nu);
911: while(asa_lev->next) {
912: asa_next_lev = asa_lev->next;
913: /* (a) aggregates are already constructed */
915: /* (b) construct B_{l+1} and P_{l+1}^l using (2.11) */
916: /* construct P_{l+1}^l */
917: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
919: /* construct B_{l+1} */
920: MatDestroy(&(asa_next_lev->B));
921: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->B));
922: asa_next_lev->cand_vecs = asa_lev->cand_vecs;
924: /* (c) construct smoothed prolongator */
925: PCSmoothProlongator_ASA(asa_lev);
926:
927: /* (d) construct coarse matrix */
928: /* Define coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
929: MatDestroy(&(asa_next_lev->A));
930: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
931: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
932: MatDestroy(&AI);
933: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
934: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
935: PCComputeSpectralRadius_ASA(asa_next_lev);
936: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->nu);
937: /* create corresponding vectors x_{l+1}, b_{l+1}, r_{l+1} */
938: VecDestroy(&(asa_next_lev->x));
939: VecDestroy(&(asa_next_lev->b));
940: VecDestroy(&(asa_next_lev->r));
941: MatGetVecs(asa_next_lev->A, &(asa_next_lev->x), &(asa_next_lev->b));
942: MatGetVecs(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->r));
944: /* go to next level */
945: asa_lev = asa_lev->next;
946: } /* end of while loop over the levels */
947: /* asa_lev now points to the coarsest level, set up direct solver there */
948: PCComputeSpectralRadius_ASA(asa_lev);
949: PCSetupDirectSolversOnLevel_ASA(asa, asa_lev, asa->nu);
951: PetscLogEventEnd(PC_CreateVcycle_ASA, 0,0,0,0);
952: return(0);
953: }
955: /* -------------------------------------------------------------------------- */
956: /*
957: PCAddCandidateToB_ASA - Inserts a candidate vector in B
959: Input Parameters:
960: + B - the matrix to insert into
961: . col_idx - the column we should insert to
962: . x - the vector to insert
963: - A - system matrix
965: Function will insert normalized x into B, such that <A x, x> = 1
966: (x itself is not changed). If B is projected down then this property
967: is kept. If <A_l x_l, x_l> = 1 and the next level is defined by
968: x_{l+1} = Pt x_l and A_{l+1} = Pt A_l P then
969: <A_{l+1} x_{l+1}, x_l> = <Pt A_l P Pt x_l, Pt x_l>
970: = <A_l P Pt x_l, P Pt x_l> = <A_l x_l, x_l> = 1
971: because of the definition of P in (2.11).
972: */
975: PetscErrorCode PCAddCandidateToB_ASA(Mat B, PetscInt col_idx, Vec x, Mat A)
976: {
978: Vec Ax;
979: PetscScalar dotprod;
980: PetscReal norm;
981: PetscInt i, loc_start, loc_end;
982: PetscScalar val, *vecarray;
985: MatGetVecs(A, PETSC_NULL, &Ax);
986: MatMult(A, x, Ax);
987: VecDot(Ax, x, &dotprod);
988: norm = PetscSqrtReal(PetscAbsScalar(dotprod));
989: VecGetOwnershipRange(x, &loc_start, &loc_end);
990: VecGetArray(x, &vecarray);
991: for (i=loc_start; i<loc_end; i++) {
992: val = vecarray[i-loc_start]/norm;
993: MatSetValues(B, 1, &i, 1, &col_idx, &val, INSERT_VALUES);
994: }
995: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
996: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
997: VecRestoreArray(x, &vecarray);
998: VecDestroy(&Ax);
999: return(0);
1000: }
1002: /* -------------------------------------------------------------------------- */
1003: /*
1004: - x - a starting guess for a hard to approximate vector, if PETSC_NULL, will be generated
1005: */
1008: PetscErrorCode PCInitializationStage_ASA(PC pc, Vec x)
1009: {
1011: PetscInt l;
1012: PC_ASA *asa = (PC_ASA*)pc->data;
1013: PC_ASA_level *asa_lev, *asa_next_lev;
1014: PetscRandom rctx; /* random number generator context */
1016: Vec ax;
1017: PetscScalar tmp;
1018: PetscReal prevnorm, norm;
1020: PetscBool skip_steps_f_i = PETSC_FALSE;
1021: PetscBool sufficiently_coarsened = PETSC_FALSE;
1023: PetscInt vec_size, vec_loc_size;
1024: PetscInt loc_vec_low, loc_vec_high;
1025: PetscInt i,j;
1027: /* Vec xhat = 0; */
1029: Mat AI;
1031: Vec cand_vec, cand_vec_new;
1032: PetscBool isrichardson;
1033: PC coarse_pc;
1036: PetscLogEventBegin(PC_InitializationStage_ASA,0,0,0,0);
1037: l=1;
1038: /* create first level */
1039: PCCreateLevel_ASA(&(asa->levellist), l, asa->comm, 0, 0, asa->ksptype_smooth, asa->pctype_smooth);
1040: asa_lev = asa->levellist;
1042: /* Set matrix */
1043: asa_lev->A = asa->A;
1044: MatGetSize(asa_lev->A, &i, &j);
1045: asa_lev->size = i;
1046: PCComputeSpectralRadius_ASA(asa_lev);
1047: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->mu_initial);
1049: /* Set DM */
1050: asa_lev->dm = pc->dm;
1051: PetscObjectReference((PetscObject)pc->dm);
1053: PetscPrintf(asa_lev->comm, "Initialization stage\n");
1055: if (x) {
1056: /* use starting guess */
1057: VecDestroy(&(asa_lev->x));
1058: VecDuplicate(x, &(asa_lev->x));
1059: VecCopy(x, asa_lev->x);
1060: } else {
1061: /* select random starting vector */
1062: VecDestroy(&(asa_lev->x));
1063: MatGetVecs(asa_lev->A, &(asa_lev->x), 0);
1064: PetscRandomCreate(asa_lev->comm,&rctx);
1065: PetscRandomSetFromOptions(rctx);
1066: VecSetRandom(asa_lev->x, rctx);
1067: PetscRandomDestroy(&rctx);
1068: }
1070: /* create right hand side */
1071: VecDestroy(&(asa_lev->b));
1072: MatGetVecs(asa_lev->A, &(asa_lev->b), 0);
1073: VecSet(asa_lev->b, 0.0);
1075: /* relax and check whether that's enough already */
1076: /* compute old norm */
1077: MatGetVecs(asa_lev->A, 0, &ax);
1078: MatMult(asa_lev->A, asa_lev->x, ax);
1079: VecDot(asa_lev->x, ax, &tmp);
1080: prevnorm = PetscAbsScalar(tmp);
1081: PetscPrintf(asa_lev->comm, "Residual norm of starting guess: %f\n", prevnorm);
1083: /* apply mu_initial relaxations */
1084: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1085: /* compute new norm */
1086: MatMult(asa_lev->A, asa_lev->x, ax);
1087: VecDot(asa_lev->x, ax, &tmp);
1088: norm = PetscAbsScalar(tmp);
1089: VecDestroy(&(ax));
1090: PetscPrintf(asa_lev->comm, "Residual norm of relaxation after %g %D relaxations: %g %g\n", asa->epsilon,asa->mu_initial, norm,prevnorm);
1092: /* Check if it already converges by itself */
1093: if (norm/prevnorm <= pow(asa->epsilon, (PetscReal) asa->mu_initial)) {
1094: /* converges by relaxation alone */
1095: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP, "Relaxation should be sufficient to treat this problem. "
1096: "Use relaxation or decrease epsilon with -pc_asa_epsilon");
1097: } else {
1098: /* set the number of relaxations to asa->mu from asa->mu_initial */
1099: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->mu);
1101: /* Let's do some multigrid ! */
1102: sufficiently_coarsened = PETSC_FALSE;
1104: /* do the whole initialization stage loop */
1105: while (!sufficiently_coarsened) {
1106: PetscPrintf(asa_lev->comm, "Initialization stage: creating level %D\n", asa_lev->level+1);
1108: /* (a) Set candidate matrix B_l = x_l */
1109: /* get the correct vector sizes and data */
1110: VecGetSize(asa_lev->x, &vec_size);
1111: VecGetOwnershipRange(asa_lev->x, &loc_vec_low, &loc_vec_high);
1112: vec_loc_size = loc_vec_high - loc_vec_low;
1114: /* create matrix for candidates */
1115: MatCreateDense(asa_lev->comm, vec_loc_size, PETSC_DECIDE, vec_size, asa->max_cand_vecs, PETSC_NULL, &(asa_lev->B));
1116: /* set the first column */
1117: PCAddCandidateToB_ASA(asa_lev->B, 0, asa_lev->x, asa_lev->A);
1118: asa_lev->cand_vecs = 1;
1120: /* create next level */
1121: PCCreateLevel_ASA(&(asa_lev->next), asa_lev->level+1, asa_lev->comm, asa_lev, PETSC_NULL, asa->ksptype_smooth, asa->pctype_smooth);
1122: asa_next_lev = asa_lev->next;
1124: /* (b) Create nodal aggregates A_i^l */
1125: PCCreateAggregates_ASA(asa_lev);
1126:
1127: /* (c) Define tentatative prolongator P_{l+1}^l and candidate matrix B_{l+1}
1128: using P_{l+1}^l B_{l+1} = B_l and (P_{l+1}^l)^T P_{l+1}^l = I */
1129: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
1131: /* future WORK: set correct fill ratios for all the operations below */
1132: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->B));
1133: asa_next_lev->cand_vecs = asa_lev->cand_vecs;
1135: /* (d) Define prolongator I_{l+1}^l = S_l P_{l+1}^l */
1136: PCSmoothProlongator_ASA(asa_lev);
1138: /* (e) Define coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
1139: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
1140: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
1141: MatDestroy(&AI);
1142: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
1143: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
1144: PCComputeSpectralRadius_ASA(asa_next_lev);
1145: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->mu);
1147: /* coarse enough for direct solver? */
1148: MatGetSize(asa_next_lev->A, &i, &j);
1149: if (PetscMax(i,j) <= asa->direct_solver) {
1150: PetscPrintf(asa_lev->comm, "Level %D can be treated directly.\n"
1151: "Algorithm will use %D levels.\n", asa_next_lev->level,
1152: asa_next_lev->level);
1153: break; /* go to step 5 */
1154: }
1156: if (!skip_steps_f_i) {
1157: /* (f) Set x_{l+1} = B_{l+1}, we just compute it again */
1158: VecDestroy(&(asa_next_lev->x));
1159: MatGetVecs(asa_lev->P, &(asa_next_lev->x), 0);
1160: MatMult(asa_lev->Pt, asa_lev->x, asa_next_lev->x);
1162: /* /\* (g) Make copy \hat{x}_{l+1} = x_{l+1} *\/ */
1163: /* VecDuplicate(asa_next_lev->x, &xhat); */
1164: /* VecCopy(asa_next_lev->x, xhat); */
1165:
1166: /* Create b_{l+1} */
1167: VecDestroy(&(asa_next_lev->b));
1168: MatGetVecs(asa_next_lev->A, &(asa_next_lev->b), 0);
1169: VecSet(asa_next_lev->b, 0.0);
1171: /* (h) Relax mu times on A_{l+1} x = 0 */
1172: /* compute old norm */
1173: MatGetVecs(asa_next_lev->A, 0, &ax);
1174: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1175: VecDot(asa_next_lev->x, ax, &tmp);
1176: prevnorm = PetscAbsScalar(tmp);
1177: PetscPrintf(asa_next_lev->comm, "Residual norm of starting guess on level %D: %f\n", asa_next_lev->level, prevnorm);
1178: /* apply mu relaxations: WORK, make sure that mu is set correctly */
1179: KSPSolve(asa_next_lev->smoothd, asa_next_lev->b, asa_next_lev->x);
1180: /* compute new norm */
1181: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1182: VecDot(asa_next_lev->x, ax, &tmp);
1183: norm = PetscAbsScalar(tmp);
1184: VecDestroy(&(ax));
1185: PetscPrintf(asa_next_lev->comm, "Residual norm after Richardson iteration on level %D: %f\n", asa_next_lev->level, norm);
1186: /* (i) Check if it already converges by itself */
1187: if (norm/prevnorm <= pow(asa->epsilon, (PetscReal) asa->mu)) {
1188: /* relaxation reduces error sufficiently */
1189: skip_steps_f_i = PETSC_TRUE;
1190: }
1191: }
1192: /* (j) go to next coarser level */
1193: l++;
1194: asa_lev = asa_next_lev;
1195: }
1196: /* Step 5. */
1197: asa->levels = asa_next_lev->level; /* WORK: correct? */
1199: /* Set up direct solvers on coarsest level */
1200: if (asa_next_lev->smoothd != asa_next_lev->smoothu) {
1201: if (asa_next_lev->smoothu) { KSPDestroy(&asa_next_lev->smoothu); }
1202: }
1203: KSPSetType(asa_next_lev->smoothd, asa->ksptype_direct);
1204: PetscObjectTypeCompare((PetscObject)(asa_next_lev->smoothd), KSPRICHARDSON, &isrichardson);
1205: if (isrichardson) {
1206: KSPSetInitialGuessNonzero(asa_next_lev->smoothd, PETSC_TRUE);
1207: } else {
1208: KSPSetInitialGuessNonzero(asa_next_lev->smoothd, PETSC_FALSE);
1209: }
1210: KSPGetPC(asa_next_lev->smoothd, &coarse_pc);
1211: PCSetType(coarse_pc, asa->pctype_direct);
1212: asa_next_lev->smoothu = asa_next_lev->smoothd;
1213: PCSetupDirectSolversOnLevel_ASA(asa, asa_next_lev, asa->nu);
1215: /* update finest-level candidate matrix B_1 = I_2^1 I_3^2 ... I_{L-1}^{L-2} x_{L-1} */
1216: if (!asa_lev->prev) {
1217: /* just one relaxation level */
1218: VecDuplicate(asa_lev->x, &cand_vec);
1219: VecCopy(asa_lev->x, cand_vec);
1220: } else {
1221: /* interpolate up the chain */
1222: cand_vec = asa_lev->x;
1223: asa_lev->x = 0;
1224: while(asa_lev->prev) {
1225: /* interpolate to higher level */
1226: MatGetVecs(asa_lev->prev->smP, 0, &cand_vec_new);
1227: MatMult(asa_lev->prev->smP, cand_vec, cand_vec_new);
1228: VecDestroy(&(cand_vec));
1229: cand_vec = cand_vec_new;
1230:
1231: /* destroy all working vectors on the way */
1232: VecDestroy(&(asa_lev->x));
1233: VecDestroy(&(asa_lev->b));
1235: /* move to next higher level */
1236: asa_lev = asa_lev->prev;
1237: }
1238: }
1239: /* set the first column of B1 */
1240: PCAddCandidateToB_ASA(asa_lev->B, 0, cand_vec, asa_lev->A);
1241: VecDestroy(&(cand_vec));
1243: /* Step 6. Create V-cycle */
1244: PCCreateVcycle_ASA(asa);
1245: }
1246: PetscLogEventEnd(PC_InitializationStage_ASA,0,0,0,0);
1247: return(0);
1248: }
1250: /* -------------------------------------------------------------------------- */
1251: /*
1252: PCApplyVcycleOnLevel_ASA - Applies current V-cycle
1254: Input Parameters:
1255: + asa_lev - the current level we should recurse on
1256: - gamma - the number of recursive cycles we should run
1258: */
1261: PetscErrorCode PCApplyVcycleOnLevel_ASA(PC_ASA_level *asa_lev, PetscInt gamma)
1262: {
1264: PC_ASA_level *asa_next_lev;
1265: PetscInt g;
1268: if (!asa_lev) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "Level is empty in PCApplyVcycleOnLevel_ASA");
1269: asa_next_lev = asa_lev->next;
1271: if (asa_next_lev) {
1272: /* 1. Presmoothing */
1273: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1274: /* 2. Coarse grid corrections */
1275: /* MatGetVecs(asa_lev->A, 0, &tmp); */
1276: /* MatGetVecs(asa_lev->smP, &(asa_next_lev->b), 0); */
1277: /* MatGetVecs(asa_next_lev->A, &(asa_next_lev->x), 0); */
1278: for (g=0; g<gamma; g++) {
1279: /* (a) get coarsened b_{l+1} = (I_{l+1}^l)^T (b_l - A_l x_l) */
1280: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1281: VecAYPX(asa_lev->r, -1.0, asa_lev->b);
1282: MatMult(asa_lev->smPt, asa_lev->r, asa_next_lev->b);
1284: /* (b) Set x_{l+1} = 0 and recurse */
1285: VecSet(asa_next_lev->x, 0.0);
1286: PCApplyVcycleOnLevel_ASA(asa_next_lev, gamma);
1288: /* (c) correct solution x_l = x_l + I_{l+1}^l x_{l+1} */
1289: MatMultAdd(asa_lev->smP, asa_next_lev->x, asa_lev->x, asa_lev->x);
1290: }
1291: /* VecDestroy(&(asa_lev->r)); */
1292: /* /\* discard x_{l+1}, b_{l+1} *\/ */
1293: /* VecDestroy(&(asa_next_lev->x)); */
1294: /* VecDestroy(&(asa_next_lev->b)); */
1295:
1296: /* 3. Postsmoothing */
1297: KSPSolve(asa_lev->smoothu, asa_lev->b, asa_lev->x);
1298: } else {
1299: /* Base case: solve directly */
1300: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1301: }
1302: return(0);
1303: }
1306: /* -------------------------------------------------------------------------- */
1307: /*
1308: PCGeneralSetupStage_ASA - Applies the ASA preconditioner to a vector. Algorithm
1309: 4 from the ASA paper
1311: Input Parameters:
1312: + asa - the data structure for the ASA algorithm
1313: - cand - a possible candidate vector, if PETSC_NULL, will be constructed randomly
1315: Output Parameters:
1316: . cand_added - PETSC_TRUE, if new candidate vector added, PETSC_FALSE otherwise
1317: */
1320: PetscErrorCode PCGeneralSetupStage_ASA(PC_ASA *asa, Vec cand, PetscBool *cand_added)
1321: {
1323: PC_ASA_level *asa_lev, *asa_next_lev;
1325: PetscRandom rctx; /* random number generator context */
1326: PetscReal r;
1327: PetscScalar rs;
1328: PetscBool nd_fast;
1330: Vec ax;
1331: PetscScalar tmp;
1332: PetscReal norm, prevnorm = 0.0;
1333: PetscInt c;
1335: PetscInt loc_vec_low, loc_vec_high;
1336: PetscInt i;
1338: PetscBool skip_steps_d_j = PETSC_FALSE;
1340: PetscInt *idxm, *idxn;
1341: PetscScalar *v;
1343: Mat AI;
1345: Vec cand_vec, cand_vec_new;
1348: *cand_added = PETSC_FALSE;
1349:
1350: asa_lev = asa->levellist;
1351: if (asa_lev == 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "No levels found in PCGeneralSetupStage_ASA");
1352: asa_next_lev = asa_lev->next;
1353: if (asa_next_lev == 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL, "Just one level, not implemented yet");
1354:
1355: PetscPrintf(asa_lev->comm, "General setup stage\n");
1357: PetscLogEventBegin(PC_GeneralSetupStage_ASA,0,0,0,0);
1359: /* 1. If max. dof per node on level 2 equals K, stop */
1360: if (asa_next_lev->cand_vecs >= asa->max_dof_lev_2) {
1361: PetscPrintf(PETSC_COMM_WORLD,
1362: "Maximum dof on level 2 reached: %D\n"
1363: "Consider increasing this limit by setting it with -pc_asa_max_dof_lev_2\n",
1364: asa->max_dof_lev_2);
1365: return(0);
1366: }
1368: /* 2. Create copy of B_1 (skipped, we just replace the last column in step 8.) */
1369:
1370: if (!cand) {
1371: /* 3. Select a random x_1 */
1372: VecDestroy(&(asa_lev->x));
1373: MatGetVecs(asa_lev->A, &(asa_lev->x), 0);
1374: PetscRandomCreate(asa_lev->comm,&rctx);
1375: PetscRandomSetFromOptions(rctx);
1376: VecGetOwnershipRange(asa_lev->x, &loc_vec_low, &loc_vec_high);
1377: for (i=loc_vec_low; i<loc_vec_high; i++) {
1378: PetscRandomGetValueReal(rctx, &r);
1379: rs = r;
1380: VecSetValues(asa_lev->x, 1, &i, &rs, INSERT_VALUES);
1381: }
1382: VecAssemblyBegin(asa_lev->x);
1383: VecAssemblyEnd(asa_lev->x);
1384: PetscRandomDestroy(&rctx);
1385: } else {
1386: VecDestroy(&(asa_lev->x));
1387: VecDuplicate(cand, &(asa_lev->x));
1388: VecCopy(cand, asa_lev->x);
1389: }
1391: /* create right hand side */
1392: VecDestroy(&(asa_lev->b));
1393: MatGetVecs(asa_lev->A, &(asa_lev->b), 0);
1394: VecSet(asa_lev->b, 0.0);
1395:
1396: /* Apply mu iterations of current V-cycle */
1397: nd_fast = PETSC_FALSE;
1398: MatGetVecs(asa_lev->A, 0, &ax);
1399: for (c=0; c<asa->mu; c++) {
1400: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1401:
1402: MatMult(asa_lev->A, asa_lev->x, ax);
1403: VecDot(asa_lev->x, ax, &tmp);
1404: norm = PetscAbsScalar(tmp);
1405: if (c>0) {
1406: if (norm/prevnorm < asa->epsilon) {
1407: nd_fast = PETSC_TRUE;
1408: break;
1409: }
1410: }
1411: prevnorm = norm;
1412: }
1413: VecDestroy(&(ax));
1415: /* 4. If energy norm decreases sufficiently fast, then stop */
1416: if (nd_fast) {
1417: PetscPrintf(asa_lev->comm, "nd_fast is true\n");
1418: return(0);
1419: }
1421: /* 5. Update B_1, by adding new column x_1 */
1422: if (asa_lev->cand_vecs >= asa->max_cand_vecs) {
1423: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM, "Number of candidate vectors will exceed allocated storage space");
1424: } else {
1425: PetscPrintf(asa_lev->comm, "Adding candidate vector %D\n", asa_lev->cand_vecs+1);
1426: }
1427: PCAddCandidateToB_ASA(asa_lev->B, asa_lev->cand_vecs, asa_lev->x, asa_lev->A);
1428: *cand_added = PETSC_TRUE;
1429: asa_lev->cand_vecs++;
1431: /* 6. loop over levels */
1432: while(asa_next_lev && asa_next_lev->next) {
1433: PetscPrintf(asa_lev->comm, "General setup stage: processing level %D\n", asa_next_lev->level);
1434: /* (a) define B_{l+1} and P_{l+1}^L */
1435: /* construct P_{l+1}^l */
1436: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
1438: /* construct B_{l+1} */
1439: MatDestroy(&(asa_next_lev->B));
1440: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->B));
1441: /* do not increase asa_next_lev->cand_vecs until step (j) */
1442:
1443: /* (b) construct prolongator I_{l+1}^l = S_l P_{l+1}^l */
1444: PCSmoothProlongator_ASA(asa_lev);
1445:
1446: /* (c) construct coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
1447: MatDestroy(&(asa_next_lev->A));
1448: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
1449: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
1450: MatDestroy(&AI);
1451: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
1452: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
1453: PCComputeSpectralRadius_ASA(asa_next_lev);
1454: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->mu);
1456: if (! skip_steps_d_j) {
1457: /* (d) get vector x_{l+1} from last column in B_{l+1} */
1458: VecDestroy(&(asa_next_lev->x));
1459: MatGetVecs(asa_next_lev->B, 0, &(asa_next_lev->x));
1461: VecGetOwnershipRange(asa_next_lev->x, &loc_vec_low, &loc_vec_high);
1462: PetscMalloc(sizeof(PetscInt)*(loc_vec_high-loc_vec_low), &idxm);
1463: for (i=loc_vec_low; i<loc_vec_high; i++)
1464: idxm[i-loc_vec_low] = i;
1465: PetscMalloc(sizeof(PetscInt)*1, &idxn);
1466: idxn[0] = asa_next_lev->cand_vecs;
1468: PetscMalloc(sizeof(PetscScalar)*(loc_vec_high-loc_vec_low), &v);
1469: MatGetValues(asa_next_lev->B, loc_vec_high-loc_vec_low, idxm, 1, idxn, v);
1471: VecSetValues(asa_next_lev->x, loc_vec_high-loc_vec_low, idxm, v, INSERT_VALUES);
1472: VecAssemblyBegin(asa_next_lev->x);
1473: VecAssemblyEnd(asa_next_lev->x);
1475: PetscFree(v);
1476: PetscFree(idxm);
1477: PetscFree(idxn);
1478:
1479: /* (e) create bridge transfer operator P_{l+2}^{l+1}, by using the previously
1480: computed candidates */
1481: PCCreateTransferOp_ASA(asa_next_lev, PETSC_TRUE);
1483: /* (f) construct bridging prolongator I_{l+2}^{l+1} = S_{l+1} P_{l+2}^{l+1} */
1484: PCSmoothProlongator_ASA(asa_next_lev);
1486: /* (g) compute <A_{l+1} x_{l+1}, x_{l+1}> and save it */
1487: MatGetVecs(asa_next_lev->A, 0, &ax);
1488: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1489: VecDot(asa_next_lev->x, ax, &tmp);
1490: prevnorm = PetscAbsScalar(tmp);
1491: VecDestroy(&(ax));
1493: /* (h) apply mu iterations of current V-cycle */
1494: /* set asa_next_lev->b */
1495: VecDestroy(&(asa_next_lev->b));
1496: VecDestroy(&(asa_next_lev->r));
1497: MatGetVecs(asa_next_lev->A, &(asa_next_lev->b), &(asa_next_lev->r));
1498: VecSet(asa_next_lev->b, 0.0);
1499: /* apply V-cycle */
1500: for (c=0; c<asa->mu; c++) {
1501: PCApplyVcycleOnLevel_ASA(asa_next_lev, asa->gamma);
1502: }
1504: /* (i) check convergence */
1505: /* compute <A_{l+1} x_{l+1}, x_{l+1}> and save it */
1506: MatGetVecs(asa_next_lev->A, 0, &ax);
1507: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1508: VecDot(asa_next_lev->x, ax, &tmp);
1509: norm = PetscAbsScalar(tmp);
1510: VecDestroy(&(ax));
1512: if (norm/prevnorm <= pow(asa->epsilon, (PetscReal) asa->mu)) skip_steps_d_j = PETSC_TRUE;
1513:
1514: /* (j) update candidate B_{l+1} */
1515: PCAddCandidateToB_ASA(asa_next_lev->B, asa_next_lev->cand_vecs, asa_next_lev->x, asa_next_lev->A);
1516: asa_next_lev->cand_vecs++;
1517: }
1518: /* go to next level */
1519: asa_lev = asa_lev->next;
1520: asa_next_lev = asa_next_lev->next;
1521: }
1523: /* 7. update the fine-level candidate */
1524: if (! asa_lev->prev) {
1525: /* just one coarsening level */
1526: VecDuplicate(asa_lev->x, &cand_vec);
1527: VecCopy(asa_lev->x, cand_vec);
1528: } else {
1529: cand_vec = asa_lev->x;
1530: asa_lev->x = 0;
1531: while(asa_lev->prev) {
1532: /* interpolate to higher level */
1533: MatGetVecs(asa_lev->prev->smP, 0, &cand_vec_new);
1534: MatMult(asa_lev->prev->smP, cand_vec, cand_vec_new);
1535: VecDestroy(&(cand_vec));
1536: cand_vec = cand_vec_new;
1538: /* destroy all working vectors on the way */
1539: VecDestroy(&(asa_lev->x));
1540: VecDestroy(&(asa_lev->b));
1542: /* move to next higher level */
1543: asa_lev = asa_lev->prev;
1544: }
1545: }
1546: /* 8. update B_1 by setting the last column of B_1 */
1547: PCAddCandidateToB_ASA(asa_lev->B, asa_lev->cand_vecs-1, cand_vec, asa_lev->A);
1548: VecDestroy(&(cand_vec));
1550: /* 9. create V-cycle */
1551: PCCreateVcycle_ASA(asa);
1552:
1553: PetscLogEventEnd(PC_GeneralSetupStage_ASA,0,0,0,0);
1554: return(0);
1555: }
1557: /* -------------------------------------------------------------------------- */
1558: /*
1559: PCConstructMultigrid_ASA - creates the multigrid preconditionier, this is a fairly
1560: involved process, which runs extensive testing to compute good candidate vectors
1562: Input Parameters:
1563: . pc - the preconditioner context
1565: */
1568: PetscErrorCode PCConstructMultigrid_ASA(PC pc)
1569: {
1571: PC_ASA *asa = (PC_ASA*)pc->data;
1572: PC_ASA_level *asa_lev;
1573: PetscInt i, ls, le;
1574: PetscScalar *d;
1575: PetscBool zeroflag = PETSC_FALSE;
1576: PetscReal rnorm, rnorm_start;
1577: PetscReal rq, rq_prev;
1578: PetscScalar rq_nom, rq_denom;
1579: PetscBool cand_added;
1580: PetscRandom rctx;
1584: /* check if we should scale with diagonal */
1585: if (asa->scale_diag) {
1586: /* Get diagonal scaling factors */
1587: MatGetVecs(pc->pmat,&(asa->invsqrtdiag),0);
1588: MatGetDiagonal(pc->pmat,asa->invsqrtdiag);
1589: /* compute (inverse) sqrt of diagonal */
1590: VecGetOwnershipRange(asa->invsqrtdiag, &ls, &le);
1591: VecGetArray(asa->invsqrtdiag, &d);
1592: for (i=0; i<le-ls; i++) {
1593: if (d[i] == 0.0) {
1594: d[i] = 1.0;
1595: zeroflag = PETSC_TRUE;
1596: } else {
1597: d[i] = 1./PetscSqrtReal(PetscAbsScalar(d[i]));
1598: }
1599: }
1600: VecRestoreArray(asa->invsqrtdiag,&d);
1601: VecAssemblyBegin(asa->invsqrtdiag);
1602: VecAssemblyEnd(asa->invsqrtdiag);
1603: if (zeroflag) {
1604: PetscInfo(pc,"Zero detected in diagonal of matrix, using 1 at those locations\n");
1605: }
1606:
1607: /* scale the matrix and store it: D^{-1/2} A D^{-1/2} */
1608: MatDuplicate(pc->pmat, MAT_COPY_VALUES, &(asa->A)); /* probably inefficient */
1609: MatDiagonalScale(asa->A, asa->invsqrtdiag, asa->invsqrtdiag);
1610: } else {
1611: /* don't scale */
1612: asa->A = pc->pmat;
1613: }
1614: /* Initialization stage */
1615: PCInitializationStage_ASA(pc, PETSC_NULL);
1616:
1617: /* get first level */
1618: asa_lev = asa->levellist;
1620: PetscRandomCreate(asa->comm,&rctx);
1621: PetscRandomSetFromOptions(rctx);
1622: VecSetRandom(asa_lev->x,rctx);
1624: /* compute starting residual */
1625: VecDestroy(&(asa_lev->r));
1626: MatGetVecs(asa_lev->A, PETSC_NULL, &(asa_lev->r));
1627: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1628: /* starting residual norm */
1629: VecNorm(asa_lev->r, NORM_2, &rnorm_start);
1630: /* compute Rayleigh quotients */
1631: VecDot(asa_lev->x, asa_lev->r, &rq_nom);
1632: VecDot(asa_lev->x, asa_lev->x, &rq_denom);
1633: rq_prev = PetscAbsScalar(rq_nom / rq_denom);
1635: /* check if we have to add more candidates */
1636: for (i=0; i<asa->max_it; i++) {
1637: if (asa_lev->cand_vecs >= asa->max_cand_vecs) {
1638: /* reached limit for candidate vectors */
1639: break;
1640: }
1641: /* apply V-cycle */
1642: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1643: /* check convergence */
1644: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1645: VecNorm(asa_lev->r, NORM_2, &rnorm);
1646: PetscPrintf(asa->comm, "After %D iterations residual norm is %f\n", i+1, rnorm);
1647: if (rnorm < rnorm_start*(asa->rtol) || rnorm < asa->abstol) {
1648: /* convergence */
1649: break;
1650: }
1651: /* compute new Rayleigh quotient */
1652: VecDot(asa_lev->x, asa_lev->r, &rq_nom);
1653: VecDot(asa_lev->x, asa_lev->x, &rq_denom);
1654: rq = PetscAbsScalar(rq_nom / rq_denom);
1655: PetscPrintf(asa->comm, "After %D iterations Rayleigh quotient of residual is %f\n", i+1, rq);
1656: /* test Rayleigh quotient decrease and add more candidate vectors if necessary */
1657: if (i && (rq > asa->rq_improve*rq_prev)) {
1658: /* improve interpolation by adding another candidate vector */
1659: PCGeneralSetupStage_ASA(asa, asa_lev->r, &cand_added);
1660: if (!cand_added) {
1661: /* either too many candidates for storage or cycle is already effective */
1662: PetscPrintf(asa->comm, "either too many candidates for storage or cycle is already effective\n");
1663: break;
1664: }
1665: VecSetRandom(asa_lev->x, rctx);
1666: rq_prev = rq*10000.; /* give the new V-cycle some grace period */
1667: } else {
1668: rq_prev = rq;
1669: }
1670: }
1672: VecDestroy(&(asa_lev->x));
1673: VecDestroy(&(asa_lev->b));
1674: PetscRandomDestroy(&rctx);
1675: asa->multigrid_constructed = PETSC_TRUE;
1676: return(0);
1677: }
1679: /* -------------------------------------------------------------------------- */
1680: /*
1681: PCApply_ASA - Applies the ASA preconditioner to a vector.
1683: Input Parameters:
1684: . pc - the preconditioner context
1685: . x - input vector
1687: Output Parameter:
1688: . y - output vector
1690: Application Interface Routine: PCApply()
1691: */
1694: PetscErrorCode PCApply_ASA(PC pc,Vec x,Vec y)
1695: {
1696: PC_ASA *asa = (PC_ASA*)pc->data;
1697: PC_ASA_level *asa_lev;
1702: if (!asa->multigrid_constructed) {
1703: PCConstructMultigrid_ASA(pc);
1704: }
1706: /* get first level */
1707: asa_lev = asa->levellist;
1709: /* set the right hand side */
1710: VecDuplicate(x, &(asa->b));
1711: VecCopy(x, asa->b);
1712: /* set starting vector */
1713: VecDestroy(&(asa->x));
1714: MatGetVecs(asa->A, &(asa->x), PETSC_NULL);
1715: VecSet(asa->x, 0.0);
1716:
1717: /* set vectors */
1718: asa_lev->x = asa->x;
1719: asa_lev->b = asa->b;
1721: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1722:
1723: /* Return solution */
1724: VecCopy(asa->x, y);
1726: /* delete working vectors */
1727: VecDestroy(&(asa->x));
1728: VecDestroy(&(asa->b));
1729: asa_lev->x = PETSC_NULL;
1730: asa_lev->b = PETSC_NULL;
1732: return(0);
1733: }
1735: /* -------------------------------------------------------------------------- */
1736: /*
1737: PCApplyRichardson_ASA - Applies the ASA iteration to solve a linear system
1739: Input Parameters:
1740: . pc - the preconditioner context
1741: . b - the right hand side
1743: Output Parameter:
1744: . x - output vector
1746: DOES NOT WORK!!!!!
1748: */
1751: PetscErrorCode PCApplyRichardson_ASA(PC pc,Vec b,Vec x,Vec w,PetscReal rtol,PetscReal abstol, PetscReal dtol,PetscInt its,PetscBool guesszero,PetscInt *outits,PCRichardsonConvergedReason *reason)
1752: {
1753: PC_ASA *asa = (PC_ASA*)pc->data;
1754: PC_ASA_level *asa_lev;
1755: PetscInt i;
1756: PetscReal rnorm, rnorm_start;
1761: if (! asa->multigrid_constructed) {
1762: PCConstructMultigrid_ASA(pc);
1763: }
1765: /* get first level */
1766: asa_lev = asa->levellist;
1768: /* set the right hand side */
1769: VecDuplicate(b, &(asa->b));
1770: if (asa->scale_diag) {
1771: VecPointwiseMult(asa->b, asa->invsqrtdiag, b);
1772: } else {
1773: VecCopy(b, asa->b);
1774: }
1775: /* set starting vector */
1776: VecDuplicate(x, &(asa->x));
1777: VecCopy(x, asa->x);
1778:
1779: /* compute starting residual */
1780: VecDestroy(&(asa->r));
1781: MatGetVecs(asa->A, &(asa->r), PETSC_NULL);
1782: MatMult(asa->A, asa->x, asa->r);
1783: VecAYPX(asa->r, -1.0, asa->b);
1784: /* starting residual norm */
1785: VecNorm(asa->r, NORM_2, &rnorm_start);
1787: /* set vectors */
1788: asa_lev->x = asa->x;
1789: asa_lev->b = asa->b;
1791: *reason = PCRICHARDSON_CONVERGED_ITS;
1792: /* **************** Full algorithm loop *********************************** */
1793: for (i=0; i<its; i++) {
1794: /* apply V-cycle */
1795: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1796: /* check convergence */
1797: MatMult(asa->A, asa->x, asa->r);
1798: VecAYPX(asa->r, -1.0, asa->b);
1799: VecNorm(asa->r, NORM_2, &rnorm);
1800: PetscPrintf(asa->comm, "After %D iterations residual norm is %f\n", i+1, rnorm);
1801: if (rnorm < rnorm_start*(rtol)) {
1802: *reason = PCRICHARDSON_CONVERGED_RTOL;
1803: break;
1804: } else if (rnorm < asa->abstol) {
1805: *reason = PCRICHARDSON_CONVERGED_ATOL;
1806: break;
1807: } else if (rnorm > rnorm_start*(dtol)) {
1808: *reason = PCRICHARDSON_DIVERGED_DTOL;
1809: break;
1810: }
1811: }
1812: *outits = i;
1813:
1814: /* Return solution */
1815: if (asa->scale_diag) {
1816: VecPointwiseMult(x, asa->x, asa->invsqrtdiag);
1817: } else {
1818: VecCopy(x, asa->x);
1819: }
1821: /* delete working vectors */
1822: VecDestroy(&(asa->x));
1823: VecDestroy(&(asa->b));
1824: VecDestroy(&(asa->r));
1825: asa_lev->x = PETSC_NULL;
1826: asa_lev->b = PETSC_NULL;
1827: return(0);
1828: }
1830: /* -------------------------------------------------------------------------- */
1831: /*
1832: PCDestroy_ASA - Destroys the private context for the ASA preconditioner
1833: that was created with PCCreate_ASA().
1835: Input Parameter:
1836: . pc - the preconditioner context
1838: Application Interface Routine: PCDestroy()
1839: */
1842: static PetscErrorCode PCDestroy_ASA(PC pc)
1843: {
1844: PC_ASA *asa;
1845: PC_ASA_level *asa_lev;
1846: PC_ASA_level *asa_next_level;
1851: asa = (PC_ASA*)pc->data;
1852: asa_lev = asa->levellist;
1854: /* Delete top level data */
1855: PetscFree(asa->ksptype_smooth);
1856: PetscFree(asa->pctype_smooth);
1857: PetscFree(asa->ksptype_direct);
1858: PetscFree(asa->pctype_direct);
1859: PetscFree(asa->coarse_mat_type);
1861: /* this is destroyed by the levels below */
1862: /* MatDestroy(&(asa->A)); */
1863: VecDestroy(&(asa->invsqrtdiag));
1864: VecDestroy(&(asa->b));
1865: VecDestroy(&(asa->x));
1866: VecDestroy(&(asa->r));
1868: /* Destroy each of the levels */
1869: while(asa_lev) {
1870: asa_next_level = asa_lev->next;
1871: PCDestroyLevel_ASA(asa_lev);
1872: asa_lev = asa_next_level;
1873: }
1875: PetscFree(asa);
1876: return(0);
1877: }
1881: static PetscErrorCode PCSetFromOptions_ASA(PC pc)
1882: {
1883: PC_ASA *asa = (PC_ASA*)pc->data;
1884: PetscBool flg;
1886: char type[20];
1891: PetscOptionsHead("ASA options");
1892: /* convergence parameters */
1893: PetscOptionsInt("-pc_asa_nu","Number of cycles to run smoother","No manual page yet",asa->nu,&(asa->nu),&flg);
1894: PetscOptionsInt("-pc_asa_gamma","Number of cycles to run coarse grid correction","No manual page yet",asa->gamma,&(asa->gamma),&flg);
1895: PetscOptionsReal("-pc_asa_epsilon","Tolerance for the relaxation method","No manual page yet",asa->epsilon,&(asa->epsilon),&flg);
1896: PetscOptionsInt("-pc_asa_mu","Number of cycles to relax in setup stages","No manual page yet",asa->mu,&(asa->mu),&flg);
1897: PetscOptionsInt("-pc_asa_mu_initial","Number of cycles to relax for generating first candidate vector","No manual page yet",asa->mu_initial,&(asa->mu_initial),&flg);
1898: PetscOptionsInt("-pc_asa_direct_solver","For which matrix size should we use the direct solver?","No manual page yet",asa->direct_solver,&(asa->direct_solver),&flg);
1899: PetscOptionsBool("-pc_asa_scale_diag","Should we scale the matrix with the inverse of its diagonal?","No manual page yet",asa->scale_diag,&(asa->scale_diag),&flg);
1900: /* type of smoother used */
1901: PetscOptionsList("-pc_asa_smoother_ksp_type","The type of KSP to be used in the smoothers","No manual page yet",KSPList,asa->ksptype_smooth,type,20,&flg);
1902: if (flg) {
1903: PetscFree(asa->ksptype_smooth);
1904: PetscStrallocpy(type,&(asa->ksptype_smooth));
1905: }
1906: PetscOptionsList("-pc_asa_smoother_pc_type","The type of PC to be used in the smoothers","No manual page yet",PCList,asa->pctype_smooth,type,20,&flg);
1907: if (flg) {
1908: PetscFree(asa->pctype_smooth);
1909: PetscStrallocpy(type,&(asa->pctype_smooth));
1910: }
1911: PetscOptionsList("-pc_asa_direct_ksp_type","The type of KSP to be used in the direct solver","No manual page yet",KSPList,asa->ksptype_direct,type,20,&flg);
1912: if (flg) {
1913: PetscFree(asa->ksptype_direct);
1914: PetscStrallocpy(type,&(asa->ksptype_direct));
1915: }
1916: PetscOptionsList("-pc_asa_direct_pc_type","The type of PC to be used in the direct solver","No manual page yet",PCList,asa->pctype_direct,type,20,&flg);
1917: if (flg) {
1918: PetscFree(asa->pctype_direct);
1919: PetscStrallocpy(type,&(asa->pctype_direct));
1920: }
1921: /* options specific for certain smoothers */
1922: PetscOptionsReal("-pc_asa_richardson_scale","Scaling parameter for preconditioning in relaxation, if smoothing KSP is Richardson","No manual page yet",asa->richardson_scale,&(asa->richardson_scale),&flg);
1923: PetscOptionsReal("-pc_asa_sor_omega","Scaling parameter for preconditioning in relaxation, if smoothing KSP is Richardson","No manual page yet",asa->sor_omega,&(asa->sor_omega),&flg);
1924: /* options for direct solver */
1925: PetscOptionsString("-pc_asa_coarse_mat_type","The coarse level matrix type (e.g. SuperLU, MUMPS, ...)","No manual page yet",asa->coarse_mat_type, type,20,&flg);
1926: if (flg) {
1927: PetscFree(asa->coarse_mat_type);
1928: PetscStrallocpy(type,&(asa->coarse_mat_type));
1929: }
1930: /* storage allocation parameters */
1931: PetscOptionsInt("-pc_asa_max_cand_vecs","Maximum number of candidate vectors","No manual page yet",asa->max_cand_vecs,&(asa->max_cand_vecs),&flg);
1932: PetscOptionsInt("-pc_asa_max_dof_lev_2","The maximum number of degrees of freedom per node on level 2 (K in paper)","No manual page yet",asa->max_dof_lev_2,&(asa->max_dof_lev_2),&flg);
1933: /* construction parameters */
1934: PetscOptionsReal("-pc_asa_rq_improve","Threshold in RQ improvement for adding another candidate","No manual page yet",asa->rq_improve,&(asa->rq_improve),&flg);
1935: PetscOptionsTail();
1936: return(0);
1937: }
1941: static PetscErrorCode PCView_ASA(PC pc,PetscViewer viewer)
1942: {
1943: PC_ASA *asa = (PC_ASA*)pc->data;
1945: PetscBool iascii;
1946: PC_ASA_level *asa_lev = asa->levellist;
1949: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1950: if (iascii) {
1951: PetscViewerASCIIPrintf(viewer," ASA:\n");
1952: asa_lev = asa->levellist;
1953: while (asa_lev) {
1954: if (!asa_lev->next) {
1955: PetscViewerASCIIPrintf(viewer,"Coarse grid solver -- level %D -------------------------------\n",0);
1956: } else {
1957: PetscViewerASCIIPrintf(viewer,"Down solver (pre-smoother) on level ? -------------------------------\n");
1958: }
1959: PetscViewerASCIIPushTab(viewer);
1960: KSPView(asa_lev->smoothd,viewer);
1961: PetscViewerASCIIPopTab(viewer);
1962: if (asa_lev->next && asa_lev->smoothd == asa_lev->smoothu) {
1963: PetscViewerASCIIPrintf(viewer,"Up solver (post-smoother) same as down solver (pre-smoother)\n");
1964: } else if (asa_lev->next){
1965: PetscViewerASCIIPrintf(viewer,"Up solver (post-smoother) on level ? -------------------------------\n");
1966: PetscViewerASCIIPushTab(viewer);
1967: KSPView(asa_lev->smoothu,viewer);
1968: PetscViewerASCIIPopTab(viewer);
1969: }
1970: asa_lev = asa_lev->next;
1971: }
1972: } else {
1973: SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCASA",((PetscObject)viewer)->type_name);
1974: }
1975: return(0);
1976: }
1978: /* -------------------------------------------------------------------------- */
1979: /*
1980: PCCreate_ASA - Creates a ASA preconditioner context, PC_ASA,
1981: and sets this as the private data within the generic preconditioning
1982: context, PC, that was created within PCCreate().
1984: Input Parameter:
1985: . pc - the preconditioner context
1987: Application Interface Routine: PCCreate()
1988: */
1989: EXTERN_C_BEGIN
1992: PetscErrorCode PCCreate_ASA(PC pc)
1993: {
1995: PC_ASA *asa;
2000: /*
2001: Set the pointers for the functions that are provided above.
2002: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
2003: are called, they will automatically call these functions. Note we
2004: choose not to provide a couple of these functions since they are
2005: not needed.
2006: */
2007: pc->ops->apply = PCApply_ASA;
2008: /* pc->ops->applytranspose = PCApply_ASA;*/
2009: pc->ops->applyrichardson = PCApplyRichardson_ASA;
2010: pc->ops->setup = 0;
2011: pc->ops->destroy = PCDestroy_ASA;
2012: pc->ops->setfromoptions = PCSetFromOptions_ASA;
2013: pc->ops->view = PCView_ASA;
2015: /* Set the data to pointer to 0 */
2016: pc->data = (void*)0;
2018: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASASetTolerances_C","PCASASetTolerances_ASA",PCASASetTolerances_ASA);
2020: /* register events */
2021: if (! asa_events_registered) {
2022: PetscLogEventRegister("PCInitializationStage_ASA", PC_CLASSID,&PC_InitializationStage_ASA);
2023: PetscLogEventRegister("PCGeneralSetupStage_ASA", PC_CLASSID,&PC_GeneralSetupStage_ASA);
2024: PetscLogEventRegister("PCCreateTransferOp_ASA", PC_CLASSID,&PC_CreateTransferOp_ASA);
2025: PetscLogEventRegister("PCCreateVcycle_ASA", PC_CLASSID,&PC_CreateVcycle_ASA);
2026: asa_events_registered = PETSC_TRUE;
2027: }
2029: /* Create new PC_ASA object */
2030: PetscNewLog(pc,PC_ASA,&asa);
2031: pc->data = (void*)asa;
2033: /* WORK: find some better initial values */
2034: asa->nu = 3;
2035: asa->gamma = 1;
2036: asa->epsilon = 1e-4;
2037: asa->mu = 3;
2038: asa->mu_initial = 20;
2039: asa->direct_solver = 100;
2040: asa->scale_diag = PETSC_TRUE;
2041: PetscStrallocpy(KSPRICHARDSON, (char **) &(asa->ksptype_smooth));
2042: PetscStrallocpy(PCSOR, (char **) &(asa->pctype_smooth));
2043: asa->smoother_rtol = 1e-10;
2044: asa->smoother_abstol = 1e-20;
2045: asa->smoother_dtol = PETSC_DEFAULT;
2046: PetscStrallocpy(KSPPREONLY, (char **) &(asa->ksptype_direct));
2047: PetscStrallocpy(PCREDUNDANT, (char **) &(asa->pctype_direct));
2048: asa->direct_rtol = 1e-10;
2049: asa->direct_abstol = 1e-20;
2050: asa->direct_dtol = PETSC_DEFAULT;
2051: asa->richardson_scale = PETSC_DECIDE;
2052: asa->sor_omega = PETSC_DECIDE;
2053: PetscStrallocpy(MATSAME, (char **) &(asa->coarse_mat_type));
2055: asa->max_cand_vecs = 4;
2056: asa->max_dof_lev_2 = 640; /* I don't think this parameter really matters, 640 should be enough for everyone! */
2058: asa->multigrid_constructed = PETSC_FALSE;
2060: asa->rtol = 1e-10;
2061: asa->abstol = 1e-15;
2062: asa->divtol = 1e5;
2063: asa->max_it = 10000;
2064: asa->rq_improve = 0.9;
2065:
2066: asa->A = 0;
2067: asa->invsqrtdiag = 0;
2068: asa->b = 0;
2069: asa->x = 0;
2070: asa->r = 0;
2072: asa->levels = 0;
2073: asa->levellist = 0;
2075: asa->comm = ((PetscObject)pc)->comm;
2076: return(0);
2077: }
2078: EXTERN_C_END