Actual source code: pch2opus.c
1: #include <petsc/private/pcimpl.h>
2: #include <petsc/private/matimpl.h>
3: #include <h2opusconf.h>
5: /* Use GPU only if H2OPUS is configured for GPU */
6: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
7: #define PETSC_H2OPUS_USE_GPU
8: #endif
10: typedef struct {
11: Mat A;
12: Mat M;
13: PetscScalar s0;
15: /* sampler for Newton-Schultz */
16: Mat S;
17: PetscInt hyperorder;
18: Vec wns[4];
19: Mat wnsmat[4];
21: /* convergence testing */
22: Mat T;
23: Vec w;
24: PetscBool testMA;
26: /* Support for PCSetCoordinates */
27: PetscInt sdim;
28: PetscInt nlocc;
29: PetscReal *coords;
31: /* Newton-Schultz customization */
32: PetscInt maxits;
33: PetscReal rtol,atol;
34: PetscBool monitor;
35: PetscBool useapproximatenorms;
36: NormType normtype;
38: /* Used when pmat != MATH2OPUS */
39: PetscReal eta;
40: PetscInt leafsize;
41: PetscInt max_rank;
42: PetscInt bs;
43: PetscReal mrtol;
45: PetscBool boundtocpu;
46: } PC_H2OPUS;
48: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*);
50: static PetscErrorCode PCReset_H2OPUS(PC pc)
51: {
52: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
56: pch2opus->sdim = 0;
57: pch2opus->nlocc = 0;
58: PetscFree(pch2opus->coords);
59: MatDestroy(&pch2opus->A);
60: MatDestroy(&pch2opus->M);
61: MatDestroy(&pch2opus->T);
62: VecDestroy(&pch2opus->w);
63: MatDestroy(&pch2opus->S);
64: VecDestroy(&pch2opus->wns[0]);
65: VecDestroy(&pch2opus->wns[1]);
66: VecDestroy(&pch2opus->wns[2]);
67: VecDestroy(&pch2opus->wns[3]);
68: MatDestroy(&pch2opus->wnsmat[0]);
69: MatDestroy(&pch2opus->wnsmat[1]);
70: MatDestroy(&pch2opus->wnsmat[2]);
71: MatDestroy(&pch2opus->wnsmat[3]);
72: return(0);
73: }
75: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
76: {
77: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
78: PetscBool reset = PETSC_TRUE;
82: if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
83: PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset);
84: reset = (PetscBool)!reset;
85: }
86: MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
87: if (reset) {
88: PCReset_H2OPUS(pc);
89: PetscMalloc1(sdim*nlocc,&pch2opus->coords);
90: PetscArraycpy(pch2opus->coords,coords,sdim*nlocc);
91: pch2opus->sdim = sdim;
92: pch2opus->nlocc = nlocc;
93: }
94: return(0);
95: }
97: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
98: {
102: PCReset_H2OPUS(pc);
103: PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL);
104: PetscFree(pc->data);
105: return(0);
106: }
108: static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc)
109: {
110: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
114: PetscOptionsHead(PetscOptionsObject,"H2OPUS options");
115: PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL);
116: PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL);
117: PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL);
118: PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL);
119: PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL);
120: PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL);
121: PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL);
122: PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL);
123: PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL);
124: PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL);
125: PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL);
126: PetscOptionsTail();
127: return(0);
128: }
130: typedef struct {
131: Mat A;
132: Mat M;
133: Vec w;
134: } AAtCtx;
136: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
137: {
138: AAtCtx *aat;
142: MatShellGetContext(A,(void*)&aat);
143: /* MatMultTranspose(aat->M,x,aat->w); */
144: MatMultTranspose(aat->A,x,aat->w);
145: MatMult(aat->A,aat->w,y);
146: return(0);
147: }
149: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
150: {
151: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
152: Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt;
153: PetscInt M,m;
154: VecType vtype;
155: PetscReal n;
156: AAtCtx aat;
160: aat.A = A;
161: aat.M = pch2opus->M; /* unused so far */
162: MatCreateVecs(A,NULL,&aat.w);
163: MatGetSize(A,&M,NULL);
164: MatGetLocalSize(A,&m,NULL);
165: MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt);
166: MatBindToCPU(AAt,pch2opus->boundtocpu);
167: MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt);
168: MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt);
169: MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);
170: MatGetVecType(A,&vtype);
171: MatShellSetVecType(AAt,vtype);
172: MatNorm(AAt,NORM_1,&n);
173: pch2opus->s0 = 1./n;
174: VecDestroy(&aat.w);
175: MatDestroy(&AAt);
176: return(0);
177: }
179: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
180: {
181: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
185: if (t) {
186: MatMultTranspose(pch2opus->M,x,y);
187: } else {
188: MatMult(pch2opus->M,x,y);
189: }
190: return(0);
191: }
193: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
194: {
195: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
199: if (t) {
200: MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
201: } else {
202: MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
203: }
204: return(0);
205: }
207: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
208: {
212: PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE);
213: return(0);
214: }
216: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
217: {
221: PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE);
222: return(0);
223: }
225: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
226: {
230: PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE);
231: return(0);
232: }
234: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
235: {
239: PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE);
240: return(0);
241: }
243: /* used to test the norm of (M^-1 A - I) */
244: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
245: {
246: PC pc;
247: Mat A;
248: PC_H2OPUS *pch2opus;
250: PetscBool sideleft = PETSC_TRUE;
253: MatShellGetContext(M,(void*)&pc);
254: pch2opus = (PC_H2OPUS*)pc->data;
255: if (!pch2opus->w) {
256: MatCreateVecs(pch2opus->M,&pch2opus->w,NULL);
257: }
258: A = pch2opus->A;
259: VecBindToCPU(pch2opus->w,pch2opus->boundtocpu);
260: if (t) {
261: if (sideleft) {
262: PCApplyTranspose_H2OPUS(pc,x,pch2opus->w);
263: MatMultTranspose(A,pch2opus->w,y);
264: } else {
265: MatMultTranspose(A,x,pch2opus->w);
266: PCApplyTranspose_H2OPUS(pc,pch2opus->w,y);
267: }
268: } else {
269: if (sideleft) {
270: MatMult(A,x,pch2opus->w);
271: PCApply_H2OPUS(pc,pch2opus->w,y);
272: } else {
273: PCApply_H2OPUS(pc,x,pch2opus->w);
274: MatMult(A,pch2opus->w,y);
275: }
276: }
277: if (!pch2opus->testMA) {
278: VecAXPY(y,-1.0,x);
279: }
280: return(0);
281: }
283: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
284: {
288: MatMultKernel_MAmI(A,x,y,PETSC_FALSE);
289: return(0);
290: }
292: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
293: {
297: MatMultKernel_MAmI(A,x,y,PETSC_TRUE);
298: return(0);
299: }
301: /* HyperPower kernel:
302: Y = R = x
303: for i = 1 . . . l - 1 do
304: R = (I - A * Xk) * R
305: Y = Y + R
306: Y = Xk * Y
307: */
308: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
309: {
310: PC pc;
311: Mat A;
312: PC_H2OPUS *pch2opus;
313: PetscInt i;
317: MatShellGetContext(M,(void*)&pc);
318: pch2opus = (PC_H2OPUS*)pc->data;
319: A = pch2opus->A;
320: MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
321: MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3]);
322: VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);
323: VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);
324: VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu);
325: VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu);
326: VecCopy(x,pch2opus->wns[0]);
327: VecCopy(x,pch2opus->wns[3]);
328: if (t) {
329: for (i=0;i<pch2opus->hyperorder-1;i++) {
330: MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1]);
331: PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2]);
332: VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);
333: VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);
334: }
335: PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y);
336: } else {
337: for (i=0;i<pch2opus->hyperorder-1;i++) {
338: PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);
339: MatMult(A,pch2opus->wns[1],pch2opus->wns[2]);
340: VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);
341: VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);
342: }
343: PCApply_H2OPUS(pc,pch2opus->wns[3],y);
344: }
345: return(0);
346: }
348: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
349: {
353: MatMultKernel_Hyper(M,x,y,PETSC_FALSE);
354: return(0);
355: }
357: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
358: {
362: MatMultKernel_Hyper(M,x,y,PETSC_TRUE);
363: return(0);
364: }
366: /* Hyper power kernel, MatMat version */
367: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
368: {
369: PC pc;
370: Mat A;
371: PC_H2OPUS *pch2opus;
372: PetscInt i;
376: MatShellGetContext(M,(void*)&pc);
377: pch2opus = (PC_H2OPUS*)pc->data;
378: A = pch2opus->A;
379: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
380: MatDestroy(&pch2opus->wnsmat[0]);
381: MatDestroy(&pch2opus->wnsmat[1]);
382: }
383: if (!pch2opus->wnsmat[0]) {
384: MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);
385: MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);
386: }
387: if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
388: MatDestroy(&pch2opus->wnsmat[2]);
389: MatDestroy(&pch2opus->wnsmat[3]);
390: }
391: if (!pch2opus->wnsmat[2]) {
392: MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2]);
393: MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3]);
394: }
395: MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
396: MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN);
397: if (t) {
398: for (i=0;i<pch2opus->hyperorder-1;i++) {
399: MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);
400: PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2]);
401: MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);
402: MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
403: }
404: PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);
405: } else {
406: for (i=0;i<pch2opus->hyperorder-1;i++) {
407: PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);
408: MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2]);
409: MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);
410: MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
411: }
412: PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);
413: }
414: return(0);
415: }
417: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx)
418: {
422: MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE);
423: return(0);
424: }
426: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
427: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
428: {
429: PC pc;
430: Mat A;
431: PC_H2OPUS *pch2opus;
435: MatShellGetContext(M,(void*)&pc);
436: pch2opus = (PC_H2OPUS*)pc->data;
437: A = pch2opus->A;
438: MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
439: VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);
440: VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);
441: if (t) {
442: PCApplyTranspose_H2OPUS(pc,x,y);
443: MatMultTranspose(A,y,pch2opus->wns[1]);
444: PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0]);
445: VecAXPBY(y,-1.,2.,pch2opus->wns[0]);
446: } else {
447: PCApply_H2OPUS(pc,x,y);
448: MatMult(A,y,pch2opus->wns[0]);
449: PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);
450: VecAXPBY(y,-1.,2.,pch2opus->wns[1]);
451: }
452: return(0);
453: }
455: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
456: {
460: MatMultKernel_NS(M,x,y,PETSC_FALSE);
461: return(0);
462: }
464: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
465: {
469: MatMultKernel_NS(M,x,y,PETSC_TRUE);
470: return(0);
471: }
473: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
474: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
475: {
476: PC pc;
477: Mat A;
478: PC_H2OPUS *pch2opus;
482: MatShellGetContext(M,(void*)&pc);
483: pch2opus = (PC_H2OPUS*)pc->data;
484: A = pch2opus->A;
485: if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
486: MatDestroy(&pch2opus->wnsmat[0]);
487: MatDestroy(&pch2opus->wnsmat[1]);
488: }
489: if (!pch2opus->wnsmat[0]) {
490: MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);
491: MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);
492: }
493: if (t) {
494: PCApplyTransposeMat_H2OPUS(pc,X,Y);
495: MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);
496: PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0]);
497: MatScale(Y,2.);
498: MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
499: } else {
500: PCApplyMat_H2OPUS(pc,X,Y);
501: MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0]);
502: PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);
503: MatScale(Y,2.);
504: MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN);
505: }
506: return(0);
507: }
509: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
510: {
514: MatMatMultKernel_NS(M,X,Y,PETSC_FALSE);
515: return(0);
516: }
518: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
519: {
520: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
521: Mat A = pc->useAmat ? pc->mat : pc->pmat;
525: if (!pch2opus->S) {
526: PetscInt M,N,m,n;
528: MatGetSize(A,&M,&N);
529: MatGetLocalSize(A,&m,&n);
530: MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S);
531: MatSetBlockSizesFromMats(pch2opus->S,A,A);
532: #if defined(PETSC_H2OPUS_USE_GPU)
533: MatShellSetVecType(pch2opus->S,VECCUDA);
534: #endif
535: }
536: if (pch2opus->hyperorder >= 2) {
537: MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper);
538: MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper);
539: MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE);
540: MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA);
541: } else {
542: MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS);
543: MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS);
544: MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE);
545: MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA);
546: }
547: MatPropagateSymmetryOptions(A,pch2opus->S);
548: MatBindToCPU(pch2opus->S,pch2opus->boundtocpu);
549: /* XXX */
550: MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE);
551: return(0);
552: }
554: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
555: {
556: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
557: Mat A = pc->useAmat ? pc->mat : pc->pmat;
558: NormType norm = pch2opus->normtype;
559: PetscReal initerr = 0.0,err;
560: PetscReal initerrMA = 0.0,errMA;
561: PetscBool ish2opus;
565: if (!pch2opus->T) {
566: PetscInt M,N,m,n;
567: const char *prefix;
569: PCGetOptionsPrefix(pc,&prefix);
570: MatGetSize(A,&M,&N);
571: MatGetLocalSize(A,&m,&n);
572: MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T);
573: MatSetBlockSizesFromMats(pch2opus->T,A,A);
574: MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI);
575: MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI);
576: MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);
577: #if defined(PETSC_H2OPUS_USE_GPU)
578: MatShellSetVecType(pch2opus->T,VECCUDA);
579: #endif
580: PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T);
581: MatSetOptionsPrefix(pch2opus->T,prefix);
582: MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_");
583: }
584: MatDestroy(&pch2opus->A);
585: PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus);
586: if (ish2opus) {
587: PetscObjectReference((PetscObject)A);
588: pch2opus->A = A;
589: } else {
590: const char *prefix;
591: MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A);
592: /* XXX */
593: MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);
594: PCGetOptionsPrefix(pc,&prefix);
595: MatSetOptionsPrefix(pch2opus->A,prefix);
596: MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_");
597: MatSetFromOptions(pch2opus->A);
598: MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY);
599: MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY);
600: /* XXX */
601: MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);
602: }
603: #if defined(PETSC_H2OPUS_USE_GPU)
604: pch2opus->boundtocpu = pch2opus->A->boundtocpu;
605: #endif
606: MatBindToCPU(pch2opus->T,pch2opus->boundtocpu);
607: if (pch2opus->M) { /* see if we can reuse M as initial guess */
608: PetscReal reuse;
610: MatBindToCPU(pch2opus->M,pch2opus->boundtocpu);
611: MatNorm(pch2opus->T,norm,&reuse);
612: if (reuse >= 1.0) {
613: MatDestroy(&pch2opus->M);
614: }
615: }
616: if (!pch2opus->M) {
617: const char *prefix;
618: MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M);
619: PCGetOptionsPrefix(pc,&prefix);
620: MatSetOptionsPrefix(pch2opus->M,prefix);
621: MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_");
622: MatSetFromOptions(pch2opus->M);
623: PCH2OpusSetUpInit(pc);
624: MatScale(pch2opus->M,pch2opus->s0);
625: }
626: /* A and M have the same h2 matrix structure, save on reordering routines */
627: MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE);
628: MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE);
629: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
630: MatNorm(pch2opus->T,norm,&initerr);
631: pch2opus->testMA = PETSC_TRUE;
632: MatNorm(pch2opus->T,norm,&initerrMA);
633: pch2opus->testMA = PETSC_FALSE;
634: }
635: if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
636: err = initerr;
637: errMA = initerrMA;
638: if (pch2opus->monitor) {
639: PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr));
640: PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));
641: }
642: if (initerr > pch2opus->atol && !pc->failedreason) {
643: PetscInt i;
645: PCH2OpusSetUpSampler_Private(pc);
646: for (i = 0; i < pch2opus->maxits; i++) {
647: Mat M;
648: const char* prefix;
650: MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M);
651: MatGetOptionsPrefix(pch2opus->M,&prefix);
652: MatSetOptionsPrefix(M,prefix);
653: MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE);
654: MatSetFromOptions(M);
655: MatH2OpusSetNativeMult(M,PETSC_TRUE);
656: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
657: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
659: MatDestroy(&pch2opus->M);
660: pch2opus->M = M;
661: if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
662: MatNorm(pch2opus->T,norm,&err);
663: pch2opus->testMA = PETSC_TRUE;
664: MatNorm(pch2opus->T,norm,&errMA);
665: pch2opus->testMA = PETSC_FALSE;
666: }
667: if (pch2opus->monitor) {
668: PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr));
669: PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));
670: }
671: if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
672: if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break;
673: }
674: }
675: /* cleanup setup workspace */
676: MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE);
677: MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE);
678: VecDestroy(&pch2opus->wns[0]);
679: VecDestroy(&pch2opus->wns[1]);
680: VecDestroy(&pch2opus->wns[2]);
681: VecDestroy(&pch2opus->wns[3]);
682: MatDestroy(&pch2opus->wnsmat[0]);
683: MatDestroy(&pch2opus->wnsmat[1]);
684: MatDestroy(&pch2opus->wnsmat[2]);
685: MatDestroy(&pch2opus->wnsmat[3]);
686: return(0);
687: }
689: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
690: {
692: PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
693: PetscBool isascii;
696: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
697: if (isascii) {
698: if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
699: PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n");
700: PetscViewerASCIIPushTab(viewer);
701: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);
702: MatView(pch2opus->A,viewer);
703: PetscViewerPopFormat(viewer);
704: PetscViewerASCIIPopTab(viewer);
705: }
706: if (pch2opus->M) {
707: PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n");
708: PetscViewerASCIIPushTab(viewer);
709: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);
710: MatView(pch2opus->M,viewer);
711: PetscViewerPopFormat(viewer);
712: PetscViewerASCIIPopTab(viewer);
713: }
714: PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0);
715: }
716: return(0);
717: }
719: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
720: {
722: PC_H2OPUS *pch2opus;
725: PetscNewLog(pc,&pch2opus);
726: pc->data = (void*)pch2opus;
728: pch2opus->atol = 1.e-2;
729: pch2opus->rtol = 1.e-6;
730: pch2opus->maxits = 50;
731: pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */
732: pch2opus->normtype = NORM_2;
734: /* these are needed when we are sampling the pmat */
735: pch2opus->eta = PETSC_DECIDE;
736: pch2opus->leafsize = PETSC_DECIDE;
737: pch2opus->max_rank = PETSC_DECIDE;
738: pch2opus->bs = PETSC_DECIDE;
739: pch2opus->mrtol = PETSC_DECIDE;
740: #if defined(PETSC_H2OPUS_USE_GPU)
741: pch2opus->boundtocpu = PETSC_FALSE;
742: #else
743: pch2opus->boundtocpu = PETSC_TRUE;
744: #endif
745: pc->ops->destroy = PCDestroy_H2OPUS;
746: pc->ops->setup = PCSetUp_H2OPUS;
747: pc->ops->apply = PCApply_H2OPUS;
748: pc->ops->matapply = PCApplyMat_H2OPUS;
749: pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
750: pc->ops->reset = PCReset_H2OPUS;
751: pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
752: pc->ops->view = PCView_H2OPUS;
754: PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS);
755: return(0);
756: }