Actual source code: pch2opus.c

  1: #include <petsc/private/pcimpl.h>
  2: #include <petsc/private/matimpl.h>
  3: #include <h2opusconf.h>

  5: /* Use GPU only if H2OPUS is configured for GPU */
  6: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
  7: #define PETSC_H2OPUS_USE_GPU
  8: #endif

 10: typedef struct {
 11:   Mat         A;
 12:   Mat         M;
 13:   PetscScalar s0;

 15:   /* sampler for Newton-Schultz */
 16:   Mat      S;
 17:   PetscInt hyperorder;
 18:   Vec      wns[4];
 19:   Mat      wnsmat[4];

 21:   /* convergence testing */
 22:   Mat       T;
 23:   Vec       w;
 24:   PetscBool testMA;

 26:   /* Support for PCSetCoordinates */
 27:   PetscInt  sdim;
 28:   PetscInt  nlocc;
 29:   PetscReal *coords;

 31:   /* Newton-Schultz customization */
 32:   PetscInt  maxits;
 33:   PetscReal rtol,atol;
 34:   PetscBool monitor;
 35:   PetscBool useapproximatenorms;
 36:   NormType  normtype;

 38:   /* Used when pmat != MATH2OPUS */
 39:   PetscReal eta;
 40:   PetscInt  leafsize;
 41:   PetscInt  max_rank;
 42:   PetscInt  bs;
 43:   PetscReal mrtol;

 45:   PetscBool boundtocpu;
 46: } PC_H2OPUS;

 48: PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*);

 50: static PetscErrorCode PCReset_H2OPUS(PC pc)
 51: {
 52:   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;

 56:   pch2opus->sdim  = 0;
 57:   pch2opus->nlocc = 0;
 58:   PetscFree(pch2opus->coords);
 59:   MatDestroy(&pch2opus->A);
 60:   MatDestroy(&pch2opus->M);
 61:   MatDestroy(&pch2opus->T);
 62:   VecDestroy(&pch2opus->w);
 63:   MatDestroy(&pch2opus->S);
 64:   VecDestroy(&pch2opus->wns[0]);
 65:   VecDestroy(&pch2opus->wns[1]);
 66:   VecDestroy(&pch2opus->wns[2]);
 67:   VecDestroy(&pch2opus->wns[3]);
 68:   MatDestroy(&pch2opus->wnsmat[0]);
 69:   MatDestroy(&pch2opus->wnsmat[1]);
 70:   MatDestroy(&pch2opus->wnsmat[2]);
 71:   MatDestroy(&pch2opus->wnsmat[3]);
 72:   return(0);
 73: }

 75: static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
 76: {
 77:   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
 78:   PetscBool      reset = PETSC_TRUE;

 82:   if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
 83:     PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset);
 84:     reset = (PetscBool)!reset;
 85:   }
 86:   MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
 87:   if (reset) {
 88:     PCReset_H2OPUS(pc);
 89:     PetscMalloc1(sdim*nlocc,&pch2opus->coords);
 90:     PetscArraycpy(pch2opus->coords,coords,sdim*nlocc);
 91:     pch2opus->sdim  = sdim;
 92:     pch2opus->nlocc = nlocc;
 93:   }
 94:   return(0);
 95: }

 97: static PetscErrorCode PCDestroy_H2OPUS(PC pc)
 98: {

102:   PCReset_H2OPUS(pc);
103:   PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL);
104:   PetscFree(pc->data);
105:   return(0);
106: }

108: static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc)
109: {
110:   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;

114:   PetscOptionsHead(PetscOptionsObject,"H2OPUS options");
115:   PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL);
116:   PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL);
117:   PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL);
118:   PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL);
119:   PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL);
120:   PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL);
121:   PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL);
122:   PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL);
123:   PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL);
124:   PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL);
125:   PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL);
126:   PetscOptionsTail();
127:   return(0);
128: }

130: typedef struct {
131:   Mat A;
132:   Mat M;
133:   Vec w;
134: } AAtCtx;

136: static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
137: {
138:   AAtCtx         *aat;

142:   MatShellGetContext(A,(void*)&aat);
143:   /* MatMultTranspose(aat->M,x,aat->w); */
144:   MatMultTranspose(aat->A,x,aat->w);
145:   MatMult(aat->A,aat->w,y);
146:   return(0);
147: }

149: static PetscErrorCode PCH2OpusSetUpInit(PC pc)
150: {
151:   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
152:   Mat            A = pc->useAmat ? pc->mat : pc->pmat, AAt;
153:   PetscInt       M,m;
154:   VecType        vtype;
155:   PetscReal      n;
156:   AAtCtx         aat;

160:   aat.A = A;
161:   aat.M = pch2opus->M; /* unused so far */
162:   MatCreateVecs(A,NULL,&aat.w);
163:   MatGetSize(A,&M,NULL);
164:   MatGetLocalSize(A,&m,NULL);
165:   MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt);
166:   MatBindToCPU(AAt,pch2opus->boundtocpu);
167:   MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt);
168:   MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt);
169:   MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);
170:   MatGetVecType(A,&vtype);
171:   MatShellSetVecType(AAt,vtype);
172:   MatNorm(AAt,NORM_1,&n);
173:   pch2opus->s0 = 1./n;
174:   VecDestroy(&aat.w);
175:   MatDestroy(&AAt);
176:   return(0);
177: }

179: static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
180: {
181:   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;

185:   if (t) {
186:     MatMultTranspose(pch2opus->M,x,y);
187:   } else {
188:     MatMult(pch2opus->M,x,y);
189:   }
190:   return(0);
191: }

193: static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
194: {
195:   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;

199:   if (t) {
200:     MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
201:   } else {
202:     MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);
203:   }
204:   return(0);
205: }

207: static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
208: {

212:   PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE);
213:   return(0);
214: }

216: static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
217: {

221:   PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE);
222:   return(0);
223: }

225: static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
226: {

230:   PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE);
231:   return(0);
232: }

234: static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
235: {

239:   PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE);
240:   return(0);
241: }

243: /* used to test the norm of (M^-1 A - I) */
244: static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
245: {
246:   PC             pc;
247:   Mat            A;
248:   PC_H2OPUS      *pch2opus;
250:   PetscBool      sideleft = PETSC_TRUE;

253:   MatShellGetContext(M,(void*)&pc);
254:   pch2opus = (PC_H2OPUS*)pc->data;
255:   if (!pch2opus->w) {
256:     MatCreateVecs(pch2opus->M,&pch2opus->w,NULL);
257:   }
258:   A = pch2opus->A;
259:   VecBindToCPU(pch2opus->w,pch2opus->boundtocpu);
260:   if (t) {
261:     if (sideleft) {
262:       PCApplyTranspose_H2OPUS(pc,x,pch2opus->w);
263:       MatMultTranspose(A,pch2opus->w,y);
264:     } else {
265:       MatMultTranspose(A,x,pch2opus->w);
266:       PCApplyTranspose_H2OPUS(pc,pch2opus->w,y);
267:     }
268:   } else {
269:     if (sideleft) {
270:       MatMult(A,x,pch2opus->w);
271:       PCApply_H2OPUS(pc,pch2opus->w,y);
272:     } else {
273:       PCApply_H2OPUS(pc,x,pch2opus->w);
274:       MatMult(A,pch2opus->w,y);
275:     }
276:   }
277:   if (!pch2opus->testMA) {
278:     VecAXPY(y,-1.0,x);
279:   }
280:   return(0);
281: }

283: static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
284: {

288:   MatMultKernel_MAmI(A,x,y,PETSC_FALSE);
289:   return(0);
290: }

292: static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
293: {

297:   MatMultKernel_MAmI(A,x,y,PETSC_TRUE);
298:   return(0);
299: }

301: /* HyperPower kernel:
302: Y = R = x
303: for i = 1 . . . l - 1 do
304:   R = (I - A * Xk) * R
305:   Y = Y + R
306: Y = Xk * Y
307: */
308: static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
309: {
310:   PC             pc;
311:   Mat            A;
312:   PC_H2OPUS      *pch2opus;
313:   PetscInt       i;

317:   MatShellGetContext(M,(void*)&pc);
318:   pch2opus = (PC_H2OPUS*)pc->data;
319:   A = pch2opus->A;
320:   MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
321:   MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3]);
322:   VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);
323:   VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);
324:   VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu);
325:   VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu);
326:   VecCopy(x,pch2opus->wns[0]);
327:   VecCopy(x,pch2opus->wns[3]);
328:   if (t) {
329:     for (i=0;i<pch2opus->hyperorder-1;i++) {
330:       MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1]);
331:       PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2]);
332:       VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);
333:       VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);
334:     }
335:     PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y);
336:   } else {
337:     for (i=0;i<pch2opus->hyperorder-1;i++) {
338:       PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);
339:       MatMult(A,pch2opus->wns[1],pch2opus->wns[2]);
340:       VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);
341:       VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);
342:     }
343:     PCApply_H2OPUS(pc,pch2opus->wns[3],y);
344:   }
345:   return(0);
346: }

348: static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
349: {

353:   MatMultKernel_Hyper(M,x,y,PETSC_FALSE);
354:   return(0);
355: }

357: static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
358: {

362:   MatMultKernel_Hyper(M,x,y,PETSC_TRUE);
363:   return(0);
364: }

366: /* Hyper power kernel, MatMat version */
367: static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
368: {
369:   PC             pc;
370:   Mat            A;
371:   PC_H2OPUS      *pch2opus;
372:   PetscInt       i;

376:   MatShellGetContext(M,(void*)&pc);
377:   pch2opus = (PC_H2OPUS*)pc->data;
378:   A = pch2opus->A;
379:   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
380:     MatDestroy(&pch2opus->wnsmat[0]);
381:     MatDestroy(&pch2opus->wnsmat[1]);
382:   }
383:   if (!pch2opus->wnsmat[0]) {
384:     MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);
385:     MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);
386:   }
387:   if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
388:     MatDestroy(&pch2opus->wnsmat[2]);
389:     MatDestroy(&pch2opus->wnsmat[3]);
390:   }
391:   if (!pch2opus->wnsmat[2]) {
392:     MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2]);
393:     MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3]);
394:   }
395:   MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
396:   MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN);
397:   if (t) {
398:     for (i=0;i<pch2opus->hyperorder-1;i++) {
399:       MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);
400:       PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2]);
401:       MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);
402:       MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
403:     }
404:     PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);
405:   } else {
406:     for (i=0;i<pch2opus->hyperorder-1;i++) {
407:       PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);
408:       MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2]);
409:       MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);
410:       MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
411:     }
412:     PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);
413:   }
414:   return(0);
415: }

417: static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx)
418: {

422:   MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE);
423:   return(0);
424: }

426: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
427: static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
428: {
429:   PC             pc;
430:   Mat            A;
431:   PC_H2OPUS      *pch2opus;

435:   MatShellGetContext(M,(void*)&pc);
436:   pch2opus = (PC_H2OPUS*)pc->data;
437:   A = pch2opus->A;
438:   MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);
439:   VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);
440:   VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);
441:   if (t) {
442:     PCApplyTranspose_H2OPUS(pc,x,y);
443:     MatMultTranspose(A,y,pch2opus->wns[1]);
444:     PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0]);
445:     VecAXPBY(y,-1.,2.,pch2opus->wns[0]);
446:   } else {
447:     PCApply_H2OPUS(pc,x,y);
448:     MatMult(A,y,pch2opus->wns[0]);
449:     PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);
450:     VecAXPBY(y,-1.,2.,pch2opus->wns[1]);
451:   }
452:   return(0);
453: }

455: static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
456: {

460:   MatMultKernel_NS(M,x,y,PETSC_FALSE);
461:   return(0);
462: }

464: static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
465: {

469:   MatMultKernel_NS(M,x,y,PETSC_TRUE);
470:   return(0);
471: }

473: /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
474: static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
475: {
476:   PC             pc;
477:   Mat            A;
478:   PC_H2OPUS      *pch2opus;

482:   MatShellGetContext(M,(void*)&pc);
483:   pch2opus = (PC_H2OPUS*)pc->data;
484:   A = pch2opus->A;
485:   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
486:     MatDestroy(&pch2opus->wnsmat[0]);
487:     MatDestroy(&pch2opus->wnsmat[1]);
488:   }
489:   if (!pch2opus->wnsmat[0]) {
490:     MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);
491:     MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);
492:   }
493:   if (t) {
494:     PCApplyTransposeMat_H2OPUS(pc,X,Y);
495:     MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);
496:     PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0]);
497:     MatScale(Y,2.);
498:     MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);
499:   } else {
500:     PCApplyMat_H2OPUS(pc,X,Y);
501:     MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0]);
502:     PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);
503:     MatScale(Y,2.);
504:     MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN);
505:   }
506:   return(0);
507: }

509: static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
510: {

514:   MatMatMultKernel_NS(M,X,Y,PETSC_FALSE);
515:   return(0);
516: }

518: static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
519: {
520:   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
521:   Mat            A = pc->useAmat ? pc->mat : pc->pmat;

525:   if (!pch2opus->S) {
526:     PetscInt M,N,m,n;

528:     MatGetSize(A,&M,&N);
529:     MatGetLocalSize(A,&m,&n);
530:     MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S);
531:     MatSetBlockSizesFromMats(pch2opus->S,A,A);
532: #if defined(PETSC_H2OPUS_USE_GPU)
533:     MatShellSetVecType(pch2opus->S,VECCUDA);
534: #endif
535:   }
536:   if (pch2opus->hyperorder >= 2) {
537:     MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper);
538:     MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper);
539:     MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE);
540:     MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA);
541:   } else {
542:     MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS);
543:     MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS);
544:     MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE);
545:     MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA);
546:   }
547:   MatPropagateSymmetryOptions(A,pch2opus->S);
548:   MatBindToCPU(pch2opus->S,pch2opus->boundtocpu);
549:   /* XXX */
550:   MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE);
551:   return(0);
552: }

554: static PetscErrorCode PCSetUp_H2OPUS(PC pc)
555: {
556:   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
557:   Mat            A = pc->useAmat ? pc->mat : pc->pmat;
558:   NormType       norm = pch2opus->normtype;
559:   PetscReal      initerr = 0.0,err;
560:   PetscReal      initerrMA = 0.0,errMA;
561:   PetscBool      ish2opus;

565:   if (!pch2opus->T) {
566:     PetscInt    M,N,m,n;
567:     const char *prefix;

569:     PCGetOptionsPrefix(pc,&prefix);
570:     MatGetSize(A,&M,&N);
571:     MatGetLocalSize(A,&m,&n);
572:     MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T);
573:     MatSetBlockSizesFromMats(pch2opus->T,A,A);
574:     MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI);
575:     MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI);
576:     MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);
577: #if defined(PETSC_H2OPUS_USE_GPU)
578:     MatShellSetVecType(pch2opus->T,VECCUDA);
579: #endif
580:     PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T);
581:     MatSetOptionsPrefix(pch2opus->T,prefix);
582:     MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_");
583:   }
584:   MatDestroy(&pch2opus->A);
585:   PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus);
586:   if (ish2opus) {
587:     PetscObjectReference((PetscObject)A);
588:     pch2opus->A = A;
589:   } else {
590:     const char *prefix;
591:     MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A);
592:     /* XXX */
593:     MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);
594:     PCGetOptionsPrefix(pc,&prefix);
595:     MatSetOptionsPrefix(pch2opus->A,prefix);
596:     MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_");
597:     MatSetFromOptions(pch2opus->A);
598:     MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY);
599:     MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY);
600:     /* XXX */
601:     MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);
602:   }
603: #if defined(PETSC_H2OPUS_USE_GPU)
604:   pch2opus->boundtocpu = pch2opus->A->boundtocpu;
605: #endif
606:   MatBindToCPU(pch2opus->T,pch2opus->boundtocpu);
607:   if (pch2opus->M) { /* see if we can reuse M as initial guess */
608:     PetscReal reuse;

610:     MatBindToCPU(pch2opus->M,pch2opus->boundtocpu);
611:     MatNorm(pch2opus->T,norm,&reuse);
612:     if (reuse >= 1.0) {
613:       MatDestroy(&pch2opus->M);
614:     }
615:   }
616:   if (!pch2opus->M) {
617:     const char *prefix;
618:     MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M);
619:     PCGetOptionsPrefix(pc,&prefix);
620:     MatSetOptionsPrefix(pch2opus->M,prefix);
621:     MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_");
622:     MatSetFromOptions(pch2opus->M);
623:     PCH2OpusSetUpInit(pc);
624:     MatScale(pch2opus->M,pch2opus->s0);
625:   }
626:   /* A and M have the same h2 matrix structure, save on reordering routines */
627:   MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE);
628:   MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE);
629:   if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
630:     MatNorm(pch2opus->T,norm,&initerr);
631:     pch2opus->testMA = PETSC_TRUE;
632:     MatNorm(pch2opus->T,norm,&initerrMA);
633:     pch2opus->testMA = PETSC_FALSE;
634:   }
635:   if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
636:   err   = initerr;
637:   errMA = initerrMA;
638:   if (pch2opus->monitor) {
639:     PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr));
640:     PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A||     NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));
641:   }
642:   if (initerr > pch2opus->atol && !pc->failedreason) {
643:     PetscInt i;

645:     PCH2OpusSetUpSampler_Private(pc);
646:     for (i = 0; i < pch2opus->maxits; i++) {
647:       Mat         M;
648:       const char* prefix;

650:       MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M);
651:       MatGetOptionsPrefix(pch2opus->M,&prefix);
652:       MatSetOptionsPrefix(M,prefix);
653:       MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE);
654:       MatSetFromOptions(M);
655:       MatH2OpusSetNativeMult(M,PETSC_TRUE);
656:       MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
657:       MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);

659:       MatDestroy(&pch2opus->M);
660:       pch2opus->M = M;
661:       if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
662:         MatNorm(pch2opus->T,norm,&err);
663:         pch2opus->testMA = PETSC_TRUE;
664:         MatNorm(pch2opus->T,norm,&errMA);
665:         pch2opus->testMA = PETSC_FALSE;
666:       }
667:       if (pch2opus->monitor) {
668:         PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr));
669:         PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A||     NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));
670:       }
671:       if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
672:       if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break;
673:     }
674:   }
675:   /* cleanup setup workspace */
676:   MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE);
677:   MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE);
678:   VecDestroy(&pch2opus->wns[0]);
679:   VecDestroy(&pch2opus->wns[1]);
680:   VecDestroy(&pch2opus->wns[2]);
681:   VecDestroy(&pch2opus->wns[3]);
682:   MatDestroy(&pch2opus->wnsmat[0]);
683:   MatDestroy(&pch2opus->wnsmat[1]);
684:   MatDestroy(&pch2opus->wnsmat[2]);
685:   MatDestroy(&pch2opus->wnsmat[3]);
686:   return(0);
687: }

689: static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
690: {
692:   PC_H2OPUS      *pch2opus = (PC_H2OPUS*)pc->data;
693:   PetscBool      isascii;

696:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
697:   if (isascii) {
698:     if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
699:       PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n");
700:       PetscViewerASCIIPushTab(viewer);
701:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);
702:       MatView(pch2opus->A,viewer);
703:       PetscViewerPopFormat(viewer);
704:       PetscViewerASCIIPopTab(viewer);
705:     }
706:     if (pch2opus->M) {
707:       PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n");
708:       PetscViewerASCIIPushTab(viewer);
709:       PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);
710:       MatView(pch2opus->M,viewer);
711:       PetscViewerPopFormat(viewer);
712:       PetscViewerASCIIPopTab(viewer);
713:     }
714:     PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0);
715:   }
716:   return(0);
717: }

719: PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
720: {
722:   PC_H2OPUS      *pch2opus;

725:   PetscNewLog(pc,&pch2opus);
726:   pc->data = (void*)pch2opus;

728:   pch2opus->atol       = 1.e-2;
729:   pch2opus->rtol       = 1.e-6;
730:   pch2opus->maxits     = 50;
731:   pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */
732:   pch2opus->normtype   = NORM_2;

734:   /* these are needed when we are sampling the pmat */
735:   pch2opus->eta        = PETSC_DECIDE;
736:   pch2opus->leafsize   = PETSC_DECIDE;
737:   pch2opus->max_rank   = PETSC_DECIDE;
738:   pch2opus->bs         = PETSC_DECIDE;
739:   pch2opus->mrtol      = PETSC_DECIDE;
740: #if defined(PETSC_H2OPUS_USE_GPU)
741:   pch2opus->boundtocpu = PETSC_FALSE;
742: #else
743:   pch2opus->boundtocpu = PETSC_TRUE;
744: #endif
745:   pc->ops->destroy        = PCDestroy_H2OPUS;
746:   pc->ops->setup          = PCSetUp_H2OPUS;
747:   pc->ops->apply          = PCApply_H2OPUS;
748:   pc->ops->matapply       = PCApplyMat_H2OPUS;
749:   pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
750:   pc->ops->reset          = PCReset_H2OPUS;
751:   pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
752:   pc->ops->view           = PCView_H2OPUS;

754:   PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS);
755:   return(0);
756: }