Actual source code: sacusp.cu

petsc-3.7.3 2016-08-01
Report Typos and Errors
  2: /*  -------------------------------------------------------------------- */

  4: /*
  5:    Include files needed for the CUSP Smoothed Aggregation preconditioner:
  6:      pcimpl.h - private include file intended for use by all preconditioners
  7: */
  8: #define PETSC_SKIP_SPINLOCK
  9: #include <petsc/private/pcimpl.h>   /*I "petscpc.h" I*/
 10: #include <../src/mat/impls/aij/seq/aij.h>
 11: #include <cusp/monitor.h>
 12: #include <cusp/version.h>
 13: #if CUSP_VERSION >= 400
 14: #include <cusp/precond/aggregation/smoothed_aggregation.h>
 15: #define cuspsaprecond cusp::precond::aggregation::smoothed_aggregation<PetscInt,PetscScalar,cusp::device_memory>
 16: #else
 17: #include <cusp/precond/smoothed_aggregation.h>
 18: #define cuspsaprecond cusp::precond::smoothed_aggregation<PetscInt,PetscScalar,cusp::device_memory>
 19: #endif
 20: #include <../src/vec/vec/impls/dvecimpl.h>
 21: #include <../src/mat/impls/aij/seq/seqcusp/cuspmatimpl.h>
 22: #include <../src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h>

 24: /*
 25:    Private context (data structure) for the SACUSP preconditioner.
 26: */
 27: typedef struct {
 28:   cuspsaprecond * SACUSP;
 29:   /*int cycles; */
 30: } PC_SACUSP;

 34: static PetscErrorCode PCSACUSPSetCycles(PC pc, int n)
 35: {
 36:   PC_SACUSP      *sac = (PC_SACUSP*)pc->data;

 39:   sac->cycles = n;
 40:   return(0);

 42:   }*/

 44: /* -------------------------------------------------------------------------- */
 45: /*
 46:    PCSetUp_SACUSP - Prepares for the use of the SACUSP preconditioner
 47:                     by setting data structures and options.

 49:    Input Parameter:
 50: .  pc - the preconditioner context

 52:    Application Interface Routine: PCSetUp()

 54:    Notes:
 55:    The interface routine PCSetUp() is not usually called directly by
 56:    the user, but instead is called by PCApply() if necessary.
 57: */
 60: static PetscErrorCode PCSetUp_SACUSP(PC pc)
 61: {
 62:   PC_SACUSP      *sa = (PC_SACUSP*)pc->data;
 63:   PetscBool      flg = PETSC_FALSE;
 65: #if !defined(PETSC_USE_COMPLEX)
 66:   // protect these in order to avoid compiler warnings. This preconditioner does
 67:   // not work for complex types.
 68:   Mat_SeqAIJCUSP *gpustruct;
 69: #endif

 72:   PetscObjectTypeCompare((PetscObject)pc->pmat,MATSEQAIJCUSP,&flg);
 73:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Currently only handles CUSP matrices");
 74:   if (pc->setupcalled != 0) {
 75:     try {
 76:       delete sa->SACUSP;
 77:     } catch(char *ex) {
 78:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
 79:     }
 80:   }
 81:   try {
 82: #if defined(PETSC_USE_COMPLEX)
 83:     sa->SACUSP = 0;CHKERRQ(1); /* TODO */
 84: #else
 85:     MatCUSPCopyToGPU(pc->pmat);
 86:     gpustruct = (Mat_SeqAIJCUSP*)(pc->pmat->spptr);
 87: 
 88:     if (gpustruct->format==MAT_CUSP_ELL) {
 89:       CUSPMATRIXELL *mat = (CUSPMATRIXELL*)gpustruct->mat;
 90:       sa->SACUSP = new cuspsaprecond(*mat);
 91:     } else if (gpustruct->format==MAT_CUSP_DIA) {
 92:       CUSPMATRIXDIA *mat = (CUSPMATRIXDIA*)gpustruct->mat;
 93:       sa->SACUSP = new cuspsaprecond(*mat);
 94:     } else {
 95:       CUSPMATRIX *mat = (CUSPMATRIX*)gpustruct->mat;
 96:       sa->SACUSP = new cuspsaprecond(*mat);
 97:     }
 98: #endif

100:   } catch(char *ex) {
101:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
102:   }
103:   /*PetscOptionsInt("-pc_sacusp_cycles","Number of v-cycles to perform","PCSACUSPSetCycles",sa->cycles,
104:     &sa->cycles,NULL);*/
105:   return(0);
106: }

110: static PetscErrorCode PCApplyRichardson_SACUSP(PC pc, Vec b, Vec y, Vec w,PetscReal rtol, PetscReal abstol, PetscReal dtol, PetscInt its, PetscBool guesszero,PetscInt *outits,PCRichardsonConvergedReason *reason)
111: {
112: #if !defined(PETSC_USE_COMPLEX)
113:   // protect these in order to avoid compiler warnings. This preconditioner does
114:   // not work for complex types.
115:   PC_SACUSP *sac = (PC_SACUSP*)pc->data;
116: #endif
118:   CUSPARRAY      *barray,*yarray;

121:   /* how to incorporate dtol, guesszero, w?*/
123:   VecCUSPGetArrayRead(b,&barray);
124:   VecCUSPGetArrayReadWrite(y,&yarray);
125: #if defined(CUSP_VERSION) && CUSP_VERSION >= 500
126:   cusp::monitor<PetscReal> monitor(*barray,its,rtol,abstol);
127: #else
128:   cusp::default_monitor<PetscReal> monitor(*barray,its,rtol,abstol);
129: #endif
130: #if defined(PETSC_USE_COMPLEX)
131:   CHKERRQ(1);
132:   /* TODO */
133: #else
134:   sac->SACUSP->solve(*barray,*yarray,monitor);
135:   *outits = monitor.iteration_count();
136:   if (monitor.converged()) *reason = PCRICHARDSON_CONVERGED_RTOL; /* how to discern between converging from RTOL or ATOL?*/
137:   else *reason = PCRICHARDSON_CONVERGED_ITS;
138: #endif
139:   PetscObjectStateIncrease((PetscObject)y);
140:   VecCUSPRestoreArrayRead(b,&barray);
141:   VecCUSPRestoreArrayReadWrite(y,&yarray);
142:   return(0);
143: }

145: /* -------------------------------------------------------------------------- */
146: /*
147:    PCApply_SACUSP - Applies the SACUSP preconditioner to a vector.

149:    Input Parameters:
150: .  pc - the preconditioner context
151: .  x - input vector

153:    Output Parameter:
154: .  y - output vector

156:    Application Interface Routine: PCApply()
157:  */
160: static PetscErrorCode PCApply_SACUSP(PC pc,Vec x,Vec y)
161: {
162:   PC_SACUSP      *sac = (PC_SACUSP*)pc->data;
164:   PetscBool      flg1,flg2;
165:   CUSPARRAY      *xarray=NULL,*yarray=NULL;

168:   /*how to apply a certain fixed number of iterations?*/
169:   PetscObjectTypeCompare((PetscObject)x,VECSEQCUSP,&flg1);
170:   PetscObjectTypeCompare((PetscObject)y,VECSEQCUSP,&flg2);
171:   if (!(flg1 && flg2)) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP, "Currently only handles CUSP vectors");
172:   if (!sac->SACUSP) {
173:     PCSetUp_SACUSP(pc);
174:   }
175:   VecSet(y,0.0);
176:   VecCUSPGetArrayRead(x,&xarray);
177:   VecCUSPGetArrayWrite(y,&yarray);
178:   try {
179: #if defined(PETSC_USE_COMPLEX)

181: #else
182:     cusp::multiply(*sac->SACUSP,*xarray,*yarray);
183: #endif
184:   } catch(char * ex) {
185:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
186:   }
187:   VecCUSPRestoreArrayRead(x,&xarray);
188:   VecCUSPRestoreArrayWrite(y,&yarray);
189:   PetscObjectStateIncrease((PetscObject)y);
190:   return(0);
191: }
192: /* -------------------------------------------------------------------------- */
193: /*
194:    PCDestroy_SACUSP - Destroys the private context for the SACUSP preconditioner
195:    that was created with PCCreate_SACUSP().

197:    Input Parameter:
198: .  pc - the preconditioner context

200:    Application Interface Routine: PCDestroy()
201: */
204: static PetscErrorCode PCDestroy_SACUSP(PC pc)
205: {
206:   PC_SACUSP      *sac = (PC_SACUSP*)pc->data;

210:   if (sac->SACUSP) {
211:     try {
212:       delete sac->SACUSP;
213:     } catch(char * ex) {
214:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
215:     }
216:   }

218:   /*
219:       Free the private data structure that was hanging off the PC
220:   */
221:   PetscFree(pc->data);
222:   return(0);
223: }

227: static PetscErrorCode PCSetFromOptions_SACUSP(PetscOptionItems *PetscOptionsObject,PC pc)
228: {

232:   PetscOptionsHead(PetscOptionsObject,"SACUSP options");
233:   PetscOptionsTail();
234:   return(0);
235: }

237: /* -------------------------------------------------------------------------- */


240: /*MC
241:      PCSACUSP  - A smoothed agglomeration algorithm that runs on the Nvidia GPU.


244:     http://research.nvidia.com/sites/default/files/publications/nvr-2011-002.pdf

246:    Level: advanced

248: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC

250: M*/

254: PETSC_EXTERN PetscErrorCode PCCreate_SACUSP(PC pc)
255: {
256:   PC_SACUSP      *sac;

260:   /*
261:      Creates the private data structure for this preconditioner and
262:      attach it to the PC object.
263:   */
264:   PetscNewLog(pc,&sac);
265:   pc->data = (void*)sac;

267:   /*
268:      Initialize the pointer to zero
269:      Initialize number of v-cycles to default (1)
270:   */
271:   sac->SACUSP = 0;
272:   /*sac->cycles=1;*/


275:   /*
276:       Set the pointers for the functions that are provided above.
277:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
278:       are called, they will automatically call these functions.  Note we
279:       choose not to provide a couple of these functions since they are
280:       not needed.
281:   */
282:   pc->ops->apply               = PCApply_SACUSP;
283:   pc->ops->applytranspose      = 0;
284:   pc->ops->setup               = PCSetUp_SACUSP;
285:   pc->ops->destroy             = PCDestroy_SACUSP;
286:   pc->ops->setfromoptions      = PCSetFromOptions_SACUSP;
287:   pc->ops->view                = 0;
288:   pc->ops->applyrichardson     = PCApplyRichardson_SACUSP;
289:   pc->ops->applysymmetricleft  = 0;
290:   pc->ops->applysymmetricright = 0;
291:   return(0);
292: }