Actual source code: sacusp.cu
petsc-3.6.1 2015-08-06
2: /* -------------------------------------------------------------------- */
4: /*
5: Include files needed for the CUSP Smoothed Aggregation preconditioner:
6: pcimpl.h - private include file intended for use by all preconditioners
7: */
9: #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
10: #include <../src/mat/impls/aij/seq/aij.h>
11: #include <cusp/monitor.h>
12: #include <cusp/version.h>
13: #if CUSP_VERSION >= 400
14: #include <cusp/precond/aggregation/smoothed_aggregation.h>
15: #define cuspsaprecond cusp::precond::aggregation::smoothed_aggregation<PetscInt,PetscScalar,cusp::device_memory>
16: #else
17: #include <cusp/precond/smoothed_aggregation.h>
18: #define cuspsaprecond cusp::precond::smoothed_aggregation<PetscInt,PetscScalar,cusp::device_memory>
19: #endif
20: #include <../src/vec/vec/impls/dvecimpl.h>
21: #include <../src/mat/impls/aij/seq/seqcusp/cuspmatimpl.h>
23: /*
24: Private context (data structure) for the SACUSP preconditioner.
25: */
26: typedef struct {
27: cuspsaprecond * SACUSP;
28: /*int cycles; */
29: } PC_SACUSP;
33: static PetscErrorCode PCSACUSPSetCycles(PC pc, int n)
34: {
35: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
38: sac->cycles = n;
39: return(0);
41: }*/
43: /* -------------------------------------------------------------------------- */
44: /*
45: PCSetUp_SACUSP - Prepares for the use of the SACUSP preconditioner
46: by setting data structures and options.
48: Input Parameter:
49: . pc - the preconditioner context
51: Application Interface Routine: PCSetUp()
53: Notes:
54: The interface routine PCSetUp() is not usually called directly by
55: the user, but instead is called by PCApply() if necessary.
56: */
59: static PetscErrorCode PCSetUp_SACUSP(PC pc)
60: {
61: PC_SACUSP *sa = (PC_SACUSP*)pc->data;
62: PetscBool flg = PETSC_FALSE;
64: #if !defined(PETSC_USE_COMPLEX)
65: // protect these in order to avoid compiler warnings. This preconditioner does
66: // not work for complex types.
67: Mat_SeqAIJCUSP *gpustruct;
68: #endif
71: PetscObjectTypeCompare((PetscObject)pc->pmat,MATSEQAIJCUSP,&flg);
72: if (!flg) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Currently only handles CUSP matrices");
73: if (pc->setupcalled != 0) {
74: try {
75: delete sa->SACUSP;
76: } catch(char *ex) {
77: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
78: }
79: }
80: try {
81: #if defined(PETSC_USE_COMPLEX)
82: sa->SACUSP = 0;CHKERRQ(1); /* TODO */
83: #else
84: MatCUSPCopyToGPU(pc->pmat);
85: gpustruct = (Mat_SeqAIJCUSP*)(pc->pmat->spptr);
86:
87: if (gpustruct->format==MAT_CUSP_ELL) {
88: CUSPMATRIXELL *mat = (CUSPMATRIXELL*)gpustruct->mat;
89: sa->SACUSP = new cuspsaprecond(*mat);
90: } else if (gpustruct->format==MAT_CUSP_DIA) {
91: CUSPMATRIXDIA *mat = (CUSPMATRIXDIA*)gpustruct->mat;
92: sa->SACUSP = new cuspsaprecond(*mat);
93: } else {
94: CUSPMATRIX *mat = (CUSPMATRIX*)gpustruct->mat;
95: sa->SACUSP = new cuspsaprecond(*mat);
96: }
97: #endif
99: } catch(char *ex) {
100: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
101: }
102: /*PetscOptionsInt("-pc_sacusp_cycles","Number of v-cycles to perform","PCSACUSPSetCycles",sa->cycles,
103: &sa->cycles,NULL);*/
104: return(0);
105: }
109: static PetscErrorCode PCApplyRichardson_SACUSP(PC pc, Vec b, Vec y, Vec w,PetscReal rtol, PetscReal abstol, PetscReal dtol, PetscInt its, PetscBool guesszero,PetscInt *outits,PCRichardsonConvergedReason *reason)
110: {
111: #if !defined(PETSC_USE_COMPLEX)
112: // protect these in order to avoid compiler warnings. This preconditioner does
113: // not work for complex types.
114: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
115: #endif
117: CUSPARRAY *barray,*yarray;
120: /* how to incorporate dtol, guesszero, w?*/
122: VecCUSPGetArrayRead(b,&barray);
123: VecCUSPGetArrayReadWrite(y,&yarray);
124: #if defined(CUSP_VERSION) && CUSP_VERSION >= 500
125: cusp::monitor<PetscReal> monitor(*barray,its,rtol,abstol);
126: #else
127: cusp::default_monitor<PetscReal> monitor(*barray,its,rtol,abstol);
128: #endif
129: #if defined(PETSC_USE_COMPLEX)
130: CHKERRQ(1);
131: /* TODO */
132: #else
133: sac->SACUSP->solve(*barray,*yarray,monitor);
134: *outits = monitor.iteration_count();
135: if (monitor.converged()) *reason = PCRICHARDSON_CONVERGED_RTOL; /* how to discern between converging from RTOL or ATOL?*/
136: else *reason = PCRICHARDSON_CONVERGED_ITS;
137: #endif
138: PetscObjectStateIncrease((PetscObject)y);
139: VecCUSPRestoreArrayRead(b,&barray);
140: VecCUSPRestoreArrayReadWrite(y,&yarray);
141: return(0);
142: }
144: /* -------------------------------------------------------------------------- */
145: /*
146: PCApply_SACUSP - Applies the SACUSP preconditioner to a vector.
148: Input Parameters:
149: . pc - the preconditioner context
150: . x - input vector
152: Output Parameter:
153: . y - output vector
155: Application Interface Routine: PCApply()
156: */
159: static PetscErrorCode PCApply_SACUSP(PC pc,Vec x,Vec y)
160: {
161: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
163: PetscBool flg1,flg2;
164: CUSPARRAY *xarray=NULL,*yarray=NULL;
167: /*how to apply a certain fixed number of iterations?*/
168: PetscObjectTypeCompare((PetscObject)x,VECSEQCUSP,&flg1);
169: PetscObjectTypeCompare((PetscObject)y,VECSEQCUSP,&flg2);
170: if (!(flg1 && flg2)) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP, "Currently only handles CUSP vectors");
171: if (!sac->SACUSP) {
172: PCSetUp_SACUSP(pc);
173: }
174: VecSet(y,0.0);
175: VecCUSPGetArrayRead(x,&xarray);
176: VecCUSPGetArrayWrite(y,&yarray);
177: try {
178: #if defined(PETSC_USE_COMPLEX)
180: #else
181: cusp::multiply(*sac->SACUSP,*xarray,*yarray);
182: #endif
183: } catch(char * ex) {
184: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
185: }
186: VecCUSPRestoreArrayRead(x,&xarray);
187: VecCUSPRestoreArrayWrite(y,&yarray);
188: PetscObjectStateIncrease((PetscObject)y);
189: return(0);
190: }
191: /* -------------------------------------------------------------------------- */
192: /*
193: PCDestroy_SACUSP - Destroys the private context for the SACUSP preconditioner
194: that was created with PCCreate_SACUSP().
196: Input Parameter:
197: . pc - the preconditioner context
199: Application Interface Routine: PCDestroy()
200: */
203: static PetscErrorCode PCDestroy_SACUSP(PC pc)
204: {
205: PC_SACUSP *sac = (PC_SACUSP*)pc->data;
209: if (sac->SACUSP) {
210: try {
211: delete sac->SACUSP;
212: } catch(char * ex) {
213: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
214: }
215: }
217: /*
218: Free the private data structure that was hanging off the PC
219: */
220: PetscFree(pc->data);
221: return(0);
222: }
226: static PetscErrorCode PCSetFromOptions_SACUSP(PetscOptions *PetscOptionsObject,PC pc)
227: {
231: PetscOptionsHead(PetscOptionsObject,"SACUSP options");
232: PetscOptionsTail();
233: return(0);
234: }
236: /* -------------------------------------------------------------------------- */
239: /*MC
240: PCSACUSP - A smoothed agglomeration algorithm that runs on the Nvidia GPU.
243: http://research.nvidia.com/sites/default/files/publications/nvr-2011-002.pdf
245: Level: advanced
247: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC
249: M*/
253: PETSC_EXTERN PetscErrorCode PCCreate_SACUSP(PC pc)
254: {
255: PC_SACUSP *sac;
259: /*
260: Creates the private data structure for this preconditioner and
261: attach it to the PC object.
262: */
263: PetscNewLog(pc,&sac);
264: pc->data = (void*)sac;
266: /*
267: Initialize the pointer to zero
268: Initialize number of v-cycles to default (1)
269: */
270: sac->SACUSP = 0;
271: /*sac->cycles=1;*/
274: /*
275: Set the pointers for the functions that are provided above.
276: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
277: are called, they will automatically call these functions. Note we
278: choose not to provide a couple of these functions since they are
279: not needed.
280: */
281: pc->ops->apply = PCApply_SACUSP;
282: pc->ops->applytranspose = 0;
283: pc->ops->setup = PCSetUp_SACUSP;
284: pc->ops->destroy = PCDestroy_SACUSP;
285: pc->ops->setfromoptions = PCSetFromOptions_SACUSP;
286: pc->ops->view = 0;
287: pc->ops->applyrichardson = PCApplyRichardson_SACUSP;
288: pc->ops->applysymmetricleft = 0;
289: pc->ops->applysymmetricright = 0;
290: return(0);
291: }