4: #include <../src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h>
6: /*for MatCreateSeqAIJCUSPFromTriple*/
7: #include <cusp/coo_matrix.h>
8: /* for everything else */
9: #include <cusp/csr_matrix.h>
10: #include <cusp/ell_matrix.h>
11: #include <cusp/dia_matrix.h>
12: #include <cusp/multiply.h>
14: /* need the thrust version */
15: #include <thrust/version.h>
17: #include <algorithm>
18: #include <vector>
19: #include <string>
20: #include <thrust/sort.h>
21: #include <thrust/fill.h>
24: /* Old way */
25: #define CUSPMATRIX cusp::csr_matrix<PetscInt,PetscScalar,cusp::device_memory> 26: #define CUSPMATRIXELL cusp::ell_matrix<PetscInt,PetscScalar,cusp::device_memory> 27: #define CUSPMATRIXDIA cusp::dia_matrix<PetscInt,PetscScalar,cusp::device_memory> 29: struct Mat_SeqAIJCUSP {
30: void *mat; /* pointer to the matrix on the GPU */
31: CUSPINTARRAYGPU *indices; /*pointer to an array containing the nonzero row indices, should usecprow be true*/
32: CUSPARRAY *tempvec; /*pointer to a workvector to which we can copy the relevant indices of a vector we want to multiply */
33: PetscInt nonzerorow; /* number of nonzero rows ... used in the flop calculations */
34: MatCUSPStorageFormat format; /* the storage format for the matrix on the device */
35: cudaStream_t stream; /* a stream for the parallel SpMV ... this is not owned and should not be deleted */
36: };
38: PETSC_INTERN PetscErrorCode MatCUSPCopyToGPU(Mat);
39: PETSC_INTERN PetscErrorCode MatCUSPCopyFromGPU(Mat, CUSPMATRIX*);
40: PETSC_INTERN PetscErrorCode MatCUSPSetStream(Mat, const cudaStream_t stream);
41: #endif