petsc-3.8.4 2018-03-24

  4:  #include <../src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h>

  6: /*for MatCreateSeqAIJCUSPFromTriple*/
  7: #include <cusp/coo_matrix.h>
  8: /* for everything else */
  9: #include <cusp/csr_matrix.h>
 10: #include <cusp/ell_matrix.h>
 11: #include <cusp/dia_matrix.h>
 12: #include <cusp/multiply.h>

 14: /* need the thrust version */
 15: #include <thrust/version.h>

 17: #include <algorithm>
 18: #include <vector>
 19: #include <string>
 20: #include <thrust/sort.h>
 21: #include <thrust/fill.h>


 24: /* Old way */
 25: #define CUSPMATRIX cusp::csr_matrix<PetscInt,PetscScalar,cusp::device_memory>
 26: #define CUSPMATRIXELL cusp::ell_matrix<PetscInt,PetscScalar,cusp::device_memory>
 27: #define CUSPMATRIXDIA cusp::dia_matrix<PetscInt,PetscScalar,cusp::device_memory>

 29: struct Mat_SeqAIJCUSP {
 30:   void                 *mat; /* pointer to the matrix on the GPU */
 31:   CUSPINTARRAYGPU      *indices; /*pointer to an array containing the nonzero row indices, should usecprow be true*/
 32:   CUSPARRAY            *tempvec; /*pointer to a workvector to which we can copy the relevant indices of a vector we want to multiply */
 33:   PetscInt             nonzerorow;   /* number of nonzero rows ... used in the flop calculations */
 34:   MatCUSPStorageFormat format;   /* the storage format for the matrix on the device */
 35:   cudaStream_t         stream;   /* a stream for the parallel SpMV ... this is not owned and should not be deleted */
 36: };

 38: PETSC_EXTERN PetscErrorCode MatCUSPCopyToGPU(Mat);
 39: PETSC_INTERN PetscErrorCode MatCUSPCopyFromGPU(Mat, CUSPMATRIX*);
 40: PETSC_INTERN PetscErrorCode MatCUSPSetStream(Mat, const cudaStream_t stream);
 41: #endif