Actual source code: math2opussampler.hpp
1: #pragma once
3: #include <petscmat.h>
4: #include <h2opus.h>
6: class PetscMatrixSampler : public HMatrixSampler {
7: protected:
8: Mat A;
9: typedef typename VectorContainer<H2OPUS_HWTYPE_CPU, H2Opus_Real>::type HRealVector;
10: typedef typename VectorContainer<H2OPUS_HWTYPE_CPU, int>::type HIntVector;
11: HIntVector hindexmap;
12: HRealVector hbuffer_in, hbuffer_out;
13: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
14: H2OpusDeviceVector<int> dindexmap;
15: H2OpusDeviceVector<H2Opus_Real> dbuffer_in, dbuffer_out;
16: #endif
17: bool gpusampling;
18: h2opusComputeStream_t stream;
20: private:
21: void Init();
22: void VerifyBuffers(int);
23: void PermuteBuffersIn(int, H2Opus_Real *, H2Opus_Real **, H2Opus_Real *, H2Opus_Real **);
24: void PermuteBuffersOut(int, H2Opus_Real *);
26: public:
27: PetscMatrixSampler();
28: PetscMatrixSampler(Mat);
29: ~PetscMatrixSampler();
30: void SetSamplingMat(Mat);
31: void SetIndexMap(int, int *);
32: void SetGPUSampling(bool);
33: void SetStream(h2opusComputeStream_t);
34: virtual void sample(H2Opus_Real *, H2Opus_Real *, int);
35: Mat GetSamplingMat() { return A; }
36: };
38: void PetscMatrixSampler::Init()
39: {
40: this->A = NULL;
41: this->gpusampling = false;
42: this->stream = NULL;
43: }
45: PetscMatrixSampler::PetscMatrixSampler()
46: {
47: Init();
48: }
50: PetscMatrixSampler::PetscMatrixSampler(Mat A)
51: {
52: Init();
53: SetSamplingMat(A);
54: }
56: void PetscMatrixSampler::SetSamplingMat(Mat A)
57: {
58: PetscMPIInt size = 1;
60: if (A) PetscCallVoid(static_cast<PetscErrorCode>(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)));
61: if (size > 1) PetscCallVoid(PETSC_ERR_SUP);
62: PetscCallVoid(PetscObjectReference((PetscObject)A));
63: PetscCallVoid(MatDestroy(&this->A));
64: this->A = A;
65: }
67: void PetscMatrixSampler::SetStream(h2opusComputeStream_t stream)
68: {
69: this->stream = stream;
70: }
72: void PetscMatrixSampler::SetIndexMap(int n, int *indexmap)
73: {
74: copyVector(this->hindexmap, indexmap, n, H2OPUS_HWTYPE_CPU);
75: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
76: copyVector(this->dindexmap, indexmap, n, H2OPUS_HWTYPE_CPU);
77: #endif
78: }
80: void PetscMatrixSampler::VerifyBuffers(int nv)
81: {
82: if (this->hindexmap.size()) {
83: size_t n = this->hindexmap.size();
84: if (!this->gpusampling) {
85: if (hbuffer_in.size() < (size_t)n * nv) hbuffer_in.resize(n * nv);
86: if (hbuffer_out.size() < (size_t)n * nv) hbuffer_out.resize(n * nv);
87: } else {
88: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
89: if (dbuffer_in.size() < (size_t)n * nv) dbuffer_in.resize(n * nv);
90: if (dbuffer_out.size() < (size_t)n * nv) dbuffer_out.resize(n * nv);
91: #endif
92: }
93: }
94: }
96: void PetscMatrixSampler::PermuteBuffersIn(int nv, H2Opus_Real *v, H2Opus_Real **w, H2Opus_Real *ov, H2Opus_Real **ow)
97: {
98: *w = v;
99: *ow = ov;
100: VerifyBuffers(nv);
101: if (this->hindexmap.size()) {
102: size_t n = this->hindexmap.size();
103: if (!this->gpusampling) {
104: permute_vectors(v, this->hbuffer_in.data(), n, nv, this->hindexmap.data(), 1, H2OPUS_HWTYPE_CPU, this->stream);
105: *w = this->hbuffer_in.data();
106: *ow = this->hbuffer_out.data();
107: } else {
108: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
109: permute_vectors(v, this->dbuffer_in.data(), n, nv, this->dindexmap.data(), 1, H2OPUS_HWTYPE_GPU, this->stream);
110: *w = this->dbuffer_in.data();
111: *ow = this->dbuffer_out.data();
112: #endif
113: }
114: }
115: }
117: void PetscMatrixSampler::PermuteBuffersOut(int nv, H2Opus_Real *v)
118: {
119: VerifyBuffers(nv);
120: if (this->hindexmap.size()) {
121: size_t n = this->hindexmap.size();
122: if (!this->gpusampling) {
123: permute_vectors(this->hbuffer_out.data(), v, n, nv, this->hindexmap.data(), 0, H2OPUS_HWTYPE_CPU, this->stream);
124: } else {
125: #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
126: permute_vectors(this->dbuffer_out.data(), v, n, nv, this->dindexmap.data(), 0, H2OPUS_HWTYPE_GPU, this->stream);
127: #endif
128: }
129: }
130: }
132: void PetscMatrixSampler::SetGPUSampling(bool gpusampling)
133: {
134: this->gpusampling = gpusampling;
135: }
137: PetscMatrixSampler::~PetscMatrixSampler()
138: {
139: PetscCallVoid(MatDestroy(&A));
140: }
142: void PetscMatrixSampler::sample(H2Opus_Real *x, H2Opus_Real *y, int samples)
143: {
144: MPI_Comm comm = PetscObjectComm((PetscObject)this->A);
145: Mat X = NULL, Y = NULL;
146: PetscInt M, N, m, n;
147: H2Opus_Real *px, *py;
148: VecType vtype;
150: if (!this->A) PetscCallVoid(PETSC_ERR_PLIB);
151: PetscCallVoid(MatGetSize(this->A, &M, &N));
152: PetscCallVoid(MatGetVecType(this->A, &vtype));
153: PetscCallVoid(MatGetLocalSize(this->A, &m, &n));
154: PetscCallVoid(PetscObjectGetComm((PetscObject)A, &comm));
155: PermuteBuffersIn(samples, x, &px, y, &py);
156: if (!this->gpusampling) {
157: PetscCallVoid(MatCreateDense(comm, n, PETSC_DECIDE, N, samples, px, &X));
158: PetscCallVoid(MatCreateDense(comm, m, PETSC_DECIDE, M, samples, py, &Y));
159: PetscCallVoid(MatSetVecType(X, vtype));
160: PetscCallVoid(MatSetVecType(Y, vtype));
162: } else {
163: #if defined(PETSC_HAVE_CUDA)
164: PetscCallVoid(MatCreateDenseCUDA(comm, n, PETSC_DECIDE, N, samples, px, &X));
165: PetscCallVoid(MatCreateDenseCUDA(comm, m, PETSC_DECIDE, M, samples, py, &Y));
166: PetscCallVoid(MatSetVecType(X, vtype));
167: PetscCallVoid(MatSetVecType(Y, vtype));
168: #endif
169: }
170: PetscCallVoid(MatMatMult(this->A, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
171: #if defined(PETSC_HAVE_CUDA)
172: if (this->gpusampling) {
173: const PetscScalar *dummy;
174: PetscCallVoid(MatDenseCUDAGetArrayRead(Y, &dummy));
175: PetscCallVoid(MatDenseCUDARestoreArrayRead(Y, &dummy));
176: }
177: #endif
178: PermuteBuffersOut(samples, y);
179: PetscCallVoid(MatDestroy(&X));
180: PetscCallVoid(MatDestroy(&Y));
181: }