Actual source code: matmpidensecuda.cu
1: #include "../matmpidensecupm.hpp"
3: using namespace Petsc::mat::cupm;
4: using Petsc::device::cupm::DeviceType;
6: static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{};
8: /*MC
9: MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs.
11: This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process
12: communicator, and `MATMPIDENSECUDA` otherwise.
14: Options Database Key:
15: . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to
16: `MatSetFromOptions()`
18: Level: beginner
20: .seealso: [](ch_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`,
21: `MATMPIDENSEHIP`, `MATDENSE`
22: M*/
24: /*MC
25: MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on
26: GPUs.
28: Options Database Key:
29: . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to
30: `MatSetFromOptions()`
32: Level: beginner
34: .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`,
35: `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
36: M*/
37: PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A)
38: {
39: PetscFunctionBegin;
40: PetscCall(mat_cupm.Create(A));
41: PetscFunctionReturn(PETSC_SUCCESS);
42: }
44: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret)
45: {
46: PetscFunctionBegin;
47: PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret));
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: /*@C
52: MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
54: Collective
56: Input Parameters:
57: + comm - MPI communicator
58: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
59: . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
60: . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
61: . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
62: - data - optional location of GPU matrix data. Pass `NULL` to have PETSc to control matrix memory allocation.
64: Output Parameter:
65: . A - the matrix
67: Level: intermediate
69: .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
70: @*/
71: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
72: {
73: PetscFunctionBegin;
74: PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A));
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: /*@C
79: MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
80: array provided by the user. This is useful to avoid copying an array into a matrix.
82: Not Collective
84: Input Parameters:
85: + mat - the matrix
86: - array - the array in column major order
88: Level: developer
90: Note:
91: You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is
92: responsible for freeing this array; it will not be freed when the matrix is destroyed. The
93: array must have been allocated with `cudaMalloc()`.
95: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`,
96: `MatDenseCUDAReplaceArray()`
97: @*/
98: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
99: {
100: PetscFunctionBegin;
101: PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array));
102: PetscFunctionReturn(PETSC_SUCCESS);
103: }
105: /*@C
106: MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to
107: `MatDenseCUDAPlaceArray()`
109: Not Collective
111: Input Parameter:
112: . mat - the matrix
114: Level: developer
116: Note:
117: You can only call this after a call to `MatDenseCUDAPlaceArray()`
119: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
120: @*/
121: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
122: {
123: PetscFunctionBegin;
124: PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat));
125: PetscFunctionReturn(PETSC_SUCCESS);
126: }
128: /*@C
129: MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix
130: with an array provided by the user. This is useful to avoid copying an array into a matrix.
132: Not Collective
134: Input Parameters:
135: + mat - the matrix
136: - array - the array in column major order
138: Level: developer
140: Note:
141: This permanently replaces the GPU array and frees the memory associated with the old GPU
142: array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is
143: destroyed. The array should respect the matrix leading dimension.
145: .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
146: @*/
147: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
148: {
149: PetscFunctionBegin;
150: PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array));
151: PetscFunctionReturn(PETSC_SUCCESS);
152: }
154: /*@C
155: MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA`
156: matrix.
158: Not Collective
160: Input Parameter:
161: . A - the matrix
163: Output Parameter:
164: . a - the GPU array in column major order
166: Level: developer
168: Notes:
169: The data on the GPU may not be updated due to operations done on the CPU. If you need updated
170: data, use `MatDenseCUDAGetArray()`.
172: The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
174: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
175: `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`,
176: `MatDenseCUDARestoreArrayRead()`
177: @*/
178: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
179: {
180: PetscFunctionBegin;
181: PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a));
182: PetscFunctionReturn(PETSC_SUCCESS);
183: }
185: /*@C
186: MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a
187: `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
189: Not Collective
191: Input Parameters:
192: + A - the matrix
193: - a - the GPU array in column major order
195: Level: developer
197: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
198: `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
199: @*/
200: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
201: {
202: PetscFunctionBegin;
203: PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a));
204: PetscFunctionReturn(PETSC_SUCCESS);
205: }
207: /*@C
208: MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a
209: `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when
210: no longer needed.
212: Not Collective
214: Input Parameter:
215: . A - the matrix
217: Output Parameter:
218: . a - the GPU array in column major order
220: Level: developer
222: Note:
223: Data may be copied to the GPU due to operations done on the CPU. If you need write only
224: access, use `MatDenseCUDAGetArrayWrite()`.
226: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
227: `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
228: `MatDenseCUDARestoreArrayRead()`
229: @*/
230: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
231: {
232: PetscFunctionBegin;
233: PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a));
234: PetscFunctionReturn(PETSC_SUCCESS);
235: }
237: /*@C
238: MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a
239: `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
241: Not Collective
243: Input Parameters:
244: + A - the matrix
245: - a - the GPU array in column major order
247: Level: developer
249: Note:
250: Data can be copied to the GPU due to operations done on the CPU. If you need write only
251: access, use `MatDenseCUDAGetArrayWrite()`.
253: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
254: `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
255: @*/
256: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
257: {
258: PetscFunctionBegin;
259: PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a));
260: PetscFunctionReturn(PETSC_SUCCESS);
261: }
263: /*@C
264: MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The
265: array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
267: Not Collective
269: Input Parameter:
270: . A - the matrix
272: Output Parameter:
273: . a - the GPU array in column major order
275: Level: developer
277: Note:
278: Data can be copied to the GPU due to operations done on the CPU. If you need write only
279: access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use
280: `MatDenseCUDAGetArrayRead()`.
282: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`,
283: `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
284: `MatDenseCUDARestoreArrayRead()`
285: @*/
286: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
287: {
288: PetscFunctionBegin;
289: PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a));
290: PetscFunctionReturn(PETSC_SUCCESS);
291: }
293: /*@C
294: MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix
295: previously obtained with `MatDenseCUDAGetArray()`.
297: Not Collective
299: Level: developer
301: Input Parameters:
302: + A - the matrix
303: - a - the GPU array in column major order
305: .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`,
306: `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
307: @*/
308: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
309: {
310: PetscFunctionBegin;
311: PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a));
312: PetscFunctionReturn(PETSC_SUCCESS);
313: }
315: /*@C
316: MatDenseCUDASetPreallocation - Set the device array used for storing the matrix elements of a
317: `MATDENSECUDA` matrix
319: Collective
321: Input Parameters:
322: + A - the matrix
323: - device_array - the array (or `NULL`)
325: Level: intermediate
327: .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDenseCUDA()`,
328: `MatSetValues()`, `MatDenseSetLDA()`
329: @*/
330: PetscErrorCode MatDenseCUDASetPreallocation(Mat A, PetscScalar *device_array)
331: {
332: PetscFunctionBegin;
333: PetscCall(MatDenseCUPMSetPreallocation<DeviceType::CUDA>(A, device_array));
334: PetscFunctionReturn(PETSC_SUCCESS);
335: }