Actual source code: petscaijdevice.h
1: #ifndef PETSCAIJDEVICE_H
2: #define PETSCAIJDEVICE_H
4: #include <petscmat.h>
6: /* SUBMANSEC = Mat */
8: #define CSRDataStructure(datatype) \
9: PetscInt *i; \
10: PetscInt *j; \
11: datatype *a; \
12: PetscInt n; \
13: PetscInt ignorezeroentries;
15: typedef struct {
16: CSRDataStructure(PetscScalar)
17: } PetscCSRDataStructure;
19: struct _n_SplitCSRMat {
20: PetscInt cstart, cend, rstart, rend;
21: PetscCSRDataStructure diag, offdiag;
22: PetscInt *colmap;
23: PetscInt M; // number of columns for out of bounds check
24: PetscMPIInt rank;
25: PetscBool allocated_indices;
26: };
28: /* 64-bit floating-point version of atomicAdd() is only natively supported by
29: CUDA devices of compute capability 6.x and higher. See also sfcuda.cu
30: */
31: #if defined(PETSC_USE_REAL_DOUBLE) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600)
32: __device__ double atomicAdd(double *x, double y)
33: {
34: typedef unsigned long long int ullint;
35: double *address = x, val = y;
36: ullint *address_as_ull = (ullint *)address;
37: ullint old = *address_as_ull, assumed;
38: do {
39: assumed = old;
40: old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
41: } while (assumed != old);
42: return __longlong_as_double(old);
43: }
44: #endif
46: #if defined(KOKKOS_INLINE_FUNCTION)
47: #define PetscAtomicAdd(a, b) Kokkos::atomic_fetch_add(a, b)
48: #elif defined(__CUDA_ARCH__)
49: #if defined(PETSC_USE_COMPLEX)
50: #define PetscAtomicAdd(a, b) \
51: { \
52: PetscReal *_a = (PetscReal *)(a); \
53: PetscReal *_b = (PetscReal *)&(b); \
54: atomicAdd(&_a[0], _b[0]); \
55: atomicAdd(&_a[1], _b[1]); \
56: }
57: #else
58: #define PetscAtomicAdd(a, b) atomicAdd(a, b)
59: #endif
60: #else
61: /* TODO: support devices other than CUDA and Kokkos */
62: #define PetscAtomicAdd(a, b) *(a) += b
63: #endif
65: #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv) \
66: { \
67: inserted = 0; \
68: if (col <= lastcol1) low1 = 0; \
69: else high1 = nrow1; \
70: lastcol1 = col; \
71: while (high1 - low1 > 5) { \
72: t = (low1 + high1) / 2; \
73: if (rp1[t] > col) high1 = t; \
74: else low1 = t; \
75: } \
76: for (_i = low1; _i < high1; _i++) { \
77: if (rp1[_i] > col) break; \
78: if (rp1[_i] == col) { \
79: if (addv == ADD_VALUES) { \
80: PetscAtomicAdd(&ap1[_i], value); \
81: } else ap1[_i] = value; \
82: inserted = 1; \
83: break; \
84: } \
85: } \
86: }
88: #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv) \
89: { \
90: inserted = 0; \
91: if (col <= lastcol2) low2 = 0; \
92: else high2 = nrow2; \
93: lastcol2 = col; \
94: while (high2 - low2 > 5) { \
95: t = (low2 + high2) / 2; \
96: if (rp2[t] > col) high2 = t; \
97: else low2 = t; \
98: } \
99: for (_i = low2; _i < high2; _i++) { \
100: if (rp2[_i] > col) break; \
101: if (rp2[_i] == col) { \
102: if (addv == ADD_VALUES) { \
103: PetscAtomicAdd(&ap2[_i], value); \
104: } else ap2[_i] = value; \
105: inserted = 1; \
106: break; \
107: } \
108: } \
109: }
111: #if defined(PETSC_USE_DEBUG) && !defined(PETSC_HAVE_SYCL)
112: #define SETERR return (printf("[%d] ERROR in %s() at %s:%d: Location (%ld,%ld) not found! v=%g\n", d_mat->rank, __func__, __FILE__, __LINE__, (long int)im[i], (long int)in[j], PetscRealPart(value)), PETSC_ERR_ARG_OUTOFRANGE)
113: #else
114: #define SETERR return PETSC_ERR_ARG_OUTOFRANGE
115: #endif
117: #if defined(__CUDA_ARCH__)
118: __device__
119: #elif defined(KOKKOS_INLINE_FUNCTION)
120: KOKKOS_INLINE_FUNCTION
121: #else
122: static
123: #endif
125: /*@C
126: MatSetValuesDevice - sets a set of values into a matrix, this may be called by CUDA or KOKKOS kernels
128: Input Parameters:
129: + d_mat - an object obtained with `MatCUSPARSEGetDeviceMatWrite()` or `MatKokkosGetDeviceMatWrite()`
130: . m - the number of rows to insert or add to
131: . im - the rows to insert or add to
132: . n - number of columns to insert or add to
133: . in - the columns to insert or add to
134: . v - the values to insert or add to the matrix (treated as a by n row oriented dense array
135: - is - either `INSERT_VALUES` or `ADD_VALUES`
137: Notes:
138: Any row or column indices that are outside the bounds of the matrix on the rank are discarded
140: It is recommended that `MatSetValuesCOO()` be used instead of this routine for efficiency
142: Level: advanced
144: .seealso: `MatSetValues()`, `MatCreate()`, `MatCreateDenseCUDA()`, `MatCreateAIJCUSPARSE()`, `MatKokkosGetDeviceMatWrite()`,
145: `MatCUSPARSEGetDeviceMatWrite()`, `MatSetValuesCOO()`
146: @*/
147: PetscErrorCode
148: MatSetValuesDevice(PetscSplitCSRDataStructure d_mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
149: {
150: MatScalar value;
151: const PetscInt *rp1, *rp2 = NULL, *ai = d_mat->diag.i, *aj = d_mat->diag.j;
152: const PetscInt *bi = d_mat->offdiag.i, *bj = d_mat->offdiag.j;
153: MatScalar *ba = d_mat->offdiag.a, *aa = d_mat->diag.a;
154: PetscInt nrow1, nrow2 = 0, _i, low1, high1, low2 = 0, high2 = 0, t, lastcol1, lastcol2 = 0, inserted;
155: MatScalar *ap1, *ap2 = NULL;
156: PetscBool roworiented = PETSC_TRUE;
157: PetscInt i, j, row, col;
158: const PetscInt rstart = d_mat->rstart, rend = d_mat->rend, cstart = d_mat->cstart, cend = d_mat->cend, M = d_mat->M;
160: for (i = 0; i < m; i++) {
161: if (im[i] >= rstart && im[i] < rend) { // silently ignore off processor rows
162: row = (int)(im[i] - rstart);
163: lastcol1 = -1;
164: rp1 = aj + ai[row];
165: ap1 = aa + ai[row];
166: nrow1 = ai[row + 1] - ai[row];
167: low1 = 0;
168: high1 = nrow1;
169: if (bj) {
170: lastcol2 = -1;
171: rp2 = bj + bi[row];
172: ap2 = ba + bi[row];
173: nrow2 = bi[row + 1] - bi[row];
174: low2 = 0;
175: high2 = nrow2;
176: } else {
177: high2 = low2 = 0;
178: }
179: for (j = 0; j < n; j++) {
180: value = roworiented ? v[i * n + j] : v[i + j * m];
181: if (in[j] >= cstart && in[j] < cend) {
182: col = (in[j] - cstart);
183: MatSetValues_SeqAIJ_A_Private(row, col, value, is);
184: if (!inserted) SETERR;
185: } else if (in[j] < 0) { // silently ignore off processor rows
186: continue;
187: } else if (in[j] >= M) SETERR;
188: else {
189: col = d_mat->colmap[in[j]] - 1;
190: if (col < 0) SETERR;
191: MatSetValues_SeqAIJ_B_Private(row, col, value, is);
192: if (!inserted) SETERR;
193: }
194: }
195: }
196: }
197: return 0;
198: }
200: #undef MatSetValues_SeqAIJ_A_Private
201: #undef MatSetValues_SeqAIJ_B_Private
202: #undef SETERR
203: #undef PetscAtomicAdd
204: #undef PetscCSRDataStructure_
206: #endif