Actual source code: aijmkl.c
1: /*
2: Defines basic operations for the MATSEQAIJMKL matrix class.
3: This class is derived from the MATSEQAIJ class and retains the
4: compressed row storage (aka Yale sparse matrix format) but uses
5: sparse BLAS operations from the Intel Math Kernel Library (MKL)
6: wherever possible.
7: */
9: #include <../src/mat/impls/aij/seq/aij.h>
10: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
12: #define MKL_ILP64
13: #endif
14: #include <mkl_spblas.h>
16: typedef struct {
17: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
18: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
19: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
20: PetscObjectState state;
21: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
22: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
23: struct matrix_descr descr;
24: #endif
25: } Mat_SeqAIJMKL;
27: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
29: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
30: {
31: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
32: /* so we will ignore 'MatType type'. */
33: Mat B = *newmat;
34: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
35: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
36: #endif
38: PetscFunctionBegin;
39: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
41: /* Reset the original function pointers. */
42: B->ops->duplicate = MatDuplicate_SeqAIJ;
43: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
44: B->ops->destroy = MatDestroy_SeqAIJ;
45: B->ops->mult = MatMult_SeqAIJ;
46: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
47: B->ops->multadd = MatMultAdd_SeqAIJ;
48: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
49: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
50: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
51: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
52: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
54: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
58: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
59: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
60: * simply involves destroying the MKL sparse matrix handle and then freeing
61: * the spptr pointer. */
62: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
64: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
65: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
66: PetscCall(PetscFree(B->spptr));
68: /* Change the type of B to MATSEQAIJ. */
69: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
71: *newmat = B;
72: PetscFunctionReturn(PETSC_SUCCESS);
73: }
75: static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
76: {
77: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
79: PetscFunctionBegin;
81: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
82: if (aijmkl) {
83: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
84: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
85: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
86: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
87: PetscCall(PetscFree(A->spptr));
88: }
90: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
91: * to destroy everything that remains. */
92: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
93: /* I don't call MatSetType(). I believe this is because that
94: * is only to be called when *building* a matrix. I could be wrong, but
95: * that is how things work for the SuperLU matrix class. */
96: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
97: PetscCall(MatDestroy_SeqAIJ(A));
98: PetscFunctionReturn(PETSC_SUCCESS);
99: }
101: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
102: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
103: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
104: * handle, creates a new one, and then calls mkl_sparse_optimize().
105: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
106: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
107: * an unoptimized matrix handle here. */
108: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
109: {
110: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
111: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
112: * does nothing. We make it callable anyway in this case because it cuts
113: * down on littering the code with #ifdefs. */
114: PetscFunctionBegin;
115: PetscFunctionReturn(PETSC_SUCCESS);
116: #else
117: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
118: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
119: PetscInt m, n;
120: MatScalar *aa;
121: PetscInt *aj, *ai;
123: PetscFunctionBegin;
124: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
125: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
126: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
127: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
128: * mkl_sparse_optimize() later. */
129: if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
130: #endif
132: if (aijmkl->sparse_optimized) {
133: /* Matrix has been previously assembled and optimized. Must destroy old
134: * matrix handle before running the optimization step again. */
135: PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
136: }
137: aijmkl->sparse_optimized = PETSC_FALSE;
139: /* Now perform the SpMV2 setup and matrix optimization. */
140: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
141: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
142: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
143: m = A->rmap->n;
144: n = A->cmap->n;
145: aj = a->j; /* aj[k] gives column index for element aa[k]. */
146: aa = a->a; /* Nonzero elements stored row-by-row. */
147: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
148: if (a->nz && aa && !A->structure_only) {
149: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
150: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
151: PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
152: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
153: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
154: if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
155: aijmkl->sparse_optimized = PETSC_TRUE;
156: PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
157: } else {
158: aijmkl->csrA = NULL;
159: }
161: PetscFunctionReturn(PETSC_SUCCESS);
162: #endif
163: }
165: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
166: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
167: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
168: {
169: sparse_index_base_t indexing;
170: PetscInt m, n;
171: PetscInt *aj, *ai, *dummy;
172: MatScalar *aa;
173: Mat_SeqAIJMKL *aijmkl;
175: PetscFunctionBegin;
176: if (csrA) {
177: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
178: PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
179: PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
180: } else {
181: aj = ai = NULL;
182: aa = NULL;
183: }
185: PetscCall(MatSetType(A, MATSEQAIJ));
186: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
187: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
188: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
189: * they will be destroyed when the MKL handle is destroyed.
190: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
191: if (csrA) {
192: PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
193: } else {
194: /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
195: PetscCall(MatSetUp(A));
196: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
197: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
198: }
200: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
201: * Now turn it into a MATSEQAIJMKL. */
202: PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
204: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
205: aijmkl->csrA = csrA;
207: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
208: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
209: * and just need to be able to run the MKL optimization step. */
210: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
211: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
212: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
213: if (csrA) {
214: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
215: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
216: }
217: PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
218: PetscFunctionReturn(PETSC_SUCCESS);
219: }
220: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
222: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
223: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
224: * MatMatMultNumeric(). */
225: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
226: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
227: {
228: PetscInt i;
229: PetscInt nrows, ncols;
230: PetscInt nz;
231: PetscInt *ai, *aj, *dummy;
232: PetscScalar *aa;
233: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
234: sparse_index_base_t indexing;
236: PetscFunctionBegin;
237: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
238: if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
240: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
241: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
243: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
244: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
245: for (i = 0; i < nrows; i++) {
246: nz = ai[i + 1] - ai[i];
247: PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
248: }
250: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
251: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
253: PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
254: /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
255: * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
256: * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
257: aijmkl->sparse_optimized = PETSC_FALSE;
258: PetscFunctionReturn(PETSC_SUCCESS);
259: }
260: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
262: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
263: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
264: {
265: PetscInt i, j, k;
266: PetscInt nrows, ncols;
267: PetscInt nz;
268: PetscInt *ai, *aj, *dummy;
269: PetscScalar *aa;
270: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
271: sparse_index_base_t indexing;
273: PetscFunctionBegin;
274: PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
276: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
277: if (!aijmkl->csrA) {
278: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
279: PetscFunctionReturn(PETSC_SUCCESS);
280: }
282: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
283: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
285: k = 0;
286: for (i = 0; i < nrows; i++) {
287: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
288: nz = ai[i + 1] - ai[i];
289: for (j = 0; j < nz; j++) {
290: if (aa) {
291: PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k])));
292: } else {
293: PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
294: }
295: k++;
296: }
297: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
298: }
299: PetscFunctionReturn(PETSC_SUCCESS);
300: }
301: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
303: static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
304: {
305: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
306: Mat_SeqAIJMKL *aijmkl_dest;
308: PetscFunctionBegin;
309: PetscCall(MatDuplicate_SeqAIJ(A, op, M));
310: aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
311: PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
312: aijmkl_dest->sparse_optimized = PETSC_FALSE;
313: if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
314: PetscFunctionReturn(PETSC_SUCCESS);
315: }
317: static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
318: {
319: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
320: Mat_SeqAIJMKL *aijmkl;
322: PetscFunctionBegin;
323: if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
325: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
326: * extra information and some different methods, call the AssemblyEnd
327: * routine for a MATSEQAIJ.
328: * I'm not sure if this is the best way to do this, but it avoids
329: * a lot of code duplication. */
330: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
331: PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
333: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
334: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
335: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
336: if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
338: PetscFunctionReturn(PETSC_SUCCESS);
339: }
341: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
342: static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
343: {
344: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
345: const PetscScalar *x;
346: PetscScalar *y;
347: const MatScalar *aa;
348: PetscInt m = A->rmap->n;
349: PetscInt n = A->cmap->n;
350: PetscScalar alpha = 1.0;
351: PetscScalar beta = 0.0;
352: const PetscInt *aj, *ai;
353: char matdescra[6];
355: /* Variables not in MatMult_SeqAIJ. */
356: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
358: PetscFunctionBegin;
359: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
360: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
361: PetscCall(VecGetArrayRead(xx, &x));
362: PetscCall(VecGetArray(yy, &y));
363: aj = a->j; /* aj[k] gives column index for element aa[k]. */
364: aa = a->a; /* Nonzero elements stored row-by-row. */
365: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
367: /* Call MKL sparse BLAS routine to do the MatMult. */
368: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
370: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
371: PetscCall(VecRestoreArrayRead(xx, &x));
372: PetscCall(VecRestoreArray(yy, &y));
373: PetscFunctionReturn(PETSC_SUCCESS);
374: }
375: #endif
377: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
378: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
379: {
380: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
381: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
382: const PetscScalar *x;
383: PetscScalar *y;
384: PetscObjectState state;
386: PetscFunctionBegin;
388: /* If there are no nonzero entries, zero yy and return immediately. */
389: if (!a->nz) {
390: PetscCall(VecGetArray(yy, &y));
391: PetscCall(PetscArrayzero(y, A->rmap->n));
392: PetscCall(VecRestoreArray(yy, &y));
393: PetscFunctionReturn(PETSC_SUCCESS);
394: }
396: PetscCall(VecGetArrayRead(xx, &x));
397: PetscCall(VecGetArray(yy, &y));
399: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
400: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
401: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
402: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
403: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
405: /* Call MKL SpMV2 executor routine to do the MatMult. */
406: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
408: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
409: PetscCall(VecRestoreArrayRead(xx, &x));
410: PetscCall(VecRestoreArray(yy, &y));
411: PetscFunctionReturn(PETSC_SUCCESS);
412: }
413: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
415: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
416: static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
417: {
418: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
419: const PetscScalar *x;
420: PetscScalar *y;
421: const MatScalar *aa;
422: PetscInt m = A->rmap->n;
423: PetscInt n = A->cmap->n;
424: PetscScalar alpha = 1.0;
425: PetscScalar beta = 0.0;
426: const PetscInt *aj, *ai;
427: char matdescra[6];
429: /* Variables not in MatMultTranspose_SeqAIJ. */
430: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
432: PetscFunctionBegin;
433: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
434: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
435: PetscCall(VecGetArrayRead(xx, &x));
436: PetscCall(VecGetArray(yy, &y));
437: aj = a->j; /* aj[k] gives column index for element aa[k]. */
438: aa = a->a; /* Nonzero elements stored row-by-row. */
439: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
441: /* Call MKL sparse BLAS routine to do the MatMult. */
442: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
444: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
445: PetscCall(VecRestoreArrayRead(xx, &x));
446: PetscCall(VecRestoreArray(yy, &y));
447: PetscFunctionReturn(PETSC_SUCCESS);
448: }
449: #endif
451: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
452: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
453: {
454: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
455: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
456: const PetscScalar *x;
457: PetscScalar *y;
458: PetscObjectState state;
460: PetscFunctionBegin;
462: /* If there are no nonzero entries, zero yy and return immediately. */
463: if (!a->nz) {
464: PetscCall(VecGetArray(yy, &y));
465: PetscCall(PetscArrayzero(y, A->cmap->n));
466: PetscCall(VecRestoreArray(yy, &y));
467: PetscFunctionReturn(PETSC_SUCCESS);
468: }
470: PetscCall(VecGetArrayRead(xx, &x));
471: PetscCall(VecGetArray(yy, &y));
473: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
474: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
475: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
476: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
477: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
479: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
480: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
482: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
483: PetscCall(VecRestoreArrayRead(xx, &x));
484: PetscCall(VecRestoreArray(yy, &y));
485: PetscFunctionReturn(PETSC_SUCCESS);
486: }
487: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
489: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
490: static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
491: {
492: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
493: const PetscScalar *x;
494: PetscScalar *y, *z;
495: const MatScalar *aa;
496: PetscInt m = A->rmap->n;
497: PetscInt n = A->cmap->n;
498: const PetscInt *aj, *ai;
499: PetscInt i;
501: /* Variables not in MatMultAdd_SeqAIJ. */
502: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
503: PetscScalar alpha = 1.0;
504: PetscScalar beta;
505: char matdescra[6];
507: PetscFunctionBegin;
508: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
509: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
511: PetscCall(VecGetArrayRead(xx, &x));
512: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
513: aj = a->j; /* aj[k] gives column index for element aa[k]. */
514: aa = a->a; /* Nonzero elements stored row-by-row. */
515: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
517: /* Call MKL sparse BLAS routine to do the MatMult. */
518: if (zz == yy) {
519: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
520: beta = 1.0;
521: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
522: } else {
523: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
524: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
525: beta = 0.0;
526: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
527: for (i = 0; i < m; i++) z[i] += y[i];
528: }
530: PetscCall(PetscLogFlops(2.0 * a->nz));
531: PetscCall(VecRestoreArrayRead(xx, &x));
532: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
533: PetscFunctionReturn(PETSC_SUCCESS);
534: }
535: #endif
537: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
538: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
539: {
540: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
541: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
542: const PetscScalar *x;
543: PetscScalar *y, *z;
544: PetscInt m = A->rmap->n;
545: PetscInt i;
547: /* Variables not in MatMultAdd_SeqAIJ. */
548: PetscObjectState state;
550: PetscFunctionBegin;
552: /* If there are no nonzero entries, set zz = yy and return immediately. */
553: if (!a->nz) {
554: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
555: PetscCall(PetscArraycpy(z, y, m));
556: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
557: PetscFunctionReturn(PETSC_SUCCESS);
558: }
560: PetscCall(VecGetArrayRead(xx, &x));
561: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
563: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
564: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
565: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
566: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
567: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
569: /* Call MKL sparse BLAS routine to do the MatMult. */
570: if (zz == yy) {
571: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
572: * with alpha and beta both set to 1.0. */
573: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
574: } else {
575: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
576: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
577: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
578: for (i = 0; i < m; i++) z[i] += y[i];
579: }
581: PetscCall(PetscLogFlops(2.0 * a->nz));
582: PetscCall(VecRestoreArrayRead(xx, &x));
583: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
584: PetscFunctionReturn(PETSC_SUCCESS);
585: }
586: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
588: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
589: static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
590: {
591: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
592: const PetscScalar *x;
593: PetscScalar *y, *z;
594: const MatScalar *aa;
595: PetscInt m = A->rmap->n;
596: PetscInt n = A->cmap->n;
597: const PetscInt *aj, *ai;
598: PetscInt i;
600: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
601: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
602: PetscScalar alpha = 1.0;
603: PetscScalar beta;
604: char matdescra[6];
606: PetscFunctionBegin;
607: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
608: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
610: PetscCall(VecGetArrayRead(xx, &x));
611: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
612: aj = a->j; /* aj[k] gives column index for element aa[k]. */
613: aa = a->a; /* Nonzero elements stored row-by-row. */
614: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
616: /* Call MKL sparse BLAS routine to do the MatMult. */
617: if (zz == yy) {
618: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
619: beta = 1.0;
620: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
621: } else {
622: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
623: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
624: beta = 0.0;
625: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
626: for (i = 0; i < n; i++) z[i] += y[i];
627: }
629: PetscCall(PetscLogFlops(2.0 * a->nz));
630: PetscCall(VecRestoreArrayRead(xx, &x));
631: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
632: PetscFunctionReturn(PETSC_SUCCESS);
633: }
634: #endif
636: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
637: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
638: {
639: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
640: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
641: const PetscScalar *x;
642: PetscScalar *y, *z;
643: PetscInt n = A->cmap->n;
644: PetscInt i;
645: PetscObjectState state;
647: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
649: PetscFunctionBegin;
651: /* If there are no nonzero entries, set zz = yy and return immediately. */
652: if (!a->nz) {
653: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
654: PetscCall(PetscArraycpy(z, y, n));
655: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
656: PetscFunctionReturn(PETSC_SUCCESS);
657: }
659: PetscCall(VecGetArrayRead(xx, &x));
660: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
662: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
663: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
664: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
665: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
666: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
668: /* Call MKL sparse BLAS routine to do the MatMult. */
669: if (zz == yy) {
670: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
671: * with alpha and beta both set to 1.0. */
672: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
673: } else {
674: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
675: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
676: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
677: for (i = 0; i < n; i++) z[i] += y[i];
678: }
680: PetscCall(PetscLogFlops(2.0 * a->nz));
681: PetscCall(VecRestoreArrayRead(xx, &x));
682: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
683: PetscFunctionReturn(PETSC_SUCCESS);
684: }
685: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
687: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
688: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
689: {
690: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
691: sparse_matrix_t csrA, csrB, csrC;
692: PetscInt nrows, ncols;
693: struct matrix_descr descr_type_gen;
694: PetscObjectState state;
696: PetscFunctionBegin;
697: /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
698: * not handle sparse matrices with zero rows or columns. */
699: if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
700: else nrows = A->cmap->N;
701: if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
702: else ncols = B->rmap->N;
704: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
705: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
706: PetscCall(PetscObjectStateGet((PetscObject)B, &state));
707: if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
708: csrA = a->csrA;
709: csrB = b->csrA;
710: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
712: if (csrA && csrB) {
713: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
714: } else {
715: csrC = NULL;
716: }
718: PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
720: PetscFunctionReturn(PETSC_SUCCESS);
721: }
723: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
724: {
725: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
726: sparse_matrix_t csrA, csrB, csrC;
727: struct matrix_descr descr_type_gen;
728: PetscObjectState state;
730: PetscFunctionBegin;
731: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
732: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
733: PetscCall(PetscObjectStateGet((PetscObject)B, &state));
734: if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
735: csrA = a->csrA;
736: csrB = b->csrA;
737: csrC = c->csrA;
738: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
740: if (csrA && csrB) {
741: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
742: } else {
743: csrC = NULL;
744: }
746: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
747: PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
749: PetscFunctionReturn(PETSC_SUCCESS);
750: }
752: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
753: {
754: PetscFunctionBegin;
755: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
756: PetscFunctionReturn(PETSC_SUCCESS);
757: }
759: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
760: {
761: PetscFunctionBegin;
762: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
763: PetscFunctionReturn(PETSC_SUCCESS);
764: }
766: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
767: {
768: PetscFunctionBegin;
769: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
770: PetscFunctionReturn(PETSC_SUCCESS);
771: }
773: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
774: {
775: PetscFunctionBegin;
776: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
777: PetscFunctionReturn(PETSC_SUCCESS);
778: }
780: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
781: {
782: PetscFunctionBegin;
783: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
784: PetscFunctionReturn(PETSC_SUCCESS);
785: }
787: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
788: {
789: PetscFunctionBegin;
790: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
791: PetscFunctionReturn(PETSC_SUCCESS);
792: }
794: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
795: {
796: Mat_Product *product = C->product;
797: Mat A = product->A, B = product->B;
799: PetscFunctionBegin;
800: PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
801: PetscFunctionReturn(PETSC_SUCCESS);
802: }
804: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
805: {
806: Mat_Product *product = C->product;
807: Mat A = product->A, B = product->B;
808: PetscReal fill = product->fill;
810: PetscFunctionBegin;
811: PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
812: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
813: PetscFunctionReturn(PETSC_SUCCESS);
814: }
816: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
817: {
818: Mat Ct;
819: Vec zeros;
820: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
821: sparse_matrix_t csrA, csrP, csrC;
822: PetscBool set, flag;
823: struct matrix_descr descr_type_sym;
824: PetscObjectState state;
826: PetscFunctionBegin;
827: PetscCall(MatIsSymmetricKnown(A, &set, &flag));
828: PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
830: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
831: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
832: PetscCall(PetscObjectStateGet((PetscObject)P, &state));
833: if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
834: csrA = a->csrA;
835: csrP = p->csrA;
836: csrC = c->csrA;
837: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
838: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
839: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
841: /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
842: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
844: /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
845: * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
846: * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
847: * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
848: * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
849: * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
850: * the full matrix. */
851: PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
852: PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
853: PetscCall(MatCreateVecs(C, &zeros, NULL));
854: PetscCall(VecSetFromOptions(zeros));
855: PetscCall(VecZeroEntries(zeros));
856: PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
857: PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
858: /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
859: PetscCall(MatProductCreateWithMat(A, P, NULL, C));
860: PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
861: PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
862: PetscCall(VecDestroy(&zeros));
863: PetscCall(MatDestroy(&Ct));
864: PetscFunctionReturn(PETSC_SUCCESS);
865: }
867: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
868: {
869: Mat_Product *product = C->product;
870: Mat A = product->A, P = product->B;
871: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
872: sparse_matrix_t csrA, csrP, csrC;
873: struct matrix_descr descr_type_sym;
874: PetscObjectState state;
876: PetscFunctionBegin;
877: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
878: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
879: PetscCall(PetscObjectStateGet((PetscObject)P, &state));
880: if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
881: csrA = a->csrA;
882: csrP = p->csrA;
883: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
884: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
885: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
887: /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
888: if (csrP && csrA) {
889: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
890: } else {
891: csrC = NULL;
892: }
894: /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
895: * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
896: * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
897: * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
898: * is guaranteed. */
899: PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
901: C->ops->productnumeric = MatProductNumeric_PtAP;
902: PetscFunctionReturn(PETSC_SUCCESS);
903: }
905: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
906: {
907: PetscFunctionBegin;
908: C->ops->productsymbolic = MatProductSymbolic_AB;
909: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
910: PetscFunctionReturn(PETSC_SUCCESS);
911: }
913: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
914: {
915: PetscFunctionBegin;
916: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
917: PetscFunctionReturn(PETSC_SUCCESS);
918: }
920: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
921: {
922: PetscFunctionBegin;
923: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
924: C->ops->productsymbolic = MatProductSymbolic_ABt;
925: PetscFunctionReturn(PETSC_SUCCESS);
926: }
928: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
929: {
930: Mat_Product *product = C->product;
931: Mat A = product->A;
932: PetscBool set, flag;
934: PetscFunctionBegin;
935: if (PetscDefined(USE_COMPLEX)) {
936: /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
937: * We do this in several other locations in this file. This works for the time being, but these
938: * routines are considered unsafe and may be removed from the MatProduct code in the future.
939: * TODO: Add proper MATSEQAIJMKL implementations */
940: C->ops->productsymbolic = NULL;
941: } else {
942: /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
943: PetscCall(MatIsSymmetricKnown(A, &set, &flag));
944: if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
945: else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
946: /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
947: * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
948: }
949: PetscFunctionReturn(PETSC_SUCCESS);
950: }
952: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
953: {
954: PetscFunctionBegin;
955: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
956: PetscFunctionReturn(PETSC_SUCCESS);
957: }
959: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
960: {
961: PetscFunctionBegin;
962: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
963: PetscFunctionReturn(PETSC_SUCCESS);
964: }
966: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
967: {
968: Mat_Product *product = C->product;
970: PetscFunctionBegin;
971: switch (product->type) {
972: case MATPRODUCT_AB:
973: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
974: break;
975: case MATPRODUCT_AtB:
976: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
977: break;
978: case MATPRODUCT_ABt:
979: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
980: break;
981: case MATPRODUCT_PtAP:
982: PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
983: break;
984: case MATPRODUCT_RARt:
985: PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
986: break;
987: case MATPRODUCT_ABC:
988: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
989: break;
990: default:
991: break;
992: }
993: PetscFunctionReturn(PETSC_SUCCESS);
994: }
995: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
997: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
998: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
999: * routine, but can also be used to convert an assembled SeqAIJ matrix
1000: * into a SeqAIJMKL one. */
1001: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
1002: {
1003: Mat B = *newmat;
1004: Mat_SeqAIJMKL *aijmkl;
1005: PetscBool set;
1006: PetscBool sametype;
1008: PetscFunctionBegin;
1009: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
1011: PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1012: if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
1014: PetscCall(PetscNew(&aijmkl));
1015: B->spptr = (void *)aijmkl;
1017: /* Set function pointers for methods that we inherit from AIJ but override.
1018: * We also parse some command line options below, since those determine some of the methods we point to. */
1019: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
1020: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1021: B->ops->destroy = MatDestroy_SeqAIJMKL;
1023: aijmkl->sparse_optimized = PETSC_FALSE;
1024: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1025: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1026: #else
1027: aijmkl->no_SpMV2 = PETSC_TRUE;
1028: #endif
1029: aijmkl->eager_inspection = PETSC_FALSE;
1031: /* Parse command line options. */
1032: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1033: PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1034: PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1035: PetscOptionsEnd();
1036: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1037: if (!aijmkl->no_SpMV2) {
1038: PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"));
1039: aijmkl->no_SpMV2 = PETSC_TRUE;
1040: }
1041: #endif
1043: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1044: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1045: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1046: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1047: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1048: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1049: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1050: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1051: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1052: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1053: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1054: #if !defined(PETSC_USE_COMPLEX)
1055: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1056: #else
1057: B->ops->ptapnumeric = NULL;
1058: #endif
1059: #endif
1060: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1062: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1063: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1064: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1065: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1066: * versions in which the older interface has not been deprecated, we use the old interface. */
1067: if (aijmkl->no_SpMV2) {
1068: B->ops->mult = MatMult_SeqAIJMKL;
1069: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1070: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1071: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1072: }
1073: #endif
1075: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1077: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1078: *newmat = B;
1079: PetscFunctionReturn(PETSC_SUCCESS);
1080: }
1082: /*@C
1083: MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1085: Collective
1087: Input Parameters:
1088: + comm - MPI communicator, set to `PETSC_COMM_SELF`
1089: . m - number of rows
1090: . n - number of columns
1091: . nz - number of nonzeros per row (same for all rows)
1092: - nnz - array containing the number of nonzeros in the various rows
1093: (possibly different for each row) or `NULL`
1095: Output Parameter:
1096: . A - the matrix
1098: Options Database Keys:
1099: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1100: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1101: performing this step the first time the matrix is applied
1103: Level: intermediate
1105: Notes:
1106: If `nnz` is given then `nz` is ignored
1108: This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1109: routines from Intel MKL whenever possible.
1111: If the installed version of MKL supports the "SpMV2" sparse
1112: inspector-executor routines, then those are used by default.
1114: `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1115: (for symmetric A) operations are currently supported.
1116: MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1118: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1119: @*/
1120: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1121: {
1122: PetscFunctionBegin;
1123: PetscCall(MatCreate(comm, A));
1124: PetscCall(MatSetSizes(*A, m, n, m, n));
1125: PetscCall(MatSetType(*A, MATSEQAIJMKL));
1126: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1127: PetscFunctionReturn(PETSC_SUCCESS);
1128: }
1130: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1131: {
1132: PetscFunctionBegin;
1133: PetscCall(MatSetType(A, MATSEQAIJ));
1134: PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1135: PetscFunctionReturn(PETSC_SUCCESS);
1136: }