2: /*
3: Defines basic operations for the MATSEQAIJMKL matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but uses
6: sparse BLAS operations from the Intel Math Kernel Library (MKL)
7: wherever possible.
8: */
10: #include <../src/mat/impls/aij/seq/aij.h> 11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12: #include <mkl_spblas.h>
14: typedef struct {
15: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18: PetscObjectState state;
19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21: struct matrix_descr descr;
22: #endif
23: } Mat_SeqAIJMKL;
25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28: {
29: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30: /* so we will ignore 'MatType type'. */
32: Mat B = *newmat;
33: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35: #endif
38: if (reuse == MAT_INITIAL_MATRIX) {
39: MatDuplicate(A,MAT_COPY_VALUES,&B);
40: }
42: /* Reset the original function pointers. */
43: B->ops->duplicate = MatDuplicate_SeqAIJ;
44: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
45: B->ops->destroy = MatDestroy_SeqAIJ;
46: B->ops->mult = MatMult_SeqAIJ;
47: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
48: B->ops->multadd = MatMultAdd_SeqAIJ;
49: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
51: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
53: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
54: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
55: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
57: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
58: if (!aijmkl->no_SpMV2) {
59: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
60: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);
61: #endif
62: }
64: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
65: * simply involves destroying the MKL sparse matrix handle and then freeing
66: * the spptr pointer. */
67: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
69: if (aijmkl->sparse_optimized) {
70: sparse_status_t stat;
71: stat = mkl_sparse_destroy(aijmkl->csrA);
72: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
73: }
74: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
75: PetscFree(B->spptr);
77: /* Change the type of B to MATSEQAIJ. */
78: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
80: *newmat = B;
81: return(0);
82: }
84: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 85: {
87: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
91: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
92: * spptr pointer. */
93: if (aijmkl) {
94: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
95: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
96: if (aijmkl->sparse_optimized) {
97: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
98: stat = mkl_sparse_destroy(aijmkl->csrA);
99: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
100: }
101: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
102: PetscFree(A->spptr);
103: }
105: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
106: * to destroy everything that remains. */
107: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
108: /* Note that I don't call MatSetType(). I believe this is because that
109: * is only to be called when *building* a matrix. I could be wrong, but
110: * that is how things work for the SuperLU matrix class. */
111: MatDestroy_SeqAIJ(A);
112: return(0);
113: }
115: /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
116: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
117: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
118: * handle, creates a new one, and then calls mkl_sparse_optimize().
119: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
120: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
121: * an unoptimized matrix handle here. */
122: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)123: {
124: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
125: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
126: * does nothing. We make it callable anyway in this case because it cuts
127: * down on littering the code with #ifdefs. */
129: return(0);
130: #else
131: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
132: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
133: PetscInt m,n;
134: MatScalar *aa;
135: PetscInt *aj,*ai;
136: sparse_status_t stat;
137: PetscErrorCode ierr;
140: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
141: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
142: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
143: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
144: * mkl_sparse_optimize() later. */
145: if (aijmkl->no_SpMV2) return(0);
146: #endif
148: if (aijmkl->sparse_optimized) {
149: /* Matrix has been previously assembled and optimized. Must destroy old
150: * matrix handle before running the optimization step again. */
151: stat = mkl_sparse_destroy(aijmkl->csrA);
152: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
153: }
154: aijmkl->sparse_optimized = PETSC_FALSE;
156: /* Now perform the SpMV2 setup and matrix optimization. */
157: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
158: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
159: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
160: m = A->rmap->n;
161: n = A->cmap->n;
162: aj = a->j; /* aj[k] gives column index for element aa[k]. */
163: aa = a->a; /* Nonzero elements stored row-by-row. */
164: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
165: if ((a->nz!=0) && aa && !(A->structure_only)) {
166: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
167: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
168: stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
169: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
170: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
171: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
172: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
173: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
174: if (!aijmkl->no_SpMV2) {
175: stat = mkl_sparse_optimize(aijmkl->csrA);
176: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
177: }
178: aijmkl->sparse_optimized = PETSC_TRUE;
179: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
180: }
182: return(0);
183: #endif
184: }
186: /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
187: * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
188: * matrix handle.
189: * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
190: * there is no good alternative. */
191: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
192: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)193: {
194: PetscErrorCode ierr;
195: sparse_status_t stat;
196: sparse_index_base_t indexing;
197: PetscInt nrows, ncols;
198: PetscInt *aj,*ai,*dummy;
199: MatScalar *aa;
200: Mat A;
201: Mat_SeqAIJMKL *aijmkl;
203: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
204: stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
205: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
207: if (reuse == MAT_REUSE_MATRIX) {
208: MatDestroy(mat);
209: }
210: MatCreate(comm,&A);
211: MatSetType(A,MATSEQAIJ);
212: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
213: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
214: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
215: * they will be destroyed when the MKL handle is destroyed.
216: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
217: MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);
219: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
220: * Now turn it into a MATSEQAIJMKL. */
221: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
223: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
224: aijmkl->csrA = csrA;
226: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
227: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
228: * and just need to be able to run the MKL optimization step. */
229: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
230: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
231: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
232: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
233: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
234: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
235: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
236: if (!aijmkl->no_SpMV2) {
237: stat = mkl_sparse_optimize(aijmkl->csrA);
238: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
239: }
240: aijmkl->sparse_optimized = PETSC_TRUE;
241: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
243: *mat = A;
244: return(0);
245: }
246: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
248: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
249: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
250: * MatMatMultNumeric(). */
251: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
252: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)253: {
254: PetscInt i;
255: PetscInt nrows,ncols;
256: PetscInt nz;
257: PetscInt *ai,*aj,*dummy;
258: PetscScalar *aa;
259: PetscErrorCode ierr;
260: Mat_SeqAIJMKL *aijmkl;
261: sparse_status_t stat;
262: sparse_index_base_t indexing;
264: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
266: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
267: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
268: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
270: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
271: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
272: for (i=0; i<nrows; i++) {
273: nz = ai[i+1] - ai[i];
274: MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
275: }
277: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
278: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
280: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
281: /* We mark our matrix as having a valid, optimized MKL handle.
282: * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
283: aijmkl->sparse_optimized = PETSC_TRUE;
285: return(0);
286: }
287: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
289: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)290: {
292: Mat_SeqAIJMKL *aijmkl;
293: Mat_SeqAIJMKL *aijmkl_dest;
296: MatDuplicate_SeqAIJ(A,op,M);
297: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
298: aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
299: PetscArraycpy(aijmkl_dest,aijmkl,1);
300: aijmkl_dest->sparse_optimized = PETSC_FALSE;
301: if (aijmkl->eager_inspection) {
302: MatSeqAIJMKL_create_mkl_handle(A);
303: }
304: return(0);
305: }
307: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)308: {
309: PetscErrorCode ierr;
310: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
311: Mat_SeqAIJMKL *aijmkl;
314: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
316: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
317: * extra information and some different methods, call the AssemblyEnd
318: * routine for a MATSEQAIJ.
319: * I'm not sure if this is the best way to do this, but it avoids
320: * a lot of code duplication. */
321: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
322: MatAssemblyEnd_SeqAIJ(A, mode);
324: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
325: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
326: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
327: if (aijmkl->eager_inspection) {
328: MatSeqAIJMKL_create_mkl_handle(A);
329: }
331: return(0);
332: }
334: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
335: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)336: {
337: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
338: const PetscScalar *x;
339: PetscScalar *y;
340: const MatScalar *aa;
341: PetscErrorCode ierr;
342: PetscInt m=A->rmap->n;
343: PetscInt n=A->cmap->n;
344: PetscScalar alpha = 1.0;
345: PetscScalar beta = 0.0;
346: const PetscInt *aj,*ai;
347: char matdescra[6];
350: /* Variables not in MatMult_SeqAIJ. */
351: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
354: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
355: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
356: VecGetArrayRead(xx,&x);
357: VecGetArray(yy,&y);
358: aj = a->j; /* aj[k] gives column index for element aa[k]. */
359: aa = a->a; /* Nonzero elements stored row-by-row. */
360: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
362: /* Call MKL sparse BLAS routine to do the MatMult. */
363: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
365: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
366: VecRestoreArrayRead(xx,&x);
367: VecRestoreArray(yy,&y);
368: return(0);
369: }
370: #endif
372: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
373: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)374: {
375: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
376: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
377: const PetscScalar *x;
378: PetscScalar *y;
379: PetscErrorCode ierr;
380: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
381: PetscObjectState state;
385: /* If there are no nonzero entries, zero yy and return immediately. */
386: if(!a->nz) {
387: PetscInt i;
388: PetscInt m=A->rmap->n;
389: VecGetArray(yy,&y);
390: for (i=0; i<m; i++) {
391: y[i] = 0.0;
392: }
393: VecRestoreArray(yy,&y);
394: return(0);
395: }
397: VecGetArrayRead(xx,&x);
398: VecGetArray(yy,&y);
400: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
401: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
402: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
403: PetscObjectStateGet((PetscObject)A,&state);
404: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
405: MatSeqAIJMKL_create_mkl_handle(A);
406: }
408: /* Call MKL SpMV2 executor routine to do the MatMult. */
409: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
410: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
411: 412: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
413: VecRestoreArrayRead(xx,&x);
414: VecRestoreArray(yy,&y);
415: return(0);
416: }
417: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
419: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
420: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)421: {
422: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
423: const PetscScalar *x;
424: PetscScalar *y;
425: const MatScalar *aa;
426: PetscErrorCode ierr;
427: PetscInt m=A->rmap->n;
428: PetscInt n=A->cmap->n;
429: PetscScalar alpha = 1.0;
430: PetscScalar beta = 0.0;
431: const PetscInt *aj,*ai;
432: char matdescra[6];
434: /* Variables not in MatMultTranspose_SeqAIJ. */
435: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
438: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
439: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
440: VecGetArrayRead(xx,&x);
441: VecGetArray(yy,&y);
442: aj = a->j; /* aj[k] gives column index for element aa[k]. */
443: aa = a->a; /* Nonzero elements stored row-by-row. */
444: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
446: /* Call MKL sparse BLAS routine to do the MatMult. */
447: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
449: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
450: VecRestoreArrayRead(xx,&x);
451: VecRestoreArray(yy,&y);
452: return(0);
453: }
454: #endif
456: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
457: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)458: {
459: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
460: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
461: const PetscScalar *x;
462: PetscScalar *y;
463: PetscErrorCode ierr;
464: sparse_status_t stat;
465: PetscObjectState state;
469: /* If there are no nonzero entries, zero yy and return immediately. */
470: if(!a->nz) {
471: PetscInt i;
472: PetscInt n=A->cmap->n;
473: VecGetArray(yy,&y);
474: for (i=0; i<n; i++) {
475: y[i] = 0.0;
476: }
477: VecRestoreArray(yy,&y);
478: return(0);
479: }
481: VecGetArrayRead(xx,&x);
482: VecGetArray(yy,&y);
484: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
485: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
486: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
487: PetscObjectStateGet((PetscObject)A,&state);
488: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
489: MatSeqAIJMKL_create_mkl_handle(A);
490: }
492: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
493: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
494: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
495: 496: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
497: VecRestoreArrayRead(xx,&x);
498: VecRestoreArray(yy,&y);
499: return(0);
500: }
501: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
503: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
504: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)505: {
506: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
507: const PetscScalar *x;
508: PetscScalar *y,*z;
509: const MatScalar *aa;
510: PetscErrorCode ierr;
511: PetscInt m=A->rmap->n;
512: PetscInt n=A->cmap->n;
513: const PetscInt *aj,*ai;
514: PetscInt i;
516: /* Variables not in MatMultAdd_SeqAIJ. */
517: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
518: PetscScalar alpha = 1.0;
519: PetscScalar beta;
520: char matdescra[6];
523: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
524: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
526: VecGetArrayRead(xx,&x);
527: VecGetArrayPair(yy,zz,&y,&z);
528: aj = a->j; /* aj[k] gives column index for element aa[k]. */
529: aa = a->a; /* Nonzero elements stored row-by-row. */
530: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
532: /* Call MKL sparse BLAS routine to do the MatMult. */
533: if (zz == yy) {
534: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
535: beta = 1.0;
536: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
537: } else {
538: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
539: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
540: beta = 0.0;
541: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
542: for (i=0; i<m; i++) {
543: z[i] += y[i];
544: }
545: }
547: PetscLogFlops(2.0*a->nz);
548: VecRestoreArrayRead(xx,&x);
549: VecRestoreArrayPair(yy,zz,&y,&z);
550: return(0);
551: }
552: #endif
554: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
555: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)556: {
557: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
558: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
559: const PetscScalar *x;
560: PetscScalar *y,*z;
561: PetscErrorCode ierr;
562: PetscInt m=A->rmap->n;
563: PetscInt i;
565: /* Variables not in MatMultAdd_SeqAIJ. */
566: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
567: PetscObjectState state;
571: /* If there are no nonzero entries, set zz = yy and return immediately. */
572: if(!a->nz) {
573: PetscInt i;
574: VecGetArrayPair(yy,zz,&y,&z);
575: for (i=0; i<m; i++) {
576: z[i] = y[i];
577: }
578: VecRestoreArrayPair(yy,zz,&y,&z);
579: return(0);
580: }
582: VecGetArrayRead(xx,&x);
583: VecGetArrayPair(yy,zz,&y,&z);
585: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
586: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
587: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
588: PetscObjectStateGet((PetscObject)A,&state);
589: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
590: MatSeqAIJMKL_create_mkl_handle(A);
591: }
593: /* Call MKL sparse BLAS routine to do the MatMult. */
594: if (zz == yy) {
595: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
596: * with alpha and beta both set to 1.0. */
597: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
598: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
599: } else {
600: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
601: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
602: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
603: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
604: for (i=0; i<m; i++) {
605: z[i] += y[i];
606: }
607: }
609: PetscLogFlops(2.0*a->nz);
610: VecRestoreArrayRead(xx,&x);
611: VecRestoreArrayPair(yy,zz,&y,&z);
612: return(0);
613: }
614: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
616: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
617: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)618: {
619: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
620: const PetscScalar *x;
621: PetscScalar *y,*z;
622: const MatScalar *aa;
623: PetscErrorCode ierr;
624: PetscInt m=A->rmap->n;
625: PetscInt n=A->cmap->n;
626: const PetscInt *aj,*ai;
627: PetscInt i;
629: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
630: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
631: PetscScalar alpha = 1.0;
632: PetscScalar beta;
633: char matdescra[6];
636: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
637: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
639: VecGetArrayRead(xx,&x);
640: VecGetArrayPair(yy,zz,&y,&z);
641: aj = a->j; /* aj[k] gives column index for element aa[k]. */
642: aa = a->a; /* Nonzero elements stored row-by-row. */
643: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
645: /* Call MKL sparse BLAS routine to do the MatMult. */
646: if (zz == yy) {
647: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
648: beta = 1.0;
649: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
650: } else {
651: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
652: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
653: beta = 0.0;
654: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
655: for (i=0; i<n; i++) {
656: z[i] += y[i];
657: }
658: }
660: PetscLogFlops(2.0*a->nz);
661: VecRestoreArrayRead(xx,&x);
662: VecRestoreArrayPair(yy,zz,&y,&z);
663: return(0);
664: }
665: #endif
667: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
668: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)669: {
670: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
671: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
672: const PetscScalar *x;
673: PetscScalar *y,*z;
674: PetscErrorCode ierr;
675: PetscInt n=A->cmap->n;
676: PetscInt i;
677: PetscObjectState state;
679: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
680: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
684: /* If there are no nonzero entries, set zz = yy and return immediately. */
685: if(!a->nz) {
686: PetscInt i;
687: VecGetArrayPair(yy,zz,&y,&z);
688: for (i=0; i<n; i++) {
689: z[i] = y[i];
690: }
691: VecRestoreArrayPair(yy,zz,&y,&z);
692: return(0);
693: }
695: VecGetArrayRead(xx,&x);
696: VecGetArrayPair(yy,zz,&y,&z);
698: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
699: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
700: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
701: PetscObjectStateGet((PetscObject)A,&state);
702: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
703: MatSeqAIJMKL_create_mkl_handle(A);
704: }
706: /* Call MKL sparse BLAS routine to do the MatMult. */
707: if (zz == yy) {
708: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
709: * with alpha and beta both set to 1.0. */
710: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
711: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
712: } else {
713: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
714: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
715: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
716: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
717: for (i=0; i<n; i++) {
718: z[i] += y[i];
719: }
720: }
722: PetscLogFlops(2.0*a->nz);
723: VecRestoreArrayRead(xx,&x);
724: VecRestoreArrayPair(yy,zz,&y,&z);
725: return(0);
726: }
727: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
729: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
730: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)731: {
732: Mat_SeqAIJMKL *a, *b, *c;
733: sparse_matrix_t csrA, csrB, csrC;
734: PetscErrorCode ierr;
735: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
736: struct matrix_descr descr_type_gen;
737: PetscObjectState state;
740: a = (Mat_SeqAIJMKL*)A->spptr;
741: b = (Mat_SeqAIJMKL*)B->spptr;
742: c = (Mat_SeqAIJMKL*)C->spptr;
743: PetscObjectStateGet((PetscObject)A,&state);
744: if (!a->sparse_optimized || a->state != state) {
745: MatSeqAIJMKL_create_mkl_handle(A);
746: }
747: PetscObjectStateGet((PetscObject)B,&state);
748: if (!b->sparse_optimized || b->state != state) {
749: MatSeqAIJMKL_create_mkl_handle(B);
750: }
751: csrA = a->csrA;
752: csrB = b->csrA;
753: csrC = c->csrA;
754: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
756: stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
757: SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
758: SPARSE_STAGE_FINALIZE_MULT,&csrC);
760: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
762: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
763: MatSeqAIJMKL_update_from_mkl_handle(C);
765: return(0);
766: }
767: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
769: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
770: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)771: {
772: Mat_SeqAIJMKL *a, *p, *c;
773: sparse_matrix_t csrA, csrP, csrC;
774: PetscBool set, flag;
775: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
776: struct matrix_descr descr_type_sym;
777: PetscObjectState state;
778: PetscErrorCode ierr;
781: MatIsSymmetricKnown(A,&set,&flag);
782: if (!set || (set && !flag)) {
783: MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);
784: return(0);
785: }
787: a = (Mat_SeqAIJMKL*)A->spptr;
788: p = (Mat_SeqAIJMKL*)P->spptr;
789: c = (Mat_SeqAIJMKL*)C->spptr;
790: PetscObjectStateGet((PetscObject)A,&state);
791: if (!a->sparse_optimized || a->state != state) {
792: MatSeqAIJMKL_create_mkl_handle(A);
793: }
794: PetscObjectStateGet((PetscObject)P,&state);
795: if (!p->sparse_optimized || p->state != state) {
796: MatSeqAIJMKL_create_mkl_handle(P);
797: }
798: csrA = a->csrA;
799: csrP = p->csrA;
800: csrC = c->csrA;
801: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
802: descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
803: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
805: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
806: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
807: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
809: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
810: MatSeqAIJMKL_update_from_mkl_handle(C);
812: return(0);
813: }
814: #endif
816: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
817: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
818: * routine, but can also be used to convert an assembled SeqAIJ matrix
819: * into a SeqAIJMKL one. */
820: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)821: {
823: Mat B = *newmat;
824: Mat_SeqAIJMKL *aijmkl;
825: PetscBool set;
826: PetscBool sametype;
829: if (reuse == MAT_INITIAL_MATRIX) {
830: MatDuplicate(A,MAT_COPY_VALUES,&B);
831: }
833: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
834: if (sametype) return(0);
836: PetscNewLog(B,&aijmkl);
837: B->spptr = (void*) aijmkl;
839: /* Set function pointers for methods that we inherit from AIJ but override.
840: * We also parse some command line options below, since those determine some of the methods we point to. */
841: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
842: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
843: B->ops->destroy = MatDestroy_SeqAIJMKL;
845: aijmkl->sparse_optimized = PETSC_FALSE;
846: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
847: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
848: #else
849: aijmkl->no_SpMV2 = PETSC_TRUE;
850: #endif
851: aijmkl->eager_inspection = PETSC_FALSE;
853: /* Parse command line options. */
854: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
855: PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
856: PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
857: PetscOptionsEnd();
858: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
859: if(!aijmkl->no_SpMV2) {
860: PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
861: aijmkl->no_SpMV2 = PETSC_TRUE;
862: }
863: #endif
865: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
866: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
867: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
868: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
869: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
870: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
871: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
872: # if !defined(PETSC_USE_COMPLEX)
873: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
874: # endif
875: # endif
876: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
878: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
879: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
880: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
881: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
882: * versions in which the older interface has not been deprecated, we use the old interface. */
883: if (aijmkl->no_SpMV2) {
884: B->ops->mult = MatMult_SeqAIJMKL;
885: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
886: B->ops->multadd = MatMultAdd_SeqAIJMKL;
887: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
888: }
889: #endif
891: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
892: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
893: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
895: if(!aijmkl->no_SpMV2) {
896: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
897: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
898: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);
899: #endif
900: #endif
901: }
903: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
904: *newmat = B;
905: return(0);
906: }
908: /*@C
909: MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
910: This type inherits from AIJ and is largely identical, but uses sparse BLAS
911: routines from Intel MKL whenever possible.
912: If the installed version of MKL supports the "SpMV2" sparse
913: inspector-executor routines, then those are used by default.
914: MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
915: symmetric A) operations are currently supported.
916: Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
918: Collective
920: Input Parameters:
921: + comm - MPI communicator, set to PETSC_COMM_SELF922: . m - number of rows
923: . n - number of columns
924: . nz - number of nonzeros per row (same for all rows)
925: - nnz - array containing the number of nonzeros in the various rows
926: (possibly different for each row) or NULL
928: Output Parameter:
929: . A - the matrix
931: Options Database Keys:
932: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
933: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
935: Notes:
936: If nnz is given then nz is ignored
938: Level: intermediate
940: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
941: @*/
942: PetscErrorCodeMatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)943: {
947: MatCreate(comm,A);
948: MatSetSizes(*A,m,n,m,n);
949: MatSetType(*A,MATSEQAIJMKL);
950: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
951: return(0);
952: }
954: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)955: {
959: MatSetType(A,MATSEQAIJ);
960: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
961: return(0);
962: }