1: /*
2: Defines basic operations for the MATSEQAIJMKL matrix class.
3: This class is derived from the MATSEQAIJ class and retains the
4: compressed row storage (aka Yale sparse matrix format) but uses
5: sparse BLAS operations from the Intel Math Kernel Library (MKL)
6: wherever possible.
7: */
9: #include <../src/mat/impls/aij/seq/aij.h> 10: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12: /* MKL include files. */
13: #include <mkl_spblas.h> /* Sparse BLAS */
15: typedef struct {
16: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19: PetscObjectState state;
20: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
21: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
22: struct matrix_descr descr;
23: #endif
24: } Mat_SeqAIJMKL;
26: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
28: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29: {
30: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
31: /* so we will ignore 'MatType type'. */
33: Mat B = *newmat;
34: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
35: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
36: #endif
39: if (reuse == MAT_INITIAL_MATRIX) {
40: MatDuplicate(A,MAT_COPY_VALUES,&B);
41: }
43: /* Reset the original function pointers. */
44: B->ops->duplicate = MatDuplicate_SeqAIJ;
45: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
46: B->ops->destroy = MatDestroy_SeqAIJ;
47: B->ops->mult = MatMult_SeqAIJ;
48: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
49: B->ops->multadd = MatMultAdd_SeqAIJ;
50: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
51: B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ;
52: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ;
54: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
55: B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
57: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
58: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);
59: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
60: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
61: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
62: if(!aijmkl->no_SpMV2) {
63: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);
64: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
65: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);
66: #endif
67: PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);
68: }
70: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
71: * simply involves destroying the MKL sparse matrix handle and then freeing
72: * the spptr pointer. */
73: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
75: if (aijmkl->sparse_optimized) {
76: sparse_status_t stat;
77: stat = mkl_sparse_destroy(aijmkl->csrA);
78: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
79: }
80: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
81: PetscFree(B->spptr);
83: /* Change the type of B to MATSEQAIJ. */
84: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
86: *newmat = B;
87: return(0);
88: }
90: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 91: {
93: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
97: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
98: * spptr pointer. */
99: if (aijmkl) {
100: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
101: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
102: if (aijmkl->sparse_optimized) {
103: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
104: stat = mkl_sparse_destroy(aijmkl->csrA);
105: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
106: }
107: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
108: PetscFree(A->spptr);
109: }
111: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
112: * to destroy everything that remains. */
113: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
114: /* Note that I don't call MatSetType(). I believe this is because that
115: * is only to be called when *building* a matrix. I could be wrong, but
116: * that is how things work for the SuperLU matrix class. */
117: MatDestroy_SeqAIJ(A);
118: return(0);
119: }
121: /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
122: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
123: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
124: * handle, creates a new one, and then calls mkl_sparse_optimize().
125: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
126: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
127: * an unoptimized matrix handle here. */
128: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)129: {
130: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
131: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
132: * does nothing. We make it callable anyway in this case because it cuts
133: * down on littering the code with #ifdefs. */
135: return(0);
136: #else
137: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
138: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
139: PetscInt m,n;
140: MatScalar *aa;
141: PetscInt *aj,*ai;
142: sparse_status_t stat;
143: PetscErrorCode ierr;
146: #if !defined(PETSC_HAVE_MKL_SPARSE_SP2M)
147: /* Versions of MKL that don't have mkl_sparse_sp2m() still support the old, non-inspector-executor interfaces. For these versions,
148: * we simply exit. Versions that do have mkl_sparse_sp2m() (version 18, update 2 and later) have deprecated the old interfaces.
149: * In this case, we must use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by
150: * not calling mkl_sparse_optimize() later. */
151: if (aijmkl->no_SpMV2) return(0);
152: #endif
154: if (aijmkl->sparse_optimized) {
155: /* Matrix has been previously assembled and optimized. Must destroy old
156: * matrix handle before running the optimization step again. */
157: stat = mkl_sparse_destroy(aijmkl->csrA);
158: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
159: }
160: aijmkl->sparse_optimized = PETSC_FALSE;
162: /* Now perform the SpMV2 setup and matrix optimization. */
163: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
164: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
165: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
166: m = A->rmap->n;
167: n = A->cmap->n;
168: aj = a->j; /* aj[k] gives column index for element aa[k]. */
169: aa = a->a; /* Nonzero elements stored row-by-row. */
170: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
171: if ((a->nz!=0) & !(A->structure_only)) {
172: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
173: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
174: stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
175: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
176: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
177: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
178: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
179: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
180: if (!aijmkl->no_SpMV2) {
181: stat = mkl_sparse_optimize(aijmkl->csrA);
182: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
183: }
184: aijmkl->sparse_optimized = PETSC_TRUE;
185: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
186: }
188: return(0);
189: #endif
190: }
192: /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
193: * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
194: * matrix handle.
195: * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
196: * there is no good alternative. */
197: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
198: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)199: {
200: PetscErrorCode ierr;
201: sparse_status_t stat;
202: sparse_index_base_t indexing;
203: PetscInt nrows, ncols;
204: PetscInt *aj,*ai,*dummy;
205: MatScalar *aa;
206: Mat A;
207: Mat_SeqAIJMKL *aijmkl;
209: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
210: stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
211: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
213: if (reuse == MAT_REUSE_MATRIX) {
214: MatDestroy(mat);
215: }
216: MatCreate(comm,&A);
217: MatSetType(A,MATSEQAIJ);
218: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
219: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
220: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
221: * they will be destroyed when the MKL handle is destroyed.
222: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
223: MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);
225: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
226: * Now turn it into a MATSEQAIJMKL. */
227: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
229: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
230: aijmkl->csrA = csrA;
232: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
233: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
234: * and just need to be able to run the MKL optimization step. */
235: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
236: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
237: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
238: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
239: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
240: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
241: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
242: if (!aijmkl->no_SpMV2) {
243: stat = mkl_sparse_optimize(aijmkl->csrA);
244: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
245: }
246: aijmkl->sparse_optimized = PETSC_TRUE;
247: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
249: *mat = A;
250: return(0);
251: }
252: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
254: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
255: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
256: * MatMatMultNumeric(). */
257: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
258: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)259: {
260: PetscInt i;
261: PetscInt nrows,ncols;
262: PetscInt nz;
263: PetscInt *ai,*aj,*dummy;
264: PetscScalar *aa;
265: PetscErrorCode ierr;
266: Mat_SeqAIJMKL *aijmkl;
267: sparse_status_t stat;
268: sparse_index_base_t indexing;
270: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
272: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
273: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
274: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
276: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
277: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
278: for (i=0; i<nrows; i++) {
279: nz = ai[i+1] - ai[i];
280: MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
281: }
283: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
284: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
286: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
287: /* We mark our matrix as having a valid, optimized MKL handle.
288: * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
289: aijmkl->sparse_optimized = PETSC_TRUE;
291: return(0);
292: }
293: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
295: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)296: {
298: Mat_SeqAIJMKL *aijmkl;
299: Mat_SeqAIJMKL *aijmkl_dest;
302: MatDuplicate_SeqAIJ(A,op,M);
303: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
304: aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
305: PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));
306: aijmkl_dest->sparse_optimized = PETSC_FALSE;
307: if (aijmkl->eager_inspection) {
308: MatSeqAIJMKL_create_mkl_handle(A);
309: }
310: return(0);
311: }
313: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)314: {
315: PetscErrorCode ierr;
316: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
317: Mat_SeqAIJMKL *aijmkl;
320: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
322: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
323: * extra information and some different methods, call the AssemblyEnd
324: * routine for a MATSEQAIJ.
325: * I'm not sure if this is the best way to do this, but it avoids
326: * a lot of code duplication. */
327: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
328: MatAssemblyEnd_SeqAIJ(A, mode);
330: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
331: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
332: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
333: if (aijmkl->eager_inspection) {
334: MatSeqAIJMKL_create_mkl_handle(A);
335: }
337: return(0);
338: }
340: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
341: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)342: {
343: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
344: const PetscScalar *x;
345: PetscScalar *y;
346: const MatScalar *aa;
347: PetscErrorCode ierr;
348: PetscInt m=A->rmap->n;
349: PetscInt n=A->cmap->n;
350: PetscScalar alpha = 1.0;
351: PetscScalar beta = 0.0;
352: const PetscInt *aj,*ai;
353: char matdescra[6];
356: /* Variables not in MatMult_SeqAIJ. */
357: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
360: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
361: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
362: VecGetArrayRead(xx,&x);
363: VecGetArray(yy,&y);
364: aj = a->j; /* aj[k] gives column index for element aa[k]. */
365: aa = a->a; /* Nonzero elements stored row-by-row. */
366: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
368: /* Call MKL sparse BLAS routine to do the MatMult. */
369: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
371: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
372: VecRestoreArrayRead(xx,&x);
373: VecRestoreArray(yy,&y);
374: return(0);
375: }
376: #endif
378: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
379: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)380: {
381: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
382: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
383: const PetscScalar *x;
384: PetscScalar *y;
385: PetscErrorCode ierr;
386: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
387: PetscObjectState state;
391: /* If there are no nonzero entries, zero yy and return immediately. */
392: if(!a->nz) {
393: PetscInt i;
394: PetscInt m=A->rmap->n;
395: VecGetArray(yy,&y);
396: for (i=0; i<m; i++) {
397: y[i] = 0.0;
398: }
399: VecRestoreArray(yy,&y);
400: return(0);
401: }
403: VecGetArrayRead(xx,&x);
404: VecGetArray(yy,&y);
406: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
407: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
408: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
409: PetscObjectStateGet((PetscObject)A,&state);
410: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
411: MatSeqAIJMKL_create_mkl_handle(A);
412: }
414: /* Call MKL SpMV2 executor routine to do the MatMult. */
415: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
416: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
417: 418: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
419: VecRestoreArrayRead(xx,&x);
420: VecRestoreArray(yy,&y);
421: return(0);
422: }
423: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
425: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
426: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)427: {
428: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
429: const PetscScalar *x;
430: PetscScalar *y;
431: const MatScalar *aa;
432: PetscErrorCode ierr;
433: PetscInt m=A->rmap->n;
434: PetscInt n=A->cmap->n;
435: PetscScalar alpha = 1.0;
436: PetscScalar beta = 0.0;
437: const PetscInt *aj,*ai;
438: char matdescra[6];
440: /* Variables not in MatMultTranspose_SeqAIJ. */
441: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
444: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
445: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
446: VecGetArrayRead(xx,&x);
447: VecGetArray(yy,&y);
448: aj = a->j; /* aj[k] gives column index for element aa[k]. */
449: aa = a->a; /* Nonzero elements stored row-by-row. */
450: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
452: /* Call MKL sparse BLAS routine to do the MatMult. */
453: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
455: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
456: VecRestoreArrayRead(xx,&x);
457: VecRestoreArray(yy,&y);
458: return(0);
459: }
460: #endif
462: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
463: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)464: {
465: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
466: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
467: const PetscScalar *x;
468: PetscScalar *y;
469: PetscErrorCode ierr;
470: sparse_status_t stat;
471: PetscObjectState state;
475: /* If there are no nonzero entries, zero yy and return immediately. */
476: if(!a->nz) {
477: PetscInt i;
478: PetscInt n=A->cmap->n;
479: VecGetArray(yy,&y);
480: for (i=0; i<n; i++) {
481: y[i] = 0.0;
482: }
483: VecRestoreArray(yy,&y);
484: return(0);
485: }
487: VecGetArrayRead(xx,&x);
488: VecGetArray(yy,&y);
490: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
491: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
492: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
493: PetscObjectStateGet((PetscObject)A,&state);
494: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
495: MatSeqAIJMKL_create_mkl_handle(A);
496: }
498: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
499: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
500: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
501: 502: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
503: VecRestoreArrayRead(xx,&x);
504: VecRestoreArray(yy,&y);
505: return(0);
506: }
507: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
509: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
510: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)511: {
512: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
513: const PetscScalar *x;
514: PetscScalar *y,*z;
515: const MatScalar *aa;
516: PetscErrorCode ierr;
517: PetscInt m=A->rmap->n;
518: PetscInt n=A->cmap->n;
519: const PetscInt *aj,*ai;
520: PetscInt i;
522: /* Variables not in MatMultAdd_SeqAIJ. */
523: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
524: PetscScalar alpha = 1.0;
525: PetscScalar beta;
526: char matdescra[6];
529: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
530: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
532: VecGetArrayRead(xx,&x);
533: VecGetArrayPair(yy,zz,&y,&z);
534: aj = a->j; /* aj[k] gives column index for element aa[k]. */
535: aa = a->a; /* Nonzero elements stored row-by-row. */
536: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
538: /* Call MKL sparse BLAS routine to do the MatMult. */
539: if (zz == yy) {
540: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
541: beta = 1.0;
542: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
543: } else {
544: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
545: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
546: beta = 0.0;
547: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
548: for (i=0; i<m; i++) {
549: z[i] += y[i];
550: }
551: }
553: PetscLogFlops(2.0*a->nz);
554: VecRestoreArrayRead(xx,&x);
555: VecRestoreArrayPair(yy,zz,&y,&z);
556: return(0);
557: }
558: #endif
560: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
561: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)562: {
563: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
564: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
565: const PetscScalar *x;
566: PetscScalar *y,*z;
567: PetscErrorCode ierr;
568: PetscInt m=A->rmap->n;
569: PetscInt i;
571: /* Variables not in MatMultAdd_SeqAIJ. */
572: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
573: PetscObjectState state;
577: /* If there are no nonzero entries, set zz = yy and return immediately. */
578: if(!a->nz) {
579: PetscInt i;
580: VecGetArrayPair(yy,zz,&y,&z);
581: for (i=0; i<m; i++) {
582: z[i] = y[i];
583: }
584: VecRestoreArrayPair(yy,zz,&y,&z);
585: return(0);
586: }
588: VecGetArrayRead(xx,&x);
589: VecGetArrayPair(yy,zz,&y,&z);
591: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
592: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
593: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
594: PetscObjectStateGet((PetscObject)A,&state);
595: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
596: MatSeqAIJMKL_create_mkl_handle(A);
597: }
599: /* Call MKL sparse BLAS routine to do the MatMult. */
600: if (zz == yy) {
601: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
602: * with alpha and beta both set to 1.0. */
603: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
604: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
605: } else {
606: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
607: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
608: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
609: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
610: for (i=0; i<m; i++) {
611: z[i] += y[i];
612: }
613: }
615: PetscLogFlops(2.0*a->nz);
616: VecRestoreArrayRead(xx,&x);
617: VecRestoreArrayPair(yy,zz,&y,&z);
618: return(0);
619: }
620: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
622: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
623: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)624: {
625: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
626: const PetscScalar *x;
627: PetscScalar *y,*z;
628: const MatScalar *aa;
629: PetscErrorCode ierr;
630: PetscInt m=A->rmap->n;
631: PetscInt n=A->cmap->n;
632: const PetscInt *aj,*ai;
633: PetscInt i;
635: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
636: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
637: PetscScalar alpha = 1.0;
638: PetscScalar beta;
639: char matdescra[6];
642: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
643: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
645: VecGetArrayRead(xx,&x);
646: VecGetArrayPair(yy,zz,&y,&z);
647: aj = a->j; /* aj[k] gives column index for element aa[k]. */
648: aa = a->a; /* Nonzero elements stored row-by-row. */
649: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
651: /* Call MKL sparse BLAS routine to do the MatMult. */
652: if (zz == yy) {
653: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
654: beta = 1.0;
655: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
656: } else {
657: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
658: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
659: beta = 0.0;
660: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
661: for (i=0; i<n; i++) {
662: z[i] += y[i];
663: }
664: }
666: PetscLogFlops(2.0*a->nz);
667: VecRestoreArrayRead(xx,&x);
668: VecRestoreArrayPair(yy,zz,&y,&z);
669: return(0);
670: }
671: #endif
673: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
674: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)675: {
676: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
677: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
678: const PetscScalar *x;
679: PetscScalar *y,*z;
680: PetscErrorCode ierr;
681: PetscInt n=A->cmap->n;
682: PetscInt i;
683: PetscObjectState state;
685: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
686: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
690: /* If there are no nonzero entries, set zz = yy and return immediately. */
691: if(!a->nz) {
692: PetscInt i;
693: VecGetArrayPair(yy,zz,&y,&z);
694: for (i=0; i<n; i++) {
695: z[i] = y[i];
696: }
697: VecRestoreArrayPair(yy,zz,&y,&z);
698: return(0);
699: }
701: VecGetArrayRead(xx,&x);
702: VecGetArrayPair(yy,zz,&y,&z);
704: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
705: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
706: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
707: PetscObjectStateGet((PetscObject)A,&state);
708: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
709: MatSeqAIJMKL_create_mkl_handle(A);
710: }
712: /* Call MKL sparse BLAS routine to do the MatMult. */
713: if (zz == yy) {
714: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
715: * with alpha and beta both set to 1.0. */
716: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
717: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
718: } else {
719: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
720: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
721: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
722: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
723: for (i=0; i<n; i++) {
724: z[i] += y[i];
725: }
726: }
728: PetscLogFlops(2.0*a->nz);
729: VecRestoreArrayRead(xx,&x);
730: VecRestoreArrayPair(yy,zz,&y,&z);
731: return(0);
732: }
733: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
735: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
736: /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
737: * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
738: * For releases of MKL prior to version 18, update 2:
739: * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
740: * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
741: * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
742: * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
743: PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)744: {
745: Mat_SeqAIJMKL *a, *b;
746: sparse_matrix_t csrA, csrB, csrC;
747: PetscErrorCode ierr;
748: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
749: PetscObjectState state;
752: a = (Mat_SeqAIJMKL*)A->spptr;
753: b = (Mat_SeqAIJMKL*)B->spptr;
754: PetscObjectStateGet((PetscObject)A,&state);
755: if (!a->sparse_optimized || a->state != state) {
756: MatSeqAIJMKL_create_mkl_handle(A);
757: }
758: PetscObjectStateGet((PetscObject)B,&state);
759: if (!b->sparse_optimized || b->state != state) {
760: MatSeqAIJMKL_create_mkl_handle(B);
761: }
762: csrA = a->csrA;
763: csrB = b->csrA;
765: stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
766: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
768: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
770: return(0);
771: }
772: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
774: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
775: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)776: {
777: Mat_SeqAIJMKL *a, *b, *c;
778: sparse_matrix_t csrA, csrB, csrC;
779: PetscErrorCode ierr;
780: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
781: struct matrix_descr descr_type_gen;
782: PetscObjectState state;
785: a = (Mat_SeqAIJMKL*)A->spptr;
786: b = (Mat_SeqAIJMKL*)B->spptr;
787: c = (Mat_SeqAIJMKL*)C->spptr;
788: PetscObjectStateGet((PetscObject)A,&state);
789: if (!a->sparse_optimized || a->state != state) {
790: MatSeqAIJMKL_create_mkl_handle(A);
791: }
792: PetscObjectStateGet((PetscObject)B,&state);
793: if (!b->sparse_optimized || b->state != state) {
794: MatSeqAIJMKL_create_mkl_handle(B);
795: }
796: csrA = a->csrA;
797: csrB = b->csrA;
798: csrC = c->csrA;
799: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
801: stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
802: SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
803: SPARSE_STAGE_FINALIZE_MULT,&csrC);
805: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
807: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
808: MatSeqAIJMKL_update_from_mkl_handle(C);
810: return(0);
811: }
812: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */
814: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
815: PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)816: {
817: Mat_SeqAIJMKL *a, *b;
818: sparse_matrix_t csrA, csrB, csrC;
819: PetscErrorCode ierr;
820: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
821: PetscObjectState state;
824: a = (Mat_SeqAIJMKL*)A->spptr;
825: b = (Mat_SeqAIJMKL*)B->spptr;
826: PetscObjectStateGet((PetscObject)A,&state);
827: if (!a->sparse_optimized || a->state != state) {
828: MatSeqAIJMKL_create_mkl_handle(A);
829: }
830: PetscObjectStateGet((PetscObject)B,&state);
831: if (!b->sparse_optimized || b->state != state) {
832: MatSeqAIJMKL_create_mkl_handle(B);
833: }
834: csrA = a->csrA;
835: csrB = b->csrA;
837: stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
838: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
840: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
842: return(0);
843: }
844: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
846: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
847: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)848: {
849: Mat_SeqAIJMKL *a, *p, *c;
850: sparse_matrix_t csrA, csrP, csrC;
851: PetscBool set, flag;
852: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
853: struct matrix_descr descr_type_sym;
854: PetscObjectState state;
855: PetscErrorCode ierr;
858: MatIsSymmetricKnown(A,&set,&flag);
859: if (!set || (set && !flag)) {
860: MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);
861: return(0);
862: }
864: a = (Mat_SeqAIJMKL*)A->spptr;
865: p = (Mat_SeqAIJMKL*)P->spptr;
866: c = (Mat_SeqAIJMKL*)C->spptr;
867: PetscObjectStateGet((PetscObject)A,&state);
868: if (!a->sparse_optimized || a->state != state) {
869: MatSeqAIJMKL_create_mkl_handle(A);
870: }
871: PetscObjectStateGet((PetscObject)P,&state);
872: if (!p->sparse_optimized || p->state != state) {
873: MatSeqAIJMKL_create_mkl_handle(P);
874: }
875: csrA = a->csrA;
876: csrP = p->csrA;
877: csrC = c->csrA;
878: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
879: descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
880: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
882: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
883: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
884: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
886: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
887: MatSeqAIJMKL_update_from_mkl_handle(C);
889: return(0);
890: }
891: #endif
893: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
894: PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)895: {
896: Mat_SeqAIJMKL *a, *p;
897: sparse_matrix_t csrA, csrP, csrC;
898: PetscBool set, flag;
899: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
900: struct matrix_descr descr_type_sym;
901: PetscObjectState state;
902: PetscErrorCode ierr;
905: MatIsSymmetricKnown(A,&set,&flag);
906: if (!set || (set && !flag)) {
907: MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);
908: return(0);
909: }
911: if (scall == MAT_REUSE_MATRIX) {
912: MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);
913: return(0);
914: }
916: a = (Mat_SeqAIJMKL*)A->spptr;
917: p = (Mat_SeqAIJMKL*)P->spptr;
918: PetscObjectStateGet((PetscObject)A,&state);
919: if (!a->sparse_optimized || a->state != state) {
920: MatSeqAIJMKL_create_mkl_handle(A);
921: }
922: PetscObjectStateGet((PetscObject)P,&state);
923: if (!p->sparse_optimized || p->state != state) {
924: MatSeqAIJMKL_create_mkl_handle(P);
925: }
926: csrA = a->csrA;
927: csrP = p->csrA;
928: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
929: descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
930: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
932: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
933: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
934: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");
936: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
937: MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);
939: return(0);
940: }
941: #endif
943: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
944: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ()
945: * routine, but can also be used to convert an assembled SeqAIJ matrix
946: * into a SeqAIJMKL one. */
947: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)948: {
950: Mat B = *newmat;
951: Mat_SeqAIJMKL *aijmkl;
952: PetscBool set;
953: PetscBool sametype;
956: if (reuse == MAT_INITIAL_MATRIX) {
957: MatDuplicate(A,MAT_COPY_VALUES,&B);
958: }
960: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
961: if (sametype) return(0);
963: PetscNewLog(B,&aijmkl);
964: B->spptr = (void*) aijmkl;
966: /* Set function pointers for methods that we inherit from AIJ but override.
967: * We also parse some command line options below, since those determine some of the methods we point to. */
968: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
969: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
970: B->ops->destroy = MatDestroy_SeqAIJMKL;
972: aijmkl->sparse_optimized = PETSC_FALSE;
973: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
974: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
975: #else
976: aijmkl->no_SpMV2 = PETSC_TRUE;
977: #endif
978: aijmkl->eager_inspection = PETSC_FALSE;
980: /* Parse command line options. */
981: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
982: PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
983: PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
984: PetscOptionsEnd();
985: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
986: if(!aijmkl->no_SpMV2) {
987: PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
988: aijmkl->no_SpMV2 = PETSC_TRUE;
989: }
990: #endif
992: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
993: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
994: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
995: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
996: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
997: B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
998: # ifdef PETSC_HAVE_MKL_SPARSE_SP2M
999: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1000: # ifndef PETSC_USE_COMPLEX
1001: B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
1002: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1003: # endif
1004: # endif
1005: B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1006: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1008: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
1009: /* In the same release in which MKL introduced mkl_sparse_sp2m() (version 18, update 2), the old sparse BLAS interfaces were
1010: * marked as deprecated. If "no_SpMV2" has been specified by the user and MKL 18u2 or later is being used, we use the new
1011: * _SpMV2 routines (set above), but do not call mkl_sparse_optimize(), which results in the old numerical kernels (without the
1012: * inspector-executor model) being used. For versions in which the older interface has not been deprecated, we use the old
1013: * interface. */
1014: if (aijmkl->no_SpMV2) {
1015: B->ops->mult = MatMult_SeqAIJMKL;
1016: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1017: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1018: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1019: }
1020: #endif
1022: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1023: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);
1024: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
1025: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
1026: if(!aijmkl->no_SpMV2) {
1027: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1028: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1029: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
1030: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);
1031: #endif
1032: PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1033: #endif
1034: }
1036: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1037: *newmat = B;
1038: return(0);
1039: }
1041: /*@C
1042: MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1043: This type inherits from AIJ and is largely identical, but uses sparse BLAS
1044: routines from Intel MKL whenever possible.
1045: If the installed version of MKL supports the "SpMV2" sparse
1046: inspector-executor routines, then those are used by default.
1047: MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1048: symmetric A) operations are currently supported.
1049: Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1051: Collective on MPI_Comm1053: Input Parameters:
1054: + comm - MPI communicator, set to PETSC_COMM_SELF1055: . m - number of rows
1056: . n - number of columns
1057: . nz - number of nonzeros per row (same for all rows)
1058: - nnz - array containing the number of nonzeros in the various rows
1059: (possibly different for each row) or NULL
1061: Output Parameter:
1062: . A - the matrix
1064: Options Database Keys:
1065: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1066: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1068: Notes:
1069: If nnz is given then nz is ignored
1071: Level: intermediate
1073: .keywords: matrix, MKL, sparse, parallel
1075: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1076: @*/
1077: PetscErrorCodeMatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)1078: {
1082: MatCreate(comm,A);
1083: MatSetSizes(*A,m,n,m,n);
1084: MatSetType(*A,MATSEQAIJMKL);
1085: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1086: return(0);
1087: }
1089: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)1090: {
1094: MatSetType(A,MATSEQAIJ);
1095: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1096: return(0);
1097: }