1: /*
2: Defines basic operations for the MATSEQAIJMKL matrix class.
3: This class is derived from the MATSEQAIJ class and retains the
4: compressed row storage (aka Yale sparse matrix format) but uses
5: sparse BLAS operations from the Intel Math Kernel Library (MKL)
6: wherever possible.
7: */
9: #include <../src/mat/impls/aij/seq/aij.h> 10: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12: /* MKL include files. */
13: #include <mkl_spblas.h> /* Sparse BLAS */
15: typedef struct {
16: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19: PetscObjectState state;
20: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
21: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
22: struct matrix_descr descr;
23: #endif
24: } Mat_SeqAIJMKL;
26: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
28: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29: {
30: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
31: /* so we will ignore 'MatType type'. */
33: Mat B = *newmat;
34: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
35: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
36: #endif
39: if (reuse == MAT_INITIAL_MATRIX) {
40: MatDuplicate(A,MAT_COPY_VALUES,&B);
41: }
43: /* Reset the original function pointers. */
44: B->ops->duplicate = MatDuplicate_SeqAIJ;
45: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
46: B->ops->destroy = MatDestroy_SeqAIJ;
47: B->ops->mult = MatMult_SeqAIJ;
48: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
49: B->ops->multadd = MatMultAdd_SeqAIJ;
50: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
51: B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ;
52: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ;
54: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
55: B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
57: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
58: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);
59: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
60: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
61: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",NULL);
62: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
63: if(!aijmkl->no_SpMV2) {
64: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);
65: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
66: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);
67: #endif
68: PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);
69: }
71: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
72: * simply involves destroying the MKL sparse matrix handle and then freeing
73: * the spptr pointer. */
74: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
76: if (aijmkl->sparse_optimized) {
77: sparse_status_t stat;
78: stat = mkl_sparse_destroy(aijmkl->csrA);
79: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
80: }
81: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
82: PetscFree(B->spptr);
84: /* Change the type of B to MATSEQAIJ. */
85: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
87: *newmat = B;
88: return(0);
89: }
91: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 92: {
94: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
98: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
99: * spptr pointer. */
100: if (aijmkl) {
101: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
102: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
103: if (aijmkl->sparse_optimized) {
104: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
105: stat = mkl_sparse_destroy(aijmkl->csrA);
106: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
107: }
108: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
109: PetscFree(A->spptr);
110: }
112: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
113: * to destroy everything that remains. */
114: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
115: /* Note that I don't call MatSetType(). I believe this is because that
116: * is only to be called when *building* a matrix. I could be wrong, but
117: * that is how things work for the SuperLU matrix class. */
118: MatDestroy_SeqAIJ(A);
119: return(0);
120: }
122: /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
123: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
124: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
125: * handle, creates a new one, and then calls mkl_sparse_optimize().
126: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
127: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
128: * an unoptimized matrix handle here. */
129: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)130: {
131: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
132: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
133: * does nothing. We make it callable anyway in this case because it cuts
134: * down on littering the code with #ifdefs. */
136: return(0);
137: #else
138: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
139: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
140: PetscInt m,n;
141: MatScalar *aa;
142: PetscInt *aj,*ai;
143: sparse_status_t stat;
144: PetscErrorCode ierr;
147: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
148: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
149: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
150: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
151: * mkl_sparse_optimize() later. */
152: if (aijmkl->no_SpMV2) return(0);
153: #endif
155: if (aijmkl->sparse_optimized) {
156: /* Matrix has been previously assembled and optimized. Must destroy old
157: * matrix handle before running the optimization step again. */
158: stat = mkl_sparse_destroy(aijmkl->csrA);
159: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
160: }
161: aijmkl->sparse_optimized = PETSC_FALSE;
163: /* Now perform the SpMV2 setup and matrix optimization. */
164: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
165: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
166: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
167: m = A->rmap->n;
168: n = A->cmap->n;
169: aj = a->j; /* aj[k] gives column index for element aa[k]. */
170: aa = a->a; /* Nonzero elements stored row-by-row. */
171: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
172: if ((a->nz!=0) & !(A->structure_only)) {
173: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
174: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
175: stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
176: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
177: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
178: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
179: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
180: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
181: if (!aijmkl->no_SpMV2) {
182: stat = mkl_sparse_optimize(aijmkl->csrA);
183: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
184: }
185: aijmkl->sparse_optimized = PETSC_TRUE;
186: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
187: }
189: return(0);
190: #endif
191: }
193: /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
194: * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
195: * matrix handle.
196: * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
197: * there is no good alternative. */
198: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
199: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)200: {
201: PetscErrorCode ierr;
202: sparse_status_t stat;
203: sparse_index_base_t indexing;
204: PetscInt nrows, ncols;
205: PetscInt *aj,*ai,*dummy;
206: MatScalar *aa;
207: Mat A;
208: Mat_SeqAIJMKL *aijmkl;
210: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
211: stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
212: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
214: if (reuse == MAT_REUSE_MATRIX) {
215: MatDestroy(mat);
216: }
217: MatCreate(comm,&A);
218: MatSetType(A,MATSEQAIJ);
219: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
220: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
221: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
222: * they will be destroyed when the MKL handle is destroyed.
223: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
224: MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);
226: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
227: * Now turn it into a MATSEQAIJMKL. */
228: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
230: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
231: aijmkl->csrA = csrA;
233: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
234: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
235: * and just need to be able to run the MKL optimization step. */
236: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
237: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
238: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
239: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
240: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
241: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
242: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
243: if (!aijmkl->no_SpMV2) {
244: stat = mkl_sparse_optimize(aijmkl->csrA);
245: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
246: }
247: aijmkl->sparse_optimized = PETSC_TRUE;
248: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
250: *mat = A;
251: return(0);
252: }
253: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
255: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
256: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
257: * MatMatMultNumeric(). */
258: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
259: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)260: {
261: PetscInt i;
262: PetscInt nrows,ncols;
263: PetscInt nz;
264: PetscInt *ai,*aj,*dummy;
265: PetscScalar *aa;
266: PetscErrorCode ierr;
267: Mat_SeqAIJMKL *aijmkl;
268: sparse_status_t stat;
269: sparse_index_base_t indexing;
271: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
273: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
274: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
275: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
277: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
278: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
279: for (i=0; i<nrows; i++) {
280: nz = ai[i+1] - ai[i];
281: MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
282: }
284: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
285: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
287: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
288: /* We mark our matrix as having a valid, optimized MKL handle.
289: * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
290: aijmkl->sparse_optimized = PETSC_TRUE;
292: return(0);
293: }
294: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
296: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)297: {
299: Mat_SeqAIJMKL *aijmkl;
300: Mat_SeqAIJMKL *aijmkl_dest;
303: MatDuplicate_SeqAIJ(A,op,M);
304: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
305: aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
306: PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));
307: aijmkl_dest->sparse_optimized = PETSC_FALSE;
308: if (aijmkl->eager_inspection) {
309: MatSeqAIJMKL_create_mkl_handle(A);
310: }
311: return(0);
312: }
314: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)315: {
316: PetscErrorCode ierr;
317: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
318: Mat_SeqAIJMKL *aijmkl;
321: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
323: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
324: * extra information and some different methods, call the AssemblyEnd
325: * routine for a MATSEQAIJ.
326: * I'm not sure if this is the best way to do this, but it avoids
327: * a lot of code duplication. */
328: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
329: MatAssemblyEnd_SeqAIJ(A, mode);
331: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
332: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
333: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
334: if (aijmkl->eager_inspection) {
335: MatSeqAIJMKL_create_mkl_handle(A);
336: }
338: return(0);
339: }
341: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
342: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)343: {
344: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
345: const PetscScalar *x;
346: PetscScalar *y;
347: const MatScalar *aa;
348: PetscErrorCode ierr;
349: PetscInt m=A->rmap->n;
350: PetscInt n=A->cmap->n;
351: PetscScalar alpha = 1.0;
352: PetscScalar beta = 0.0;
353: const PetscInt *aj,*ai;
354: char matdescra[6];
357: /* Variables not in MatMult_SeqAIJ. */
358: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
361: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
362: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
363: VecGetArrayRead(xx,&x);
364: VecGetArray(yy,&y);
365: aj = a->j; /* aj[k] gives column index for element aa[k]. */
366: aa = a->a; /* Nonzero elements stored row-by-row. */
367: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
369: /* Call MKL sparse BLAS routine to do the MatMult. */
370: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
372: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
373: VecRestoreArrayRead(xx,&x);
374: VecRestoreArray(yy,&y);
375: return(0);
376: }
377: #endif
379: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
380: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)381: {
382: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
383: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
384: const PetscScalar *x;
385: PetscScalar *y;
386: PetscErrorCode ierr;
387: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
388: PetscObjectState state;
392: /* If there are no nonzero entries, zero yy and return immediately. */
393: if(!a->nz) {
394: PetscInt i;
395: PetscInt m=A->rmap->n;
396: VecGetArray(yy,&y);
397: for (i=0; i<m; i++) {
398: y[i] = 0.0;
399: }
400: VecRestoreArray(yy,&y);
401: return(0);
402: }
404: VecGetArrayRead(xx,&x);
405: VecGetArray(yy,&y);
407: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
408: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
409: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
410: PetscObjectStateGet((PetscObject)A,&state);
411: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
412: MatSeqAIJMKL_create_mkl_handle(A);
413: }
415: /* Call MKL SpMV2 executor routine to do the MatMult. */
416: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
417: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
418: 419: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
420: VecRestoreArrayRead(xx,&x);
421: VecRestoreArray(yy,&y);
422: return(0);
423: }
424: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
426: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
427: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)428: {
429: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
430: const PetscScalar *x;
431: PetscScalar *y;
432: const MatScalar *aa;
433: PetscErrorCode ierr;
434: PetscInt m=A->rmap->n;
435: PetscInt n=A->cmap->n;
436: PetscScalar alpha = 1.0;
437: PetscScalar beta = 0.0;
438: const PetscInt *aj,*ai;
439: char matdescra[6];
441: /* Variables not in MatMultTranspose_SeqAIJ. */
442: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
445: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
446: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
447: VecGetArrayRead(xx,&x);
448: VecGetArray(yy,&y);
449: aj = a->j; /* aj[k] gives column index for element aa[k]. */
450: aa = a->a; /* Nonzero elements stored row-by-row. */
451: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
453: /* Call MKL sparse BLAS routine to do the MatMult. */
454: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
456: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
457: VecRestoreArrayRead(xx,&x);
458: VecRestoreArray(yy,&y);
459: return(0);
460: }
461: #endif
463: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
464: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)465: {
466: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
467: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
468: const PetscScalar *x;
469: PetscScalar *y;
470: PetscErrorCode ierr;
471: sparse_status_t stat;
472: PetscObjectState state;
476: /* If there are no nonzero entries, zero yy and return immediately. */
477: if(!a->nz) {
478: PetscInt i;
479: PetscInt n=A->cmap->n;
480: VecGetArray(yy,&y);
481: for (i=0; i<n; i++) {
482: y[i] = 0.0;
483: }
484: VecRestoreArray(yy,&y);
485: return(0);
486: }
488: VecGetArrayRead(xx,&x);
489: VecGetArray(yy,&y);
491: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
492: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
493: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
494: PetscObjectStateGet((PetscObject)A,&state);
495: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
496: MatSeqAIJMKL_create_mkl_handle(A);
497: }
499: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
500: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
501: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
502: 503: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
504: VecRestoreArrayRead(xx,&x);
505: VecRestoreArray(yy,&y);
506: return(0);
507: }
508: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
510: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
511: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)512: {
513: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
514: const PetscScalar *x;
515: PetscScalar *y,*z;
516: const MatScalar *aa;
517: PetscErrorCode ierr;
518: PetscInt m=A->rmap->n;
519: PetscInt n=A->cmap->n;
520: const PetscInt *aj,*ai;
521: PetscInt i;
523: /* Variables not in MatMultAdd_SeqAIJ. */
524: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
525: PetscScalar alpha = 1.0;
526: PetscScalar beta;
527: char matdescra[6];
530: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
531: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
533: VecGetArrayRead(xx,&x);
534: VecGetArrayPair(yy,zz,&y,&z);
535: aj = a->j; /* aj[k] gives column index for element aa[k]. */
536: aa = a->a; /* Nonzero elements stored row-by-row. */
537: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
539: /* Call MKL sparse BLAS routine to do the MatMult. */
540: if (zz == yy) {
541: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
542: beta = 1.0;
543: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
544: } else {
545: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
546: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
547: beta = 0.0;
548: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
549: for (i=0; i<m; i++) {
550: z[i] += y[i];
551: }
552: }
554: PetscLogFlops(2.0*a->nz);
555: VecRestoreArrayRead(xx,&x);
556: VecRestoreArrayPair(yy,zz,&y,&z);
557: return(0);
558: }
559: #endif
561: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
562: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)563: {
564: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
565: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
566: const PetscScalar *x;
567: PetscScalar *y,*z;
568: PetscErrorCode ierr;
569: PetscInt m=A->rmap->n;
570: PetscInt i;
572: /* Variables not in MatMultAdd_SeqAIJ. */
573: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
574: PetscObjectState state;
578: /* If there are no nonzero entries, set zz = yy and return immediately. */
579: if(!a->nz) {
580: PetscInt i;
581: VecGetArrayPair(yy,zz,&y,&z);
582: for (i=0; i<m; i++) {
583: z[i] = y[i];
584: }
585: VecRestoreArrayPair(yy,zz,&y,&z);
586: return(0);
587: }
589: VecGetArrayRead(xx,&x);
590: VecGetArrayPair(yy,zz,&y,&z);
592: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
593: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
594: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
595: PetscObjectStateGet((PetscObject)A,&state);
596: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
597: MatSeqAIJMKL_create_mkl_handle(A);
598: }
600: /* Call MKL sparse BLAS routine to do the MatMult. */
601: if (zz == yy) {
602: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
603: * with alpha and beta both set to 1.0. */
604: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
605: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
606: } else {
607: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
608: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
609: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
610: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
611: for (i=0; i<m; i++) {
612: z[i] += y[i];
613: }
614: }
616: PetscLogFlops(2.0*a->nz);
617: VecRestoreArrayRead(xx,&x);
618: VecRestoreArrayPair(yy,zz,&y,&z);
619: return(0);
620: }
621: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
623: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
624: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)625: {
626: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
627: const PetscScalar *x;
628: PetscScalar *y,*z;
629: const MatScalar *aa;
630: PetscErrorCode ierr;
631: PetscInt m=A->rmap->n;
632: PetscInt n=A->cmap->n;
633: const PetscInt *aj,*ai;
634: PetscInt i;
636: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
637: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
638: PetscScalar alpha = 1.0;
639: PetscScalar beta;
640: char matdescra[6];
643: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
644: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
646: VecGetArrayRead(xx,&x);
647: VecGetArrayPair(yy,zz,&y,&z);
648: aj = a->j; /* aj[k] gives column index for element aa[k]. */
649: aa = a->a; /* Nonzero elements stored row-by-row. */
650: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
652: /* Call MKL sparse BLAS routine to do the MatMult. */
653: if (zz == yy) {
654: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
655: beta = 1.0;
656: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
657: } else {
658: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
659: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
660: beta = 0.0;
661: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
662: for (i=0; i<n; i++) {
663: z[i] += y[i];
664: }
665: }
667: PetscLogFlops(2.0*a->nz);
668: VecRestoreArrayRead(xx,&x);
669: VecRestoreArrayPair(yy,zz,&y,&z);
670: return(0);
671: }
672: #endif
674: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
675: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)676: {
677: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
678: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
679: const PetscScalar *x;
680: PetscScalar *y,*z;
681: PetscErrorCode ierr;
682: PetscInt n=A->cmap->n;
683: PetscInt i;
684: PetscObjectState state;
686: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
687: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
691: /* If there are no nonzero entries, set zz = yy and return immediately. */
692: if(!a->nz) {
693: PetscInt i;
694: VecGetArrayPair(yy,zz,&y,&z);
695: for (i=0; i<n; i++) {
696: z[i] = y[i];
697: }
698: VecRestoreArrayPair(yy,zz,&y,&z);
699: return(0);
700: }
702: VecGetArrayRead(xx,&x);
703: VecGetArrayPair(yy,zz,&y,&z);
705: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
706: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
707: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
708: PetscObjectStateGet((PetscObject)A,&state);
709: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
710: MatSeqAIJMKL_create_mkl_handle(A);
711: }
713: /* Call MKL sparse BLAS routine to do the MatMult. */
714: if (zz == yy) {
715: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
716: * with alpha and beta both set to 1.0. */
717: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
718: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
719: } else {
720: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
721: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
722: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
723: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
724: for (i=0; i<n; i++) {
725: z[i] += y[i];
726: }
727: }
729: PetscLogFlops(2.0*a->nz);
730: VecRestoreArrayRead(xx,&x);
731: VecRestoreArrayPair(yy,zz,&y,&z);
732: return(0);
733: }
734: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
736: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
737: /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
738: * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
739: * For releases of MKL prior to version 18, update 2:
740: * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
741: * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
742: * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
743: * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
744: PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)745: {
746: Mat_SeqAIJMKL *a, *b;
747: sparse_matrix_t csrA, csrB, csrC;
748: PetscErrorCode ierr;
749: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
750: PetscObjectState state;
753: a = (Mat_SeqAIJMKL*)A->spptr;
754: b = (Mat_SeqAIJMKL*)B->spptr;
755: PetscObjectStateGet((PetscObject)A,&state);
756: if (!a->sparse_optimized || a->state != state) {
757: MatSeqAIJMKL_create_mkl_handle(A);
758: }
759: PetscObjectStateGet((PetscObject)B,&state);
760: if (!b->sparse_optimized || b->state != state) {
761: MatSeqAIJMKL_create_mkl_handle(B);
762: }
763: csrA = a->csrA;
764: csrB = b->csrA;
766: stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
767: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
769: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
771: return(0);
772: }
773: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
775: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
776: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)777: {
778: Mat_SeqAIJMKL *a, *b, *c;
779: sparse_matrix_t csrA, csrB, csrC;
780: PetscErrorCode ierr;
781: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
782: struct matrix_descr descr_type_gen;
783: PetscObjectState state;
786: a = (Mat_SeqAIJMKL*)A->spptr;
787: b = (Mat_SeqAIJMKL*)B->spptr;
788: c = (Mat_SeqAIJMKL*)C->spptr;
789: PetscObjectStateGet((PetscObject)A,&state);
790: if (!a->sparse_optimized || a->state != state) {
791: MatSeqAIJMKL_create_mkl_handle(A);
792: }
793: PetscObjectStateGet((PetscObject)B,&state);
794: if (!b->sparse_optimized || b->state != state) {
795: MatSeqAIJMKL_create_mkl_handle(B);
796: }
797: csrA = a->csrA;
798: csrB = b->csrA;
799: csrC = c->csrA;
800: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
802: stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
803: SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
804: SPARSE_STAGE_FINALIZE_MULT,&csrC);
806: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
808: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
809: MatSeqAIJMKL_update_from_mkl_handle(C);
811: return(0);
812: }
813: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
815: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
816: PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)817: {
818: Mat_SeqAIJMKL *a, *b;
819: sparse_matrix_t csrA, csrB, csrC;
820: PetscErrorCode ierr;
821: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
822: PetscObjectState state;
825: a = (Mat_SeqAIJMKL*)A->spptr;
826: b = (Mat_SeqAIJMKL*)B->spptr;
827: PetscObjectStateGet((PetscObject)A,&state);
828: if (!a->sparse_optimized || a->state != state) {
829: MatSeqAIJMKL_create_mkl_handle(A);
830: }
831: PetscObjectStateGet((PetscObject)B,&state);
832: if (!b->sparse_optimized || b->state != state) {
833: MatSeqAIJMKL_create_mkl_handle(B);
834: }
835: csrA = a->csrA;
836: csrB = b->csrA;
838: stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
839: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
841: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
843: return(0);
844: }
845: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
847: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
848: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)849: {
850: Mat_SeqAIJMKL *a, *p, *c;
851: sparse_matrix_t csrA, csrP, csrC;
852: PetscBool set, flag;
853: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
854: struct matrix_descr descr_type_sym;
855: PetscObjectState state;
856: PetscErrorCode ierr;
859: MatIsSymmetricKnown(A,&set,&flag);
860: if (!set || (set && !flag)) {
861: MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);
862: return(0);
863: }
865: a = (Mat_SeqAIJMKL*)A->spptr;
866: p = (Mat_SeqAIJMKL*)P->spptr;
867: c = (Mat_SeqAIJMKL*)C->spptr;
868: PetscObjectStateGet((PetscObject)A,&state);
869: if (!a->sparse_optimized || a->state != state) {
870: MatSeqAIJMKL_create_mkl_handle(A);
871: }
872: PetscObjectStateGet((PetscObject)P,&state);
873: if (!p->sparse_optimized || p->state != state) {
874: MatSeqAIJMKL_create_mkl_handle(P);
875: }
876: csrA = a->csrA;
877: csrP = p->csrA;
878: csrC = c->csrA;
879: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
880: descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
881: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
883: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
884: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
885: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
887: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
888: MatSeqAIJMKL_update_from_mkl_handle(C);
890: return(0);
891: }
892: #endif
894: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
895: PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)896: {
897: Mat_SeqAIJMKL *a, *p;
898: sparse_matrix_t csrA, csrP, csrC;
899: PetscBool set, flag;
900: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
901: struct matrix_descr descr_type_sym;
902: PetscObjectState state;
903: PetscErrorCode ierr;
906: MatIsSymmetricKnown(A,&set,&flag);
907: if (!set || (set && !flag)) {
908: MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);
909: return(0);
910: }
912: if (scall == MAT_REUSE_MATRIX) {
913: MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);
914: return(0);
915: }
917: a = (Mat_SeqAIJMKL*)A->spptr;
918: p = (Mat_SeqAIJMKL*)P->spptr;
919: PetscObjectStateGet((PetscObject)A,&state);
920: if (!a->sparse_optimized || a->state != state) {
921: MatSeqAIJMKL_create_mkl_handle(A);
922: }
923: PetscObjectStateGet((PetscObject)P,&state);
924: if (!p->sparse_optimized || p->state != state) {
925: MatSeqAIJMKL_create_mkl_handle(P);
926: }
927: csrA = a->csrA;
928: csrP = p->csrA;
929: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
930: descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
931: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
933: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
934: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
935: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");
937: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
938: MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);
940: return(0);
941: }
942: #endif
944: /* This function prototype is needed in MatConvert_SeqAIJ_SeqAIJMKL(), below. */
945: PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
947: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
948: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
949: * routine, but can also be used to convert an assembled SeqAIJ matrix
950: * into a SeqAIJMKL one. */
951: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)952: {
954: Mat B = *newmat;
955: Mat_SeqAIJMKL *aijmkl;
956: PetscBool set;
957: PetscBool sametype;
960: if (reuse == MAT_INITIAL_MATRIX) {
961: MatDuplicate(A,MAT_COPY_VALUES,&B);
962: }
964: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
965: if (sametype) return(0);
967: PetscNewLog(B,&aijmkl);
968: B->spptr = (void*) aijmkl;
970: /* Set function pointers for methods that we inherit from AIJ but override.
971: * We also parse some command line options below, since those determine some of the methods we point to. */
972: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
973: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
974: B->ops->destroy = MatDestroy_SeqAIJMKL;
976: aijmkl->sparse_optimized = PETSC_FALSE;
977: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
978: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
979: #else
980: aijmkl->no_SpMV2 = PETSC_TRUE;
981: #endif
982: aijmkl->eager_inspection = PETSC_FALSE;
984: /* Parse command line options. */
985: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
986: PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
987: PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
988: PetscOptionsEnd();
989: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
990: if(!aijmkl->no_SpMV2) {
991: PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
992: aijmkl->no_SpMV2 = PETSC_TRUE;
993: }
994: #endif
996: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
997: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
998: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
999: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1000: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1001: B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1002: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1003: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1004: # if !defined(PETSC_USE_COMPLEX)
1005: B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
1006: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1007: # endif
1008: # endif
1009: B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1010: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1012: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1013: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1014: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1015: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1016: * versions in which the older interface has not been deprecated, we use the old interface. */
1017: if (aijmkl->no_SpMV2) {
1018: B->ops->mult = MatMult_SeqAIJMKL;
1019: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1020: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1021: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1022: }
1023: #endif
1025: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1026: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);
1027: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
1028: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
1029: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",MatPtAP_IS_XAIJ);
1030: if(!aijmkl->no_SpMV2) {
1031: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1032: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1033: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1034: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);
1035: #endif
1036: PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1037: #endif
1038: }
1040: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1041: *newmat = B;
1042: return(0);
1043: }
1045: /*@C
1046: MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1047: This type inherits from AIJ and is largely identical, but uses sparse BLAS
1048: routines from Intel MKL whenever possible.
1049: If the installed version of MKL supports the "SpMV2" sparse
1050: inspector-executor routines, then those are used by default.
1051: MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1052: symmetric A) operations are currently supported.
1053: Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1055: Collective on MPI_Comm1057: Input Parameters:
1058: + comm - MPI communicator, set to PETSC_COMM_SELF1059: . m - number of rows
1060: . n - number of columns
1061: . nz - number of nonzeros per row (same for all rows)
1062: - nnz - array containing the number of nonzeros in the various rows
1063: (possibly different for each row) or NULL
1065: Output Parameter:
1066: . A - the matrix
1068: Options Database Keys:
1069: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1070: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1072: Notes:
1073: If nnz is given then nz is ignored
1075: Level: intermediate
1077: .keywords: matrix, MKL, sparse, parallel
1079: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1080: @*/
1081: PetscErrorCodeMatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)1082: {
1086: MatCreate(comm,A);
1087: MatSetSizes(*A,m,n,m,n);
1088: MatSetType(*A,MATSEQAIJMKL);
1089: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1090: return(0);
1091: }
1093: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)1094: {
1098: MatSetType(A,MATSEQAIJ);
1099: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1100: return(0);
1101: }