petsc-3.10.5 2019-03-28
  1: /*
  2:   Defines basic operations for the MATSEQAIJMKL matrix class.
  3:   This class is derived from the MATSEQAIJ class and retains the
  4:   compressed row storage (aka Yale sparse matrix format) but uses 
  5:   sparse BLAS operations from the Intel Math Kernel Library (MKL) 
  6:   wherever possible.
  7: */

  9:  #include <../src/mat/impls/aij/seq/aij.h>
 10:  #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>

 12: /* MKL include files. */
 13: #include <mkl_spblas.h>  /* Sparse BLAS */

 15: typedef struct {
 16:   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 17:   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 18:   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 19:   PetscObjectState state;
 20: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 21:   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 22:   struct matrix_descr descr;
 23: #endif
 24: } Mat_SeqAIJMKL;

 26: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);

 28: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 29: {
 30:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 31:   /* so we will ignore 'MatType type'. */
 33:   Mat            B       = *newmat;
 34: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 35:   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
 36: #endif

 39:   if (reuse == MAT_INITIAL_MATRIX) {
 40:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 41:   }

 43:   /* Reset the original function pointers. */
 44:   B->ops->duplicate        = MatDuplicate_SeqAIJ;
 45:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
 46:   B->ops->destroy          = MatDestroy_SeqAIJ;
 47:   B->ops->mult             = MatMult_SeqAIJ;
 48:   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
 49:   B->ops->multadd          = MatMultAdd_SeqAIJ;
 50:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
 51:   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
 52:   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 53:   B->ops->ptap             = MatPtAP_SeqAIJ_SeqAIJ;
 54:   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
 55:   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;

 57:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
 58:   PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);
 59:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
 60:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
 61:   PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",NULL);
 62: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 63:   if(!aijmkl->no_SpMV2) {
 64:     PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);
 65: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
 66:     PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);
 67: #endif
 68:     PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);
 69:   }

 71:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 
 72:    * simply involves destroying the MKL sparse matrix handle and then freeing 
 73:    * the spptr pointer. */
 74:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;

 76:   if (aijmkl->sparse_optimized) {
 77:     sparse_status_t stat;
 78:     stat = mkl_sparse_destroy(aijmkl->csrA);
 79:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
 80:   }
 81: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 82:   PetscFree(B->spptr);

 84:   /* Change the type of B to MATSEQAIJ. */
 85:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 87:   *newmat = B;
 88:   return(0);
 89: }

 91: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 92: {
 94:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;


 98:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 
 99:    * spptr pointer. */
100:   if (aijmkl) {
101:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
102: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
103:     if (aijmkl->sparse_optimized) {
104:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
105:       stat = mkl_sparse_destroy(aijmkl->csrA);
106:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
107:     }
108: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
109:     PetscFree(A->spptr);
110:   }

112:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
113:    * to destroy everything that remains. */
114:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
115:   /* Note that I don't call MatSetType().  I believe this is because that
116:    * is only to be called when *building* a matrix.  I could be wrong, but
117:    * that is how things work for the SuperLU matrix class. */
118:   MatDestroy_SeqAIJ(A);
119:   return(0);
120: }

122: /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 
123:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
124:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 
125:  * handle, creates a new one, and then calls mkl_sparse_optimize().
126:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 
127:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 
128:  * an unoptimized matrix handle here. */
129: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
130: {
131: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
132:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine 
133:    * does nothing. We make it callable anyway in this case because it cuts 
134:    * down on littering the code with #ifdefs. */
136:   return(0);
137: #else
138:   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
139:   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
140:   PetscInt         m,n;
141:   MatScalar        *aa;
142:   PetscInt         *aj,*ai;
143:   sparse_status_t  stat;
144:   PetscErrorCode   ierr;

147: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
148:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
149:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
150:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
151:    * mkl_sparse_optimize() later. */
152:   if (aijmkl->no_SpMV2) return(0);
153: #endif

155:   if (aijmkl->sparse_optimized) {
156:     /* Matrix has been previously assembled and optimized. Must destroy old
157:      * matrix handle before running the optimization step again. */
158:     stat = mkl_sparse_destroy(aijmkl->csrA);
159:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
160:   }
161:   aijmkl->sparse_optimized = PETSC_FALSE;

163:   /* Now perform the SpMV2 setup and matrix optimization. */
164:   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
165:   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
166:   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
167:   m = A->rmap->n;
168:   n = A->cmap->n;
169:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
170:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
171:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
172:   if ((a->nz!=0) & !(A->structure_only)) {
173:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
174:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
175:     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
176:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
177:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
178:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
179:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
180:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
181:     if (!aijmkl->no_SpMV2) {
182:       stat = mkl_sparse_optimize(aijmkl->csrA);
183:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
184:     }
185:     aijmkl->sparse_optimized = PETSC_TRUE;
186:     PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
187:   }

189:   return(0);
190: #endif
191: }

193: /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 
194:  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 
195:  * matrix handle.
196:  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
197:  * there is no good alternative. */
198: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
199: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
200: {
201:   PetscErrorCode      ierr;
202:   sparse_status_t     stat;
203:   sparse_index_base_t indexing;
204:   PetscInt            nrows, ncols;
205:   PetscInt            *aj,*ai,*dummy;
206:   MatScalar           *aa;
207:   Mat                 A;
208:   Mat_SeqAIJMKL       *aijmkl;

210:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
211:   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
212:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

214:   if (reuse == MAT_REUSE_MATRIX) {
215:     MatDestroy(mat);
216:   }
217:   MatCreate(comm,&A);
218:   MatSetType(A,MATSEQAIJ);
219:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
220:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 
221:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
222:    * they will be destroyed when the MKL handle is destroyed.
223:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
224:   MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);

226:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
227:    * Now turn it into a MATSEQAIJMKL. */
228:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);

230:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
231:   aijmkl->csrA = csrA;

233:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
234:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 
235:    * and just need to be able to run the MKL optimization step. */
236:   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
237:   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
238:   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
239:   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
240:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
241:   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
242:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
243:   if (!aijmkl->no_SpMV2) {
244:     stat = mkl_sparse_optimize(aijmkl->csrA);
245:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
246:   }
247:   aijmkl->sparse_optimized = PETSC_TRUE;
248:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));

250:   *mat = A;
251:   return(0);
252: }
253: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

255: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
256:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 
257:  * MatMatMultNumeric(). */
258: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
259: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
260: {
261:   PetscInt            i;
262:   PetscInt            nrows,ncols;
263:   PetscInt            nz;
264:   PetscInt            *ai,*aj,*dummy;
265:   PetscScalar         *aa;
266:   PetscErrorCode      ierr;
267:   Mat_SeqAIJMKL       *aijmkl;
268:   sparse_status_t     stat;
269:   sparse_index_base_t indexing;

271:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;

273:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
274:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
275:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

277:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 
278:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
279:   for (i=0; i<nrows; i++) {
280:     nz = ai[i+1] - ai[i];
281:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
282:   }

284:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
285:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

287:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
288:   /* We mark our matrix as having a valid, optimized MKL handle.
289:    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
290:   aijmkl->sparse_optimized = PETSC_TRUE;

292:   return(0);
293: }
294: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

296: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
297: {
299:   Mat_SeqAIJMKL  *aijmkl;
300:   Mat_SeqAIJMKL  *aijmkl_dest;

303:   MatDuplicate_SeqAIJ(A,op,M);
304:   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
305:   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
306:   PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));
307:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
308:   if (aijmkl->eager_inspection) {
309:     MatSeqAIJMKL_create_mkl_handle(A);
310:   }
311:   return(0);
312: }

314: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
315: {
316:   PetscErrorCode  ierr;
317:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
318:   Mat_SeqAIJMKL   *aijmkl;

321:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

323:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
324:    * extra information and some different methods, call the AssemblyEnd 
325:    * routine for a MATSEQAIJ.
326:    * I'm not sure if this is the best way to do this, but it avoids
327:    * a lot of code duplication. */
328:   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
329:   MatAssemblyEnd_SeqAIJ(A, mode);

331:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
332:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
333:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
334:   if (aijmkl->eager_inspection) {
335:     MatSeqAIJMKL_create_mkl_handle(A);
336:   }

338:   return(0);
339: }

341: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
342: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
343: {
344:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
345:   const PetscScalar *x;
346:   PetscScalar       *y;
347:   const MatScalar   *aa;
348:   PetscErrorCode    ierr;
349:   PetscInt          m=A->rmap->n;
350:   PetscInt          n=A->cmap->n;
351:   PetscScalar       alpha = 1.0;
352:   PetscScalar       beta = 0.0;
353:   const PetscInt    *aj,*ai;
354:   char              matdescra[6];


357:   /* Variables not in MatMult_SeqAIJ. */
358:   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */

361:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
362:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
363:   VecGetArrayRead(xx,&x);
364:   VecGetArray(yy,&y);
365:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
366:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
367:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

369:   /* Call MKL sparse BLAS routine to do the MatMult. */
370:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

372:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
373:   VecRestoreArrayRead(xx,&x);
374:   VecRestoreArray(yy,&y);
375:   return(0);
376: }
377: #endif

379: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
380: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
381: {
382:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
383:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
384:   const PetscScalar *x;
385:   PetscScalar       *y;
386:   PetscErrorCode    ierr;
387:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
388:   PetscObjectState  state;


392:   /* If there are no nonzero entries, zero yy and return immediately. */
393:   if(!a->nz) {
394:     PetscInt i;
395:     PetscInt m=A->rmap->n;
396:     VecGetArray(yy,&y);
397:     for (i=0; i<m; i++) {
398:       y[i] = 0.0;
399:     }
400:     VecRestoreArray(yy,&y);
401:     return(0);
402:   }

404:   VecGetArrayRead(xx,&x);
405:   VecGetArray(yy,&y);

407:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
408:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
409:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
410:   PetscObjectStateGet((PetscObject)A,&state);
411:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
412:     MatSeqAIJMKL_create_mkl_handle(A);
413:   }

415:   /* Call MKL SpMV2 executor routine to do the MatMult. */
416:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
417:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
418: 
419:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
420:   VecRestoreArrayRead(xx,&x);
421:   VecRestoreArray(yy,&y);
422:   return(0);
423: }
424: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

426: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
427: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
428: {
429:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
430:   const PetscScalar *x;
431:   PetscScalar       *y;
432:   const MatScalar   *aa;
433:   PetscErrorCode    ierr;
434:   PetscInt          m=A->rmap->n;
435:   PetscInt          n=A->cmap->n;
436:   PetscScalar       alpha = 1.0;
437:   PetscScalar       beta = 0.0;
438:   const PetscInt    *aj,*ai;
439:   char              matdescra[6];

441:   /* Variables not in MatMultTranspose_SeqAIJ. */
442:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */

445:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
446:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
447:   VecGetArrayRead(xx,&x);
448:   VecGetArray(yy,&y);
449:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
450:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
451:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

453:   /* Call MKL sparse BLAS routine to do the MatMult. */
454:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

456:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
457:   VecRestoreArrayRead(xx,&x);
458:   VecRestoreArray(yy,&y);
459:   return(0);
460: }
461: #endif

463: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
464: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
465: {
466:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
467:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
468:   const PetscScalar *x;
469:   PetscScalar       *y;
470:   PetscErrorCode    ierr;
471:   sparse_status_t   stat;
472:   PetscObjectState  state;


476:   /* If there are no nonzero entries, zero yy and return immediately. */
477:   if(!a->nz) {
478:     PetscInt i;
479:     PetscInt n=A->cmap->n;
480:     VecGetArray(yy,&y);
481:     for (i=0; i<n; i++) {
482:       y[i] = 0.0;
483:     }
484:     VecRestoreArray(yy,&y);
485:     return(0);
486:   }

488:   VecGetArrayRead(xx,&x);
489:   VecGetArray(yy,&y);

491:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
492:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
493:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
494:   PetscObjectStateGet((PetscObject)A,&state);
495:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
496:     MatSeqAIJMKL_create_mkl_handle(A);
497:   }

499:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
500:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
501:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
502: 
503:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
504:   VecRestoreArrayRead(xx,&x);
505:   VecRestoreArray(yy,&y);
506:   return(0);
507: }
508: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

510: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
511: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
512: {
513:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
514:   const PetscScalar *x;
515:   PetscScalar       *y,*z;
516:   const MatScalar   *aa;
517:   PetscErrorCode    ierr;
518:   PetscInt          m=A->rmap->n;
519:   PetscInt          n=A->cmap->n;
520:   const PetscInt    *aj,*ai;
521:   PetscInt          i;

523:   /* Variables not in MatMultAdd_SeqAIJ. */
524:   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
525:   PetscScalar       alpha = 1.0;
526:   PetscScalar       beta;
527:   char              matdescra[6];

530:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
531:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

533:   VecGetArrayRead(xx,&x);
534:   VecGetArrayPair(yy,zz,&y,&z);
535:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
536:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
537:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

539:   /* Call MKL sparse BLAS routine to do the MatMult. */
540:   if (zz == yy) {
541:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
542:     beta = 1.0;
543:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
544:   } else {
545:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 
546:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
547:     beta = 0.0;
548:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
549:     for (i=0; i<m; i++) {
550:       z[i] += y[i];
551:     }
552:   }

554:   PetscLogFlops(2.0*a->nz);
555:   VecRestoreArrayRead(xx,&x);
556:   VecRestoreArrayPair(yy,zz,&y,&z);
557:   return(0);
558: }
559: #endif

561: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
562: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
563: {
564:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
565:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
566:   const PetscScalar *x;
567:   PetscScalar       *y,*z;
568:   PetscErrorCode    ierr;
569:   PetscInt          m=A->rmap->n;
570:   PetscInt          i;

572:   /* Variables not in MatMultAdd_SeqAIJ. */
573:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
574:   PetscObjectState  state;


578:   /* If there are no nonzero entries, set zz = yy and return immediately. */
579:   if(!a->nz) {
580:     PetscInt i;
581:     VecGetArrayPair(yy,zz,&y,&z);
582:     for (i=0; i<m; i++) {
583:       z[i] = y[i];
584:     }
585:     VecRestoreArrayPair(yy,zz,&y,&z);
586:     return(0);
587:   }

589:   VecGetArrayRead(xx,&x);
590:   VecGetArrayPair(yy,zz,&y,&z);

592:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
593:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
594:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
595:   PetscObjectStateGet((PetscObject)A,&state);
596:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
597:     MatSeqAIJMKL_create_mkl_handle(A);
598:   }

600:   /* Call MKL sparse BLAS routine to do the MatMult. */
601:   if (zz == yy) {
602:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
603:      * with alpha and beta both set to 1.0. */
604:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
605:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
606:   } else {
607:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
608:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
609:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
610:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
611:     for (i=0; i<m; i++) {
612:       z[i] += y[i];
613:     }
614:   }

616:   PetscLogFlops(2.0*a->nz);
617:   VecRestoreArrayRead(xx,&x);
618:   VecRestoreArrayPair(yy,zz,&y,&z);
619:   return(0);
620: }
621: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

623: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
624: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
625: {
626:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
627:   const PetscScalar *x;
628:   PetscScalar       *y,*z;
629:   const MatScalar   *aa;
630:   PetscErrorCode    ierr;
631:   PetscInt          m=A->rmap->n;
632:   PetscInt          n=A->cmap->n;
633:   const PetscInt    *aj,*ai;
634:   PetscInt          i;

636:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
637:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
638:   PetscScalar       alpha = 1.0;
639:   PetscScalar       beta;
640:   char              matdescra[6];

643:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
644:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

646:   VecGetArrayRead(xx,&x);
647:   VecGetArrayPair(yy,zz,&y,&z);
648:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
649:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
650:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

652:   /* Call MKL sparse BLAS routine to do the MatMult. */
653:   if (zz == yy) {
654:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
655:     beta = 1.0;
656:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
657:   } else {
658:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 
659:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
660:     beta = 0.0;
661:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
662:     for (i=0; i<n; i++) {
663:       z[i] += y[i];
664:     }
665:   }

667:   PetscLogFlops(2.0*a->nz);
668:   VecRestoreArrayRead(xx,&x);
669:   VecRestoreArrayPair(yy,zz,&y,&z);
670:   return(0);
671: }
672: #endif

674: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
675: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
676: {
677:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
678:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
679:   const PetscScalar *x;
680:   PetscScalar       *y,*z;
681:   PetscErrorCode    ierr;
682:   PetscInt          n=A->cmap->n;
683:   PetscInt          i;
684:   PetscObjectState  state;

686:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
687:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;


691:   /* If there are no nonzero entries, set zz = yy and return immediately. */
692:   if(!a->nz) {
693:     PetscInt i;
694:     VecGetArrayPair(yy,zz,&y,&z);
695:     for (i=0; i<n; i++) {
696:       z[i] = y[i];
697:     }
698:     VecRestoreArrayPair(yy,zz,&y,&z);
699:     return(0);
700:   }

702:   VecGetArrayRead(xx,&x);
703:   VecGetArrayPair(yy,zz,&y,&z);

705:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
706:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
707:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
708:   PetscObjectStateGet((PetscObject)A,&state);
709:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
710:     MatSeqAIJMKL_create_mkl_handle(A);
711:   }

713:   /* Call MKL sparse BLAS routine to do the MatMult. */
714:   if (zz == yy) {
715:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
716:      * with alpha and beta both set to 1.0. */
717:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
718:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
719:   } else {
720:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
721:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
722:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
723:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
724:     for (i=0; i<n; i++) {
725:       z[i] += y[i];
726:     }
727:   }

729:   PetscLogFlops(2.0*a->nz);
730:   VecRestoreArrayRead(xx,&x);
731:   VecRestoreArrayPair(yy,zz,&y,&z);
732:   return(0);
733: }
734: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

736: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
737: /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 
738:  * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 
739:  * For releases of MKL prior to version 18, update 2:
740:  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 
741:  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 
742:  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 
743:  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
744: PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
745: {
746:   Mat_SeqAIJMKL    *a, *b;
747:   sparse_matrix_t  csrA, csrB, csrC;
748:   PetscErrorCode   ierr;
749:   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
750:   PetscObjectState state;

753:   a = (Mat_SeqAIJMKL*)A->spptr;
754:   b = (Mat_SeqAIJMKL*)B->spptr;
755:   PetscObjectStateGet((PetscObject)A,&state);
756:   if (!a->sparse_optimized || a->state != state) {
757:     MatSeqAIJMKL_create_mkl_handle(A);
758:   }
759:   PetscObjectStateGet((PetscObject)B,&state);
760:   if (!b->sparse_optimized || b->state != state) {
761:     MatSeqAIJMKL_create_mkl_handle(B);
762:   }
763:   csrA = a->csrA;
764:   csrB = b->csrA;

766:   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
767:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");

769:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);

771:   return(0);
772: }
773: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

775: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
776: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
777: {
778:   Mat_SeqAIJMKL       *a, *b, *c;
779:   sparse_matrix_t     csrA, csrB, csrC;
780:   PetscErrorCode      ierr;
781:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
782:   struct matrix_descr descr_type_gen;
783:   PetscObjectState    state;

786:   a = (Mat_SeqAIJMKL*)A->spptr;
787:   b = (Mat_SeqAIJMKL*)B->spptr;
788:   c = (Mat_SeqAIJMKL*)C->spptr;
789:   PetscObjectStateGet((PetscObject)A,&state);
790:   if (!a->sparse_optimized || a->state != state) {
791:     MatSeqAIJMKL_create_mkl_handle(A);
792:   }
793:   PetscObjectStateGet((PetscObject)B,&state);
794:   if (!b->sparse_optimized || b->state != state) {
795:     MatSeqAIJMKL_create_mkl_handle(B);
796:   }
797:   csrA = a->csrA;
798:   csrB = b->csrA;
799:   csrC = c->csrA;
800:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

802:   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
803:                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
804:                          SPARSE_STAGE_FINALIZE_MULT,&csrC);

806:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");

808:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
809:   MatSeqAIJMKL_update_from_mkl_handle(C);

811:   return(0);
812: }
813: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

815: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
816: PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
817: {
818:   Mat_SeqAIJMKL    *a, *b;
819:   sparse_matrix_t  csrA, csrB, csrC;
820:   PetscErrorCode   ierr;
821:   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
822:   PetscObjectState state;

825:   a = (Mat_SeqAIJMKL*)A->spptr;
826:   b = (Mat_SeqAIJMKL*)B->spptr;
827:   PetscObjectStateGet((PetscObject)A,&state);
828:   if (!a->sparse_optimized || a->state != state) {
829:     MatSeqAIJMKL_create_mkl_handle(A);
830:   }
831:   PetscObjectStateGet((PetscObject)B,&state);
832:   if (!b->sparse_optimized || b->state != state) {
833:     MatSeqAIJMKL_create_mkl_handle(B);
834:   }
835:   csrA = a->csrA;
836:   csrB = b->csrA;

838:   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
839:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");

841:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);

843:   return(0);
844: }
845: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

847: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
848: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
849: {
850:   Mat_SeqAIJMKL       *a, *p, *c;
851:   sparse_matrix_t     csrA, csrP, csrC;
852:   PetscBool           set, flag;
853:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
854:   struct matrix_descr descr_type_sym;
855:   PetscObjectState    state;
856:   PetscErrorCode      ierr;

859:   MatIsSymmetricKnown(A,&set,&flag);
860:   if (!set || (set && !flag)) {
861:     MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);
862:     return(0);
863:   }

865:   a = (Mat_SeqAIJMKL*)A->spptr;
866:   p = (Mat_SeqAIJMKL*)P->spptr;
867:   c = (Mat_SeqAIJMKL*)C->spptr;
868:   PetscObjectStateGet((PetscObject)A,&state);
869:   if (!a->sparse_optimized || a->state != state) {
870:     MatSeqAIJMKL_create_mkl_handle(A);
871:   }
872:   PetscObjectStateGet((PetscObject)P,&state);
873:   if (!p->sparse_optimized || p->state != state) {
874:     MatSeqAIJMKL_create_mkl_handle(P);
875:   }
876:   csrA = a->csrA;
877:   csrP = p->csrA;
878:   csrC = c->csrA;
879:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
880:   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
881:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

883:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
884:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
885:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");

887:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
888:   MatSeqAIJMKL_update_from_mkl_handle(C);

890:   return(0);
891: }
892: #endif

894: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
895: PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
896: {
897:   Mat_SeqAIJMKL       *a, *p;
898:   sparse_matrix_t     csrA, csrP, csrC;
899:   PetscBool           set, flag;
900:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
901:   struct matrix_descr descr_type_sym;
902:   PetscObjectState    state;
903:   PetscErrorCode      ierr;

906:   MatIsSymmetricKnown(A,&set,&flag);
907:   if (!set || (set && !flag)) {
908:     MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);
909:     return(0);
910:   }

912:   if (scall == MAT_REUSE_MATRIX) {
913:     MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);
914:     return(0);
915:   }

917:   a = (Mat_SeqAIJMKL*)A->spptr;
918:   p = (Mat_SeqAIJMKL*)P->spptr;
919:   PetscObjectStateGet((PetscObject)A,&state);
920:   if (!a->sparse_optimized || a->state != state) {
921:     MatSeqAIJMKL_create_mkl_handle(A);
922:   }
923:   PetscObjectStateGet((PetscObject)P,&state);
924:   if (!p->sparse_optimized || p->state != state) {
925:     MatSeqAIJMKL_create_mkl_handle(P);
926:   }
927:   csrA = a->csrA;
928:   csrP = p->csrA;
929:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
930:   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
931:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

933:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
934:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
935:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");

937:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
938:   MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);

940:   return(0);
941: }
942: #endif

944: /* This function prototype is needed in MatConvert_SeqAIJ_SeqAIJMKL(), below. */
945: PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);

947: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
948:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
949:  * routine, but can also be used to convert an assembled SeqAIJ matrix
950:  * into a SeqAIJMKL one. */
951: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
952: {
954:   Mat            B = *newmat;
955:   Mat_SeqAIJMKL  *aijmkl;
956:   PetscBool      set;
957:   PetscBool      sametype;

960:   if (reuse == MAT_INITIAL_MATRIX) {
961:     MatDuplicate(A,MAT_COPY_VALUES,&B);
962:   }

964:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
965:   if (sametype) return(0);

967:   PetscNewLog(B,&aijmkl);
968:   B->spptr = (void*) aijmkl;

970:   /* Set function pointers for methods that we inherit from AIJ but override. 
971:    * We also parse some command line options below, since those determine some of the methods we point to. */
972:   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
973:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
974:   B->ops->destroy          = MatDestroy_SeqAIJMKL;

976:   aijmkl->sparse_optimized = PETSC_FALSE;
977: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
978:   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
979: #else
980:   aijmkl->no_SpMV2 = PETSC_TRUE;
981: #endif
982:   aijmkl->eager_inspection = PETSC_FALSE;

984:   /* Parse command line options. */
985:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
986:   PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
987:   PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
988:   PetscOptionsEnd();
989: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
990:   if(!aijmkl->no_SpMV2) {
991:     PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
992:     aijmkl->no_SpMV2 = PETSC_TRUE;
993:   }
994: #endif

996: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
997:   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
998:   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
999:   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1000:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1001:   B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1002: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1003:   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1004: #   if !defined(PETSC_USE_COMPLEX)
1005:   B->ops->ptap             = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
1006:   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1007: #   endif
1008: # endif
1009:   B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1010: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1012: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1013:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1014:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1015:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1016:    * versions in which the older interface has not been deprecated, we use the old interface. */
1017:   if (aijmkl->no_SpMV2) {
1018:     B->ops->mult             = MatMult_SeqAIJMKL;
1019:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1020:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1021:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1022:   }
1023: #endif

1025:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1026:   PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);
1027:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
1028:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
1029:   PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",MatPtAP_IS_XAIJ);
1030:   if(!aijmkl->no_SpMV2) {
1031: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1032:     PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1033: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1034:     PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);
1035: #endif
1036:     PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1037: #endif
1038:   }

1040:   PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1041:   *newmat = B;
1042:   return(0);
1043: }

1045: /*@C
1046:    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1047:    This type inherits from AIJ and is largely identical, but uses sparse BLAS 
1048:    routines from Intel MKL whenever possible.
1049:    If the installed version of MKL supports the "SpMV2" sparse 
1050:    inspector-executor routines, then those are used by default.
1051:    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1052:    symmetric A) operations are currently supported.
1053:    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.

1055:    Collective on MPI_Comm

1057:    Input Parameters:
1058: +  comm - MPI communicator, set to PETSC_COMM_SELF
1059: .  m - number of rows
1060: .  n - number of columns
1061: .  nz - number of nonzeros per row (same for all rows)
1062: -  nnz - array containing the number of nonzeros in the various rows
1063:          (possibly different for each row) or NULL

1065:    Output Parameter:
1066: .  A - the matrix

1068:    Options Database Keys:
1069: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1070: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1072:    Notes:
1073:    If nnz is given then nz is ignored

1075:    Level: intermediate

1077: .keywords: matrix, MKL, sparse, parallel

1079: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1080: @*/
1081: PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1082: {

1086:   MatCreate(comm,A);
1087:   MatSetSizes(*A,m,n,m,n);
1088:   MatSetType(*A,MATSEQAIJMKL);
1089:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1090:   return(0);
1091: }

1093: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1094: {

1098:   MatSetType(A,MATSEQAIJ);
1099:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1100:   return(0);
1101: }