Actual source code: aijmkl.c

petsc-3.9.4 2018-09-11
Report Typos and Errors
  1: /*
  2:   Defines basic operations for the MATSEQAIJMKL matrix class.
  3:   This class is derived from the MATSEQAIJ class and retains the
  4:   compressed row storage (aka Yale sparse matrix format) but uses 
  5:   sparse BLAS operations from the Intel Math Kernel Library (MKL) 
  6:   wherever possible.
  7: */

  9:  #include <../src/mat/impls/aij/seq/aij.h>
 10:  #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>

 12: /* MKL include files. */
 13: #include <mkl_spblas.h>  /* Sparse BLAS */

 15: typedef struct {
 16:   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 17:   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 18:   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 19:   PetscObjectState state;
 20: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 21:   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 22:   struct matrix_descr descr;
 23: #endif
 24: } Mat_SeqAIJMKL;

 26: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);

 28: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 29: {
 30:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 31:   /* so we will ignore 'MatType type'. */
 33:   Mat            B       = *newmat;
 34: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 35:   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
 36: #endif

 39:   if (reuse == MAT_INITIAL_MATRIX) {
 40:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 41:   }

 43:   /* Reset the original function pointers. */
 44:   B->ops->duplicate        = MatDuplicate_SeqAIJ;
 45:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
 46:   B->ops->destroy          = MatDestroy_SeqAIJ;
 47:   B->ops->mult             = MatMult_SeqAIJ;
 48:   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
 49:   B->ops->multadd          = MatMultAdd_SeqAIJ;
 50:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
 51:   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
 52:   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 53:   B->ops->ptap             = MatPtAP_SeqAIJ_SeqAIJ;
 54:   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
 55:   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;

 57:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
 58:   PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);
 59:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
 60:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
 61: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 62:   if(!aijmkl->no_SpMV2) {
 63:     PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);
 64: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
 65:     PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);
 66: #endif
 67:     PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);
 68:   }

 70:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 
 71:    * simply involves destroying the MKL sparse matrix handle and then freeing 
 72:    * the spptr pointer. */
 73:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;

 75:   if (aijmkl->sparse_optimized) {
 76:     sparse_status_t stat;
 77:     stat = mkl_sparse_destroy(aijmkl->csrA);
 78:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
 79:   }
 80: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 81:   PetscFree(B->spptr);

 83:   /* Change the type of B to MATSEQAIJ. */
 84:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 86:   *newmat = B;
 87:   return(0);
 88: }

 90: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 91: {
 93:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;


 97:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 
 98:    * spptr pointer. */
 99:   if (aijmkl) {
100:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
101: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
102:     if (aijmkl->sparse_optimized) {
103:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
104:       stat = mkl_sparse_destroy(aijmkl->csrA);
105:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
106:     }
107: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
108:     PetscFree(A->spptr);
109:   }

111:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
112:    * to destroy everything that remains. */
113:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
114:   /* Note that I don't call MatSetType().  I believe this is because that
115:    * is only to be called when *building* a matrix.  I could be wrong, but
116:    * that is how things work for the SuperLU matrix class. */
117:   MatDestroy_SeqAIJ(A);
118:   return(0);
119: }

121: /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 
122:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
123:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 
124:  * handle, creates a new one, and then calls mkl_sparse_optimize().
125:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 
126:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 
127:  * an unoptimized matrix handle here. */
128: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
129: {
130: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
131:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine 
132:    * does nothing. We make it callable anyway in this case because it cuts 
133:    * down on littering the code with #ifdefs. */
135:   return(0);
136: #else
137:   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
138:   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
139:   PetscInt         m,n;
140:   MatScalar        *aa;
141:   PetscInt         *aj,*ai;
142:   sparse_status_t  stat;
143:   PetscErrorCode   ierr;

146: #if !defined(PETSC_HAVE_MKL_SPARSE_SP2M)
147:   /* Versions of MKL that don't have mkl_sparse_sp2m() still support the old, non-inspector-executor interfaces. For these versions,
148:    * we simply exit. Versions that do have mkl_sparse_sp2m() (version 18, update 2 and later) have deprecated the old interfaces.
149:    * In this case, we must use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by
150:    * not calling mkl_sparse_optimize() later. */
151:   if (aijmkl->no_SpMV2) return(0);
152: #endif

154:   if (aijmkl->sparse_optimized) {
155:     /* Matrix has been previously assembled and optimized. Must destroy old
156:      * matrix handle before running the optimization step again. */
157:     stat = mkl_sparse_destroy(aijmkl->csrA);
158:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
159:   }
160:   aijmkl->sparse_optimized = PETSC_FALSE;

162:   /* Now perform the SpMV2 setup and matrix optimization. */
163:   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
164:   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
165:   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
166:   m = A->rmap->n;
167:   n = A->cmap->n;
168:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
169:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
170:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
171:   if ((a->nz!=0) & !(A->structure_only)) {
172:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
173:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
174:     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
175:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
176:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
177:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
178:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
179:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
180:     if (!aijmkl->no_SpMV2) {
181:       stat = mkl_sparse_optimize(aijmkl->csrA);
182:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
183:     }
184:     aijmkl->sparse_optimized = PETSC_TRUE;
185:     PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
186:   }

188:   return(0);
189: #endif
190: }

192: /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 
193:  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 
194:  * matrix handle.
195:  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
196:  * there is no good alternative. */
197: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
198: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
199: {
200:   PetscErrorCode      ierr;
201:   sparse_status_t     stat;
202:   sparse_index_base_t indexing;
203:   PetscInt            nrows, ncols;
204:   PetscInt            *aj,*ai,*dummy;
205:   MatScalar           *aa;
206:   Mat                 A;
207:   Mat_SeqAIJMKL       *aijmkl;

209:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
210:   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
211:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

213:   if (reuse == MAT_REUSE_MATRIX) {
214:     MatDestroy(mat);
215:   }
216:   MatCreate(comm,&A);
217:   MatSetType(A,MATSEQAIJ);
218:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
219:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 
220:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
221:    * they will be destroyed when the MKL handle is destroyed.
222:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
223:   MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);

225:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
226:    * Now turn it into a MATSEQAIJMKL. */
227:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);

229:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
230:   aijmkl->csrA = csrA;

232:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
233:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 
234:    * and just need to be able to run the MKL optimization step. */
235:   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
236:   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
237:   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
238:   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
239:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
240:   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
241:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
242:   if (!aijmkl->no_SpMV2) {
243:     stat = mkl_sparse_optimize(aijmkl->csrA);
244:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
245:   }
246:   aijmkl->sparse_optimized = PETSC_TRUE;
247:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));

249:   *mat = A;
250:   return(0);
251: }
252: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

254: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
255:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 
256:  * MatMatMultNumeric(). */
257: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
258: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
259: {
260:   PetscInt            i;
261:   PetscInt            nrows,ncols;
262:   PetscInt            nz;
263:   PetscInt            *ai,*aj,*dummy;
264:   PetscScalar         *aa;
265:   PetscErrorCode      ierr;
266:   Mat_SeqAIJMKL       *aijmkl;
267:   sparse_status_t     stat;
268:   sparse_index_base_t indexing;

270:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;

272:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
273:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
274:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

276:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 
277:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
278:   for (i=0; i<nrows; i++) {
279:     nz = ai[i+1] - ai[i];
280:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
281:   }

283:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
284:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

286:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
287:   /* We mark our matrix as having a valid, optimized MKL handle.
288:    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
289:   aijmkl->sparse_optimized = PETSC_TRUE;

291:   return(0);
292: }
293: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

295: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
296: {
298:   Mat_SeqAIJMKL  *aijmkl;
299:   Mat_SeqAIJMKL  *aijmkl_dest;

302:   MatDuplicate_SeqAIJ(A,op,M);
303:   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
304:   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
305:   PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));
306:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
307:   if (aijmkl->eager_inspection) {
308:     MatSeqAIJMKL_create_mkl_handle(A);
309:   }
310:   return(0);
311: }

313: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
314: {
315:   PetscErrorCode  ierr;
316:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
317:   Mat_SeqAIJMKL   *aijmkl;

320:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

322:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
323:    * extra information and some different methods, call the AssemblyEnd 
324:    * routine for a MATSEQAIJ.
325:    * I'm not sure if this is the best way to do this, but it avoids
326:    * a lot of code duplication. */
327:   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
328:   MatAssemblyEnd_SeqAIJ(A, mode);

330:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
331:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
332:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
333:   if (aijmkl->eager_inspection) {
334:     MatSeqAIJMKL_create_mkl_handle(A);
335:   }

337:   return(0);
338: }

340: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
341: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
342: {
343:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
344:   const PetscScalar *x;
345:   PetscScalar       *y;
346:   const MatScalar   *aa;
347:   PetscErrorCode    ierr;
348:   PetscInt          m=A->rmap->n;
349:   PetscInt          n=A->cmap->n;
350:   PetscScalar       alpha = 1.0;
351:   PetscScalar       beta = 0.0;
352:   const PetscInt    *aj,*ai;
353:   char              matdescra[6];


356:   /* Variables not in MatMult_SeqAIJ. */
357:   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */

360:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
361:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
362:   VecGetArrayRead(xx,&x);
363:   VecGetArray(yy,&y);
364:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
365:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
366:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

368:   /* Call MKL sparse BLAS routine to do the MatMult. */
369:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

371:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
372:   VecRestoreArrayRead(xx,&x);
373:   VecRestoreArray(yy,&y);
374:   return(0);
375: }
376: #endif

378: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
379: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
380: {
381:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
382:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
383:   const PetscScalar *x;
384:   PetscScalar       *y;
385:   PetscErrorCode    ierr;
386:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
387:   PetscObjectState  state;


391:   /* If there are no nonzero entries, zero yy and return immediately. */
392:   if(!a->nz) {
393:     PetscInt i;
394:     PetscInt m=A->rmap->n;
395:     VecGetArray(yy,&y);
396:     for (i=0; i<m; i++) {
397:       y[i] = 0.0;
398:     }
399:     VecRestoreArray(yy,&y);
400:     return(0);
401:   }

403:   VecGetArrayRead(xx,&x);
404:   VecGetArray(yy,&y);

406:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
407:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
408:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
409:   PetscObjectStateGet((PetscObject)A,&state);
410:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
411:     MatSeqAIJMKL_create_mkl_handle(A);
412:   }

414:   /* Call MKL SpMV2 executor routine to do the MatMult. */
415:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
416:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
417: 
418:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
419:   VecRestoreArrayRead(xx,&x);
420:   VecRestoreArray(yy,&y);
421:   return(0);
422: }
423: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

425: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
426: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
427: {
428:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
429:   const PetscScalar *x;
430:   PetscScalar       *y;
431:   const MatScalar   *aa;
432:   PetscErrorCode    ierr;
433:   PetscInt          m=A->rmap->n;
434:   PetscInt          n=A->cmap->n;
435:   PetscScalar       alpha = 1.0;
436:   PetscScalar       beta = 0.0;
437:   const PetscInt    *aj,*ai;
438:   char              matdescra[6];

440:   /* Variables not in MatMultTranspose_SeqAIJ. */
441:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */

444:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
445:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
446:   VecGetArrayRead(xx,&x);
447:   VecGetArray(yy,&y);
448:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
449:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
450:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

452:   /* Call MKL sparse BLAS routine to do the MatMult. */
453:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

455:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
456:   VecRestoreArrayRead(xx,&x);
457:   VecRestoreArray(yy,&y);
458:   return(0);
459: }
460: #endif

462: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
463: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
464: {
465:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
466:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
467:   const PetscScalar *x;
468:   PetscScalar       *y;
469:   PetscErrorCode    ierr;
470:   sparse_status_t   stat;
471:   PetscObjectState  state;


475:   /* If there are no nonzero entries, zero yy and return immediately. */
476:   if(!a->nz) {
477:     PetscInt i;
478:     PetscInt n=A->cmap->n;
479:     VecGetArray(yy,&y);
480:     for (i=0; i<n; i++) {
481:       y[i] = 0.0;
482:     }
483:     VecRestoreArray(yy,&y);
484:     return(0);
485:   }

487:   VecGetArrayRead(xx,&x);
488:   VecGetArray(yy,&y);

490:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
491:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
492:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
493:   PetscObjectStateGet((PetscObject)A,&state);
494:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
495:     MatSeqAIJMKL_create_mkl_handle(A);
496:   }

498:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
499:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
500:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
501: 
502:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
503:   VecRestoreArrayRead(xx,&x);
504:   VecRestoreArray(yy,&y);
505:   return(0);
506: }
507: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

509: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
510: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
511: {
512:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
513:   const PetscScalar *x;
514:   PetscScalar       *y,*z;
515:   const MatScalar   *aa;
516:   PetscErrorCode    ierr;
517:   PetscInt          m=A->rmap->n;
518:   PetscInt          n=A->cmap->n;
519:   const PetscInt    *aj,*ai;
520:   PetscInt          i;

522:   /* Variables not in MatMultAdd_SeqAIJ. */
523:   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
524:   PetscScalar       alpha = 1.0;
525:   PetscScalar       beta;
526:   char              matdescra[6];

529:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
530:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

532:   VecGetArrayRead(xx,&x);
533:   VecGetArrayPair(yy,zz,&y,&z);
534:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
535:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
536:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

538:   /* Call MKL sparse BLAS routine to do the MatMult. */
539:   if (zz == yy) {
540:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
541:     beta = 1.0;
542:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
543:   } else {
544:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 
545:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
546:     beta = 0.0;
547:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
548:     for (i=0; i<m; i++) {
549:       z[i] += y[i];
550:     }
551:   }

553:   PetscLogFlops(2.0*a->nz);
554:   VecRestoreArrayRead(xx,&x);
555:   VecRestoreArrayPair(yy,zz,&y,&z);
556:   return(0);
557: }
558: #endif

560: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
561: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
562: {
563:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
564:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
565:   const PetscScalar *x;
566:   PetscScalar       *y,*z;
567:   PetscErrorCode    ierr;
568:   PetscInt          m=A->rmap->n;
569:   PetscInt          i;

571:   /* Variables not in MatMultAdd_SeqAIJ. */
572:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
573:   PetscObjectState  state;


577:   /* If there are no nonzero entries, set zz = yy and return immediately. */
578:   if(!a->nz) {
579:     PetscInt i;
580:     VecGetArrayPair(yy,zz,&y,&z);
581:     for (i=0; i<m; i++) {
582:       z[i] = y[i];
583:     }
584:     VecRestoreArrayPair(yy,zz,&y,&z);
585:     return(0);
586:   }

588:   VecGetArrayRead(xx,&x);
589:   VecGetArrayPair(yy,zz,&y,&z);

591:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
592:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
593:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
594:   PetscObjectStateGet((PetscObject)A,&state);
595:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
596:     MatSeqAIJMKL_create_mkl_handle(A);
597:   }

599:   /* Call MKL sparse BLAS routine to do the MatMult. */
600:   if (zz == yy) {
601:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
602:      * with alpha and beta both set to 1.0. */
603:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
604:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
605:   } else {
606:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
607:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
608:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
609:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
610:     for (i=0; i<m; i++) {
611:       z[i] += y[i];
612:     }
613:   }

615:   PetscLogFlops(2.0*a->nz);
616:   VecRestoreArrayRead(xx,&x);
617:   VecRestoreArrayPair(yy,zz,&y,&z);
618:   return(0);
619: }
620: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

622: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
623: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
624: {
625:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
626:   const PetscScalar *x;
627:   PetscScalar       *y,*z;
628:   const MatScalar   *aa;
629:   PetscErrorCode    ierr;
630:   PetscInt          m=A->rmap->n;
631:   PetscInt          n=A->cmap->n;
632:   const PetscInt    *aj,*ai;
633:   PetscInt          i;

635:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
636:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
637:   PetscScalar       alpha = 1.0;
638:   PetscScalar       beta;
639:   char              matdescra[6];

642:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
643:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

645:   VecGetArrayRead(xx,&x);
646:   VecGetArrayPair(yy,zz,&y,&z);
647:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
648:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
649:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

651:   /* Call MKL sparse BLAS routine to do the MatMult. */
652:   if (zz == yy) {
653:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
654:     beta = 1.0;
655:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
656:   } else {
657:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 
658:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
659:     beta = 0.0;
660:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
661:     for (i=0; i<n; i++) {
662:       z[i] += y[i];
663:     }
664:   }

666:   PetscLogFlops(2.0*a->nz);
667:   VecRestoreArrayRead(xx,&x);
668:   VecRestoreArrayPair(yy,zz,&y,&z);
669:   return(0);
670: }
671: #endif

673: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
674: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
675: {
676:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
677:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
678:   const PetscScalar *x;
679:   PetscScalar       *y,*z;
680:   PetscErrorCode    ierr;
681:   PetscInt          n=A->cmap->n;
682:   PetscInt          i;
683:   PetscObjectState  state;

685:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
686:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;


690:   /* If there are no nonzero entries, set zz = yy and return immediately. */
691:   if(!a->nz) {
692:     PetscInt i;
693:     VecGetArrayPair(yy,zz,&y,&z);
694:     for (i=0; i<n; i++) {
695:       z[i] = y[i];
696:     }
697:     VecRestoreArrayPair(yy,zz,&y,&z);
698:     return(0);
699:   }

701:   VecGetArrayRead(xx,&x);
702:   VecGetArrayPair(yy,zz,&y,&z);

704:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
705:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
706:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
707:   PetscObjectStateGet((PetscObject)A,&state);
708:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
709:     MatSeqAIJMKL_create_mkl_handle(A);
710:   }

712:   /* Call MKL sparse BLAS routine to do the MatMult. */
713:   if (zz == yy) {
714:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
715:      * with alpha and beta both set to 1.0. */
716:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
717:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
718:   } else {
719:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
720:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
721:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
722:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
723:     for (i=0; i<n; i++) {
724:       z[i] += y[i];
725:     }
726:   }

728:   PetscLogFlops(2.0*a->nz);
729:   VecRestoreArrayRead(xx,&x);
730:   VecRestoreArrayPair(yy,zz,&y,&z);
731:   return(0);
732: }
733: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

735: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
736: /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 
737:  * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 
738:  * For releases of MKL prior to version 18, update 2:
739:  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 
740:  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 
741:  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 
742:  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
743: PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
744: {
745:   Mat_SeqAIJMKL    *a, *b;
746:   sparse_matrix_t  csrA, csrB, csrC;
747:   PetscErrorCode   ierr;
748:   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
749:   PetscObjectState state;

752:   a = (Mat_SeqAIJMKL*)A->spptr;
753:   b = (Mat_SeqAIJMKL*)B->spptr;
754:   PetscObjectStateGet((PetscObject)A,&state);
755:   if (!a->sparse_optimized || a->state != state) {
756:     MatSeqAIJMKL_create_mkl_handle(A);
757:   }
758:   PetscObjectStateGet((PetscObject)B,&state);
759:   if (!b->sparse_optimized || b->state != state) {
760:     MatSeqAIJMKL_create_mkl_handle(B);
761:   }
762:   csrA = a->csrA;
763:   csrB = b->csrA;

765:   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
766:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");

768:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);

770:   return(0);
771: }
772: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

774: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
775: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
776: {
777:   Mat_SeqAIJMKL       *a, *b, *c;
778:   sparse_matrix_t     csrA, csrB, csrC;
779:   PetscErrorCode      ierr;
780:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
781:   struct matrix_descr descr_type_gen;
782:   PetscObjectState    state;

785:   a = (Mat_SeqAIJMKL*)A->spptr;
786:   b = (Mat_SeqAIJMKL*)B->spptr;
787:   c = (Mat_SeqAIJMKL*)C->spptr;
788:   PetscObjectStateGet((PetscObject)A,&state);
789:   if (!a->sparse_optimized || a->state != state) {
790:     MatSeqAIJMKL_create_mkl_handle(A);
791:   }
792:   PetscObjectStateGet((PetscObject)B,&state);
793:   if (!b->sparse_optimized || b->state != state) {
794:     MatSeqAIJMKL_create_mkl_handle(B);
795:   }
796:   csrA = a->csrA;
797:   csrB = b->csrA;
798:   csrC = c->csrA;
799:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

801:   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
802:                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
803:                          SPARSE_STAGE_FINALIZE_MULT,&csrC);

805:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");

807:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
808:   MatSeqAIJMKL_update_from_mkl_handle(C);

810:   return(0);
811: }
812: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */

814: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
815: PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
816: {
817:   Mat_SeqAIJMKL    *a, *b;
818:   sparse_matrix_t  csrA, csrB, csrC;
819:   PetscErrorCode   ierr;
820:   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
821:   PetscObjectState state;

824:   a = (Mat_SeqAIJMKL*)A->spptr;
825:   b = (Mat_SeqAIJMKL*)B->spptr;
826:   PetscObjectStateGet((PetscObject)A,&state);
827:   if (!a->sparse_optimized || a->state != state) {
828:     MatSeqAIJMKL_create_mkl_handle(A);
829:   }
830:   PetscObjectStateGet((PetscObject)B,&state);
831:   if (!b->sparse_optimized || b->state != state) {
832:     MatSeqAIJMKL_create_mkl_handle(B);
833:   }
834:   csrA = a->csrA;
835:   csrB = b->csrA;

837:   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
838:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");

840:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);

842:   return(0);
843: }
844: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

846: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
847: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
848: {
849:   Mat_SeqAIJMKL       *a, *p, *c;
850:   sparse_matrix_t     csrA, csrP, csrC;
851:   PetscBool           set, flag;
852:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
853:   struct matrix_descr descr_type_sym;
854:   PetscObjectState    state;
855:   PetscErrorCode      ierr;

858:   MatIsSymmetricKnown(A,&set,&flag);
859:   if (!set || (set && !flag)) {
860:     MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);
861:     return(0);
862:   }

864:   a = (Mat_SeqAIJMKL*)A->spptr;
865:   p = (Mat_SeqAIJMKL*)P->spptr;
866:   c = (Mat_SeqAIJMKL*)C->spptr;
867:   PetscObjectStateGet((PetscObject)A,&state);
868:   if (!a->sparse_optimized || a->state != state) {
869:     MatSeqAIJMKL_create_mkl_handle(A);
870:   }
871:   PetscObjectStateGet((PetscObject)P,&state);
872:   if (!p->sparse_optimized || p->state != state) {
873:     MatSeqAIJMKL_create_mkl_handle(P);
874:   }
875:   csrA = a->csrA;
876:   csrP = p->csrA;
877:   csrC = c->csrA;
878:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
879:   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
880:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

882:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
883:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
884:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");

886:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
887:   MatSeqAIJMKL_update_from_mkl_handle(C);

889:   return(0);
890: }
891: #endif

893: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
894: PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
895: {
896:   Mat_SeqAIJMKL       *a, *p;
897:   sparse_matrix_t     csrA, csrP, csrC;
898:   PetscBool           set, flag;
899:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
900:   struct matrix_descr descr_type_sym;
901:   PetscObjectState    state;
902:   PetscErrorCode      ierr;

905:   MatIsSymmetricKnown(A,&set,&flag);
906:   if (!set || (set && !flag)) {
907:     MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);
908:     return(0);
909:   }

911:   if (scall == MAT_REUSE_MATRIX) {
912:     MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);
913:     return(0);
914:   }

916:   a = (Mat_SeqAIJMKL*)A->spptr;
917:   p = (Mat_SeqAIJMKL*)P->spptr;
918:   PetscObjectStateGet((PetscObject)A,&state);
919:   if (!a->sparse_optimized || a->state != state) {
920:     MatSeqAIJMKL_create_mkl_handle(A);
921:   }
922:   PetscObjectStateGet((PetscObject)P,&state);
923:   if (!p->sparse_optimized || p->state != state) {
924:     MatSeqAIJMKL_create_mkl_handle(P);
925:   }
926:   csrA = a->csrA;
927:   csrP = p->csrA;
928:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
929:   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
930:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

932:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
933:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
934:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");

936:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
937:   MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);

939:   return(0);
940: }
941: #endif

943: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
944:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
945:  * routine, but can also be used to convert an assembled SeqAIJ matrix
946:  * into a SeqAIJMKL one. */
947: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
948: {
950:   Mat            B = *newmat;
951:   Mat_SeqAIJMKL  *aijmkl;
952:   PetscBool      set;
953:   PetscBool      sametype;

956:   if (reuse == MAT_INITIAL_MATRIX) {
957:     MatDuplicate(A,MAT_COPY_VALUES,&B);
958:   }

960:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
961:   if (sametype) return(0);

963:   PetscNewLog(B,&aijmkl);
964:   B->spptr = (void*) aijmkl;

966:   /* Set function pointers for methods that we inherit from AIJ but override. 
967:    * We also parse some command line options below, since those determine some of the methods we point to. */
968:   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
969:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
970:   B->ops->destroy          = MatDestroy_SeqAIJMKL;

972:   aijmkl->sparse_optimized = PETSC_FALSE;
973: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
974:   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
975: #else
976:   aijmkl->no_SpMV2 = PETSC_TRUE;
977: #endif
978:   aijmkl->eager_inspection = PETSC_FALSE;

980:   /* Parse command line options. */
981:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
982:   PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
983:   PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
984:   PetscOptionsEnd();
985: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
986:   if(!aijmkl->no_SpMV2) {
987:     PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
988:     aijmkl->no_SpMV2 = PETSC_TRUE;
989:   }
990: #endif

992: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
993:   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
994:   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
995:   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
996:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
997:   B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
998: # ifdef PETSC_HAVE_MKL_SPARSE_SP2M
999:   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1000: #   ifndef PETSC_USE_COMPLEX
1001:   B->ops->ptap             = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
1002:   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1003: #   endif
1004: # endif
1005:   B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1006: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1008: #ifndef PETSC_HAVE_MKL_SPARSE_SP2M
1009:   /* In the same release in which MKL introduced mkl_sparse_sp2m() (version 18, update 2), the old sparse BLAS interfaces were
1010:    * marked as deprecated. If "no_SpMV2" has been specified by the user and MKL 18u2 or later is being used, we use the new
1011:    * _SpMV2 routines (set above), but do not call mkl_sparse_optimize(), which results in the old numerical kernels (without the
1012:    * inspector-executor model) being used. For versions in which the older interface has not been deprecated, we use the old
1013:    * interface. */
1014:   if (aijmkl->no_SpMV2) {
1015:     B->ops->mult             = MatMult_SeqAIJMKL;
1016:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1017:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1018:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1019:   }
1020: #endif

1022:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1023:   PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);
1024:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
1025:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
1026:   if(!aijmkl->no_SpMV2) {
1027: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1028:     PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1029: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
1030:     PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);
1031: #endif
1032:     PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1033: #endif
1034:   }

1036:   PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1037:   *newmat = B;
1038:   return(0);
1039: }

1041: /*@C
1042:    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1043:    This type inherits from AIJ and is largely identical, but uses sparse BLAS 
1044:    routines from Intel MKL whenever possible.
1045:    If the installed version of MKL supports the "SpMV2" sparse 
1046:    inspector-executor routines, then those are used by default.
1047:    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1048:    symmetric A) operations are currently supported.
1049:    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.

1051:    Collective on MPI_Comm

1053:    Input Parameters:
1054: +  comm - MPI communicator, set to PETSC_COMM_SELF
1055: .  m - number of rows
1056: .  n - number of columns
1057: .  nz - number of nonzeros per row (same for all rows)
1058: -  nnz - array containing the number of nonzeros in the various rows
1059:          (possibly different for each row) or NULL

1061:    Output Parameter:
1062: .  A - the matrix

1064:    Options Database Keys:
1065: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1066: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1068:    Notes:
1069:    If nnz is given then nz is ignored

1071:    Level: intermediate

1073: .keywords: matrix, MKL, sparse, parallel

1075: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1076: @*/
1077: PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1078: {

1082:   MatCreate(comm,A);
1083:   MatSetSizes(*A,m,n,m,n);
1084:   MatSetType(*A,MATSEQAIJMKL);
1085:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1086:   return(0);
1087: }

1089: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1090: {

1094:   MatSetType(A,MATSEQAIJ);
1095:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1096:   return(0);
1097: }