petsc-3.14.6 2021-03-30
  2: /*
  3:   Defines basic operations for the MATSEQAIJMKL matrix class.
  4:   This class is derived from the MATSEQAIJ class and retains the
  5:   compressed row storage (aka Yale sparse matrix format) but uses
  6:   sparse BLAS operations from the Intel Math Kernel Library (MKL)
  7:   wherever possible.
  8: */

 10: #include <../src/mat/impls/aij/seq/aij.h>
 11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
 12: #include <mkl_spblas.h>

 14: typedef struct {
 15:   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 16:   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 17:   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 18:   PetscObjectState    state;
 19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 20:   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 21:   struct matrix_descr descr;
 22: #endif
 23: } Mat_SeqAIJMKL;

 25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);

 27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 28: {
 29:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 30:   /* so we will ignore 'MatType type'. */
 32:   Mat            B       = *newmat;
 33: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 34:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
 35: #endif

 38:   if (reuse == MAT_INITIAL_MATRIX) {
 39:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 40:   }

 42:   /* Reset the original function pointers. */
 43:   B->ops->duplicate               = MatDuplicate_SeqAIJ;
 44:   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
 45:   B->ops->destroy                 = MatDestroy_SeqAIJ;
 46:   B->ops->mult                    = MatMult_SeqAIJ;
 47:   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
 48:   B->ops->multadd                 = MatMultAdd_SeqAIJ;
 49:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
 50:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
 51:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
 52:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 53:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
 54:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
 55:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;

 57:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);

 59: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 60:   if (!aijmkl->no_SpMV2) {
 61: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
 62:     PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);
 63: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
 64:   }

 66:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
 67:    * simply involves destroying the MKL sparse matrix handle and then freeing
 68:    * the spptr pointer. */
 69:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;

 71:   if (aijmkl->sparse_optimized) {
 72:     sparse_status_t stat;
 73:     stat = mkl_sparse_destroy(aijmkl->csrA);
 74:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
 75:   }
 76: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 77:   PetscFree(B->spptr);

 79:   /* Change the type of B to MATSEQAIJ. */
 80:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 82:   *newmat = B;
 83:   return(0);
 84: }

 86: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 87: {
 89:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;


 93:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
 94:   if (aijmkl) {
 95:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
 96: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 97:     if (aijmkl->sparse_optimized) {
 98:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
 99:       stat = mkl_sparse_destroy(aijmkl->csrA);
100:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
101:     }
102: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
103:     PetscFree(A->spptr);
104:   }

106:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
107:    * to destroy everything that remains. */
108:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
109:   /* Note that I don't call MatSetType().  I believe this is because that
110:    * is only to be called when *building* a matrix.  I could be wrong, but
111:    * that is how things work for the SuperLU matrix class. */
112:   MatDestroy_SeqAIJ(A);
113:   return(0);
114: }

116: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
117:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
118:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
119:  * handle, creates a new one, and then calls mkl_sparse_optimize().
120:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
121:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
122:  * an unoptimized matrix handle here. */
123: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
124: {
125: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
126:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
127:    * does nothing. We make it callable anyway in this case because it cuts
128:    * down on littering the code with #ifdefs. */
130:   return(0);
131: #else
132:   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
133:   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
134:   PetscInt         m,n;
135:   MatScalar        *aa;
136:   PetscInt         *aj,*ai;
137:   sparse_status_t  stat;
138:   PetscErrorCode   ierr;

141: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
142:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
143:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
144:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
145:    * mkl_sparse_optimize() later. */
146:   if (aijmkl->no_SpMV2) return(0);
147: #endif

149:   if (aijmkl->sparse_optimized) {
150:     /* Matrix has been previously assembled and optimized. Must destroy old
151:      * matrix handle before running the optimization step again. */
152:     stat = mkl_sparse_destroy(aijmkl->csrA);
153:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
154:   }
155:   aijmkl->sparse_optimized = PETSC_FALSE;

157:   /* Now perform the SpMV2 setup and matrix optimization. */
158:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
159:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
160:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
161:   m = A->rmap->n;
162:   n = A->cmap->n;
163:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
164:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
165:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
166:   if (a->nz && aa && !A->structure_only) {
167:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
168:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
169:     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
170:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
171:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
172:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
173:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
174:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
175:     if (!aijmkl->no_SpMV2) {
176:       stat = mkl_sparse_optimize(aijmkl->csrA);
177:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
178:     }
179:     aijmkl->sparse_optimized = PETSC_TRUE;
180:     PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
181:   } else {
182:     aijmkl->csrA = PETSC_NULL;
183:   }

185:   return(0);
186: #endif
187: }

189: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
190: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
191: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
192: {
193:   PetscErrorCode      ierr;
194:   sparse_status_t     stat;
195:   sparse_index_base_t indexing;
196:   PetscInt            m,n;
197:   PetscInt            *aj,*ai,*dummy;
198:   MatScalar           *aa;
199:   Mat_SeqAIJMKL       *aijmkl;

201:   if (csrA) {
202:     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
203:     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
204:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
205:     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
206:   } else {
207:     aj = ai = PETSC_NULL;
208:     aa = PETSC_NULL;
209:   }

211:   MatSetType(A,MATSEQAIJ);
212:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
213:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
214:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
215:    * they will be destroyed when the MKL handle is destroyed.
216:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
217:   if (csrA) {
218:     MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);
219:   } else {
220:     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
221:     MatSetUp(A);
222:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
223:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
224:   }

226:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
227:    * Now turn it into a MATSEQAIJMKL. */
228:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);

230:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
231:   aijmkl->csrA = csrA;

233:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
234:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
235:    * and just need to be able to run the MKL optimization step. */
236:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
237:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
238:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
239:   if (csrA) {
240:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
241:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
242:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
243:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
244:   }
245:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
246:   return(0);
247: }
248: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

250: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
251:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
252:  * MatMatMultNumeric(). */
253: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
254: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
255: {
256:   PetscInt            i;
257:   PetscInt            nrows,ncols;
258:   PetscInt            nz;
259:   PetscInt            *ai,*aj,*dummy;
260:   PetscScalar         *aa;
261:   PetscErrorCode      ierr;
262:   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
263:   sparse_status_t     stat;
264:   sparse_index_base_t indexing;

266:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
267:   if (!aijmkl->csrA) return(0);

269:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
270:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
271:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

273:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
274:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
275:   for (i=0; i<nrows; i++) {
276:     nz = ai[i+1] - ai[i];
277:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
278:   }

280:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
281:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

283:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
284:   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
285:    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
286:    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
287:   aijmkl->sparse_optimized = PETSC_FALSE;
288:   return(0);
289: }
290: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

292: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
293: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
294: {
295:   PetscInt            i,j,k;
296:   PetscInt            nrows,ncols;
297:   PetscInt            nz;
298:   PetscInt            *ai,*aj,*dummy;
299:   PetscScalar         *aa;
300:   PetscErrorCode      ierr;
301:   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
302:   sparse_status_t     stat;
303:   sparse_index_base_t indexing;

305:   PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");

307:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
308:   if (!aijmkl->csrA) {
309:     PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");
310:     return(0);
311:   }

313:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
314:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
315:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

317:   k = 0;
318:   for (i=0; i<nrows; i++) {
319:     PetscViewerASCIIPrintf(viewer,"row %D: ",i);
320:     nz = ai[i+1] - ai[i];
321:     for (j=0; j<nz; j++) {
322:       if (aa) {
323:         PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],PetscRealPart(aa[k]));
324:       } else {
325:         PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);
326:       }
327:       k++;
328:     }
329:     PetscViewerASCIIPrintf(viewer,"\n");
330:   }
331:   return(0);
332: }
333: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

335: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
336: {
338:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
339:   Mat_SeqAIJMKL  *aijmkl_dest;

342:   MatDuplicate_SeqAIJ(A,op,M);
343:   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
344:   PetscArraycpy(aijmkl_dest,aijmkl,1);
345:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
346:   if (aijmkl->eager_inspection) {
347:     MatSeqAIJMKL_create_mkl_handle(A);
348:   }
349:   return(0);
350: }

352: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
353: {
354:   PetscErrorCode  ierr;
355:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
356:   Mat_SeqAIJMKL   *aijmkl;

359:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

361:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
362:    * extra information and some different methods, call the AssemblyEnd
363:    * routine for a MATSEQAIJ.
364:    * I'm not sure if this is the best way to do this, but it avoids
365:    * a lot of code duplication. */
366:   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
367:   MatAssemblyEnd_SeqAIJ(A, mode);

369:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
370:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
371:   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
372:   if (aijmkl->eager_inspection) {
373:     MatSeqAIJMKL_create_mkl_handle(A);
374:   }

376:   return(0);
377: }

379: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
380: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
381: {
382:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
383:   const PetscScalar *x;
384:   PetscScalar       *y;
385:   const MatScalar   *aa;
386:   PetscErrorCode    ierr;
387:   PetscInt          m = A->rmap->n;
388:   PetscInt          n = A->cmap->n;
389:   PetscScalar       alpha = 1.0;
390:   PetscScalar       beta = 0.0;
391:   const PetscInt    *aj,*ai;
392:   char              matdescra[6];


395:   /* Variables not in MatMult_SeqAIJ. */
396:   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */

399:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
400:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
401:   VecGetArrayRead(xx,&x);
402:   VecGetArray(yy,&y);
403:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
404:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
405:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

407:   /* Call MKL sparse BLAS routine to do the MatMult. */
408:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

410:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
411:   VecRestoreArrayRead(xx,&x);
412:   VecRestoreArray(yy,&y);
413:   return(0);
414: }
415: #endif

417: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
418: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
419: {
420:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
421:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
422:   const PetscScalar *x;
423:   PetscScalar       *y;
424:   PetscErrorCode    ierr;
425:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
426:   PetscObjectState  state;


430:   /* If there are no nonzero entries, zero yy and return immediately. */
431:   if (!a->nz) {
432:     PetscInt i;
433:     PetscInt m=A->rmap->n;
434:     VecGetArray(yy,&y);
435:     for (i=0; i<m; i++) {
436:       y[i] = 0.0;
437:     }
438:     VecRestoreArray(yy,&y);
439:     return(0);
440:   }

442:   VecGetArrayRead(xx,&x);
443:   VecGetArray(yy,&y);

445:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
446:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
447:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
448:   PetscObjectStateGet((PetscObject)A,&state);
449:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
450:     MatSeqAIJMKL_create_mkl_handle(A);
451:   }

453:   /* Call MKL SpMV2 executor routine to do the MatMult. */
454:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
455:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");

457:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
458:   VecRestoreArrayRead(xx,&x);
459:   VecRestoreArray(yy,&y);
460:   return(0);
461: }
462: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

464: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
465: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
466: {
467:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
468:   const PetscScalar *x;
469:   PetscScalar       *y;
470:   const MatScalar   *aa;
471:   PetscErrorCode    ierr;
472:   PetscInt          m = A->rmap->n;
473:   PetscInt          n = A->cmap->n;
474:   PetscScalar       alpha = 1.0;
475:   PetscScalar       beta = 0.0;
476:   const PetscInt    *aj,*ai;
477:   char              matdescra[6];

479:   /* Variables not in MatMultTranspose_SeqAIJ. */
480:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */

483:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
484:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
485:   VecGetArrayRead(xx,&x);
486:   VecGetArray(yy,&y);
487:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
488:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
489:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

491:   /* Call MKL sparse BLAS routine to do the MatMult. */
492:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

494:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
495:   VecRestoreArrayRead(xx,&x);
496:   VecRestoreArray(yy,&y);
497:   return(0);
498: }
499: #endif

501: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
502: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
503: {
504:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
505:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
506:   const PetscScalar *x;
507:   PetscScalar       *y;
508:   PetscErrorCode    ierr;
509:   sparse_status_t   stat;
510:   PetscObjectState  state;


514:   /* If there are no nonzero entries, zero yy and return immediately. */
515:   if (!a->nz) {
516:     PetscInt i;
517:     PetscInt n=A->cmap->n;
518:     VecGetArray(yy,&y);
519:     for (i=0; i<n; i++) {
520:       y[i] = 0.0;
521:     }
522:     VecRestoreArray(yy,&y);
523:     return(0);
524:   }

526:   VecGetArrayRead(xx,&x);
527:   VecGetArray(yy,&y);

529:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
530:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
531:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
532:   PetscObjectStateGet((PetscObject)A,&state);
533:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
534:     MatSeqAIJMKL_create_mkl_handle(A);
535:   }

537:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
538:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
539:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");

541:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
542:   VecRestoreArrayRead(xx,&x);
543:   VecRestoreArray(yy,&y);
544:   return(0);
545: }
546: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

548: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
549: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
550: {
551:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
552:   const PetscScalar *x;
553:   PetscScalar       *y,*z;
554:   const MatScalar   *aa;
555:   PetscErrorCode    ierr;
556:   PetscInt          m = A->rmap->n;
557:   PetscInt          n = A->cmap->n;
558:   const PetscInt    *aj,*ai;
559:   PetscInt          i;

561:   /* Variables not in MatMultAdd_SeqAIJ. */
562:   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
563:   PetscScalar       alpha = 1.0;
564:   PetscScalar       beta;
565:   char              matdescra[6];

568:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
569:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

571:   VecGetArrayRead(xx,&x);
572:   VecGetArrayPair(yy,zz,&y,&z);
573:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
574:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
575:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

577:   /* Call MKL sparse BLAS routine to do the MatMult. */
578:   if (zz == yy) {
579:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
580:     beta = 1.0;
581:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
582:   } else {
583:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
584:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
585:     beta = 0.0;
586:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
587:     for (i=0; i<m; i++) {
588:       z[i] += y[i];
589:     }
590:   }

592:   PetscLogFlops(2.0*a->nz);
593:   VecRestoreArrayRead(xx,&x);
594:   VecRestoreArrayPair(yy,zz,&y,&z);
595:   return(0);
596: }
597: #endif

599: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
600: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
601: {
602:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
603:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
604:   const PetscScalar *x;
605:   PetscScalar       *y,*z;
606:   PetscErrorCode    ierr;
607:   PetscInt          m = A->rmap->n;
608:   PetscInt          i;

610:   /* Variables not in MatMultAdd_SeqAIJ. */
611:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
612:   PetscObjectState  state;


616:   /* If there are no nonzero entries, set zz = yy and return immediately. */
617:   if (!a->nz) {
618:     PetscInt i;
619:     VecGetArrayPair(yy,zz,&y,&z);
620:     for (i=0; i<m; i++) {
621:       z[i] = y[i];
622:     }
623:     VecRestoreArrayPair(yy,zz,&y,&z);
624:     return(0);
625:   }

627:   VecGetArrayRead(xx,&x);
628:   VecGetArrayPair(yy,zz,&y,&z);

630:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
631:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
632:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
633:   PetscObjectStateGet((PetscObject)A,&state);
634:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
635:     MatSeqAIJMKL_create_mkl_handle(A);
636:   }

638:   /* Call MKL sparse BLAS routine to do the MatMult. */
639:   if (zz == yy) {
640:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
641:      * with alpha and beta both set to 1.0. */
642:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
643:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
644:   } else {
645:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
646:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
647:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
648:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
649:     for (i=0; i<m; i++) {
650:       z[i] += y[i];
651:     }
652:   }

654:   PetscLogFlops(2.0*a->nz);
655:   VecRestoreArrayRead(xx,&x);
656:   VecRestoreArrayPair(yy,zz,&y,&z);
657:   return(0);
658: }
659: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

661: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
662: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
663: {
664:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
665:   const PetscScalar *x;
666:   PetscScalar       *y,*z;
667:   const MatScalar   *aa;
668:   PetscErrorCode    ierr;
669:   PetscInt          m = A->rmap->n;
670:   PetscInt          n = A->cmap->n;
671:   const PetscInt    *aj,*ai;
672:   PetscInt          i;

674:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
675:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
676:   PetscScalar       alpha = 1.0;
677:   PetscScalar       beta;
678:   char              matdescra[6];

681:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
682:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

684:   VecGetArrayRead(xx,&x);
685:   VecGetArrayPair(yy,zz,&y,&z);
686:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
687:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
688:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

690:   /* Call MKL sparse BLAS routine to do the MatMult. */
691:   if (zz == yy) {
692:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
693:     beta = 1.0;
694:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
695:   } else {
696:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
697:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
698:     beta = 0.0;
699:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
700:     for (i=0; i<n; i++) {
701:       z[i] += y[i];
702:     }
703:   }

705:   PetscLogFlops(2.0*a->nz);
706:   VecRestoreArrayRead(xx,&x);
707:   VecRestoreArrayPair(yy,zz,&y,&z);
708:   return(0);
709: }
710: #endif

712: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
713: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
714: {
715:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
716:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
717:   const PetscScalar *x;
718:   PetscScalar       *y,*z;
719:   PetscErrorCode    ierr;
720:   PetscInt          n = A->cmap->n;
721:   PetscInt          i;
722:   PetscObjectState  state;

724:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
725:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;


729:   /* If there are no nonzero entries, set zz = yy and return immediately. */
730:   if (!a->nz) {
731:     PetscInt i;
732:     VecGetArrayPair(yy,zz,&y,&z);
733:     for (i=0; i<n; i++) {
734:       z[i] = y[i];
735:     }
736:     VecRestoreArrayPair(yy,zz,&y,&z);
737:     return(0);
738:   }

740:   VecGetArrayRead(xx,&x);
741:   VecGetArrayPair(yy,zz,&y,&z);

743:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
744:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
745:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
746:   PetscObjectStateGet((PetscObject)A,&state);
747:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
748:     MatSeqAIJMKL_create_mkl_handle(A);
749:   }

751:   /* Call MKL sparse BLAS routine to do the MatMult. */
752:   if (zz == yy) {
753:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
754:      * with alpha and beta both set to 1.0. */
755:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
756:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
757:   } else {
758:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
759:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
760:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
761:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
762:     for (i=0; i<n; i++) {
763:       z[i] += y[i];
764:     }
765:   }

767:   PetscLogFlops(2.0*a->nz);
768:   VecRestoreArrayRead(xx,&x);
769:   VecRestoreArrayPair(yy,zz,&y,&z);
770:   return(0);
771: }
772: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

774: /* -------------------------- MatProduct code -------------------------- */
775: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
776: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
777: {
778:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
779:   sparse_matrix_t     csrA,csrB,csrC;
780:   PetscInt            nrows,ncols;
781:   PetscErrorCode      ierr;
782:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
783:   struct matrix_descr descr_type_gen;
784:   PetscObjectState    state;

787:   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
788:    * not handle sparse matrices with zero rows or columns. */
789:   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
790:   else nrows = A->cmap->N;
791:   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
792:   else ncols = B->rmap->N;

794:   PetscObjectStateGet((PetscObject)A,&state);
795:   if (!a->sparse_optimized || a->state != state) {
796:     MatSeqAIJMKL_create_mkl_handle(A);
797:   }
798:   PetscObjectStateGet((PetscObject)B,&state);
799:   if (!b->sparse_optimized || b->state != state) {
800:     MatSeqAIJMKL_create_mkl_handle(B);
801:   }
802:   csrA = a->csrA;
803:   csrB = b->csrA;
804:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

806:   if (csrA && csrB) {
807:     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
808:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
809:   } else {
810:     csrC = PETSC_NULL;
811:   }

813:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);

815:   return(0);
816: }

818: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
819: {
820:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
821:   sparse_matrix_t     csrA, csrB, csrC;
822:   PetscErrorCode      ierr;
823:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
824:   struct matrix_descr descr_type_gen;
825:   PetscObjectState    state;

828:   PetscObjectStateGet((PetscObject)A,&state);
829:   if (!a->sparse_optimized || a->state != state) {
830:     MatSeqAIJMKL_create_mkl_handle(A);
831:   }
832:   PetscObjectStateGet((PetscObject)B,&state);
833:   if (!b->sparse_optimized || b->state != state) {
834:     MatSeqAIJMKL_create_mkl_handle(B);
835:   }
836:   csrA = a->csrA;
837:   csrB = b->csrA;
838:   csrC = c->csrA;
839:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

841:   if (csrA && csrB) {
842:     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
843:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
844:   } else {
845:     csrC = PETSC_NULL;
846:   }

848:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
849:   MatSeqAIJMKL_update_from_mkl_handle(C);

851:   return(0);
852: }

854: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
855: {

859:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
860:   return(0);
861: }

863: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
864: {

868:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
869:   return(0);
870: }

872: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
873: {

877:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
878:   return(0);
879: }

881: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
882: {

886:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
887:   return(0);
888: }

890: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
891: {

895:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
896:   return(0);
897: }

899: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
900: {

904:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
905:   return(0);
906: }

908: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
909: {
911:   Mat_Product    *product = C->product;
912:   Mat            A = product->A,B = product->B;

915:   MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);
916:   return(0);
917: }

919: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
920: {
922:   Mat_Product    *product = C->product;
923:   Mat            A = product->A,B = product->B;
924:   PetscReal      fill = product->fill;

927:   MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);
928:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
929:   return(0);
930: }

932: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
933: {
934:   Mat                 Ct;
935:   Vec                 zeros;
936:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
937:   sparse_matrix_t     csrA, csrP, csrC;
938:   PetscBool           set, flag;
939:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
940:   struct matrix_descr descr_type_sym;
941:   PetscObjectState    state;
942:   PetscErrorCode      ierr;

945:   MatIsSymmetricKnown(A,&set,&flag);
946:   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");

948:   PetscObjectStateGet((PetscObject)A,&state);
949:   if (!a->sparse_optimized || a->state != state) {
950:     MatSeqAIJMKL_create_mkl_handle(A);
951:   }
952:   PetscObjectStateGet((PetscObject)P,&state);
953:   if (!p->sparse_optimized || p->state != state) {
954:     MatSeqAIJMKL_create_mkl_handle(P);
955:   }
956:   csrA = a->csrA;
957:   csrP = p->csrA;
958:   csrC = c->csrA;
959:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
960:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
961:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

963:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
964:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
965:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");

967:   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
968:    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
969:    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
970:    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
971:    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
972:    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
973:    * the full matrix. */
974:   MatSeqAIJMKL_update_from_mkl_handle(C);
975:   MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);
976:   MatCreateVecs(C,&zeros,NULL);
977:   VecSetFromOptions(zeros);
978:   VecZeroEntries(zeros);
979:   MatDiagonalSet(Ct,zeros,INSERT_VALUES);
980:   MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);
981:   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
982:   MatProductCreateWithMat(A,P,PETSC_NULL,C);
983:   MatProductSetType(C,MATPRODUCT_PtAP);
984:   MatSeqAIJMKL_create_mkl_handle(C);
985:   VecDestroy(&zeros);
986:   MatDestroy(&Ct);
987:   return(0);
988: }

990: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
991: {
992:   Mat_Product         *product = C->product;
993:   Mat                 A = product->A,P = product->B;
994:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
995:   sparse_matrix_t     csrA,csrP,csrC;
996:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
997:   struct matrix_descr descr_type_sym;
998:   PetscObjectState    state;
999:   PetscErrorCode      ierr;

1002:   PetscObjectStateGet((PetscObject)A,&state);
1003:   if (!a->sparse_optimized || a->state != state) {
1004:     MatSeqAIJMKL_create_mkl_handle(A);
1005:   }
1006:   PetscObjectStateGet((PetscObject)P,&state);
1007:   if (!p->sparse_optimized || p->state != state) {
1008:     MatSeqAIJMKL_create_mkl_handle(P);
1009:   }
1010:   csrA = a->csrA;
1011:   csrP = p->csrA;
1012:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1013:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1014:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

1016:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1017:   if (csrP && csrA) {
1018:     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1019:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1020:   } else {
1021:     csrC = PETSC_NULL;
1022:   }

1024:   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1025:    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1026:    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1027:    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1028:    * is guaranteed. */
1029:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);

1031:   C->ops->productnumeric = MatProductNumeric_PtAP;
1032:   return(0);
1033: }

1035: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1036: {
1038:   C->ops->productsymbolic = MatProductSymbolic_AB;
1039:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1040:   return(0);
1041: }

1043: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1044: {
1046:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1047:   return(0);
1048: }

1050: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1051: {
1053:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1054:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1055:   return(0);
1056: }

1058: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1059: {
1061:   Mat_Product    *product = C->product;
1062:   Mat            A = product->A;
1063:   PetscBool      set, flag;

1066: #if defined(PETSC_USE_COMPLEX)
1067:   /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used.
1068:    * We do this in several other locations in this file. This works for the time being, but the _Basic()
1069:    * routines are considered unsafe and may be removed from the MatProduct code in the future.
1070:    * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */
1071:   C->ops->productsymbolic = NULL;
1072: #else
1073:   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1074:   MatIsSymmetricKnown(A,&set,&flag);
1075:   if (set && flag) {
1076:     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1077:     return(0);
1078:   } else {
1079:     C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1080:   }
1081:   /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1082:    * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1083: #endif
1084:   return(0);
1085: }

1087: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1088: {
1090:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1091:   return(0);
1092: }

1094: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1095: {
1097:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1098:   return(0);
1099: }

1101: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1102: {
1104:   Mat_Product    *product = C->product;

1107:   switch (product->type) {
1108:   case MATPRODUCT_AB:
1109:     MatProductSetFromOptions_SeqAIJMKL_AB(C);
1110:     break;
1111:   case MATPRODUCT_AtB:
1112:     MatProductSetFromOptions_SeqAIJMKL_AtB(C);
1113:     break;
1114:   case MATPRODUCT_ABt:
1115:     MatProductSetFromOptions_SeqAIJMKL_ABt(C);
1116:     break;
1117:   case MATPRODUCT_PtAP:
1118:     MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
1119:     break;
1120:   case MATPRODUCT_RARt:
1121:     MatProductSetFromOptions_SeqAIJMKL_RARt(C);
1122:     break;
1123:   case MATPRODUCT_ABC:
1124:     MatProductSetFromOptions_SeqAIJMKL_ABC(C);
1125:     break;
1126:   default:
1127:     break;
1128:   }
1129:   return(0);
1130: }
1131: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1132: /* ------------------------ End MatProduct code ------------------------ */

1134: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1135:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1136:  * routine, but can also be used to convert an assembled SeqAIJ matrix
1137:  * into a SeqAIJMKL one. */
1138: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1139: {
1141:   Mat            B = *newmat;
1142:   Mat_SeqAIJMKL  *aijmkl;
1143:   PetscBool      set;
1144:   PetscBool      sametype;

1147:   if (reuse == MAT_INITIAL_MATRIX) {
1148:     MatDuplicate(A,MAT_COPY_VALUES,&B);
1149:   }

1151:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
1152:   if (sametype) return(0);

1154:   PetscNewLog(B,&aijmkl);
1155:   B->spptr = (void*) aijmkl;

1157:   /* Set function pointers for methods that we inherit from AIJ but override.
1158:    * We also parse some command line options below, since those determine some of the methods we point to. */
1159:   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1160:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1161:   B->ops->destroy          = MatDestroy_SeqAIJMKL;

1163:   aijmkl->sparse_optimized = PETSC_FALSE;
1164: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1165:   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1166: #else
1167:   aijmkl->no_SpMV2 = PETSC_TRUE;
1168: #endif
1169:   aijmkl->eager_inspection = PETSC_FALSE;

1171:   /* Parse command line options. */
1172:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
1173:   PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
1174:   PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
1175:   PetscOptionsEnd();
1176: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1177:   if (!aijmkl->no_SpMV2) {
1178:     PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1179:     aijmkl->no_SpMV2 = PETSC_TRUE;
1180:   }
1181: #endif

1183: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1184:   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1185:   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1186:   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1187:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1188: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1189:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1190:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1191:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1192:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1193:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1194: #   if !defined(PETSC_USE_COMPLEX)
1195:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1196: #   else
1197:   B->ops->ptapnumeric             = NULL;
1198: #   endif
1199: # endif
1200: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1202: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1203:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1204:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1205:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1206:    * versions in which the older interface has not been deprecated, we use the old interface. */
1207:   if (aijmkl->no_SpMV2) {
1208:     B->ops->mult             = MatMult_SeqAIJMKL;
1209:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1210:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1211:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1212:   }
1213: #endif

1215:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);

1217:   if (!aijmkl->no_SpMV2) {
1218: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1219: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1220:     PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);
1221: #endif
1222: #endif
1223:   }

1225:   PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1226:   *newmat = B;
1227:   return(0);
1228: }

1230: /*@C
1231:    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1232:    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1233:    routines from Intel MKL whenever possible.
1234:    If the installed version of MKL supports the "SpMV2" sparse
1235:    inspector-executor routines, then those are used by default.
1236:    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1237:    symmetric A) operations are currently supported.
1238:    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.

1240:    Collective

1242:    Input Parameters:
1243: +  comm - MPI communicator, set to PETSC_COMM_SELF
1244: .  m - number of rows
1245: .  n - number of columns
1246: .  nz - number of nonzeros per row (same for all rows)
1247: -  nnz - array containing the number of nonzeros in the various rows
1248:          (possibly different for each row) or NULL

1250:    Output Parameter:
1251: .  A - the matrix

1253:    Options Database Keys:
1254: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1255: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1257:    Notes:
1258:    If nnz is given then nz is ignored

1260:    Level: intermediate

1262: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1263: @*/
1264: PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1265: {

1269:   MatCreate(comm,A);
1270:   MatSetSizes(*A,m,n,m,n);
1271:   MatSetType(*A,MATSEQAIJMKL);
1272:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1273:   return(0);
1274: }

1276: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1277: {

1281:   MatSetType(A,MATSEQAIJ);
1282:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1283:   return(0);
1284: }