Actual source code: aijmkl.c


  2: /*
  3:   Defines basic operations for the MATSEQAIJMKL matrix class.
  4:   This class is derived from the MATSEQAIJ class and retains the
  5:   compressed row storage (aka Yale sparse matrix format) but uses
  6:   sparse BLAS operations from the Intel Math Kernel Library (MKL)
  7:   wherever possible.
  8: */

 10: #include <../src/mat/impls/aij/seq/aij.h>
 11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
 12: #include <mkl_spblas.h>

 14: typedef struct {
 15:   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 16:   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 17:   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 18:   PetscObjectState    state;
 19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 20:   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 21:   struct matrix_descr descr;
 22: #endif
 23: } Mat_SeqAIJMKL;

 25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);

 27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 28: {
 29:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 30:   /* so we will ignore 'MatType type'. */
 32:   Mat            B       = *newmat;
 33: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 34:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
 35: #endif

 38:   if (reuse == MAT_INITIAL_MATRIX) {
 39:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 40:   }

 42:   /* Reset the original function pointers. */
 43:   B->ops->duplicate               = MatDuplicate_SeqAIJ;
 44:   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
 45:   B->ops->destroy                 = MatDestroy_SeqAIJ;
 46:   B->ops->mult                    = MatMult_SeqAIJ;
 47:   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
 48:   B->ops->multadd                 = MatMultAdd_SeqAIJ;
 49:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
 50:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
 51:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
 52:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 53:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
 54:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
 55:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;

 57:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);

 59: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 60:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
 61:    * simply involves destroying the MKL sparse matrix handle and then freeing
 62:    * the spptr pointer. */
 63:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;

 65:   if (aijmkl->sparse_optimized) {
 66:     sparse_status_t stat;
 67:     stat = mkl_sparse_destroy(aijmkl->csrA);
 68:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
 69:   }
 70: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 71:   PetscFree(B->spptr);

 73:   /* Change the type of B to MATSEQAIJ. */
 74:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 76:   *newmat = B;
 77:   return(0);
 78: }

 80: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 81: {
 83:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;


 87:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
 88:   if (aijmkl) {
 89:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
 90: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 91:     if (aijmkl->sparse_optimized) {
 92:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
 93:       stat = mkl_sparse_destroy(aijmkl->csrA);
 94:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
 95:     }
 96: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 97:     PetscFree(A->spptr);
 98:   }

100:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
101:    * to destroy everything that remains. */
102:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
103:   /* Note that I don't call MatSetType().  I believe this is because that
104:    * is only to be called when *building* a matrix.  I could be wrong, but
105:    * that is how things work for the SuperLU matrix class. */
106:   MatDestroy_SeqAIJ(A);
107:   return(0);
108: }

110: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
111:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
112:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
113:  * handle, creates a new one, and then calls mkl_sparse_optimize().
114:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
115:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
116:  * an unoptimized matrix handle here. */
117: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
118: {
119: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
120:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
121:    * does nothing. We make it callable anyway in this case because it cuts
122:    * down on littering the code with #ifdefs. */
124:   return(0);
125: #else
126:   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
127:   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
128:   PetscInt         m,n;
129:   MatScalar        *aa;
130:   PetscInt         *aj,*ai;
131:   sparse_status_t  stat;
132:   PetscErrorCode   ierr;

135: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
136:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
137:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
138:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
139:    * mkl_sparse_optimize() later. */
140:   if (aijmkl->no_SpMV2) return(0);
141: #endif

143:   if (aijmkl->sparse_optimized) {
144:     /* Matrix has been previously assembled and optimized. Must destroy old
145:      * matrix handle before running the optimization step again. */
146:     stat = mkl_sparse_destroy(aijmkl->csrA);
147:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
148:   }
149:   aijmkl->sparse_optimized = PETSC_FALSE;

151:   /* Now perform the SpMV2 setup and matrix optimization. */
152:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
153:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
154:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
155:   m = A->rmap->n;
156:   n = A->cmap->n;
157:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
158:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
159:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
160:   if (a->nz && aa && !A->structure_only) {
161:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
162:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
163:     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
164:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
165:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
166:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
167:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
168:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
169:     if (!aijmkl->no_SpMV2) {
170:       stat = mkl_sparse_optimize(aijmkl->csrA);
171:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
172:     }
173:     aijmkl->sparse_optimized = PETSC_TRUE;
174:     PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
175:   } else {
176:     aijmkl->csrA = PETSC_NULL;
177:   }

179:   return(0);
180: #endif
181: }

183: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
184: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
185: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
186: {
187:   PetscErrorCode      ierr;
188:   sparse_status_t     stat;
189:   sparse_index_base_t indexing;
190:   PetscInt            m,n;
191:   PetscInt            *aj,*ai,*dummy;
192:   MatScalar           *aa;
193:   Mat_SeqAIJMKL       *aijmkl;

195:   if (csrA) {
196:     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
197:     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
198:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
199:     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
200:   } else {
201:     aj = ai = PETSC_NULL;
202:     aa = PETSC_NULL;
203:   }

205:   MatSetType(A,MATSEQAIJ);
206:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
207:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
208:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
209:    * they will be destroyed when the MKL handle is destroyed.
210:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
211:   if (csrA) {
212:     MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);
213:   } else {
214:     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
215:     MatSetUp(A);
216:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
217:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
218:   }

220:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
221:    * Now turn it into a MATSEQAIJMKL. */
222:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);

224:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
225:   aijmkl->csrA = csrA;

227:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
228:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
229:    * and just need to be able to run the MKL optimization step. */
230:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
231:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
232:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
233:   if (csrA) {
234:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
235:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
236:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
237:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
238:   }
239:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
240:   return(0);
241: }
242: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

244: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
245:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
246:  * MatMatMultNumeric(). */
247: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
248: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
249: {
250:   PetscInt            i;
251:   PetscInt            nrows,ncols;
252:   PetscInt            nz;
253:   PetscInt            *ai,*aj,*dummy;
254:   PetscScalar         *aa;
255:   PetscErrorCode      ierr;
256:   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
257:   sparse_status_t     stat;
258:   sparse_index_base_t indexing;

260:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
261:   if (!aijmkl->csrA) return(0);

263:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
264:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
265:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

267:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
268:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
269:   for (i=0; i<nrows; i++) {
270:     nz = ai[i+1] - ai[i];
271:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
272:   }

274:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
275:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

277:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
278:   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
279:    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
280:    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
281:   aijmkl->sparse_optimized = PETSC_FALSE;
282:   return(0);
283: }
284: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

286: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
287: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
288: {
289:   PetscInt            i,j,k;
290:   PetscInt            nrows,ncols;
291:   PetscInt            nz;
292:   PetscInt            *ai,*aj,*dummy;
293:   PetscScalar         *aa;
294:   PetscErrorCode      ierr;
295:   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
296:   sparse_status_t     stat;
297:   sparse_index_base_t indexing;

299:   PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");

301:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
302:   if (!aijmkl->csrA) {
303:     PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");
304:     return(0);
305:   }

307:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
308:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
309:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

311:   k = 0;
312:   for (i=0; i<nrows; i++) {
313:     PetscViewerASCIIPrintf(viewer,"row %D: ",i);
314:     nz = ai[i+1] - ai[i];
315:     for (j=0; j<nz; j++) {
316:       if (aa) {
317:         PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],PetscRealPart(aa[k]));
318:       } else {
319:         PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);
320:       }
321:       k++;
322:     }
323:     PetscViewerASCIIPrintf(viewer,"\n");
324:   }
325:   return(0);
326: }
327: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

329: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
330: {
332:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
333:   Mat_SeqAIJMKL  *aijmkl_dest;

336:   MatDuplicate_SeqAIJ(A,op,M);
337:   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
338:   PetscArraycpy(aijmkl_dest,aijmkl,1);
339:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
340:   if (aijmkl->eager_inspection) {
341:     MatSeqAIJMKL_create_mkl_handle(A);
342:   }
343:   return(0);
344: }

346: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
347: {
348:   PetscErrorCode  ierr;
349:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
350:   Mat_SeqAIJMKL   *aijmkl;

353:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

355:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
356:    * extra information and some different methods, call the AssemblyEnd
357:    * routine for a MATSEQAIJ.
358:    * I'm not sure if this is the best way to do this, but it avoids
359:    * a lot of code duplication. */
360:   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
361:   MatAssemblyEnd_SeqAIJ(A, mode);

363:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
364:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
365:   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
366:   if (aijmkl->eager_inspection) {
367:     MatSeqAIJMKL_create_mkl_handle(A);
368:   }

370:   return(0);
371: }

373: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
374: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
375: {
376:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
377:   const PetscScalar *x;
378:   PetscScalar       *y;
379:   const MatScalar   *aa;
380:   PetscErrorCode    ierr;
381:   PetscInt          m = A->rmap->n;
382:   PetscInt          n = A->cmap->n;
383:   PetscScalar       alpha = 1.0;
384:   PetscScalar       beta = 0.0;
385:   const PetscInt    *aj,*ai;
386:   char              matdescra[6];


389:   /* Variables not in MatMult_SeqAIJ. */
390:   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */

393:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
394:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
395:   VecGetArrayRead(xx,&x);
396:   VecGetArray(yy,&y);
397:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
398:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
399:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

401:   /* Call MKL sparse BLAS routine to do the MatMult. */
402:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

404:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
405:   VecRestoreArrayRead(xx,&x);
406:   VecRestoreArray(yy,&y);
407:   return(0);
408: }
409: #endif

411: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
412: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
413: {
414:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
415:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
416:   const PetscScalar *x;
417:   PetscScalar       *y;
418:   PetscErrorCode    ierr;
419:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
420:   PetscObjectState  state;


424:   /* If there are no nonzero entries, zero yy and return immediately. */
425:   if (!a->nz) {
426:     PetscInt i;
427:     PetscInt m=A->rmap->n;
428:     VecGetArray(yy,&y);
429:     for (i=0; i<m; i++) {
430:       y[i] = 0.0;
431:     }
432:     VecRestoreArray(yy,&y);
433:     return(0);
434:   }

436:   VecGetArrayRead(xx,&x);
437:   VecGetArray(yy,&y);

439:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
440:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
441:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
442:   PetscObjectStateGet((PetscObject)A,&state);
443:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
444:     MatSeqAIJMKL_create_mkl_handle(A);
445:   }

447:   /* Call MKL SpMV2 executor routine to do the MatMult. */
448:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
449:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");

451:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
452:   VecRestoreArrayRead(xx,&x);
453:   VecRestoreArray(yy,&y);
454:   return(0);
455: }
456: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

458: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
459: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
460: {
461:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
462:   const PetscScalar *x;
463:   PetscScalar       *y;
464:   const MatScalar   *aa;
465:   PetscErrorCode    ierr;
466:   PetscInt          m = A->rmap->n;
467:   PetscInt          n = A->cmap->n;
468:   PetscScalar       alpha = 1.0;
469:   PetscScalar       beta = 0.0;
470:   const PetscInt    *aj,*ai;
471:   char              matdescra[6];

473:   /* Variables not in MatMultTranspose_SeqAIJ. */
474:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */

477:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
478:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
479:   VecGetArrayRead(xx,&x);
480:   VecGetArray(yy,&y);
481:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
482:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
483:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

485:   /* Call MKL sparse BLAS routine to do the MatMult. */
486:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

488:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
489:   VecRestoreArrayRead(xx,&x);
490:   VecRestoreArray(yy,&y);
491:   return(0);
492: }
493: #endif

495: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
496: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
497: {
498:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
499:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
500:   const PetscScalar *x;
501:   PetscScalar       *y;
502:   PetscErrorCode    ierr;
503:   sparse_status_t   stat;
504:   PetscObjectState  state;


508:   /* If there are no nonzero entries, zero yy and return immediately. */
509:   if (!a->nz) {
510:     PetscInt i;
511:     PetscInt n=A->cmap->n;
512:     VecGetArray(yy,&y);
513:     for (i=0; i<n; i++) {
514:       y[i] = 0.0;
515:     }
516:     VecRestoreArray(yy,&y);
517:     return(0);
518:   }

520:   VecGetArrayRead(xx,&x);
521:   VecGetArray(yy,&y);

523:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
524:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
525:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
526:   PetscObjectStateGet((PetscObject)A,&state);
527:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
528:     MatSeqAIJMKL_create_mkl_handle(A);
529:   }

531:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
532:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
533:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");

535:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
536:   VecRestoreArrayRead(xx,&x);
537:   VecRestoreArray(yy,&y);
538:   return(0);
539: }
540: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

542: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
543: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
544: {
545:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
546:   const PetscScalar *x;
547:   PetscScalar       *y,*z;
548:   const MatScalar   *aa;
549:   PetscErrorCode    ierr;
550:   PetscInt          m = A->rmap->n;
551:   PetscInt          n = A->cmap->n;
552:   const PetscInt    *aj,*ai;
553:   PetscInt          i;

555:   /* Variables not in MatMultAdd_SeqAIJ. */
556:   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
557:   PetscScalar       alpha = 1.0;
558:   PetscScalar       beta;
559:   char              matdescra[6];

562:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
563:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

565:   VecGetArrayRead(xx,&x);
566:   VecGetArrayPair(yy,zz,&y,&z);
567:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
568:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
569:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

571:   /* Call MKL sparse BLAS routine to do the MatMult. */
572:   if (zz == yy) {
573:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
574:     beta = 1.0;
575:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
576:   } else {
577:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
578:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
579:     beta = 0.0;
580:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
581:     for (i=0; i<m; i++) {
582:       z[i] += y[i];
583:     }
584:   }

586:   PetscLogFlops(2.0*a->nz);
587:   VecRestoreArrayRead(xx,&x);
588:   VecRestoreArrayPair(yy,zz,&y,&z);
589:   return(0);
590: }
591: #endif

593: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
594: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
595: {
596:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
597:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
598:   const PetscScalar *x;
599:   PetscScalar       *y,*z;
600:   PetscErrorCode    ierr;
601:   PetscInt          m = A->rmap->n;
602:   PetscInt          i;

604:   /* Variables not in MatMultAdd_SeqAIJ. */
605:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
606:   PetscObjectState  state;


610:   /* If there are no nonzero entries, set zz = yy and return immediately. */
611:   if (!a->nz) {
612:     PetscInt i;
613:     VecGetArrayPair(yy,zz,&y,&z);
614:     for (i=0; i<m; i++) {
615:       z[i] = y[i];
616:     }
617:     VecRestoreArrayPair(yy,zz,&y,&z);
618:     return(0);
619:   }

621:   VecGetArrayRead(xx,&x);
622:   VecGetArrayPair(yy,zz,&y,&z);

624:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
625:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
626:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
627:   PetscObjectStateGet((PetscObject)A,&state);
628:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
629:     MatSeqAIJMKL_create_mkl_handle(A);
630:   }

632:   /* Call MKL sparse BLAS routine to do the MatMult. */
633:   if (zz == yy) {
634:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
635:      * with alpha and beta both set to 1.0. */
636:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
637:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
638:   } else {
639:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
640:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
641:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
642:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
643:     for (i=0; i<m; i++) {
644:       z[i] += y[i];
645:     }
646:   }

648:   PetscLogFlops(2.0*a->nz);
649:   VecRestoreArrayRead(xx,&x);
650:   VecRestoreArrayPair(yy,zz,&y,&z);
651:   return(0);
652: }
653: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

655: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
656: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
657: {
658:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
659:   const PetscScalar *x;
660:   PetscScalar       *y,*z;
661:   const MatScalar   *aa;
662:   PetscErrorCode    ierr;
663:   PetscInt          m = A->rmap->n;
664:   PetscInt          n = A->cmap->n;
665:   const PetscInt    *aj,*ai;
666:   PetscInt          i;

668:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
669:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
670:   PetscScalar       alpha = 1.0;
671:   PetscScalar       beta;
672:   char              matdescra[6];

675:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
676:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

678:   VecGetArrayRead(xx,&x);
679:   VecGetArrayPair(yy,zz,&y,&z);
680:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
681:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
682:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

684:   /* Call MKL sparse BLAS routine to do the MatMult. */
685:   if (zz == yy) {
686:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
687:     beta = 1.0;
688:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
689:   } else {
690:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
691:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
692:     beta = 0.0;
693:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
694:     for (i=0; i<n; i++) {
695:       z[i] += y[i];
696:     }
697:   }

699:   PetscLogFlops(2.0*a->nz);
700:   VecRestoreArrayRead(xx,&x);
701:   VecRestoreArrayPair(yy,zz,&y,&z);
702:   return(0);
703: }
704: #endif

706: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
707: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
708: {
709:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
710:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
711:   const PetscScalar *x;
712:   PetscScalar       *y,*z;
713:   PetscErrorCode    ierr;
714:   PetscInt          n = A->cmap->n;
715:   PetscInt          i;
716:   PetscObjectState  state;

718:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
719:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;


723:   /* If there are no nonzero entries, set zz = yy and return immediately. */
724:   if (!a->nz) {
725:     PetscInt i;
726:     VecGetArrayPair(yy,zz,&y,&z);
727:     for (i=0; i<n; i++) {
728:       z[i] = y[i];
729:     }
730:     VecRestoreArrayPair(yy,zz,&y,&z);
731:     return(0);
732:   }

734:   VecGetArrayRead(xx,&x);
735:   VecGetArrayPair(yy,zz,&y,&z);

737:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
738:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
739:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
740:   PetscObjectStateGet((PetscObject)A,&state);
741:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
742:     MatSeqAIJMKL_create_mkl_handle(A);
743:   }

745:   /* Call MKL sparse BLAS routine to do the MatMult. */
746:   if (zz == yy) {
747:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
748:      * with alpha and beta both set to 1.0. */
749:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
750:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
751:   } else {
752:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
753:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
754:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
755:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
756:     for (i=0; i<n; i++) {
757:       z[i] += y[i];
758:     }
759:   }

761:   PetscLogFlops(2.0*a->nz);
762:   VecRestoreArrayRead(xx,&x);
763:   VecRestoreArrayPair(yy,zz,&y,&z);
764:   return(0);
765: }
766: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

768: /* -------------------------- MatProduct code -------------------------- */
769: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
770: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
771: {
772:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
773:   sparse_matrix_t     csrA,csrB,csrC;
774:   PetscInt            nrows,ncols;
775:   PetscErrorCode      ierr;
776:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
777:   struct matrix_descr descr_type_gen;
778:   PetscObjectState    state;

781:   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
782:    * not handle sparse matrices with zero rows or columns. */
783:   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
784:   else nrows = A->cmap->N;
785:   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
786:   else ncols = B->rmap->N;

788:   PetscObjectStateGet((PetscObject)A,&state);
789:   if (!a->sparse_optimized || a->state != state) {
790:     MatSeqAIJMKL_create_mkl_handle(A);
791:   }
792:   PetscObjectStateGet((PetscObject)B,&state);
793:   if (!b->sparse_optimized || b->state != state) {
794:     MatSeqAIJMKL_create_mkl_handle(B);
795:   }
796:   csrA = a->csrA;
797:   csrB = b->csrA;
798:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

800:   if (csrA && csrB) {
801:     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
802:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
803:   } else {
804:     csrC = PETSC_NULL;
805:   }

807:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);

809:   return(0);
810: }

812: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
813: {
814:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
815:   sparse_matrix_t     csrA, csrB, csrC;
816:   PetscErrorCode      ierr;
817:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
818:   struct matrix_descr descr_type_gen;
819:   PetscObjectState    state;

822:   PetscObjectStateGet((PetscObject)A,&state);
823:   if (!a->sparse_optimized || a->state != state) {
824:     MatSeqAIJMKL_create_mkl_handle(A);
825:   }
826:   PetscObjectStateGet((PetscObject)B,&state);
827:   if (!b->sparse_optimized || b->state != state) {
828:     MatSeqAIJMKL_create_mkl_handle(B);
829:   }
830:   csrA = a->csrA;
831:   csrB = b->csrA;
832:   csrC = c->csrA;
833:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

835:   if (csrA && csrB) {
836:     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
837:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
838:   } else {
839:     csrC = PETSC_NULL;
840:   }

842:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
843:   MatSeqAIJMKL_update_from_mkl_handle(C);

845:   return(0);
846: }

848: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
849: {

853:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
854:   return(0);
855: }

857: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
858: {

862:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
863:   return(0);
864: }

866: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
867: {

871:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
872:   return(0);
873: }

875: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
876: {

880:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
881:   return(0);
882: }

884: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
885: {

889:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
890:   return(0);
891: }

893: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
894: {

898:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
899:   return(0);
900: }

902: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
903: {
905:   Mat_Product    *product = C->product;
906:   Mat            A = product->A,B = product->B;

909:   MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);
910:   return(0);
911: }

913: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
914: {
916:   Mat_Product    *product = C->product;
917:   Mat            A = product->A,B = product->B;
918:   PetscReal      fill = product->fill;

921:   MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);
922:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
923:   return(0);
924: }

926: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
927: {
928:   Mat                 Ct;
929:   Vec                 zeros;
930:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
931:   sparse_matrix_t     csrA, csrP, csrC;
932:   PetscBool           set, flag;
933:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
934:   struct matrix_descr descr_type_sym;
935:   PetscObjectState    state;
936:   PetscErrorCode      ierr;

939:   MatIsSymmetricKnown(A,&set,&flag);
940:   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");

942:   PetscObjectStateGet((PetscObject)A,&state);
943:   if (!a->sparse_optimized || a->state != state) {
944:     MatSeqAIJMKL_create_mkl_handle(A);
945:   }
946:   PetscObjectStateGet((PetscObject)P,&state);
947:   if (!p->sparse_optimized || p->state != state) {
948:     MatSeqAIJMKL_create_mkl_handle(P);
949:   }
950:   csrA = a->csrA;
951:   csrP = p->csrA;
952:   csrC = c->csrA;
953:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
954:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
955:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

957:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
958:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
959:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");

961:   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
962:    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
963:    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
964:    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
965:    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
966:    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
967:    * the full matrix. */
968:   MatSeqAIJMKL_update_from_mkl_handle(C);
969:   MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);
970:   MatCreateVecs(C,&zeros,NULL);
971:   VecSetFromOptions(zeros);
972:   VecZeroEntries(zeros);
973:   MatDiagonalSet(Ct,zeros,INSERT_VALUES);
974:   MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);
975:   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
976:   MatProductCreateWithMat(A,P,PETSC_NULL,C);
977:   MatProductSetType(C,MATPRODUCT_PtAP);
978:   MatSeqAIJMKL_create_mkl_handle(C);
979:   VecDestroy(&zeros);
980:   MatDestroy(&Ct);
981:   return(0);
982: }

984: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
985: {
986:   Mat_Product         *product = C->product;
987:   Mat                 A = product->A,P = product->B;
988:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
989:   sparse_matrix_t     csrA,csrP,csrC;
990:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
991:   struct matrix_descr descr_type_sym;
992:   PetscObjectState    state;
993:   PetscErrorCode      ierr;

996:   PetscObjectStateGet((PetscObject)A,&state);
997:   if (!a->sparse_optimized || a->state != state) {
998:     MatSeqAIJMKL_create_mkl_handle(A);
999:   }
1000:   PetscObjectStateGet((PetscObject)P,&state);
1001:   if (!p->sparse_optimized || p->state != state) {
1002:     MatSeqAIJMKL_create_mkl_handle(P);
1003:   }
1004:   csrA = a->csrA;
1005:   csrP = p->csrA;
1006:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1007:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1008:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

1010:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1011:   if (csrP && csrA) {
1012:     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1013:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1014:   } else {
1015:     csrC = PETSC_NULL;
1016:   }

1018:   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1019:    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1020:    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1021:    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1022:    * is guaranteed. */
1023:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);

1025:   C->ops->productnumeric = MatProductNumeric_PtAP;
1026:   return(0);
1027: }

1029: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1030: {
1032:   C->ops->productsymbolic = MatProductSymbolic_AB;
1033:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1034:   return(0);
1035: }

1037: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1038: {
1040:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1041:   return(0);
1042: }

1044: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1045: {
1047:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1048:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1049:   return(0);
1050: }

1052: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1053: {
1055:   Mat_Product    *product = C->product;
1056:   Mat            A = product->A;
1057:   PetscBool      set, flag;

1060:   if (PetscDefined(USE_COMPLEX)) {
1061:     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
1062:      * We do this in several other locations in this file. This works for the time being, but these
1063:      * routines are considered unsafe and may be removed from the MatProduct code in the future.
1064:      * TODO: Add proper MATSEQAIJMKL implementations */
1065:     C->ops->productsymbolic = NULL;
1066:   } else {
1067:     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1068:     MatIsSymmetricKnown(A,&set,&flag);
1069:     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1070:     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1071:     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1072:      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1073:   }
1074:   return(0);
1075: }

1077: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1078: {
1080:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1081:   return(0);
1082: }

1084: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1085: {
1087:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1088:   return(0);
1089: }

1091: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1092: {
1094:   Mat_Product    *product = C->product;

1097:   switch (product->type) {
1098:   case MATPRODUCT_AB:
1099:     MatProductSetFromOptions_SeqAIJMKL_AB(C);
1100:     break;
1101:   case MATPRODUCT_AtB:
1102:     MatProductSetFromOptions_SeqAIJMKL_AtB(C);
1103:     break;
1104:   case MATPRODUCT_ABt:
1105:     MatProductSetFromOptions_SeqAIJMKL_ABt(C);
1106:     break;
1107:   case MATPRODUCT_PtAP:
1108:     MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
1109:     break;
1110:   case MATPRODUCT_RARt:
1111:     MatProductSetFromOptions_SeqAIJMKL_RARt(C);
1112:     break;
1113:   case MATPRODUCT_ABC:
1114:     MatProductSetFromOptions_SeqAIJMKL_ABC(C);
1115:     break;
1116:   default:
1117:     break;
1118:   }
1119:   return(0);
1120: }
1121: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1122: /* ------------------------ End MatProduct code ------------------------ */

1124: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1125:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1126:  * routine, but can also be used to convert an assembled SeqAIJ matrix
1127:  * into a SeqAIJMKL one. */
1128: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1129: {
1131:   Mat            B = *newmat;
1132:   Mat_SeqAIJMKL  *aijmkl;
1133:   PetscBool      set;
1134:   PetscBool      sametype;

1137:   if (reuse == MAT_INITIAL_MATRIX) {
1138:     MatDuplicate(A,MAT_COPY_VALUES,&B);
1139:   }

1141:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
1142:   if (sametype) return(0);

1144:   PetscNewLog(B,&aijmkl);
1145:   B->spptr = (void*) aijmkl;

1147:   /* Set function pointers for methods that we inherit from AIJ but override.
1148:    * We also parse some command line options below, since those determine some of the methods we point to. */
1149:   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1150:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1151:   B->ops->destroy          = MatDestroy_SeqAIJMKL;

1153:   aijmkl->sparse_optimized = PETSC_FALSE;
1154: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1155:   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1156: #else
1157:   aijmkl->no_SpMV2 = PETSC_TRUE;
1158: #endif
1159:   aijmkl->eager_inspection = PETSC_FALSE;

1161:   /* Parse command line options. */
1162:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
1163:   PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
1164:   PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
1165:   PetscOptionsEnd();
1166: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1167:   if (!aijmkl->no_SpMV2) {
1168:     PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1169:     aijmkl->no_SpMV2 = PETSC_TRUE;
1170:   }
1171: #endif

1173: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1174:   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1175:   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1176:   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1177:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1178: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1179:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1180:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1181:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1182:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1183:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1184: #   if !defined(PETSC_USE_COMPLEX)
1185:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1186: #   else
1187:   B->ops->ptapnumeric             = NULL;
1188: #   endif
1189: # endif
1190: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1192: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1193:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1194:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1195:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1196:    * versions in which the older interface has not been deprecated, we use the old interface. */
1197:   if (aijmkl->no_SpMV2) {
1198:     B->ops->mult             = MatMult_SeqAIJMKL;
1199:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1200:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1201:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1202:   }
1203: #endif

1205:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);

1207:   PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1208:   *newmat = B;
1209:   return(0);
1210: }

1212: /*@C
1213:    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1214:    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1215:    routines from Intel MKL whenever possible.
1216:    If the installed version of MKL supports the "SpMV2" sparse
1217:    inspector-executor routines, then those are used by default.
1218:    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1219:    symmetric A) operations are currently supported.
1220:    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.

1222:    Collective

1224:    Input Parameters:
1225: +  comm - MPI communicator, set to PETSC_COMM_SELF
1226: .  m - number of rows
1227: .  n - number of columns
1228: .  nz - number of nonzeros per row (same for all rows)
1229: -  nnz - array containing the number of nonzeros in the various rows
1230:          (possibly different for each row) or NULL

1232:    Output Parameter:
1233: .  A - the matrix

1235:    Options Database Keys:
1236: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1237: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1239:    Notes:
1240:    If nnz is given then nz is ignored

1242:    Level: intermediate

1244: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1245: @*/
1246: PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1247: {

1251:   MatCreate(comm,A);
1252:   MatSetSizes(*A,m,n,m,n);
1253:   MatSetType(*A,MATSEQAIJMKL);
1254:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1255:   return(0);
1256: }

1258: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1259: {

1263:   MatSetType(A,MATSEQAIJ);
1264:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1265:   return(0);
1266: }