Actual source code: aijmkl.c


  2: /*
  3:   Defines basic operations for the MATSEQAIJMKL matrix class.
  4:   This class is derived from the MATSEQAIJ class and retains the
  5:   compressed row storage (aka Yale sparse matrix format) but uses
  6:   sparse BLAS operations from the Intel Math Kernel Library (MKL)
  7:   wherever possible.
  8: */

 10: #include <../src/mat/impls/aij/seq/aij.h>
 11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
 12: #include <mkl_spblas.h>

 14: typedef struct {
 15:   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 16:   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 17:   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 18:   PetscObjectState    state;
 19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 20:   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 21:   struct matrix_descr descr;
 22: #endif
 23: } Mat_SeqAIJMKL;

 25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);

 27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 28: {
 29:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 30:   /* so we will ignore 'MatType type'. */
 32:   Mat            B       = *newmat;
 33: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 34:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
 35: #endif

 38:   if (reuse == MAT_INITIAL_MATRIX) {
 39:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 40:   }

 42:   /* Reset the original function pointers. */
 43:   B->ops->duplicate               = MatDuplicate_SeqAIJ;
 44:   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
 45:   B->ops->destroy                 = MatDestroy_SeqAIJ;
 46:   B->ops->mult                    = MatMult_SeqAIJ;
 47:   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
 48:   B->ops->multadd                 = MatMultAdd_SeqAIJ;
 49:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
 50:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
 51:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
 52:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 53:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
 54:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
 55:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;

 57:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);

 59: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 60:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
 61:    * simply involves destroying the MKL sparse matrix handle and then freeing
 62:    * the spptr pointer. */
 63:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;

 65:   if (aijmkl->sparse_optimized) {
 66:     sparse_status_t stat;
 67:     stat = mkl_sparse_destroy(aijmkl->csrA);
 68:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
 69:   }
 70: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 71:   PetscFree(B->spptr);

 73:   /* Change the type of B to MATSEQAIJ. */
 74:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 76:   *newmat = B;
 77:   return(0);
 78: }

 80: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 81: {
 83:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;


 87:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
 88:   if (aijmkl) {
 89:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
 90: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 91:     if (aijmkl->sparse_optimized) {
 92:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
 93:       stat = mkl_sparse_destroy(aijmkl->csrA);
 94:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
 95:     }
 96: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 97:     PetscFree(A->spptr);
 98:   }

100:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
101:    * to destroy everything that remains. */
102:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
103:   /* Note that I don't call MatSetType().  I believe this is because that
104:    * is only to be called when *building* a matrix.  I could be wrong, but
105:    * that is how things work for the SuperLU matrix class. */
106:   MatDestroy_SeqAIJ(A);
107:   return(0);
108: }

110: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
111:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
112:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
113:  * handle, creates a new one, and then calls mkl_sparse_optimize().
114:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
115:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
116:  * an unoptimized matrix handle here. */
117: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
118: {
119: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
120:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
121:    * does nothing. We make it callable anyway in this case because it cuts
122:    * down on littering the code with #ifdefs. */
124:   return(0);
125: #else
126:   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
127:   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
128:   PetscInt         m,n;
129:   MatScalar        *aa;
130:   PetscInt         *aj,*ai;
131:   sparse_status_t  stat;
132:   PetscErrorCode   ierr;

135: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
136:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
137:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
138:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
139:    * mkl_sparse_optimize() later. */
140:   if (aijmkl->no_SpMV2) return(0);
141: #endif

143:   if (aijmkl->sparse_optimized) {
144:     /* Matrix has been previously assembled and optimized. Must destroy old
145:      * matrix handle before running the optimization step again. */
146:     stat = mkl_sparse_destroy(aijmkl->csrA);
147:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
148:   }
149:   aijmkl->sparse_optimized = PETSC_FALSE;

151:   /* Now perform the SpMV2 setup and matrix optimization. */
152:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
153:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
154:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
155:   m = A->rmap->n;
156:   n = A->cmap->n;
157:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
158:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
159:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
160:   if (a->nz && aa && !A->structure_only) {
161:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
162:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
163:     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
164:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
165:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
166:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
167:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
168:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
169:     if (!aijmkl->no_SpMV2) {
170:       stat = mkl_sparse_optimize(aijmkl->csrA);
171:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
172:     }
173:     aijmkl->sparse_optimized = PETSC_TRUE;
174:     PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
175:   } else {
176:     aijmkl->csrA = PETSC_NULL;
177:   }

179:   return(0);
180: #endif
181: }

183: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
184: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
185: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
186: {
187:   PetscErrorCode      ierr;
188:   sparse_status_t     stat;
189:   sparse_index_base_t indexing;
190:   PetscInt            m,n;
191:   PetscInt            *aj,*ai,*dummy;
192:   MatScalar           *aa;
193:   Mat_SeqAIJMKL       *aijmkl;

196:   if (csrA) {
197:     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
198:     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
199:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
200:     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
201:   } else {
202:     aj = ai = PETSC_NULL;
203:     aa = PETSC_NULL;
204:   }

206:   MatSetType(A,MATSEQAIJ);
207:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
208:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
209:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
210:    * they will be destroyed when the MKL handle is destroyed.
211:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
212:   if (csrA) {
213:     MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);
214:   } else {
215:     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
216:     MatSetUp(A);
217:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
218:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
219:   }

221:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
222:    * Now turn it into a MATSEQAIJMKL. */
223:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);

225:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
226:   aijmkl->csrA = csrA;

228:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
229:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
230:    * and just need to be able to run the MKL optimization step. */
231:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
232:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
233:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
234:   if (csrA) {
235:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
236:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
237:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
238:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
239:   }
240:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
241:   return(0);
242: }
243: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

245: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
246:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
247:  * MatMatMultNumeric(). */
248: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
249: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
250: {
251:   PetscInt            i;
252:   PetscInt            nrows,ncols;
253:   PetscInt            nz;
254:   PetscInt            *ai,*aj,*dummy;
255:   PetscScalar         *aa;
256:   PetscErrorCode      ierr;
257:   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
258:   sparse_status_t     stat;
259:   sparse_index_base_t indexing;

262:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
263:   if (!aijmkl->csrA) return(0);

265:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
266:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
267:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

269:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
270:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
271:   for (i=0; i<nrows; i++) {
272:     nz = ai[i+1] - ai[i];
273:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
274:   }

276:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
277:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

279:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
280:   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
281:    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
282:    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
283:   aijmkl->sparse_optimized = PETSC_FALSE;
284:   return(0);
285: }
286: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

288: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
289: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
290: {
291:   PetscInt            i,j,k;
292:   PetscInt            nrows,ncols;
293:   PetscInt            nz;
294:   PetscInt            *ai,*aj,*dummy;
295:   PetscScalar         *aa;
296:   PetscErrorCode      ierr;
297:   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
298:   sparse_status_t     stat;
299:   sparse_index_base_t indexing;

302:   PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");

304:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
305:   if (!aijmkl->csrA) {
306:     PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");
307:     return(0);
308:   }

310:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
311:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
312:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

314:   k = 0;
315:   for (i=0; i<nrows; i++) {
316:     PetscViewerASCIIPrintf(viewer,"row %D: ",i);
317:     nz = ai[i+1] - ai[i];
318:     for (j=0; j<nz; j++) {
319:       if (aa) {
320:         PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],PetscRealPart(aa[k]));
321:       } else {
322:         PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);
323:       }
324:       k++;
325:     }
326:     PetscViewerASCIIPrintf(viewer,"\n");
327:   }
328:   return(0);
329: }
330: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

332: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
333: {
335:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
336:   Mat_SeqAIJMKL  *aijmkl_dest;

339:   MatDuplicate_SeqAIJ(A,op,M);
340:   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
341:   PetscArraycpy(aijmkl_dest,aijmkl,1);
342:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
343:   if (aijmkl->eager_inspection) {
344:     MatSeqAIJMKL_create_mkl_handle(A);
345:   }
346:   return(0);
347: }

349: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
350: {
351:   PetscErrorCode  ierr;
352:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
353:   Mat_SeqAIJMKL   *aijmkl;

356:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

358:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
359:    * extra information and some different methods, call the AssemblyEnd
360:    * routine for a MATSEQAIJ.
361:    * I'm not sure if this is the best way to do this, but it avoids
362:    * a lot of code duplication. */
363:   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
364:   MatAssemblyEnd_SeqAIJ(A, mode);

366:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
367:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
368:   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
369:   if (aijmkl->eager_inspection) {
370:     MatSeqAIJMKL_create_mkl_handle(A);
371:   }

373:   return(0);
374: }

376: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
377: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
378: {
379:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
380:   const PetscScalar *x;
381:   PetscScalar       *y;
382:   const MatScalar   *aa;
383:   PetscErrorCode    ierr;
384:   PetscInt          m = A->rmap->n;
385:   PetscInt          n = A->cmap->n;
386:   PetscScalar       alpha = 1.0;
387:   PetscScalar       beta = 0.0;
388:   const PetscInt    *aj,*ai;
389:   char              matdescra[6];

391:   /* Variables not in MatMult_SeqAIJ. */
392:   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */

395:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
396:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
397:   VecGetArrayRead(xx,&x);
398:   VecGetArray(yy,&y);
399:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
400:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
401:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

403:   /* Call MKL sparse BLAS routine to do the MatMult. */
404:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

406:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
407:   VecRestoreArrayRead(xx,&x);
408:   VecRestoreArray(yy,&y);
409:   return(0);
410: }
411: #endif

413: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
414: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
415: {
416:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
417:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
418:   const PetscScalar *x;
419:   PetscScalar       *y;
420:   PetscErrorCode    ierr;
421:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
422:   PetscObjectState  state;


426:   /* If there are no nonzero entries, zero yy and return immediately. */
427:   if (!a->nz) {
428:     PetscInt i;
429:     PetscInt m=A->rmap->n;
430:     VecGetArray(yy,&y);
431:     for (i=0; i<m; i++) {
432:       y[i] = 0.0;
433:     }
434:     VecRestoreArray(yy,&y);
435:     return(0);
436:   }

438:   VecGetArrayRead(xx,&x);
439:   VecGetArray(yy,&y);

441:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
442:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
443:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
444:   PetscObjectStateGet((PetscObject)A,&state);
445:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
446:     MatSeqAIJMKL_create_mkl_handle(A);
447:   }

449:   /* Call MKL SpMV2 executor routine to do the MatMult. */
450:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
451:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");

453:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
454:   VecRestoreArrayRead(xx,&x);
455:   VecRestoreArray(yy,&y);
456:   return(0);
457: }
458: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

460: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
461: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
462: {
463:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
464:   const PetscScalar *x;
465:   PetscScalar       *y;
466:   const MatScalar   *aa;
467:   PetscErrorCode    ierr;
468:   PetscInt          m = A->rmap->n;
469:   PetscInt          n = A->cmap->n;
470:   PetscScalar       alpha = 1.0;
471:   PetscScalar       beta = 0.0;
472:   const PetscInt    *aj,*ai;
473:   char              matdescra[6];

475:   /* Variables not in MatMultTranspose_SeqAIJ. */
476:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */

479:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
480:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
481:   VecGetArrayRead(xx,&x);
482:   VecGetArray(yy,&y);
483:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
484:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
485:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

487:   /* Call MKL sparse BLAS routine to do the MatMult. */
488:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

490:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
491:   VecRestoreArrayRead(xx,&x);
492:   VecRestoreArray(yy,&y);
493:   return(0);
494: }
495: #endif

497: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
498: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
499: {
500:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
501:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
502:   const PetscScalar *x;
503:   PetscScalar       *y;
504:   PetscErrorCode    ierr;
505:   sparse_status_t   stat;
506:   PetscObjectState  state;


510:   /* If there are no nonzero entries, zero yy and return immediately. */
511:   if (!a->nz) {
512:     PetscInt i;
513:     PetscInt n=A->cmap->n;
514:     VecGetArray(yy,&y);
515:     for (i=0; i<n; i++) {
516:       y[i] = 0.0;
517:     }
518:     VecRestoreArray(yy,&y);
519:     return(0);
520:   }

522:   VecGetArrayRead(xx,&x);
523:   VecGetArray(yy,&y);

525:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
526:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
527:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
528:   PetscObjectStateGet((PetscObject)A,&state);
529:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
530:     MatSeqAIJMKL_create_mkl_handle(A);
531:   }

533:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
534:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
535:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");

537:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
538:   VecRestoreArrayRead(xx,&x);
539:   VecRestoreArray(yy,&y);
540:   return(0);
541: }
542: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

544: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
545: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
546: {
547:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
548:   const PetscScalar *x;
549:   PetscScalar       *y,*z;
550:   const MatScalar   *aa;
551:   PetscErrorCode    ierr;
552:   PetscInt          m = A->rmap->n;
553:   PetscInt          n = A->cmap->n;
554:   const PetscInt    *aj,*ai;
555:   PetscInt          i;

557:   /* Variables not in MatMultAdd_SeqAIJ. */
558:   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
559:   PetscScalar       alpha = 1.0;
560:   PetscScalar       beta;
561:   char              matdescra[6];

564:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
565:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

567:   VecGetArrayRead(xx,&x);
568:   VecGetArrayPair(yy,zz,&y,&z);
569:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
570:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
571:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

573:   /* Call MKL sparse BLAS routine to do the MatMult. */
574:   if (zz == yy) {
575:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
576:     beta = 1.0;
577:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
578:   } else {
579:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
580:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
581:     beta = 0.0;
582:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
583:     for (i=0; i<m; i++) {
584:       z[i] += y[i];
585:     }
586:   }

588:   PetscLogFlops(2.0*a->nz);
589:   VecRestoreArrayRead(xx,&x);
590:   VecRestoreArrayPair(yy,zz,&y,&z);
591:   return(0);
592: }
593: #endif

595: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
596: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
597: {
598:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
599:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
600:   const PetscScalar *x;
601:   PetscScalar       *y,*z;
602:   PetscErrorCode    ierr;
603:   PetscInt          m = A->rmap->n;
604:   PetscInt          i;

606:   /* Variables not in MatMultAdd_SeqAIJ. */
607:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
608:   PetscObjectState  state;


612:   /* If there are no nonzero entries, set zz = yy and return immediately. */
613:   if (!a->nz) {
614:     PetscInt i;
615:     VecGetArrayPair(yy,zz,&y,&z);
616:     for (i=0; i<m; i++) {
617:       z[i] = y[i];
618:     }
619:     VecRestoreArrayPair(yy,zz,&y,&z);
620:     return(0);
621:   }

623:   VecGetArrayRead(xx,&x);
624:   VecGetArrayPair(yy,zz,&y,&z);

626:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
627:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
628:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
629:   PetscObjectStateGet((PetscObject)A,&state);
630:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
631:     MatSeqAIJMKL_create_mkl_handle(A);
632:   }

634:   /* Call MKL sparse BLAS routine to do the MatMult. */
635:   if (zz == yy) {
636:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
637:      * with alpha and beta both set to 1.0. */
638:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
639:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
640:   } else {
641:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
642:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
643:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
644:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
645:     for (i=0; i<m; i++) {
646:       z[i] += y[i];
647:     }
648:   }

650:   PetscLogFlops(2.0*a->nz);
651:   VecRestoreArrayRead(xx,&x);
652:   VecRestoreArrayPair(yy,zz,&y,&z);
653:   return(0);
654: }
655: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

657: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
658: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
659: {
660:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
661:   const PetscScalar *x;
662:   PetscScalar       *y,*z;
663:   const MatScalar   *aa;
664:   PetscErrorCode    ierr;
665:   PetscInt          m = A->rmap->n;
666:   PetscInt          n = A->cmap->n;
667:   const PetscInt    *aj,*ai;
668:   PetscInt          i;

670:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
671:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
672:   PetscScalar       alpha = 1.0;
673:   PetscScalar       beta;
674:   char              matdescra[6];

677:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
678:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

680:   VecGetArrayRead(xx,&x);
681:   VecGetArrayPair(yy,zz,&y,&z);
682:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
683:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
684:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

686:   /* Call MKL sparse BLAS routine to do the MatMult. */
687:   if (zz == yy) {
688:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
689:     beta = 1.0;
690:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
691:   } else {
692:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
693:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
694:     beta = 0.0;
695:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
696:     for (i=0; i<n; i++) {
697:       z[i] += y[i];
698:     }
699:   }

701:   PetscLogFlops(2.0*a->nz);
702:   VecRestoreArrayRead(xx,&x);
703:   VecRestoreArrayPair(yy,zz,&y,&z);
704:   return(0);
705: }
706: #endif

708: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
709: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
710: {
711:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
712:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
713:   const PetscScalar *x;
714:   PetscScalar       *y,*z;
715:   PetscErrorCode    ierr;
716:   PetscInt          n = A->cmap->n;
717:   PetscInt          i;
718:   PetscObjectState  state;

720:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
721:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;


725:   /* If there are no nonzero entries, set zz = yy and return immediately. */
726:   if (!a->nz) {
727:     PetscInt i;
728:     VecGetArrayPair(yy,zz,&y,&z);
729:     for (i=0; i<n; i++) {
730:       z[i] = y[i];
731:     }
732:     VecRestoreArrayPair(yy,zz,&y,&z);
733:     return(0);
734:   }

736:   VecGetArrayRead(xx,&x);
737:   VecGetArrayPair(yy,zz,&y,&z);

739:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
740:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
741:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
742:   PetscObjectStateGet((PetscObject)A,&state);
743:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
744:     MatSeqAIJMKL_create_mkl_handle(A);
745:   }

747:   /* Call MKL sparse BLAS routine to do the MatMult. */
748:   if (zz == yy) {
749:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
750:      * with alpha and beta both set to 1.0. */
751:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
752:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
753:   } else {
754:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
755:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
756:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
757:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
758:     for (i=0; i<n; i++) {
759:       z[i] += y[i];
760:     }
761:   }

763:   PetscLogFlops(2.0*a->nz);
764:   VecRestoreArrayRead(xx,&x);
765:   VecRestoreArrayPair(yy,zz,&y,&z);
766:   return(0);
767: }
768: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

770: /* -------------------------- MatProduct code -------------------------- */
771: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
772: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
773: {
774:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
775:   sparse_matrix_t     csrA,csrB,csrC;
776:   PetscInt            nrows,ncols;
777:   PetscErrorCode      ierr;
778:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
779:   struct matrix_descr descr_type_gen;
780:   PetscObjectState    state;

783:   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
784:    * not handle sparse matrices with zero rows or columns. */
785:   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
786:   else nrows = A->cmap->N;
787:   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
788:   else ncols = B->rmap->N;

790:   PetscObjectStateGet((PetscObject)A,&state);
791:   if (!a->sparse_optimized || a->state != state) {
792:     MatSeqAIJMKL_create_mkl_handle(A);
793:   }
794:   PetscObjectStateGet((PetscObject)B,&state);
795:   if (!b->sparse_optimized || b->state != state) {
796:     MatSeqAIJMKL_create_mkl_handle(B);
797:   }
798:   csrA = a->csrA;
799:   csrB = b->csrA;
800:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

802:   if (csrA && csrB) {
803:     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
804:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
805:   } else {
806:     csrC = PETSC_NULL;
807:   }

809:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);

811:   return(0);
812: }

814: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
815: {
816:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
817:   sparse_matrix_t     csrA, csrB, csrC;
818:   PetscErrorCode      ierr;
819:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
820:   struct matrix_descr descr_type_gen;
821:   PetscObjectState    state;

824:   PetscObjectStateGet((PetscObject)A,&state);
825:   if (!a->sparse_optimized || a->state != state) {
826:     MatSeqAIJMKL_create_mkl_handle(A);
827:   }
828:   PetscObjectStateGet((PetscObject)B,&state);
829:   if (!b->sparse_optimized || b->state != state) {
830:     MatSeqAIJMKL_create_mkl_handle(B);
831:   }
832:   csrA = a->csrA;
833:   csrB = b->csrA;
834:   csrC = c->csrA;
835:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

837:   if (csrA && csrB) {
838:     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
839:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
840:   } else {
841:     csrC = PETSC_NULL;
842:   }

844:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
845:   MatSeqAIJMKL_update_from_mkl_handle(C);

847:   return(0);
848: }

850: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
851: {

855:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
856:   return(0);
857: }

859: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
860: {

864:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
865:   return(0);
866: }

868: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
869: {

873:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
874:   return(0);
875: }

877: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
878: {

882:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
883:   return(0);
884: }

886: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
887: {

891:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
892:   return(0);
893: }

895: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
896: {

900:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
901:   return(0);
902: }

904: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
905: {
907:   Mat_Product    *product = C->product;
908:   Mat            A = product->A,B = product->B;

911:   MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);
912:   return(0);
913: }

915: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
916: {
918:   Mat_Product    *product = C->product;
919:   Mat            A = product->A,B = product->B;
920:   PetscReal      fill = product->fill;

923:   MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);
924:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
925:   return(0);
926: }

928: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
929: {
930:   Mat                 Ct;
931:   Vec                 zeros;
932:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
933:   sparse_matrix_t     csrA, csrP, csrC;
934:   PetscBool           set, flag;
935:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
936:   struct matrix_descr descr_type_sym;
937:   PetscObjectState    state;
938:   PetscErrorCode      ierr;

941:   MatIsSymmetricKnown(A,&set,&flag);
942:   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");

944:   PetscObjectStateGet((PetscObject)A,&state);
945:   if (!a->sparse_optimized || a->state != state) {
946:     MatSeqAIJMKL_create_mkl_handle(A);
947:   }
948:   PetscObjectStateGet((PetscObject)P,&state);
949:   if (!p->sparse_optimized || p->state != state) {
950:     MatSeqAIJMKL_create_mkl_handle(P);
951:   }
952:   csrA = a->csrA;
953:   csrP = p->csrA;
954:   csrC = c->csrA;
955:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
956:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
957:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

959:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
960:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
961:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");

963:   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
964:    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
965:    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
966:    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
967:    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
968:    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
969:    * the full matrix. */
970:   MatSeqAIJMKL_update_from_mkl_handle(C);
971:   MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);
972:   MatCreateVecs(C,&zeros,NULL);
973:   VecSetFromOptions(zeros);
974:   VecZeroEntries(zeros);
975:   MatDiagonalSet(Ct,zeros,INSERT_VALUES);
976:   MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);
977:   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
978:   MatProductCreateWithMat(A,P,PETSC_NULL,C);
979:   MatProductSetType(C,MATPRODUCT_PtAP);
980:   MatSeqAIJMKL_create_mkl_handle(C);
981:   VecDestroy(&zeros);
982:   MatDestroy(&Ct);
983:   return(0);
984: }

986: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
987: {
988:   Mat_Product         *product = C->product;
989:   Mat                 A = product->A,P = product->B;
990:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
991:   sparse_matrix_t     csrA,csrP,csrC;
992:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
993:   struct matrix_descr descr_type_sym;
994:   PetscObjectState    state;
995:   PetscErrorCode      ierr;

998:   PetscObjectStateGet((PetscObject)A,&state);
999:   if (!a->sparse_optimized || a->state != state) {
1000:     MatSeqAIJMKL_create_mkl_handle(A);
1001:   }
1002:   PetscObjectStateGet((PetscObject)P,&state);
1003:   if (!p->sparse_optimized || p->state != state) {
1004:     MatSeqAIJMKL_create_mkl_handle(P);
1005:   }
1006:   csrA = a->csrA;
1007:   csrP = p->csrA;
1008:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1009:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1010:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

1012:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1013:   if (csrP && csrA) {
1014:     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1015:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1016:   } else {
1017:     csrC = PETSC_NULL;
1018:   }

1020:   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1021:    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1022:    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1023:    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1024:    * is guaranteed. */
1025:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);

1027:   C->ops->productnumeric = MatProductNumeric_PtAP;
1028:   return(0);
1029: }

1031: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1032: {
1034:   C->ops->productsymbolic = MatProductSymbolic_AB;
1035:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1036:   return(0);
1037: }

1039: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1040: {
1042:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1043:   return(0);
1044: }

1046: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1047: {
1049:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1050:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1051:   return(0);
1052: }

1054: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1055: {
1057:   Mat_Product    *product = C->product;
1058:   Mat            A = product->A;
1059:   PetscBool      set, flag;

1062:   if (PetscDefined(USE_COMPLEX)) {
1063:     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
1064:      * We do this in several other locations in this file. This works for the time being, but these
1065:      * routines are considered unsafe and may be removed from the MatProduct code in the future.
1066:      * TODO: Add proper MATSEQAIJMKL implementations */
1067:     C->ops->productsymbolic = NULL;
1068:   } else {
1069:     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1070:     MatIsSymmetricKnown(A,&set,&flag);
1071:     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1072:     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1073:     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1074:      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1075:   }
1076:   return(0);
1077: }

1079: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1080: {
1082:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1083:   return(0);
1084: }

1086: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1087: {
1089:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1090:   return(0);
1091: }

1093: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1094: {
1096:   Mat_Product    *product = C->product;

1099:   switch (product->type) {
1100:   case MATPRODUCT_AB:
1101:     MatProductSetFromOptions_SeqAIJMKL_AB(C);
1102:     break;
1103:   case MATPRODUCT_AtB:
1104:     MatProductSetFromOptions_SeqAIJMKL_AtB(C);
1105:     break;
1106:   case MATPRODUCT_ABt:
1107:     MatProductSetFromOptions_SeqAIJMKL_ABt(C);
1108:     break;
1109:   case MATPRODUCT_PtAP:
1110:     MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
1111:     break;
1112:   case MATPRODUCT_RARt:
1113:     MatProductSetFromOptions_SeqAIJMKL_RARt(C);
1114:     break;
1115:   case MATPRODUCT_ABC:
1116:     MatProductSetFromOptions_SeqAIJMKL_ABC(C);
1117:     break;
1118:   default:
1119:     break;
1120:   }
1121:   return(0);
1122: }
1123: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1124: /* ------------------------ End MatProduct code ------------------------ */

1126: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1127:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1128:  * routine, but can also be used to convert an assembled SeqAIJ matrix
1129:  * into a SeqAIJMKL one. */
1130: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1131: {
1133:   Mat            B = *newmat;
1134:   Mat_SeqAIJMKL  *aijmkl;
1135:   PetscBool      set;
1136:   PetscBool      sametype;

1139:   if (reuse == MAT_INITIAL_MATRIX) {
1140:     MatDuplicate(A,MAT_COPY_VALUES,&B);
1141:   }

1143:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
1144:   if (sametype) return(0);

1146:   PetscNewLog(B,&aijmkl);
1147:   B->spptr = (void*) aijmkl;

1149:   /* Set function pointers for methods that we inherit from AIJ but override.
1150:    * We also parse some command line options below, since those determine some of the methods we point to. */
1151:   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1152:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1153:   B->ops->destroy          = MatDestroy_SeqAIJMKL;

1155:   aijmkl->sparse_optimized = PETSC_FALSE;
1156: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1157:   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1158: #else
1159:   aijmkl->no_SpMV2 = PETSC_TRUE;
1160: #endif
1161:   aijmkl->eager_inspection = PETSC_FALSE;

1163:   /* Parse command line options. */
1164:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
1165:   PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
1166:   PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
1167:   PetscOptionsEnd();
1168: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1169:   if (!aijmkl->no_SpMV2) {
1170:     PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1171:     aijmkl->no_SpMV2 = PETSC_TRUE;
1172:   }
1173: #endif

1175: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1176:   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1177:   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1178:   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1179:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1180: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1181:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1182:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1183:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1184:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1185:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1186: #   if !defined(PETSC_USE_COMPLEX)
1187:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1188: #   else
1189:   B->ops->ptapnumeric             = NULL;
1190: #   endif
1191: # endif
1192: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1194: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1195:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1196:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1197:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1198:    * versions in which the older interface has not been deprecated, we use the old interface. */
1199:   if (aijmkl->no_SpMV2) {
1200:     B->ops->mult             = MatMult_SeqAIJMKL;
1201:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1202:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1203:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1204:   }
1205: #endif

1207:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);

1209:   PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1210:   *newmat = B;
1211:   return(0);
1212: }

1214: /*@C
1215:    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1216:    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1217:    routines from Intel MKL whenever possible.
1218:    If the installed version of MKL supports the "SpMV2" sparse
1219:    inspector-executor routines, then those are used by default.
1220:    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1221:    symmetric A) operations are currently supported.
1222:    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.

1224:    Collective

1226:    Input Parameters:
1227: +  comm - MPI communicator, set to PETSC_COMM_SELF
1228: .  m - number of rows
1229: .  n - number of columns
1230: .  nz - number of nonzeros per row (same for all rows)
1231: -  nnz - array containing the number of nonzeros in the various rows
1232:          (possibly different for each row) or NULL

1234:    Output Parameter:
1235: .  A - the matrix

1237:    Options Database Keys:
1238: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1239: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1241:    Notes:
1242:    If nnz is given then nz is ignored

1244:    Level: intermediate

1246: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1247: @*/
1248: PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1249: {

1253:   MatCreate(comm,A);
1254:   MatSetSizes(*A,m,n,m,n);
1255:   MatSetType(*A,MATSEQAIJMKL);
1256:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1257:   return(0);
1258: }

1260: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1261: {

1265:   MatSetType(A,MATSEQAIJ);
1266:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1267:   return(0);
1268: }