Actual source code: aijmkl.c


  2: /*
  3:   Defines basic operations for the MATSEQAIJMKL matrix class.
  4:   This class is derived from the MATSEQAIJ class and retains the
  5:   compressed row storage (aka Yale sparse matrix format) but uses
  6:   sparse BLAS operations from the Intel Math Kernel Library (MKL)
  7:   wherever possible.
  8: */

 10: #include <../src/mat/impls/aij/seq/aij.h>
 11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
 12: #include <mkl_spblas.h>

 14: typedef struct {
 15:   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 16:   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 17:   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 18:   PetscObjectState    state;
 19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 20:   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 21:   struct matrix_descr descr;
 22: #endif
 23: } Mat_SeqAIJMKL;

 25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);

 27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 28: {
 29:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 30:   /* so we will ignore 'MatType type'. */
 31:   Mat            B       = *newmat;
 32: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 33:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
 34: #endif

 36:   if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A,MAT_COPY_VALUES,&B);

 38:   /* Reset the original function pointers. */
 39:   B->ops->duplicate               = MatDuplicate_SeqAIJ;
 40:   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
 41:   B->ops->destroy                 = MatDestroy_SeqAIJ;
 42:   B->ops->mult                    = MatMult_SeqAIJ;
 43:   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
 44:   B->ops->multadd                 = MatMultAdd_SeqAIJ;
 45:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
 46:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
 47:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
 48:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 49:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
 50:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
 51:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;

 53:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);

 55: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 56:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
 57:    * simply involves destroying the MKL sparse matrix handle and then freeing
 58:    * the spptr pointer. */
 59:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;

 61:   if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
 62: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 63:   PetscFree(B->spptr);

 65:   /* Change the type of B to MATSEQAIJ. */
 66:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 68:   *newmat = B;
 69:   return 0;
 70: }

 72: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 73: {
 74:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;


 77:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
 78:   if (aijmkl) {
 79:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
 80: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 81:     if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
 82: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 83:     PetscFree(A->spptr);
 84:   }

 86:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
 87:    * to destroy everything that remains. */
 88:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
 89:   /* Note that I don't call MatSetType().  I believe this is because that
 90:    * is only to be called when *building* a matrix.  I could be wrong, but
 91:    * that is how things work for the SuperLU matrix class. */
 92:   MatDestroy_SeqAIJ(A);
 93:   return 0;
 94: }

 96: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
 97:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
 98:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
 99:  * handle, creates a new one, and then calls mkl_sparse_optimize().
100:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
101:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
102:  * an unoptimized matrix handle here. */
103: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
104: {
105: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
106:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
107:    * does nothing. We make it callable anyway in this case because it cuts
108:    * down on littering the code with #ifdefs. */
109:   return 0;
110: #else
111:   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
112:   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
113:   PetscInt         m,n;
114:   MatScalar        *aa;
115:   PetscInt         *aj,*ai;

117: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
118:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
119:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
120:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
121:    * mkl_sparse_optimize() later. */
122:   if (aijmkl->no_SpMV2) return 0;
123: #endif

125:   if (aijmkl->sparse_optimized) {
126:     /* Matrix has been previously assembled and optimized. Must destroy old
127:      * matrix handle before running the optimization step again. */
128:     PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
129:   }
130:   aijmkl->sparse_optimized = PETSC_FALSE;

132:   /* Now perform the SpMV2 setup and matrix optimization. */
133:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
134:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
135:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
136:   m = A->rmap->n;
137:   n = A->cmap->n;
138:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
139:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
140:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
141:   if (a->nz && aa && !A->structure_only) {
142:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
143:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
144:     PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
145:     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
146:     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
147:     if (!aijmkl->no_SpMV2) {
148:       PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA);
149:     }
150:     aijmkl->sparse_optimized = PETSC_TRUE;
151:     PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
152:   } else {
153:     aijmkl->csrA = PETSC_NULL;
154:   }

156:   return 0;
157: #endif
158: }

160: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
161: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
162: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
163: {
164:   sparse_index_base_t indexing;
165:   PetscInt            m,n;
166:   PetscInt            *aj,*ai,*dummy;
167:   MatScalar           *aa;
168:   Mat_SeqAIJMKL       *aijmkl;

170:   if (csrA) {
171:     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
172:     PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
174:   } else {
175:     aj = ai = PETSC_NULL;
176:     aa = PETSC_NULL;
177:   }

179:   MatSetType(A,MATSEQAIJ);
180:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
181:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
182:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
183:    * they will be destroyed when the MKL handle is destroyed.
184:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
185:   if (csrA) {
186:     MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);
187:   } else {
188:     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
189:     MatSetUp(A);
190:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
191:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
192:   }

194:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
195:    * Now turn it into a MATSEQAIJMKL. */
196:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);

198:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
199:   aijmkl->csrA = csrA;

201:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
202:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
203:    * and just need to be able to run the MKL optimization step. */
204:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
205:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
206:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
207:   if (csrA) {
208:     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
209:     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
210:   }
211:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
212:   return 0;
213: }
214: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

216: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
217:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
218:  * MatMatMultNumeric(). */
219: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
220: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
221: {
222:   PetscInt            i;
223:   PetscInt            nrows,ncols;
224:   PetscInt            nz;
225:   PetscInt            *ai,*aj,*dummy;
226:   PetscScalar         *aa;
227:   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
228:   sparse_index_base_t indexing;

230:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
231:   if (!aijmkl->csrA) return 0;

233:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
234:   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);

236:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
237:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
238:   for (i=0; i<nrows; i++) {
239:     nz = ai[i+1] - ai[i];
240:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
241:   }

243:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
244:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

246:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
247:   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
248:    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
249:    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
250:   aijmkl->sparse_optimized = PETSC_FALSE;
251:   return 0;
252: }
253: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

255: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
256: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
257: {
258:   PetscInt            i,j,k;
259:   PetscInt            nrows,ncols;
260:   PetscInt            nz;
261:   PetscInt            *ai,*aj,*dummy;
262:   PetscScalar         *aa;
263:   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
264:   sparse_index_base_t indexing;

266:   PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");

268:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
269:   if (!aijmkl->csrA) {
270:     PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");
271:     return 0;
272:   }

274:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
275:   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);

277:   k = 0;
278:   for (i=0; i<nrows; i++) {
279:     PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i);
280:     nz = ai[i+1] - ai[i];
281:     for (j=0; j<nz; j++) {
282:       if (aa) {
283:         PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g)  ",aj[k],PetscRealPart(aa[k]));
284:       } else {
285:         PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k]);
286:       }
287:       k++;
288:     }
289:     PetscViewerASCIIPrintf(viewer,"\n");
290:   }
291:   return 0;
292: }
293: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

295: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
296: {
297:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
298:   Mat_SeqAIJMKL  *aijmkl_dest;

300:   MatDuplicate_SeqAIJ(A,op,M);
301:   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
302:   PetscArraycpy(aijmkl_dest,aijmkl,1);
303:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
304:   if (aijmkl->eager_inspection) {
305:     MatSeqAIJMKL_create_mkl_handle(A);
306:   }
307:   return 0;
308: }

310: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
311: {
312:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
313:   Mat_SeqAIJMKL   *aijmkl;

315:   if (mode == MAT_FLUSH_ASSEMBLY) return 0;

317:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
318:    * extra information and some different methods, call the AssemblyEnd
319:    * routine for a MATSEQAIJ.
320:    * I'm not sure if this is the best way to do this, but it avoids
321:    * a lot of code duplication. */
322:   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
323:   MatAssemblyEnd_SeqAIJ(A, mode);

325:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
326:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
327:   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
328:   if (aijmkl->eager_inspection) {
329:     MatSeqAIJMKL_create_mkl_handle(A);
330:   }

332:   return 0;
333: }

335: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
336: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
337: {
338:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
339:   const PetscScalar *x;
340:   PetscScalar       *y;
341:   const MatScalar   *aa;
342:   PetscInt          m = A->rmap->n;
343:   PetscInt          n = A->cmap->n;
344:   PetscScalar       alpha = 1.0;
345:   PetscScalar       beta = 0.0;
346:   const PetscInt    *aj,*ai;
347:   char              matdescra[6];

349:   /* Variables not in MatMult_SeqAIJ. */
350:   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */

352:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
353:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
354:   VecGetArrayRead(xx,&x);
355:   VecGetArray(yy,&y);
356:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
357:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
358:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

360:   /* Call MKL sparse BLAS routine to do the MatMult. */
361:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

363:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
364:   VecRestoreArrayRead(xx,&x);
365:   VecRestoreArray(yy,&y);
366:   return 0;
367: }
368: #endif

370: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
371: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
372: {
373:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
374:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
375:   const PetscScalar *x;
376:   PetscScalar       *y;
377:   PetscObjectState  state;


380:   /* If there are no nonzero entries, zero yy and return immediately. */
381:   if (!a->nz) {
382:     VecGetArray(yy,&y);
383:     PetscArrayzero(y,A->rmap->n);
384:     VecRestoreArray(yy,&y);
385:     return 0;
386:   }

388:   VecGetArrayRead(xx,&x);
389:   VecGetArray(yy,&y);

391:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
392:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
393:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
394:   PetscObjectStateGet((PetscObject)A,&state);
395:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

397:   /* Call MKL SpMV2 executor routine to do the MatMult. */
398:   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);

400:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
401:   VecRestoreArrayRead(xx,&x);
402:   VecRestoreArray(yy,&y);
403:   return 0;
404: }
405: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

407: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
408: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
409: {
410:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
411:   const PetscScalar *x;
412:   PetscScalar       *y;
413:   const MatScalar   *aa;
414:   PetscInt          m = A->rmap->n;
415:   PetscInt          n = A->cmap->n;
416:   PetscScalar       alpha = 1.0;
417:   PetscScalar       beta = 0.0;
418:   const PetscInt    *aj,*ai;
419:   char              matdescra[6];

421:   /* Variables not in MatMultTranspose_SeqAIJ. */
422:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */

424:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
425:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
426:   VecGetArrayRead(xx,&x);
427:   VecGetArray(yy,&y);
428:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
429:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
430:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

432:   /* Call MKL sparse BLAS routine to do the MatMult. */
433:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

435:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
436:   VecRestoreArrayRead(xx,&x);
437:   VecRestoreArray(yy,&y);
438:   return 0;
439: }
440: #endif

442: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
443: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
444: {
445:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
446:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
447:   const PetscScalar *x;
448:   PetscScalar       *y;
449:   PetscObjectState  state;


452:   /* If there are no nonzero entries, zero yy and return immediately. */
453:   if (!a->nz) {
454:     VecGetArray(yy,&y);
455:     PetscArrayzero(y,A->cmap->n);
456:     VecRestoreArray(yy,&y);
457:     return 0;
458:   }

460:   VecGetArrayRead(xx,&x);
461:   VecGetArray(yy,&y);

463:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
464:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
465:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
466:   PetscObjectStateGet((PetscObject)A,&state);
467:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

469:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
470:   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);

472:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
473:   VecRestoreArrayRead(xx,&x);
474:   VecRestoreArray(yy,&y);
475:   return 0;
476: }
477: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

479: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
480: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
481: {
482:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
483:   const PetscScalar *x;
484:   PetscScalar       *y,*z;
485:   const MatScalar   *aa;
486:   PetscInt          m = A->rmap->n;
487:   PetscInt          n = A->cmap->n;
488:   const PetscInt    *aj,*ai;
489:   PetscInt          i;

491:   /* Variables not in MatMultAdd_SeqAIJ. */
492:   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
493:   PetscScalar       alpha = 1.0;
494:   PetscScalar       beta;
495:   char              matdescra[6];

497:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
498:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

500:   VecGetArrayRead(xx,&x);
501:   VecGetArrayPair(yy,zz,&y,&z);
502:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
503:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
504:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

506:   /* Call MKL sparse BLAS routine to do the MatMult. */
507:   if (zz == yy) {
508:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
509:     beta = 1.0;
510:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
511:   } else {
512:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
513:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
514:     beta = 0.0;
515:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
516:     for (i=0; i<m; i++) {
517:       z[i] += y[i];
518:     }
519:   }

521:   PetscLogFlops(2.0*a->nz);
522:   VecRestoreArrayRead(xx,&x);
523:   VecRestoreArrayPair(yy,zz,&y,&z);
524:   return 0;
525: }
526: #endif

528: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
529: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
530: {
531:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
532:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
533:   const PetscScalar *x;
534:   PetscScalar       *y,*z;
535:   PetscInt          m = A->rmap->n;
536:   PetscInt          i;

538:   /* Variables not in MatMultAdd_SeqAIJ. */
539:   PetscObjectState  state;


542:   /* If there are no nonzero entries, set zz = yy and return immediately. */
543:   if (!a->nz) {
544:     VecGetArrayPair(yy,zz,&y,&z);
545:     PetscArraycpy(z,y,m);
546:     VecRestoreArrayPair(yy,zz,&y,&z);
547:     return 0;
548:   }

550:   VecGetArrayRead(xx,&x);
551:   VecGetArrayPair(yy,zz,&y,&z);

553:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
554:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
555:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
556:   PetscObjectStateGet((PetscObject)A,&state);
557:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

559:   /* Call MKL sparse BLAS routine to do the MatMult. */
560:   if (zz == yy) {
561:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
562:      * with alpha and beta both set to 1.0. */
563:     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
564:   } else {
565:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
566:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
567:     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
568:     for (i=0; i<m; i++) z[i] += y[i];
569:   }

571:   PetscLogFlops(2.0*a->nz);
572:   VecRestoreArrayRead(xx,&x);
573:   VecRestoreArrayPair(yy,zz,&y,&z);
574:   return 0;
575: }
576: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

578: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
579: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
580: {
581:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
582:   const PetscScalar *x;
583:   PetscScalar       *y,*z;
584:   const MatScalar   *aa;
585:   PetscInt          m = A->rmap->n;
586:   PetscInt          n = A->cmap->n;
587:   const PetscInt    *aj,*ai;
588:   PetscInt          i;

590:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
591:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
592:   PetscScalar       alpha = 1.0;
593:   PetscScalar       beta;
594:   char              matdescra[6];

596:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
597:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

599:   VecGetArrayRead(xx,&x);
600:   VecGetArrayPair(yy,zz,&y,&z);
601:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
602:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
603:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

605:   /* Call MKL sparse BLAS routine to do the MatMult. */
606:   if (zz == yy) {
607:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
608:     beta = 1.0;
609:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
610:   } else {
611:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
612:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
613:     beta = 0.0;
614:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
615:     for (i=0; i<n; i++) {
616:       z[i] += y[i];
617:     }
618:   }

620:   PetscLogFlops(2.0*a->nz);
621:   VecRestoreArrayRead(xx,&x);
622:   VecRestoreArrayPair(yy,zz,&y,&z);
623:   return 0;
624: }
625: #endif

627: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
628: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
629: {
630:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
631:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
632:   const PetscScalar *x;
633:   PetscScalar       *y,*z;
634:   PetscInt          n = A->cmap->n;
635:   PetscInt          i;
636:   PetscObjectState  state;

638:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */


641:   /* If there are no nonzero entries, set zz = yy and return immediately. */
642:   if (!a->nz) {
643:     VecGetArrayPair(yy,zz,&y,&z);
644:     PetscArraycpy(z,y,n);
645:     VecRestoreArrayPair(yy,zz,&y,&z);
646:     return 0;
647:   }

649:   VecGetArrayRead(xx,&x);
650:   VecGetArrayPair(yy,zz,&y,&z);

652:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
653:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
654:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
655:   PetscObjectStateGet((PetscObject)A,&state);
656:   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);

658:   /* Call MKL sparse BLAS routine to do the MatMult. */
659:   if (zz == yy) {
660:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
661:      * with alpha and beta both set to 1.0. */
662:     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
663:   } else {
664:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
665:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
666:     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
667:     for (i=0; i<n; i++) z[i] += y[i];
668:   }

670:   PetscLogFlops(2.0*a->nz);
671:   VecRestoreArrayRead(xx,&x);
672:   VecRestoreArrayPair(yy,zz,&y,&z);
673:   return 0;
674: }
675: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

677: /* -------------------------- MatProduct code -------------------------- */
678: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
679: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
680: {
681:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
682:   sparse_matrix_t     csrA,csrB,csrC;
683:   PetscInt            nrows,ncols;
684:   struct matrix_descr descr_type_gen;
685:   PetscObjectState    state;

687:   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
688:    * not handle sparse matrices with zero rows or columns. */
689:   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
690:   else nrows = A->cmap->N;
691:   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
692:   else ncols = B->rmap->N;

694:   PetscObjectStateGet((PetscObject)A,&state);
695:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
696:   PetscObjectStateGet((PetscObject)B,&state);
697:   if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
698:   csrA = a->csrA;
699:   csrB = b->csrA;
700:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

702:   if (csrA && csrB) {
703:     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
704:   } else {
705:     csrC = PETSC_NULL;
706:   }

708:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);

710:   return 0;
711: }

713: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
714: {
715:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
716:   sparse_matrix_t     csrA, csrB, csrC;
717:   struct matrix_descr descr_type_gen;
718:   PetscObjectState    state;

720:   PetscObjectStateGet((PetscObject)A,&state);
721:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
722:   PetscObjectStateGet((PetscObject)B,&state);
723:   if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
724:   csrA = a->csrA;
725:   csrB = b->csrA;
726:   csrC = c->csrA;
727:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

729:   if (csrA && csrB) {
730:     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
731:   } else {
732:     csrC = PETSC_NULL;
733:   }

735:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
736:   MatSeqAIJMKL_update_from_mkl_handle(C);

738:   return 0;
739: }

741: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
742: {
743:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
744:   return 0;
745: }

747: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
748: {
749:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
750:   return 0;
751: }

753: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
754: {
755:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
756:   return 0;
757: }

759: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
760: {
761:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
762:   return 0;
763: }

765: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
766: {
767:   MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
768:   return 0;
769: }

771: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
772: {
773:   MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
774:   return 0;
775: }

777: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
778: {
779:   Mat_Product    *product = C->product;
780:   Mat            A = product->A,B = product->B;

782:   MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);
783:   return 0;
784: }

786: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
787: {
788:   Mat_Product    *product = C->product;
789:   Mat            A = product->A,B = product->B;
790:   PetscReal      fill = product->fill;

792:   MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);
793:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
794:   return 0;
795: }

797: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
798: {
799:   Mat                 Ct;
800:   Vec                 zeros;
801:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
802:   sparse_matrix_t     csrA, csrP, csrC;
803:   PetscBool           set, flag;
804:   struct matrix_descr descr_type_sym;
805:   PetscObjectState    state;

807:   MatIsSymmetricKnown(A,&set,&flag);

810:   PetscObjectStateGet((PetscObject)A,&state);
811:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
812:   PetscObjectStateGet((PetscObject)P,&state);
813:   if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
814:   csrA = a->csrA;
815:   csrP = p->csrA;
816:   csrC = c->csrA;
817:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
818:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
819:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

821:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
822:   PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);

824:   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
825:    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
826:    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
827:    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
828:    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
829:    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
830:    * the full matrix. */
831:   MatSeqAIJMKL_update_from_mkl_handle(C);
832:   MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);
833:   MatCreateVecs(C,&zeros,NULL);
834:   VecSetFromOptions(zeros);
835:   VecZeroEntries(zeros);
836:   MatDiagonalSet(Ct,zeros,INSERT_VALUES);
837:   MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);
838:   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
839:   MatProductCreateWithMat(A,P,PETSC_NULL,C);
840:   MatProductSetType(C,MATPRODUCT_PtAP);
841:   MatSeqAIJMKL_create_mkl_handle(C);
842:   VecDestroy(&zeros);
843:   MatDestroy(&Ct);
844:   return 0;
845: }

847: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
848: {
849:   Mat_Product         *product = C->product;
850:   Mat                 A = product->A,P = product->B;
851:   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
852:   sparse_matrix_t     csrA,csrP,csrC;
853:   struct matrix_descr descr_type_sym;
854:   PetscObjectState    state;

856:   PetscObjectStateGet((PetscObject)A,&state);
857:   if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
858:   PetscObjectStateGet((PetscObject)P,&state);
859:   if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
860:   csrA = a->csrA;
861:   csrP = p->csrA;
862:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
863:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
864:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

866:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
867:   if (csrP && csrA) {
868:     PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
869:   } else {
870:     csrC = PETSC_NULL;
871:   }

873:   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
874:    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
875:    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
876:    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
877:    * is guaranteed. */
878:   MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);

880:   C->ops->productnumeric = MatProductNumeric_PtAP;
881:   return 0;
882: }

884: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
885: {
886:   C->ops->productsymbolic = MatProductSymbolic_AB;
887:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
888:   return 0;
889: }

891: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
892: {
893:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
894:   return 0;
895: }

897: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
898: {
899:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
900:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
901:   return 0;
902: }

904: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
905: {
906:   Mat_Product    *product = C->product;
907:   Mat            A = product->A;
908:   PetscBool      set, flag;

910:   if (PetscDefined(USE_COMPLEX)) {
911:     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
912:      * We do this in several other locations in this file. This works for the time being, but these
913:      * routines are considered unsafe and may be removed from the MatProduct code in the future.
914:      * TODO: Add proper MATSEQAIJMKL implementations */
915:     C->ops->productsymbolic = NULL;
916:   } else {
917:     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
918:     MatIsSymmetricKnown(A,&set,&flag);
919:     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
920:     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
921:     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
922:      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
923:   }
924:   return 0;
925: }

927: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
928: {
929:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
930:   return 0;
931: }

933: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
934: {
935:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
936:   return 0;
937: }

939: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
940: {
941:   Mat_Product    *product = C->product;

943:   switch (product->type) {
944:   case MATPRODUCT_AB:
945:     MatProductSetFromOptions_SeqAIJMKL_AB(C);
946:     break;
947:   case MATPRODUCT_AtB:
948:     MatProductSetFromOptions_SeqAIJMKL_AtB(C);
949:     break;
950:   case MATPRODUCT_ABt:
951:     MatProductSetFromOptions_SeqAIJMKL_ABt(C);
952:     break;
953:   case MATPRODUCT_PtAP:
954:     MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
955:     break;
956:   case MATPRODUCT_RARt:
957:     MatProductSetFromOptions_SeqAIJMKL_RARt(C);
958:     break;
959:   case MATPRODUCT_ABC:
960:     MatProductSetFromOptions_SeqAIJMKL_ABC(C);
961:     break;
962:   default:
963:     break;
964:   }
965:   return 0;
966: }
967: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
968: /* ------------------------ End MatProduct code ------------------------ */

970: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
971:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
972:  * routine, but can also be used to convert an assembled SeqAIJ matrix
973:  * into a SeqAIJMKL one. */
974: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
975: {
976:   Mat            B = *newmat;
977:   Mat_SeqAIJMKL  *aijmkl;
978:   PetscBool      set;
979:   PetscBool      sametype;

982:   if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A,MAT_COPY_VALUES,&B);

984:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
985:   if (sametype) return 0;

987:   PetscNewLog(B,&aijmkl);
988:   B->spptr = (void*) aijmkl;

990:   /* Set function pointers for methods that we inherit from AIJ but override.
991:    * We also parse some command line options below, since those determine some of the methods we point to. */
992:   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
993:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
994:   B->ops->destroy          = MatDestroy_SeqAIJMKL;

996:   aijmkl->sparse_optimized = PETSC_FALSE;
997: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
998:   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
999: #else
1000:   aijmkl->no_SpMV2 = PETSC_TRUE;
1001: #endif
1002:   aijmkl->eager_inspection = PETSC_FALSE;

1004:   /* Parse command line options. */
1005:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
1006:   PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
1007:   PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
1008:   PetscOptionsEnd();
1009: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1010:   if (!aijmkl->no_SpMV2) {
1011:     PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1012:     aijmkl->no_SpMV2 = PETSC_TRUE;
1013:   }
1014: #endif

1016: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1017:   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1018:   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1019:   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1020:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1021: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1022:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1023:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1024:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1025:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1026:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1027: #   if !defined(PETSC_USE_COMPLEX)
1028:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1029: #   else
1030:   B->ops->ptapnumeric             = NULL;
1031: #   endif
1032: # endif
1033: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1035: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1036:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1037:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1038:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1039:    * versions in which the older interface has not been deprecated, we use the old interface. */
1040:   if (aijmkl->no_SpMV2) {
1041:     B->ops->mult             = MatMult_SeqAIJMKL;
1042:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1043:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1044:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1045:   }
1046: #endif

1048:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);

1050:   PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1051:   *newmat = B;
1052:   return 0;
1053: }

1055: /*@C
1056:    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1057:    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1058:    routines from Intel MKL whenever possible.
1059:    If the installed version of MKL supports the "SpMV2" sparse
1060:    inspector-executor routines, then those are used by default.
1061:    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1062:    symmetric A) operations are currently supported.
1063:    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.

1065:    Collective

1067:    Input Parameters:
1068: +  comm - MPI communicator, set to PETSC_COMM_SELF
1069: .  m - number of rows
1070: .  n - number of columns
1071: .  nz - number of nonzeros per row (same for all rows)
1072: -  nnz - array containing the number of nonzeros in the various rows
1073:          (possibly different for each row) or NULL

1075:    Output Parameter:
1076: .  A - the matrix

1078:    Options Database Keys:
1079: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1080: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1082:    Notes:
1083:    If nnz is given then nz is ignored

1085:    Level: intermediate

1087: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1088: @*/
1089: PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1090: {
1091:   MatCreate(comm,A);
1092:   MatSetSizes(*A,m,n,m,n);
1093:   MatSetType(*A,MATSEQAIJMKL);
1094:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1095:   return 0;
1096: }

1098: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1099: {
1100:   MatSetType(A,MATSEQAIJ);
1101:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1102:   return 0;
1103: }