Actual source code: baijmkl.c

petsc-3.9.4 2018-09-11
Report Typos and Errors
  1: /*
  2:   Defines basic operations for the MATSEQBAIJMKL matrix class.
  3:   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 
  4:   wherever possible. If used MKL verion is older than 11.3 PETSc default 
  5:   code for sparse matrix operations is used.
  6: */

  8:  #include <../src/mat/impls/baij/seq/baij.h>
  9:  #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>


 12: /* MKL include files. */
 13: #include <mkl_spblas.h>  /* Sparse BLAS */

 15: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 16: typedef struct {
 17:   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 18:   sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 19:   struct matrix_descr descr;
 20: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
 21:   PetscInt *ai1;
 22:   PetscInt *aj1;
 23: #endif
 24: } Mat_SeqBAIJMKL;

 26: PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
 27: extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);

 29: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 30: {
 31:   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
 32:   /* so we will ignore 'MatType type'. */
 34:   Mat            B        = *newmat;
 35:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;

 38:   if (reuse == MAT_INITIAL_MATRIX) {
 39:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 40:   }

 42:   /* Reset the original function pointers. */
 43:   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
 44:   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
 45:   B->ops->destroy          = MatDestroy_SeqBAIJ;
 46:   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
 47:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
 48:   B->ops->scale            = MatScale_SeqBAIJ;
 49:   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
 50:   B->ops->axpy             = MatAXPY_SeqBAIJ;
 51: 
 52:   switch (A->rmap->bs) {
 53:     case 1:
 54:       B->ops->mult    = MatMult_SeqBAIJ_1;
 55:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
 56:       break;
 57:     case 2:
 58:       B->ops->mult    = MatMult_SeqBAIJ_2;
 59:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
 60:       break;
 61:     case 3:
 62:       B->ops->mult    = MatMult_SeqBAIJ_3;
 63:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
 64:       break;
 65:     case 4:
 66:       B->ops->mult    = MatMult_SeqBAIJ_4;
 67:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
 68:       break;
 69:     case 5:
 70:       B->ops->mult    = MatMult_SeqBAIJ_5;
 71:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
 72:       break;
 73:     case 6:
 74:       B->ops->mult    = MatMult_SeqBAIJ_6;
 75:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
 76:       break;
 77:     case 7:
 78:       B->ops->mult    = MatMult_SeqBAIJ_7;
 79:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
 80:       break;
 81:     case 11:
 82:       B->ops->mult    = MatMult_SeqBAIJ_11;
 83:       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
 84:       break;
 85:     case 15:
 86:       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
 87:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
 88:       break;
 89:     default:
 90:       B->ops->mult    = MatMult_SeqBAIJ_N;
 91:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
 92:       break;
 93:   }
 94:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);

 96:   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 
 97:    * simply involves destroying the MKL sparse matrix handle and then freeing 
 98:    * the spptr pointer. */
 99:   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;

101:   if (baijmkl->sparse_optimized) {
102:     sparse_status_t stat;
103:     stat = mkl_sparse_destroy(baijmkl->bsrA);
104:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
105:   }
106: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
107:    PetscFree(baijmkl->ai1);
108:    PetscFree(baijmkl->aj1);
109: #endif  
110:   PetscFree(B->spptr);

112:   /* Change the type of B to MATSEQBAIJ. */
113:   PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);

115:   *newmat = B;
116:   return(0);
117: }

119: PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
120: {
122:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
123: 
125:   if (baijmkl) {
126:     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
127:     if (baijmkl->sparse_optimized) {
128:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
129:       stat = mkl_sparse_destroy(baijmkl->bsrA);
130:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
131:     }
132: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
133:    PetscFree(baijmkl->ai1);
134:    PetscFree(baijmkl->aj1);
135: #endif    
136:     PetscFree(A->spptr);
137:   }

139:   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
140:    * to destroy everything that remains. */
141:   PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);
142:   MatDestroy_SeqBAIJ(A);
143:   return(0);
144: }

146: PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
147: {
148:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
149:   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
150:   PetscInt        mbs, nbs, nz, bs;
151:   MatScalar       *aa;
152:   PetscInt        *aj,*ai;
153:   sparse_status_t stat;
154: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
155:   PetscErrorCode  ierr;
156:   PetscInt        i;
157: #endif

160:   if (baijmkl->sparse_optimized) {
161:     /* Matrix has been previously assembled and optimized. Must destroy old
162:      * matrix handle before running the optimization step again. */
163: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
164:     PetscFree(baijmkl->ai1);
165:     PetscFree(baijmkl->aj1);
166: #endif     
167:     stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
168:   }
169:   baijmkl->sparse_optimized = PETSC_FALSE;

171:   /* Now perform the SpMV2 setup and matrix optimization. */
172:   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
173:   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
174:   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
175:   mbs = a->mbs;
176:   nbs = a->nbs;
177:   nz  = a->nz;
178:   bs  = A->rmap->bs;
179:   aa  = a->a;
180: 
181:   if ((nz!=0) & !(A->structure_only)) {
182:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
183:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
184: #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
185:     aj   = a->j;
186:     ai   = a->i;
187:     stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
188: #else
189:     PetscMalloc1(mbs+1,&ai);
190:     PetscMalloc1(nz,&aj);
191:     for (i=0;i<mbs+1;i++)
192:       ai[i] = a->i[i]+1;
193:     for (i=0;i<nz;i++)
194:       aj[i] = a->j[i]+1;
195:     aa   = a->a;
196:     stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
197:     baijmkl->ai1 = ai;
198:     baijmkl->aj1 = aj;
199: #endif
200:     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
201:     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
202:     stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
203:     baijmkl->sparse_optimized = PETSC_TRUE;
204:   }
205:   return(0);
206: }

208: PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
209: {
211:   Mat_SeqBAIJMKL *baijmkl;
212:   Mat_SeqBAIJMKL *baijmkl_dest;

215:   MatDuplicate_SeqBAIJ(A,op,M);
216:   baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
217:   PetscNewLog((*M),&baijmkl_dest);
218:   (*M)->spptr = (void*)baijmkl_dest;
219:   PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));
220:   baijmkl_dest->sparse_optimized = PETSC_FALSE;
221:   MatSeqBAIJMKL_create_mkl_handle(A);
222:   return(0);
223: }

225: PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
226: {
227:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
228:   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
229:   const PetscScalar  *x;
230:   PetscScalar        *y;
231:   PetscErrorCode     ierr;
232:   sparse_status_t    stat = SPARSE_STATUS_SUCCESS;

235:   /* If there are no nonzero entries, zero yy and return immediately. */
236:   if(!a->nz) {
237:     PetscInt i;
238:     PetscInt m=a->mbs*A->rmap->bs;
239:     VecGetArray(yy,&y);
240:     for (i=0; i<m; i++) {
241:       y[i] = 0.0;
242:     }
243:     VecRestoreArray(yy,&y);
244:     return(0);
245:   }

247:   VecGetArrayRead(xx,&x);
248:   VecGetArray(yy,&y);

250:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
251:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
252:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
253:   if (!baijmkl->sparse_optimized) {
254:     MatSeqBAIJMKL_create_mkl_handle(A);
255:   }

257:   /* Call MKL SpMV2 executor routine to do the MatMult. */
258:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
259: 
260:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
261:   VecRestoreArrayRead(xx,&x);
262:   VecRestoreArray(yy,&y);
263:   return(0);
264: }

266: PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
267: {
268:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
269:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
270:   const PetscScalar *x;
271:   PetscScalar       *y;
272:   PetscErrorCode    ierr;
273:   sparse_status_t   stat;

276:   /* If there are no nonzero entries, zero yy and return immediately. */
277:   if(!a->nz) {
278:     PetscInt i;
279:     PetscInt n=a->nbs;
280:     VecGetArray(yy,&y);
281:     for (i=0; i<n; i++) {
282:       y[i] = 0.0;
283:     }
284:     VecRestoreArray(yy,&y);
285:     return(0);
286:   }

288:   VecGetArrayRead(xx,&x);
289:   VecGetArray(yy,&y);

291:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
292:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
293:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
294:   if (!baijmkl->sparse_optimized) {
295:     MatSeqBAIJMKL_create_mkl_handle(A);
296:   }

298:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
299:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
300: 
301:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
302:   VecRestoreArrayRead(xx,&x);
303:   VecRestoreArray(yy,&y);
304:   return(0);
305: }

307: PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
308: {
309:   Mat_SeqBAIJ        *a       = (Mat_SeqBAIJ*)A->data;
310:   Mat_SeqBAIJMKL     *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
311:   const PetscScalar  *x;
312:   PetscScalar        *y,*z;
313:   PetscErrorCode     ierr;
314:   PetscInt           m=a->mbs*A->rmap->bs;
315:   PetscInt           i;

317:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;

320:   /* If there are no nonzero entries, set zz = yy and return immediately. */
321:   if(!a->nz) {
322:     PetscInt i;
323:     VecGetArrayPair(yy,zz,&y,&z);
324:     for (i=0; i<m; i++) {
325:       z[i] = y[i];
326:     }
327:     VecRestoreArrayPair(yy,zz,&y,&z);
328:     return(0);
329:   }

331:   VecGetArrayRead(xx,&x);
332:   VecGetArrayPair(yy,zz,&y,&z);

334:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
335:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
336:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
337:   if (!baijmkl->sparse_optimized) {
338:     MatSeqBAIJMKL_create_mkl_handle(A);
339:   }

341:   /* Call MKL sparse BLAS routine to do the MatMult. */
342:   if (zz == yy) {
343:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
344:      * with alpha and beta both set to 1.0. */
345:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
346:   } else {
347:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
348:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
349:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
350:     for (i=0; i<m; i++) {
351:       z[i] += y[i];
352:     }
353:   }

355:   PetscLogFlops(2.0*a->nz);
356:   VecRestoreArrayRead(xx,&x);
357:   VecRestoreArrayPair(yy,zz,&y,&z);
358:   return(0);
359: }

361: PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
362: {
363:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
364:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
365:   const PetscScalar *x;
366:   PetscScalar       *y,*z;
367:   PetscErrorCode    ierr;
368:   PetscInt          n=a->nbs*A->rmap->bs;
369:   PetscInt          i;
370:   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
371:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;

374:   /* If there are no nonzero entries, set zz = yy and return immediately. */
375:   if(!a->nz) {
376:     PetscInt i;
377:     VecGetArrayPair(yy,zz,&y,&z);
378:     for (i=0; i<n; i++) {
379:       z[i] = y[i];
380:     }
381:     VecRestoreArrayPair(yy,zz,&y,&z);
382:     return(0);
383:   }

385:   VecGetArrayRead(xx,&x);
386:   VecGetArrayPair(yy,zz,&y,&z);

388:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
389:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
390:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
391:   if (!baijmkl->sparse_optimized) {
392:     MatSeqBAIJMKL_create_mkl_handle(A);
393:   }

395:   /* Call MKL sparse BLAS routine to do the MatMult. */
396:   if (zz == yy) {
397:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
398:      * with alpha and beta both set to 1.0. */
399:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
400:   } else {
401:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
402:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
403:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
404:     for (i=0; i<n; i++) {
405:       z[i] += y[i];
406:     }
407:   }

409:   PetscLogFlops(2.0*a->nz);
410:   VecRestoreArrayRead(xx,&x);
411:   VecRestoreArrayPair(yy,zz,&y,&z);
412:   return(0);
413: }

415: PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
416: {

420:   MatScale_SeqBAIJ(inA,alpha);
421:   MatSeqBAIJMKL_create_mkl_handle(inA);
422:   return(0);
423: }

425: PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
426: {

430:   MatDiagonalScale_SeqBAIJ(A,ll,rr);
431:   MatSeqBAIJMKL_create_mkl_handle(A);
432:   return(0);
433: }

435: PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
436: {

440:   MatAXPY_SeqBAIJ(Y,a,X,str);
441:   if (str == SAME_NONZERO_PATTERN) {
442:     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
443:     MatSeqBAIJMKL_create_mkl_handle(Y);
444:   }
445:   return(0);
446: }
447: /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
448:  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
449:  * routine, but can also be used to convert an assembled SeqBAIJ matrix
450:  * into a SeqBAIJMKL one. */
451: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
452: {
454:   Mat            B = *newmat;
455:   Mat_SeqBAIJMKL *baijmkl;
456:   PetscBool      sametype;

459:   if (reuse == MAT_INITIAL_MATRIX) {
460:     MatDuplicate(A,MAT_COPY_VALUES,&B);
461:   }

463:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
464:   if (sametype) return(0);

466:   PetscNewLog(B,&baijmkl);
467:   B->spptr = (void*)baijmkl;

469:   /* Set function pointers for methods that we inherit from BAIJ but override. 
470:    * We also parse some command line options below, since those determine some of the methods we point to. */
471:   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
472: 
473:   baijmkl->sparse_optimized = PETSC_FALSE;

475:   PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);
476:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);

478:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);
479:   *newmat = B;
480:   return(0);
481: }

483: PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
484: {
485:   PetscErrorCode  ierr;

488:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
489:   MatAssemblyEnd_SeqBAIJ(A, mode);
490:   MatSeqBAIJMKL_create_mkl_handle(A);
491:   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
492:   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
493:   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
494:   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
495:   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
496:   A->ops->scale            = MatScale_SeqBAIJMKL;
497:   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
498:   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
499:   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
500:   return(0);
501: }
502: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

504: /*@C
505:    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
506:    This type inherits from BAIJ and is largely identical, but uses sparse BLAS 
507:    routines from Intel MKL whenever possible.
508:    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 
509:    operations are currently supported.
510:    If the installed version of MKL supports the "SpMV2" sparse 
511:    inspector-executor routines, then those are used by default. 
512:    Default PETSc kernels are used otherwise. 

514:    Input Parameters:
515: +  comm - MPI communicator, set to PETSC_COMM_SELF
516: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
517:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
518: .  m - number of rows
519: .  n - number of columns
520: .  nz - number of nonzero blocks  per block row (same for all rows)
521: -  nnz - array containing the number of nonzero blocks in the various block rows
522:          (possibly different for each block row) or NULL


525:    Output Parameter:
526: .  A - the matrix

528:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
529:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
530:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

532:    Options Database Keys:
533: .   -mat_no_unroll - uses code that does not unroll the loops in the
534:                      block calculations (much slower)
535: .    -mat_block_size - size of the blocks to use

537:    Level: intermediate

539:    Notes:
540:    The number of rows and columns must be divisible by blocksize.

542:    If the nnz parameter is given then the nz parameter is ignored

544:    A nonzero block is any block that as 1 or more nonzeros in it

546:    The block AIJ format is fully compatible with standard Fortran 77
547:    storage.  That is, the stored row and column indices can begin at
548:    either one (as in Fortran) or zero.  See the users' manual for details.

550:    Specify the preallocated storage with either nz or nnz (not both).
551:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
552:    allocation.  See Users-Manual: ch_mat for details.
553:    matrices.

555: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()

557: @*/
558: PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
559: {

563:   MatCreate(comm,A);
564:   MatSetSizes(*A,m,n,m,n);
565: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE    
566:   MatSetType(*A,MATSEQBAIJMKL);
567:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
568: #else
569:   PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
570:   MatSetType(*A,MATSEQBAIJ);
571:   MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
572: #endif
573:   return(0);
574: }

576: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
577: {

581:   MatSetType(A,MATSEQBAIJ);
582: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE  
583:   MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);
584: #else
585:   PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
586: #endif
587:   return(0);
588: }