petsc-3.8.4 2018-03-24
  1: /*
  2:   Defines basic operations for the MATSEQBAIJMKL matrix class.
  3:   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 
  4:   wherever possible. If used MKL verion is older than 11.3 PETSc default 
  5:   code for sparse matrix operations is used.
  6: */

  8:  #include <../src/mat/impls/baij/seq/baij.h>
  9:  #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>


 12: /* MKL include files. */
 13: #include <mkl_spblas.h>  /* Sparse BLAS */

 15: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 16: typedef struct {
 17:   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 18:   sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 19:   struct matrix_descr descr;
 20: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
 21:   PetscInt *ai1;
 22:   PetscInt *aj1;
 23: #endif
 24: } Mat_SeqBAIJMKL;

 26: PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
 27: extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);

 29: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 30: {
 31:   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
 32:   /* so we will ignore 'MatType type'. */
 34:   Mat            B        = *newmat;
 35:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;

 38:   if (reuse == MAT_INITIAL_MATRIX) {
 39:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 40:   }

 42:   /* Reset the original function pointers. */
 43:   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
 44:   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
 45:   B->ops->destroy          = MatDestroy_SeqBAIJ;
 46:   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
 47:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
 48:   B->ops->scale            = MatScale_SeqBAIJ;
 49:   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
 50:   B->ops->axpy             = MatAXPY_SeqBAIJ;
 51: 
 52:   switch (A->rmap->bs) {
 53:     case 1:
 54:       B->ops->mult    = MatMult_SeqBAIJ_1;
 55:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
 56:       break;
 57:     case 2:
 58:       B->ops->mult    = MatMult_SeqBAIJ_2;
 59:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
 60:       break;
 61:     case 3:
 62:       B->ops->mult    = MatMult_SeqBAIJ_3;
 63:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
 64:       break;
 65:     case 4:
 66:       B->ops->mult    = MatMult_SeqBAIJ_4;
 67:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
 68:       break;
 69:     case 5:
 70:       B->ops->mult    = MatMult_SeqBAIJ_5;
 71:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
 72:       break;
 73:     case 6:
 74:       B->ops->mult    = MatMult_SeqBAIJ_6;
 75:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
 76:       break;
 77:     case 7:
 78:       B->ops->mult    = MatMult_SeqBAIJ_7;
 79:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
 80:       break;
 81:     case 11:
 82:       B->ops->mult    = MatMult_SeqBAIJ_11;
 83:       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
 84:       break;
 85:     case 15:
 86:       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
 87:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
 88:       break;
 89:     default:
 90:       B->ops->mult    = MatMult_SeqBAIJ_N;
 91:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
 92:       break;
 93:   }
 94:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);

 96:   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 
 97:    * simply involves destroying the MKL sparse matrix handle and then freeing 
 98:    * the spptr pointer. */
 99:   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;

101:   if (baijmkl->sparse_optimized) {
102:     sparse_status_t stat;
103:     stat = mkl_sparse_destroy(baijmkl->bsrA);
104:     if (stat != SPARSE_STATUS_SUCCESS) {
105:       PetscFunctionReturn(PETSC_ERR_LIB);
106:     }
107:   }
108: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
109:    PetscFree(baijmkl->ai1);
110:    PetscFree(baijmkl->aj1);
111: #endif  
112:   PetscFree(B->spptr);

114:   /* Change the type of B to MATSEQBAIJ. */
115:   PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);

117:   *newmat = B;
118:   return(0);
119: }

121: PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
122: {
124:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
125: 
127:   if (baijmkl) {
128:     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
129:     if (baijmkl->sparse_optimized) {
130:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
131:       stat = mkl_sparse_destroy(baijmkl->bsrA);
132:       if (stat != SPARSE_STATUS_SUCCESS) {
133:         PetscFunctionReturn(PETSC_ERR_LIB);
134:       }
135:     }
136: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
137:    PetscFree(baijmkl->ai1);
138:    PetscFree(baijmkl->aj1);
139: #endif    
140:     PetscFree(A->spptr);
141:   }

143:   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
144:    * to destroy everything that remains. */
145:   PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);
146:   MatDestroy_SeqBAIJ(A);
147:   return(0);
148: }

150: PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
151: {
153:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
154:   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
155:   PetscInt        mbs, nbs, nz, bs, i;
156:   MatScalar       *aa;
157:   PetscInt        *aj,*ai;
158:   sparse_status_t stat;

161:   if (baijmkl->sparse_optimized) {
162:     /* Matrix has been previously assembled and optimized. Must destroy old
163:      * matrix handle before running the optimization step again. */
164: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
165:     PetscFree(baijmkl->ai1);
166:     PetscFree(baijmkl->aj1);
167: #endif     
168:     stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
169:   }
170:   baijmkl->sparse_optimized = PETSC_FALSE;

172:   /* Now perform the SpMV2 setup and matrix optimization. */
173:   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
174:   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
175:   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
176:   mbs = a->mbs;
177:   nbs = a->nbs;
178:   nz  = a->nz;
179:   bs  = A->rmap->bs;
180:   aa  = a->a;
181: 
182:   if ((nz!=0) & !(A->structure_only)) {
183:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
184:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
185: #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
186:     aj   = a->j;
187:     ai   = a->i;
188:     stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
189: #else
190:     PetscMalloc1(mbs+1,&ai);
191:     PetscMalloc1(nz,&aj);
192:     for (i=0;i<mbs+1;i++)
193:       ai[i] = a->i[i]+1;
194:     for (i=0;i<nz;i++)
195:       aj[i] = a->j[i]+1;
196:     aa   = a->a;
197:     stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
198:     baijmkl->ai1 = ai;
199:     baijmkl->aj1 = aj;
200: #endif
201:     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
202:     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
203:     stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
204:     baijmkl->sparse_optimized = PETSC_TRUE;
205:   }
206:   return(0);
207: }

209: PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
210: {
212:   Mat_SeqBAIJMKL *baijmkl;
213:   Mat_SeqBAIJMKL *baijmkl_dest;

216:   MatDuplicate_SeqBAIJ(A,op,M);
217:   baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
218:   PetscNewLog((*M),&baijmkl_dest);
219:   (*M)->spptr = (void*)baijmkl_dest;
220:   PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));
221:   baijmkl_dest->sparse_optimized = PETSC_FALSE;
222:   MatSeqBAIJMKL_create_mkl_handle(A);
223:   return(0);
224: }

226: PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
227: {
228:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
229:   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
230:   const PetscScalar  *x;
231:   PetscScalar        *y;
232:   PetscErrorCode     ierr;
233:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;

236:   /* If there are no nonzero entries, zero yy and return immediately. */
237:   if(!a->nz) {
238:     PetscInt i;
239:     PetscInt m=a->mbs*A->rmap->bs;
240:     VecGetArray(yy,&y);
241:     for (i=0; i<m; i++) {
242:       y[i] = 0.0;
243:     }
244:     VecRestoreArray(yy,&y);
245:     return(0);
246:   }

248:   VecGetArrayRead(xx,&x);
249:   VecGetArray(yy,&y);

251:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
252:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
253:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
254:   if (!baijmkl->sparse_optimized) {
255:     MatSeqBAIJMKL_create_mkl_handle(A);
256:   }

258:   /* Call MKL SpMV2 executor routine to do the MatMult. */
259:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
260: 
261:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
262:   VecRestoreArrayRead(xx,&x);
263:   VecRestoreArray(yy,&y);
264:   return(0);
265: }

267: PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
268: {
269:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
270:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
271:   const PetscScalar *x;
272:   PetscScalar       *y;
273:   PetscErrorCode    ierr;
274:   sparse_status_t   stat;

277:   /* If there are no nonzero entries, zero yy and return immediately. */
278:   if(!a->nz) {
279:     PetscInt i;
280:     PetscInt n=a->nbs;
281:     VecGetArray(yy,&y);
282:     for (i=0; i<n; i++) {
283:       y[i] = 0.0;
284:     }
285:     VecRestoreArray(yy,&y);
286:     return(0);
287:   }

289:   VecGetArrayRead(xx,&x);
290:   VecGetArray(yy,&y);

292:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
293:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
294:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
295:   if (!baijmkl->sparse_optimized) {
296:     MatSeqBAIJMKL_create_mkl_handle(A);
297:   }

299:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
300:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
301: 
302:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
303:   VecRestoreArrayRead(xx,&x);
304:   VecRestoreArray(yy,&y);
305:   return(0);
306: }

308: PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
309: {
310:   Mat_SeqBAIJ        *a       = (Mat_SeqBAIJ*)A->data;
311:   Mat_SeqBAIJMKL     *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
312:   const PetscScalar  *x;
313:   PetscScalar        *y,*z;
314:   PetscErrorCode     ierr;
315:   PetscInt           m=a->mbs*A->rmap->bs;
316:   PetscInt           i;

318:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;

321:   /* If there are no nonzero entries, set zz = yy and return immediately. */
322:   if(!a->nz) {
323:     PetscInt i;
324:     VecGetArrayPair(yy,zz,&y,&z);
325:     for (i=0; i<m; i++) {
326:       z[i] = y[i];
327:     }
328:     VecRestoreArrayPair(yy,zz,&y,&z);
329:     return(0);
330:   }

332:   VecGetArrayRead(xx,&x);
333:   VecGetArrayPair(yy,zz,&y,&z);

335:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
336:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
337:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
338:   if (!baijmkl->sparse_optimized) {
339:     MatSeqBAIJMKL_create_mkl_handle(A);
340:   }

342:   /* Call MKL sparse BLAS routine to do the MatMult. */
343:   if (zz == yy) {
344:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
345:      * with alpha and beta both set to 1.0. */
346:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
347:   } else {
348:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
349:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
350:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
351:     for (i=0; i<m; i++) {
352:       z[i] += y[i];
353:     }
354:   }

356:   PetscLogFlops(2.0*a->nz);
357:   VecRestoreArrayRead(xx,&x);
358:   VecRestoreArrayPair(yy,zz,&y,&z);
359:   return(0);
360: }

362: PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
363: {
364:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
365:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
366:   const PetscScalar *x;
367:   PetscScalar       *y,*z;
368:   PetscErrorCode    ierr;
369:   PetscInt          n=a->nbs*A->rmap->bs;
370:   PetscInt          i;
371:   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
372:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;

375:   /* If there are no nonzero entries, set zz = yy and return immediately. */
376:   if(!a->nz) {
377:     PetscInt i;
378:     VecGetArrayPair(yy,zz,&y,&z);
379:     for (i=0; i<n; i++) {
380:       z[i] = y[i];
381:     }
382:     VecRestoreArrayPair(yy,zz,&y,&z);
383:     return(0);
384:   }

386:   VecGetArrayRead(xx,&x);
387:   VecGetArrayPair(yy,zz,&y,&z);

389:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
390:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
391:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
392:   if (!baijmkl->sparse_optimized) {
393:     MatSeqBAIJMKL_create_mkl_handle(A);
394:   }

396:   /* Call MKL sparse BLAS routine to do the MatMult. */
397:   if (zz == yy) {
398:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
399:      * with alpha and beta both set to 1.0. */
400:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
401:   } else {
402:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
403:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
404:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
405:     for (i=0; i<n; i++) {
406:       z[i] += y[i];
407:     }
408:   }

410:   PetscLogFlops(2.0*a->nz);
411:   VecRestoreArrayRead(xx,&x);
412:   VecRestoreArrayPair(yy,zz,&y,&z);
413:   return(0);
414: }

416: PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
417: {

421:   MatScale_SeqBAIJ(inA,alpha);
422:   MatSeqBAIJMKL_create_mkl_handle(inA);
423:   return(0);
424: }

426: PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
427: {

431:   MatDiagonalScale_SeqBAIJ(A,ll,rr);
432:   MatSeqBAIJMKL_create_mkl_handle(A);
433:   return(0);
434: }

436: PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
437: {

441:   MatAXPY_SeqBAIJ(Y,a,X,str);
442:   if (str == SAME_NONZERO_PATTERN) {
443:     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
444:     MatSeqBAIJMKL_create_mkl_handle(Y);
445:   }
446:   return(0);
447: }
448: /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
449:  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
450:  * routine, but can also be used to convert an assembled SeqBAIJ matrix
451:  * into a SeqBAIJMKL one. */
452: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
453: {
455:   Mat            B = *newmat;
456:   Mat_SeqBAIJMKL *baijmkl;
457:   PetscBool      sametype;

460:   if (reuse == MAT_INITIAL_MATRIX) {
461:     MatDuplicate(A,MAT_COPY_VALUES,&B);
462:   }

464:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
465:   if (sametype) return(0);

467:   PetscNewLog(B,&baijmkl);
468:   B->spptr = (void*)baijmkl;

470:   /* Set function pointers for methods that we inherit from BAIJ but override. 
471:    * We also parse some command line options below, since those determine some of the methods we point to. */
472:   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
473: 
474:   baijmkl->sparse_optimized = PETSC_FALSE;

476:   PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);
477:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);

479:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);
480:   *newmat = B;
481:   return(0);
482: }
483: PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
484: {
485:   PetscErrorCode  ierr;
487:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
488:   MatAssemblyEnd_SeqBAIJ(A, mode);
489:   MatSeqBAIJMKL_create_mkl_handle(A);
490:   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
491:   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
492:   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
493:   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
494:   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
495:   A->ops->scale            = MatScale_SeqBAIJMKL;
496:   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
497:   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
498:   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
499:   return(0);
500: }
501: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
502: /*@C
503:    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
504:    This type inherits from BAIJ and is largely identical, but uses sparse BLAS 
505:    routines from Intel MKL whenever possible.
506:    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 
507:    operations are currently supported.
508:    If the installed version of MKL supports the "SpMV2" sparse 
509:    inspector-executor routines, then those are used by default. 
510:    Default PETSc kernels are used otherwise. 

512:    Input Parameters:
513: +  comm - MPI communicator, set to PETSC_COMM_SELF
514: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
515:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
516: .  m - number of rows
517: .  n - number of columns
518: .  nz - number of nonzero blocks  per block row (same for all rows)
519: -  nnz - array containing the number of nonzero blocks in the various block rows
520:          (possibly different for each block row) or NULL


523:    Output Parameter:
524: .  A - the matrix

526:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
527:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
528:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

530:    Options Database Keys:
531: .   -mat_no_unroll - uses code that does not unroll the loops in the
532:                      block calculations (much slower)
533: .    -mat_block_size - size of the blocks to use

535:    Level: intermediate

537:    Notes:
538:    The number of rows and columns must be divisible by blocksize.

540:    If the nnz parameter is given then the nz parameter is ignored

542:    A nonzero block is any block that as 1 or more nonzeros in it

544:    The block AIJ format is fully compatible with standard Fortran 77
545:    storage.  That is, the stored row and column indices can begin at
546:    either one (as in Fortran) or zero.  See the users' manual for details.

548:    Specify the preallocated storage with either nz or nnz (not both).
549:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
550:    allocation.  See Users-Manual: ch_mat for details.
551:    matrices.

553: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()

555: @*/
556: PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
557: {

561:   MatCreate(comm,A);
562:   MatSetSizes(*A,m,n,m,n);
563: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE    
564:   MatSetType(*A,MATSEQBAIJMKL);
565:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
566: #else
567:   PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
568:   MatSetType(*A,MATSEQBAIJ);
569:   MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
570: #endif
571:   return(0);
572: }
573: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
574: {

578:   MatSetType(A,MATSEQBAIJ);
579: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE  
580:   MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);
581: #else
582:   PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
583: #endif
584:   return(0);
585: }