Actual source code: baijmkl.c

  1: /*
  2:   Defines basic operations for the MATSEQBAIJMKL matrix class.
  3:   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
  4:   wherever possible. If used MKL verion is older than 11.3 PETSc default
  5:   code for sparse matrix operations is used.
  6: */

  8: #include <../src/mat/impls/baij/seq/baij.h>
  9: #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
 10: #include <mkl_spblas.h>

 12: static PetscBool PetscSeqBAIJSupportsZeroBased(void)
 13: {
 14:   static PetscBool set = PETSC_FALSE,value;
 15:   int              n=1,ia[1],ja[1];
 16:   float            a[1];
 17:   sparse_status_t  status;
 18:   sparse_matrix_t  A;

 20:   if (!set) {
 21:     status = mkl_sparse_s_create_bsr(&A,SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,n,n,n,ia,ia,ja,a);
 22:     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
 23:     (void)   mkl_sparse_destroy(A);
 24:     set    = PETSC_TRUE;
 25:   }
 26:   return value;
 27: }

 29: typedef struct {
 30:   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 31:   sparse_matrix_t     bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 32:   struct matrix_descr descr;
 33:   PetscInt            *ai1;
 34:   PetscInt            *aj1;
 35: } Mat_SeqBAIJMKL;

 37: static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
 38: extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);

 40: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 41: {
 42:   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
 43:   /* so we will ignore 'MatType type'. */
 45:   Mat            B        = *newmat;
 46:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;

 49:   if (reuse == MAT_INITIAL_MATRIX) {
 50:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 51:   }

 53:   /* Reset the original function pointers. */
 54:   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
 55:   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
 56:   B->ops->destroy          = MatDestroy_SeqBAIJ;
 57:   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
 58:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
 59:   B->ops->scale            = MatScale_SeqBAIJ;
 60:   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
 61:   B->ops->axpy             = MatAXPY_SeqBAIJ;

 63:   switch (A->rmap->bs) {
 64:     case 1:
 65:       B->ops->mult    = MatMult_SeqBAIJ_1;
 66:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
 67:       break;
 68:     case 2:
 69:       B->ops->mult    = MatMult_SeqBAIJ_2;
 70:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
 71:       break;
 72:     case 3:
 73:       B->ops->mult    = MatMult_SeqBAIJ_3;
 74:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
 75:       break;
 76:     case 4:
 77:       B->ops->mult    = MatMult_SeqBAIJ_4;
 78:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
 79:       break;
 80:     case 5:
 81:       B->ops->mult    = MatMult_SeqBAIJ_5;
 82:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
 83:       break;
 84:     case 6:
 85:       B->ops->mult    = MatMult_SeqBAIJ_6;
 86:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
 87:       break;
 88:     case 7:
 89:       B->ops->mult    = MatMult_SeqBAIJ_7;
 90:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
 91:       break;
 92:     case 11:
 93:       B->ops->mult    = MatMult_SeqBAIJ_11;
 94:       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
 95:       break;
 96:     case 15:
 97:       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
 98:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
 99:       break;
100:     default:
101:       B->ops->mult    = MatMult_SeqBAIJ_N;
102:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
103:       break;
104:   }
105:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);

107:   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
108:    * simply involves destroying the MKL sparse matrix handle and then freeing
109:    * the spptr pointer. */
110:   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;

112:   if (baijmkl->sparse_optimized) {
113:     sparse_status_t stat;
114:     stat = mkl_sparse_destroy(baijmkl->bsrA);
115:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
116:   }
117:   PetscFree2(baijmkl->ai1,baijmkl->aj1);
118:   PetscFree(B->spptr);

120:   /* Change the type of B to MATSEQBAIJ. */
121:   PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);

123:   *newmat = B;
124:   return(0);
125: }

127: static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
128: {
130:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;

133:   if (baijmkl) {
134:     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
135:     if (baijmkl->sparse_optimized) {
136:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
137:       stat = mkl_sparse_destroy(baijmkl->bsrA);
138:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
139:     }
140:     PetscFree2(baijmkl->ai1,baijmkl->aj1);
141:     PetscFree(A->spptr);
142:   }

144:   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
145:    * to destroy everything that remains. */
146:   PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);
147:   MatDestroy_SeqBAIJ(A);
148:   return(0);
149: }

151: static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
152: {
153:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
154:   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
155:   PetscInt        mbs, nbs, nz, bs;
156:   MatScalar       *aa;
157:   PetscInt        *aj,*ai;
158:   sparse_status_t stat;
159:   PetscErrorCode  ierr;
160:   PetscInt        i;

163:   if (baijmkl->sparse_optimized) {
164:     /* Matrix has been previously assembled and optimized. Must destroy old
165:      * matrix handle before running the optimization step again. */
166:     PetscFree2(baijmkl->ai1,baijmkl->aj1);
167:     stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
168:   }
169:   baijmkl->sparse_optimized = PETSC_FALSE;

171:   /* Now perform the SpMV2 setup and matrix optimization. */
172:   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
173:   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
174:   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
175:   mbs = a->mbs;
176:   nbs = a->nbs;
177:   nz  = a->nz;
178:   bs  = A->rmap->bs;
179:   aa  = a->a;

181:   if ((nz!=0) & !(A->structure_only)) {
182:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
183:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
184:     if (PetscSeqBAIJSupportsZeroBased()) {
185:       aj   = a->j;
186:       ai   = a->i;
187:       stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
188:     } else {
189:       PetscMalloc2(mbs+1,&ai,nz,&aj);
190:       for (i=0;i<mbs+1;i++) ai[i] = a->i[i]+1;
191:       for (i=0;i<nz;i++) aj[i] = a->j[i]+1;
192:       aa   = a->a;
193:       stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
194:       baijmkl->ai1 = ai;
195:       baijmkl->aj1 = aj;
196:     }
197:     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
198:     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
199:     stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
200:     baijmkl->sparse_optimized = PETSC_TRUE;
201:   }
202:   return(0);
203: }

205: static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
206: {
208:   Mat_SeqBAIJMKL *baijmkl;
209:   Mat_SeqBAIJMKL *baijmkl_dest;

212:   MatDuplicate_SeqBAIJ(A,op,M);
213:   baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
214:   PetscNewLog((*M),&baijmkl_dest);
215:   (*M)->spptr = (void*)baijmkl_dest;
216:   PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));
217:   baijmkl_dest->sparse_optimized = PETSC_FALSE;
218:   MatSeqBAIJMKL_create_mkl_handle(A);
219:   return(0);
220: }

222: static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
223: {
224:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
225:   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
226:   const PetscScalar  *x;
227:   PetscScalar        *y;
228:   PetscErrorCode     ierr;
229:   sparse_status_t    stat = SPARSE_STATUS_SUCCESS;

232:   /* If there are no nonzero entries, zero yy and return immediately. */
233:   if (!a->nz) {
234:     VecSet(yy,0.0);
235:     return(0);
236:   }

238:   VecGetArrayRead(xx,&x);
239:   VecGetArray(yy,&y);

241:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
242:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
243:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
244:   if (!baijmkl->sparse_optimized) {
245:     MatSeqBAIJMKL_create_mkl_handle(A);
246:   }

248:   /* Call MKL SpMV2 executor routine to do the MatMult. */
249:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);

251:   PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs);
252:   VecRestoreArrayRead(xx,&x);
253:   VecRestoreArray(yy,&y);
254:   return(0);
255: }

257: static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
258: {
259:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
260:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
261:   const PetscScalar *x;
262:   PetscScalar       *y;
263:   PetscErrorCode    ierr;
264:   sparse_status_t   stat;

267:   /* If there are no nonzero entries, zero yy and return immediately. */
268:   if (!a->nz) {
269:     VecSet(yy,0.0);
270:     return(0);
271:   }

273:   VecGetArrayRead(xx,&x);
274:   VecGetArray(yy,&y);

276:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
277:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
278:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
279:   if (!baijmkl->sparse_optimized) {
280:     MatSeqBAIJMKL_create_mkl_handle(A);
281:   }

283:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
284:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);

286:   PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs);
287:   VecRestoreArrayRead(xx,&x);
288:   VecRestoreArray(yy,&y);
289:   return(0);
290: }

292: static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
293: {
294:   Mat_SeqBAIJ        *a       = (Mat_SeqBAIJ*)A->data;
295:   Mat_SeqBAIJMKL     *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
296:   const PetscScalar  *x;
297:   PetscScalar        *y,*z;
298:   PetscErrorCode     ierr;
299:   PetscInt           m=a->mbs*A->rmap->bs;
300:   PetscInt           i;

302:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;

305:   /* If there are no nonzero entries, set zz = yy and return immediately. */
306:   if (!a->nz) {
307:     VecCopy(yy,zz);
308:     return(0);
309:   }

311:   VecGetArrayRead(xx,&x);
312:   VecGetArrayPair(yy,zz,&y,&z);

314:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
315:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
316:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
317:   if (!baijmkl->sparse_optimized) {
318:     MatSeqBAIJMKL_create_mkl_handle(A);
319:   }

321:   /* Call MKL sparse BLAS routine to do the MatMult. */
322:   if (zz == yy) {
323:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
324:      * with alpha and beta both set to 1.0. */
325:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
326:   } else {
327:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
328:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
329:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
330:     for (i=0; i<m; i++) {
331:       z[i] += y[i];
332:     }
333:   }

335:   PetscLogFlops(2.0*a->bs2*a->nz);
336:   VecRestoreArrayRead(xx,&x);
337:   VecRestoreArrayPair(yy,zz,&y,&z);
338:   return(0);
339: }

341: static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
342: {
343:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
344:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
345:   const PetscScalar *x;
346:   PetscScalar       *y,*z;
347:   PetscErrorCode    ierr;
348:   PetscInt          n=a->nbs*A->rmap->bs;
349:   PetscInt          i;
350:   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
351:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;

354:   /* If there are no nonzero entries, set zz = yy and return immediately. */
355:   if (!a->nz) {
356:     VecCopy(yy,zz);
357:     return(0);
358:   }

360:   VecGetArrayRead(xx,&x);
361:   VecGetArrayPair(yy,zz,&y,&z);

363:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
364:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
365:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
366:   if (!baijmkl->sparse_optimized) {
367:     MatSeqBAIJMKL_create_mkl_handle(A);
368:   }

370:   /* Call MKL sparse BLAS routine to do the MatMult. */
371:   if (zz == yy) {
372:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
373:      * with alpha and beta both set to 1.0. */
374:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
375:   } else {
376:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
377:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
378:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
379:     for (i=0; i<n; i++) {
380:       z[i] += y[i];
381:     }
382:   }

384:   PetscLogFlops(2.0*a->bs2*a->nz);
385:   VecRestoreArrayRead(xx,&x);
386:   VecRestoreArrayPair(yy,zz,&y,&z);
387:   return(0);
388: }

390: static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
391: {

395:   MatScale_SeqBAIJ(inA,alpha);
396:   MatSeqBAIJMKL_create_mkl_handle(inA);
397:   return(0);
398: }

400: static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
401: {

405:   MatDiagonalScale_SeqBAIJ(A,ll,rr);
406:   MatSeqBAIJMKL_create_mkl_handle(A);
407:   return(0);
408: }

410: static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
411: {

415:   MatAXPY_SeqBAIJ(Y,a,X,str);
416:   if (str == SAME_NONZERO_PATTERN) {
417:     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
418:     MatSeqBAIJMKL_create_mkl_handle(Y);
419:   }
420:   return(0);
421: }
422: /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
423:  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
424:  * routine, but can also be used to convert an assembled SeqBAIJ matrix
425:  * into a SeqBAIJMKL one. */
426: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
427: {
429:   Mat            B = *newmat;
430:   Mat_SeqBAIJMKL *baijmkl;
431:   PetscBool      sametype;

434:   if (reuse == MAT_INITIAL_MATRIX) {
435:     MatDuplicate(A,MAT_COPY_VALUES,&B);
436:   }

438:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
439:   if (sametype) return(0);

441:   PetscNewLog(B,&baijmkl);
442:   B->spptr = (void*)baijmkl;

444:   /* Set function pointers for methods that we inherit from BAIJ but override.
445:    * We also parse some command line options below, since those determine some of the methods we point to. */
446:   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;

448:   baijmkl->sparse_optimized = PETSC_FALSE;

450:   PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);
451:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);

453:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);
454:   *newmat = B;
455:   return(0);
456: }

458: static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
459: {
460:   PetscErrorCode  ierr;

463:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
464:   MatAssemblyEnd_SeqBAIJ(A, mode);
465:   MatSeqBAIJMKL_create_mkl_handle(A);
466:   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
467:   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
468:   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
469:   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
470:   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
471:   A->ops->scale            = MatScale_SeqBAIJMKL;
472:   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
473:   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
474:   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
475:   return(0);
476: }

478: /*@C
479:    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
480:    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
481:    routines from Intel MKL whenever possible.
482:    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
483:    operations are currently supported.
484:    If the installed version of MKL supports the "SpMV2" sparse
485:    inspector-executor routines, then those are used by default.
486:    Default PETSc kernels are used otherwise.

488:    Input Parameters:
489: +  comm - MPI communicator, set to PETSC_COMM_SELF
490: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
491:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
492: .  m - number of rows
493: .  n - number of columns
494: .  nz - number of nonzero blocks  per block row (same for all rows)
495: -  nnz - array containing the number of nonzero blocks in the various block rows
496:          (possibly different for each block row) or NULL

498:    Output Parameter:
499: .  A - the matrix

501:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
502:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
503:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

505:    Options Database Keys:
506: +   -mat_no_unroll - uses code that does not unroll the loops in the
507:                      block calculations (much slower)
508: -   -mat_block_size - size of the blocks to use

510:    Level: intermediate

512:    Notes:
513:    The number of rows and columns must be divisible by blocksize.

515:    If the nnz parameter is given then the nz parameter is ignored

517:    A nonzero block is any block that as 1 or more nonzeros in it

519:    The block AIJ format is fully compatible with standard Fortran 77
520:    storage.  That is, the stored row and column indices can begin at
521:    either one (as in Fortran) or zero.  See the users' manual for details.

523:    Specify the preallocated storage with either nz or nnz (not both).
524:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
525:    allocation.  See Users-Manual: ch_mat for details.
526:    matrices.

528: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()

530: @*/
531: PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
532: {

536:   MatCreate(comm,A);
537:   MatSetSizes(*A,m,n,m,n);
538:   MatSetType(*A,MATSEQBAIJMKL);
539:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
540:   return(0);
541: }

543: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
544: {

548:   MatSetType(A,MATSEQBAIJ);
549:   MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);
550:   return(0);
551: }