Actual source code: baijmkl.c
1: /*
2: Defines basic operations for the MATSEQBAIJMKL matrix class.
3: Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4: wherever possible. If used MKL verion is older than 11.3 PETSc default
5: code for sparse matrix operations is used.
6: */
8: #include <../src/mat/impls/baij/seq/baij.h>
9: #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10: #include <mkl_spblas.h>
12: static PetscBool PetscSeqBAIJSupportsZeroBased(void)
13: {
14: static PetscBool set = PETSC_FALSE,value;
15: int n=1,ia[1],ja[1];
16: float a[1];
17: sparse_status_t status;
18: sparse_matrix_t A;
20: if (!set) {
21: status = mkl_sparse_s_create_bsr(&A,SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,n,n,n,ia,ia,ja,a);
22: value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
23: (void) mkl_sparse_destroy(A);
24: set = PETSC_TRUE;
25: }
26: return value;
27: }
29: typedef struct {
30: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
31: sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
32: struct matrix_descr descr;
33: PetscInt *ai1;
34: PetscInt *aj1;
35: } Mat_SeqBAIJMKL;
37: static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
38: extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
40: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
41: {
42: /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
43: /* so we will ignore 'MatType type'. */
45: Mat B = *newmat;
46: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
49: if (reuse == MAT_INITIAL_MATRIX) {
50: MatDuplicate(A,MAT_COPY_VALUES,&B);
51: }
53: /* Reset the original function pointers. */
54: B->ops->duplicate = MatDuplicate_SeqBAIJ;
55: B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ;
56: B->ops->destroy = MatDestroy_SeqBAIJ;
57: B->ops->multtranspose = MatMultTranspose_SeqBAIJ;
58: B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
59: B->ops->scale = MatScale_SeqBAIJ;
60: B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ;
61: B->ops->axpy = MatAXPY_SeqBAIJ;
63: switch (A->rmap->bs) {
64: case 1:
65: B->ops->mult = MatMult_SeqBAIJ_1;
66: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
67: break;
68: case 2:
69: B->ops->mult = MatMult_SeqBAIJ_2;
70: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
71: break;
72: case 3:
73: B->ops->mult = MatMult_SeqBAIJ_3;
74: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
75: break;
76: case 4:
77: B->ops->mult = MatMult_SeqBAIJ_4;
78: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
79: break;
80: case 5:
81: B->ops->mult = MatMult_SeqBAIJ_5;
82: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
83: break;
84: case 6:
85: B->ops->mult = MatMult_SeqBAIJ_6;
86: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
87: break;
88: case 7:
89: B->ops->mult = MatMult_SeqBAIJ_7;
90: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
91: break;
92: case 11:
93: B->ops->mult = MatMult_SeqBAIJ_11;
94: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
95: break;
96: case 15:
97: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
98: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
99: break;
100: default:
101: B->ops->mult = MatMult_SeqBAIJ_N;
102: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
103: break;
104: }
105: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);
107: /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
108: * simply involves destroying the MKL sparse matrix handle and then freeing
109: * the spptr pointer. */
110: if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
112: if (baijmkl->sparse_optimized) {
113: sparse_status_t stat;
114: stat = mkl_sparse_destroy(baijmkl->bsrA);
115: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
116: }
117: PetscFree2(baijmkl->ai1,baijmkl->aj1);
118: PetscFree(B->spptr);
120: /* Change the type of B to MATSEQBAIJ. */
121: PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);
123: *newmat = B;
124: return(0);
125: }
127: static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
128: {
130: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
133: if (baijmkl) {
134: /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
135: if (baijmkl->sparse_optimized) {
136: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
137: stat = mkl_sparse_destroy(baijmkl->bsrA);
138: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
139: }
140: PetscFree2(baijmkl->ai1,baijmkl->aj1);
141: PetscFree(A->spptr);
142: }
144: /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
145: * to destroy everything that remains. */
146: PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);
147: MatDestroy_SeqBAIJ(A);
148: return(0);
149: }
151: static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
152: {
153: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
154: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
155: PetscInt mbs, nbs, nz, bs;
156: MatScalar *aa;
157: PetscInt *aj,*ai;
158: sparse_status_t stat;
159: PetscErrorCode ierr;
160: PetscInt i;
163: if (baijmkl->sparse_optimized) {
164: /* Matrix has been previously assembled and optimized. Must destroy old
165: * matrix handle before running the optimization step again. */
166: PetscFree2(baijmkl->ai1,baijmkl->aj1);
167: stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
168: }
169: baijmkl->sparse_optimized = PETSC_FALSE;
171: /* Now perform the SpMV2 setup and matrix optimization. */
172: baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
173: baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
174: baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
175: mbs = a->mbs;
176: nbs = a->nbs;
177: nz = a->nz;
178: bs = A->rmap->bs;
179: aa = a->a;
181: if ((nz!=0) & !(A->structure_only)) {
182: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
183: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
184: if (PetscSeqBAIJSupportsZeroBased()) {
185: aj = a->j;
186: ai = a->i;
187: stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
188: } else {
189: PetscMalloc2(mbs+1,&ai,nz,&aj);
190: for (i=0;i<mbs+1;i++) ai[i] = a->i[i]+1;
191: for (i=0;i<nz;i++) aj[i] = a->j[i]+1;
192: aa = a->a;
193: stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
194: baijmkl->ai1 = ai;
195: baijmkl->aj1 = aj;
196: }
197: stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
198: stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
199: stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
200: baijmkl->sparse_optimized = PETSC_TRUE;
201: }
202: return(0);
203: }
205: static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
206: {
208: Mat_SeqBAIJMKL *baijmkl;
209: Mat_SeqBAIJMKL *baijmkl_dest;
212: MatDuplicate_SeqBAIJ(A,op,M);
213: baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
214: PetscNewLog((*M),&baijmkl_dest);
215: (*M)->spptr = (void*)baijmkl_dest;
216: PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));
217: baijmkl_dest->sparse_optimized = PETSC_FALSE;
218: MatSeqBAIJMKL_create_mkl_handle(A);
219: return(0);
220: }
222: static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
223: {
224: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
225: Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
226: const PetscScalar *x;
227: PetscScalar *y;
228: PetscErrorCode ierr;
229: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
232: /* If there are no nonzero entries, zero yy and return immediately. */
233: if (!a->nz) {
234: VecSet(yy,0.0);
235: return(0);
236: }
238: VecGetArrayRead(xx,&x);
239: VecGetArray(yy,&y);
241: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
242: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
243: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
244: if (!baijmkl->sparse_optimized) {
245: MatSeqBAIJMKL_create_mkl_handle(A);
246: }
248: /* Call MKL SpMV2 executor routine to do the MatMult. */
249: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
251: PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs);
252: VecRestoreArrayRead(xx,&x);
253: VecRestoreArray(yy,&y);
254: return(0);
255: }
257: static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
258: {
259: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
260: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
261: const PetscScalar *x;
262: PetscScalar *y;
263: PetscErrorCode ierr;
264: sparse_status_t stat;
267: /* If there are no nonzero entries, zero yy and return immediately. */
268: if (!a->nz) {
269: VecSet(yy,0.0);
270: return(0);
271: }
273: VecGetArrayRead(xx,&x);
274: VecGetArray(yy,&y);
276: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
277: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
278: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
279: if (!baijmkl->sparse_optimized) {
280: MatSeqBAIJMKL_create_mkl_handle(A);
281: }
283: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
284: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
286: PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs);
287: VecRestoreArrayRead(xx,&x);
288: VecRestoreArray(yy,&y);
289: return(0);
290: }
292: static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
293: {
294: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
295: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
296: const PetscScalar *x;
297: PetscScalar *y,*z;
298: PetscErrorCode ierr;
299: PetscInt m=a->mbs*A->rmap->bs;
300: PetscInt i;
302: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
305: /* If there are no nonzero entries, set zz = yy and return immediately. */
306: if (!a->nz) {
307: VecCopy(yy,zz);
308: return(0);
309: }
311: VecGetArrayRead(xx,&x);
312: VecGetArrayPair(yy,zz,&y,&z);
314: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
315: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
316: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
317: if (!baijmkl->sparse_optimized) {
318: MatSeqBAIJMKL_create_mkl_handle(A);
319: }
321: /* Call MKL sparse BLAS routine to do the MatMult. */
322: if (zz == yy) {
323: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
324: * with alpha and beta both set to 1.0. */
325: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
326: } else {
327: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
328: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
329: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
330: for (i=0; i<m; i++) {
331: z[i] += y[i];
332: }
333: }
335: PetscLogFlops(2.0*a->bs2*a->nz);
336: VecRestoreArrayRead(xx,&x);
337: VecRestoreArrayPair(yy,zz,&y,&z);
338: return(0);
339: }
341: static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
342: {
343: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
344: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
345: const PetscScalar *x;
346: PetscScalar *y,*z;
347: PetscErrorCode ierr;
348: PetscInt n=a->nbs*A->rmap->bs;
349: PetscInt i;
350: /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
351: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
354: /* If there are no nonzero entries, set zz = yy and return immediately. */
355: if (!a->nz) {
356: VecCopy(yy,zz);
357: return(0);
358: }
360: VecGetArrayRead(xx,&x);
361: VecGetArrayPair(yy,zz,&y,&z);
363: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
364: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
365: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
366: if (!baijmkl->sparse_optimized) {
367: MatSeqBAIJMKL_create_mkl_handle(A);
368: }
370: /* Call MKL sparse BLAS routine to do the MatMult. */
371: if (zz == yy) {
372: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
373: * with alpha and beta both set to 1.0. */
374: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
375: } else {
376: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
377: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
378: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
379: for (i=0; i<n; i++) {
380: z[i] += y[i];
381: }
382: }
384: PetscLogFlops(2.0*a->bs2*a->nz);
385: VecRestoreArrayRead(xx,&x);
386: VecRestoreArrayPair(yy,zz,&y,&z);
387: return(0);
388: }
390: static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
391: {
395: MatScale_SeqBAIJ(inA,alpha);
396: MatSeqBAIJMKL_create_mkl_handle(inA);
397: return(0);
398: }
400: static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
401: {
405: MatDiagonalScale_SeqBAIJ(A,ll,rr);
406: MatSeqBAIJMKL_create_mkl_handle(A);
407: return(0);
408: }
410: static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
411: {
415: MatAXPY_SeqBAIJ(Y,a,X,str);
416: if (str == SAME_NONZERO_PATTERN) {
417: /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
418: MatSeqBAIJMKL_create_mkl_handle(Y);
419: }
420: return(0);
421: }
422: /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
423: * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ()
424: * routine, but can also be used to convert an assembled SeqBAIJ matrix
425: * into a SeqBAIJMKL one. */
426: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
427: {
429: Mat B = *newmat;
430: Mat_SeqBAIJMKL *baijmkl;
431: PetscBool sametype;
434: if (reuse == MAT_INITIAL_MATRIX) {
435: MatDuplicate(A,MAT_COPY_VALUES,&B);
436: }
438: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
439: if (sametype) return(0);
441: PetscNewLog(B,&baijmkl);
442: B->spptr = (void*)baijmkl;
444: /* Set function pointers for methods that we inherit from BAIJ but override.
445: * We also parse some command line options below, since those determine some of the methods we point to. */
446: B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
448: baijmkl->sparse_optimized = PETSC_FALSE;
450: PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);
451: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);
453: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);
454: *newmat = B;
455: return(0);
456: }
458: static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
459: {
460: PetscErrorCode ierr;
463: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
464: MatAssemblyEnd_SeqBAIJ(A, mode);
465: MatSeqBAIJMKL_create_mkl_handle(A);
466: A->ops->destroy = MatDestroy_SeqBAIJMKL;
467: A->ops->mult = MatMult_SeqBAIJMKL_SpMV2;
468: A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2;
469: A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2;
470: A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
471: A->ops->scale = MatScale_SeqBAIJMKL;
472: A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL;
473: A->ops->axpy = MatAXPY_SeqBAIJMKL;
474: A->ops->duplicate = MatDuplicate_SeqBAIJMKL;
475: return(0);
476: }
478: /*@C
479: MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
480: This type inherits from BAIJ and is largely identical, but uses sparse BLAS
481: routines from Intel MKL whenever possible.
482: MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
483: operations are currently supported.
484: If the installed version of MKL supports the "SpMV2" sparse
485: inspector-executor routines, then those are used by default.
486: Default PETSc kernels are used otherwise.
488: Input Parameters:
489: + comm - MPI communicator, set to PETSC_COMM_SELF
490: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
491: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
492: . m - number of rows
493: . n - number of columns
494: . nz - number of nonzero blocks per block row (same for all rows)
495: - nnz - array containing the number of nonzero blocks in the various block rows
496: (possibly different for each block row) or NULL
499: Output Parameter:
500: . A - the matrix
502: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
503: MatXXXXSetPreallocation() paradigm instead of this routine directly.
504: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
506: Options Database Keys:
507: + -mat_no_unroll - uses code that does not unroll the loops in the
508: block calculations (much slower)
509: - -mat_block_size - size of the blocks to use
511: Level: intermediate
513: Notes:
514: The number of rows and columns must be divisible by blocksize.
516: If the nnz parameter is given then the nz parameter is ignored
518: A nonzero block is any block that as 1 or more nonzeros in it
520: The block AIJ format is fully compatible with standard Fortran 77
521: storage. That is, the stored row and column indices can begin at
522: either one (as in Fortran) or zero. See the users' manual for details.
524: Specify the preallocated storage with either nz or nnz (not both).
525: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
526: allocation. See Users-Manual: ch_mat for details.
527: matrices.
529: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
531: @*/
532: PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
533: {
537: MatCreate(comm,A);
538: MatSetSizes(*A,m,n,m,n);
539: MatSetType(*A,MATSEQBAIJMKL);
540: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
541: return(0);
542: }
544: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
545: {
549: MatSetType(A,MATSEQBAIJ);
550: MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);
551: return(0);
552: }