1: /*
2: Defines basic operations for the MATSEQBAIJMKL matrix class.
3: Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4: wherever possible. If used MKL verion is older than 11.3 PETSc default
5: code for sparse matrix operations is used.
6: */
8: #include <../src/mat/impls/baij/seq/baij.h> 9: #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 12: /* MKL include files. */
13: #include <mkl_spblas.h> /* Sparse BLAS */
15: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
16: typedef struct {
17: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18: sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
19: struct matrix_descr descr;
20: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
21: PetscInt *ai1;
22: PetscInt *aj1;
23: #endif
24: } Mat_SeqBAIJMKL;
26: PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
27: extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
29: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 30: {
31: /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
32: /* so we will ignore 'MatType type'. */
34: Mat B = *newmat;
35: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
38: if (reuse == MAT_INITIAL_MATRIX) {
39: MatDuplicate(A,MAT_COPY_VALUES,&B);
40: }
42: /* Reset the original function pointers. */
43: B->ops->duplicate = MatDuplicate_SeqBAIJ;
44: B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ;
45: B->ops->destroy = MatDestroy_SeqBAIJ;
46: B->ops->multtranspose = MatMultTranspose_SeqBAIJ;
47: B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
48: B->ops->scale = MatScale_SeqBAIJ;
49: B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ;
50: B->ops->axpy = MatAXPY_SeqBAIJ;
51: 52: switch (A->rmap->bs) {
53: case 1:
54: B->ops->mult = MatMult_SeqBAIJ_1;
55: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
56: break;
57: case 2:
58: B->ops->mult = MatMult_SeqBAIJ_2;
59: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
60: break;
61: case 3:
62: B->ops->mult = MatMult_SeqBAIJ_3;
63: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
64: break;
65: case 4:
66: B->ops->mult = MatMult_SeqBAIJ_4;
67: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
68: break;
69: case 5:
70: B->ops->mult = MatMult_SeqBAIJ_5;
71: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
72: break;
73: case 6:
74: B->ops->mult = MatMult_SeqBAIJ_6;
75: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
76: break;
77: case 7:
78: B->ops->mult = MatMult_SeqBAIJ_7;
79: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
80: break;
81: case 11:
82: B->ops->mult = MatMult_SeqBAIJ_11;
83: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
84: break;
85: case 15:
86: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
87: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
88: break;
89: default: 90: B->ops->mult = MatMult_SeqBAIJ_N;
91: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
92: break;
93: }
94: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);
96: /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
97: * simply involves destroying the MKL sparse matrix handle and then freeing
98: * the spptr pointer. */
99: if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
101: if (baijmkl->sparse_optimized) {
102: sparse_status_t stat;
103: stat = mkl_sparse_destroy(baijmkl->bsrA);
104: if (stat != SPARSE_STATUS_SUCCESS) {
105: PetscFunctionReturn(PETSC_ERR_LIB);
106: }
107: }
108: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
109: PetscFree(baijmkl->ai1);
110: PetscFree(baijmkl->aj1);
111: #endif
112: PetscFree(B->spptr);
114: /* Change the type of B to MATSEQBAIJ. */
115: PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);
117: *newmat = B;
118: return(0);
119: }
121: PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)122: {
124: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
125: 127: if (baijmkl) {
128: /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
129: if (baijmkl->sparse_optimized) {
130: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
131: stat = mkl_sparse_destroy(baijmkl->bsrA);
132: if (stat != SPARSE_STATUS_SUCCESS) {
133: PetscFunctionReturn(PETSC_ERR_LIB);
134: }
135: }
136: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
137: PetscFree(baijmkl->ai1);
138: PetscFree(baijmkl->aj1);
139: #endif
140: PetscFree(A->spptr);
141: }
143: /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
144: * to destroy everything that remains. */
145: PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);
146: MatDestroy_SeqBAIJ(A);
147: return(0);
148: }
150: PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)151: {
153: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
154: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
155: PetscInt mbs, nbs, nz, bs, i;
156: MatScalar *aa;
157: PetscInt *aj,*ai;
158: sparse_status_t stat;
161: if (baijmkl->sparse_optimized) {
162: /* Matrix has been previously assembled and optimized. Must destroy old
163: * matrix handle before running the optimization step again. */
164: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
165: PetscFree(baijmkl->ai1);
166: PetscFree(baijmkl->aj1);
167: #endif
168: stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
169: }
170: baijmkl->sparse_optimized = PETSC_FALSE;
172: /* Now perform the SpMV2 setup and matrix optimization. */
173: baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
174: baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
175: baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
176: mbs = a->mbs;
177: nbs = a->nbs;
178: nz = a->nz;
179: bs = A->rmap->bs;
180: aa = a->a;
181: 182: if ((nz!=0) & !(A->structure_only)) {
183: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
184: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
185: #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
186: aj = a->j;
187: ai = a->i;
188: stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
189: #else
190: PetscMalloc1(mbs+1,&ai);
191: PetscMalloc1(nz,&aj);
192: for (i=0;i<mbs+1;i++)
193: ai[i] = a->i[i]+1;
194: for (i=0;i<nz;i++)
195: aj[i] = a->j[i]+1;
196: aa = a->a;
197: stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
198: baijmkl->ai1 = ai;
199: baijmkl->aj1 = aj;
200: #endif
201: stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
202: stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
203: stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
204: baijmkl->sparse_optimized = PETSC_TRUE;
205: }
206: return(0);
207: }
209: PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)210: {
212: Mat_SeqBAIJMKL *baijmkl;
213: Mat_SeqBAIJMKL *baijmkl_dest;
216: MatDuplicate_SeqBAIJ(A,op,M);
217: baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
218: PetscNewLog((*M),&baijmkl_dest);
219: (*M)->spptr = (void*)baijmkl_dest;
220: PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));
221: baijmkl_dest->sparse_optimized = PETSC_FALSE;
222: MatSeqBAIJMKL_create_mkl_handle(A);
223: return(0);
224: }
226: PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)227: {
228: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
229: Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
230: const PetscScalar *x;
231: PetscScalar *y;
232: PetscErrorCode ierr;
233: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
236: /* If there are no nonzero entries, zero yy and return immediately. */
237: if(!a->nz) {
238: PetscInt i;
239: PetscInt m=a->mbs*A->rmap->bs;
240: VecGetArray(yy,&y);
241: for (i=0; i<m; i++) {
242: y[i] = 0.0;
243: }
244: VecRestoreArray(yy,&y);
245: return(0);
246: }
248: VecGetArrayRead(xx,&x);
249: VecGetArray(yy,&y);
251: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
252: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
253: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
254: if (!baijmkl->sparse_optimized) {
255: MatSeqBAIJMKL_create_mkl_handle(A);
256: }
258: /* Call MKL SpMV2 executor routine to do the MatMult. */
259: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
260: 261: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
262: VecRestoreArrayRead(xx,&x);
263: VecRestoreArray(yy,&y);
264: return(0);
265: }
267: PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)268: {
269: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
270: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
271: const PetscScalar *x;
272: PetscScalar *y;
273: PetscErrorCode ierr;
274: sparse_status_t stat;
277: /* If there are no nonzero entries, zero yy and return immediately. */
278: if(!a->nz) {
279: PetscInt i;
280: PetscInt n=a->nbs;
281: VecGetArray(yy,&y);
282: for (i=0; i<n; i++) {
283: y[i] = 0.0;
284: }
285: VecRestoreArray(yy,&y);
286: return(0);
287: }
289: VecGetArrayRead(xx,&x);
290: VecGetArray(yy,&y);
292: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
293: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
294: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
295: if (!baijmkl->sparse_optimized) {
296: MatSeqBAIJMKL_create_mkl_handle(A);
297: }
299: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
300: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
301: 302: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
303: VecRestoreArrayRead(xx,&x);
304: VecRestoreArray(yy,&y);
305: return(0);
306: }
308: PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)309: {
310: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
311: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
312: const PetscScalar *x;
313: PetscScalar *y,*z;
314: PetscErrorCode ierr;
315: PetscInt m=a->mbs*A->rmap->bs;
316: PetscInt i;
318: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
321: /* If there are no nonzero entries, set zz = yy and return immediately. */
322: if(!a->nz) {
323: PetscInt i;
324: VecGetArrayPair(yy,zz,&y,&z);
325: for (i=0; i<m; i++) {
326: z[i] = y[i];
327: }
328: VecRestoreArrayPair(yy,zz,&y,&z);
329: return(0);
330: }
332: VecGetArrayRead(xx,&x);
333: VecGetArrayPair(yy,zz,&y,&z);
335: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
336: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
337: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
338: if (!baijmkl->sparse_optimized) {
339: MatSeqBAIJMKL_create_mkl_handle(A);
340: }
342: /* Call MKL sparse BLAS routine to do the MatMult. */
343: if (zz == yy) {
344: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
345: * with alpha and beta both set to 1.0. */
346: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
347: } else {
348: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
349: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
350: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
351: for (i=0; i<m; i++) {
352: z[i] += y[i];
353: }
354: }
356: PetscLogFlops(2.0*a->nz);
357: VecRestoreArrayRead(xx,&x);
358: VecRestoreArrayPair(yy,zz,&y,&z);
359: return(0);
360: }
362: PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)363: {
364: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
365: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
366: const PetscScalar *x;
367: PetscScalar *y,*z;
368: PetscErrorCode ierr;
369: PetscInt n=a->nbs*A->rmap->bs;
370: PetscInt i;
371: /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
372: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
375: /* If there are no nonzero entries, set zz = yy and return immediately. */
376: if(!a->nz) {
377: PetscInt i;
378: VecGetArrayPair(yy,zz,&y,&z);
379: for (i=0; i<n; i++) {
380: z[i] = y[i];
381: }
382: VecRestoreArrayPair(yy,zz,&y,&z);
383: return(0);
384: }
386: VecGetArrayRead(xx,&x);
387: VecGetArrayPair(yy,zz,&y,&z);
389: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
390: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
391: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
392: if (!baijmkl->sparse_optimized) {
393: MatSeqBAIJMKL_create_mkl_handle(A);
394: }
396: /* Call MKL sparse BLAS routine to do the MatMult. */
397: if (zz == yy) {
398: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
399: * with alpha and beta both set to 1.0. */
400: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
401: } else {
402: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
403: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
404: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
405: for (i=0; i<n; i++) {
406: z[i] += y[i];
407: }
408: }
410: PetscLogFlops(2.0*a->nz);
411: VecRestoreArrayRead(xx,&x);
412: VecRestoreArrayPair(yy,zz,&y,&z);
413: return(0);
414: }
416: PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)417: {
421: MatScale_SeqBAIJ(inA,alpha);
422: MatSeqBAIJMKL_create_mkl_handle(inA);
423: return(0);
424: }
426: PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)427: {
431: MatDiagonalScale_SeqBAIJ(A,ll,rr);
432: MatSeqBAIJMKL_create_mkl_handle(A);
433: return(0);
434: }
436: PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)437: {
441: MatAXPY_SeqBAIJ(Y,a,X,str);
442: if (str == SAME_NONZERO_PATTERN) {
443: /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
444: MatSeqBAIJMKL_create_mkl_handle(Y);
445: }
446: return(0);
447: }
448: /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
449: * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ()
450: * routine, but can also be used to convert an assembled SeqBAIJ matrix
451: * into a SeqBAIJMKL one. */
452: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)453: {
455: Mat B = *newmat;
456: Mat_SeqBAIJMKL *baijmkl;
457: PetscBool sametype;
460: if (reuse == MAT_INITIAL_MATRIX) {
461: MatDuplicate(A,MAT_COPY_VALUES,&B);
462: }
464: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
465: if (sametype) return(0);
467: PetscNewLog(B,&baijmkl);
468: B->spptr = (void*)baijmkl;
470: /* Set function pointers for methods that we inherit from BAIJ but override.
471: * We also parse some command line options below, since those determine some of the methods we point to. */
472: B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
473: 474: baijmkl->sparse_optimized = PETSC_FALSE;
476: PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);
477: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);
479: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);
480: *newmat = B;
481: return(0);
482: }
483: PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)484: {
485: PetscErrorCode ierr;
487: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
488: MatAssemblyEnd_SeqBAIJ(A, mode);
489: MatSeqBAIJMKL_create_mkl_handle(A);
490: A->ops->destroy = MatDestroy_SeqBAIJMKL;
491: A->ops->mult = MatMult_SeqBAIJMKL_SpMV2;
492: A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2;
493: A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2;
494: A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
495: A->ops->scale = MatScale_SeqBAIJMKL;
496: A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL;
497: A->ops->axpy = MatAXPY_SeqBAIJMKL;
498: A->ops->duplicate = MatDuplicate_SeqBAIJMKL;
499: return(0);
500: }
501: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
502: /*@C
503: MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
504: This type inherits from BAIJ and is largely identical, but uses sparse BLAS
505: routines from Intel MKL whenever possible.
506: MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd507: operations are currently supported.
508: If the installed version of MKL supports the "SpMV2" sparse
509: inspector-executor routines, then those are used by default.
510: Default PETSc kernels are used otherwise.
512: Input Parameters:
513: + comm - MPI communicator, set to PETSC_COMM_SELF514: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
515: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
516: . m - number of rows
517: . n - number of columns
518: . nz - number of nonzero blocks per block row (same for all rows)
519: - nnz - array containing the number of nonzero blocks in the various block rows
520: (possibly different for each block row) or NULL
523: Output Parameter:
524: . A - the matrix
526: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
527: MatXXXXSetPreallocation() paradgm instead of this routine directly.
528: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
530: Options Database Keys:
531: . -mat_no_unroll - uses code that does not unroll the loops in the
532: block calculations (much slower)
533: . -mat_block_size - size of the blocks to use
535: Level: intermediate
537: Notes:
538: The number of rows and columns must be divisible by blocksize.
540: If the nnz parameter is given then the nz parameter is ignored
542: A nonzero block is any block that as 1 or more nonzeros in it
544: The block AIJ format is fully compatible with standard Fortran 77
545: storage. That is, the stored row and column indices can begin at
546: either one (as in Fortran) or zero. See the users' manual for details.
548: Specify the preallocated storage with either nz or nnz (not both).
549: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
550: allocation. See Users-Manual: ch_mat for details.
551: matrices.
553: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
555: @*/
556: PetscErrorCodeMatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)557: {
561: MatCreate(comm,A);
562: MatSetSizes(*A,m,n,m,n);
563: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
564: MatSetType(*A,MATSEQBAIJMKL);
565: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
566: #else
567: PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
568: MatSetType(*A,MATSEQBAIJ);
569: MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
570: #endif
571: return(0);
572: }
573: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)574: {
578: MatSetType(A,MATSEQBAIJ);
579: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
580: MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);
581: #else
582: PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
583: #endif
584: return(0);
585: }