1: /*
2: Defines basic operations for the MATSEQAIJMKL matrix class.
3: This class is derived from the MATSEQAIJ class and retains the
4: compressed row storage (aka Yale sparse matrix format) but uses
5: sparse BLAS operations from the Intel Math Kernel Library (MKL)
6: wherever possible.
7: */
9: #include <../src/mat/impls/aij/seq/aij.h> 10: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12: /* MKL include files. */
13: #include <mkl_spblas.h> /* Sparse BLAS */
15: typedef struct {
16: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
19: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
20: struct matrix_descr descr;
21: #endif
22: } Mat_SeqAIJMKL;
24: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 27: {
28: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
29: /* so we will ignore 'MatType type'. */
31: Mat B = *newmat;
32: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
33: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
34: #endif
37: if (reuse == MAT_INITIAL_MATRIX) {
38: MatDuplicate(A,MAT_COPY_VALUES,&B);
39: }
41: /* Reset the original function pointers. */
42: B->ops->duplicate = MatDuplicate_SeqAIJ;
43: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
44: B->ops->destroy = MatDestroy_SeqAIJ;
45: B->ops->mult = MatMult_SeqAIJ;
46: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
47: B->ops->multadd = MatMultAdd_SeqAIJ;
48: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
49: B->ops->scale = MatScale_SeqAIJ;
50: B->ops->diagonalscale = MatDiagonalScale_SeqAIJ;
51: B->ops->diagonalset = MatDiagonalSet_SeqAIJ;
52: B->ops->axpy = MatAXPY_SeqAIJ;
54: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
55: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);
56: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
57: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
59: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
60: * simply involves destroying the MKL sparse matrix handle and then freeing
61: * the spptr pointer. */
62: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
63: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
65: if (aijmkl->sparse_optimized) {
66: sparse_status_t stat;
67: stat = mkl_sparse_destroy(aijmkl->csrA);
68: if (stat != SPARSE_STATUS_SUCCESS) {
69: PetscFunctionReturn(PETSC_ERR_LIB);
70: }
71: }
72: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
73: PetscFree(B->spptr);
75: /* Change the type of B to MATSEQAIJ. */
76: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
78: *newmat = B;
79: return(0);
80: }
82: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 83: {
85: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
89: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
90: * spptr pointer. */
91: if (aijmkl) {
92: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
93: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
94: if (aijmkl->sparse_optimized) {
95: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
96: stat = mkl_sparse_destroy(aijmkl->csrA);
97: if (stat != SPARSE_STATUS_SUCCESS) {
98: PetscFunctionReturn(PETSC_ERR_LIB);
99: }
100: }
101: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
102: PetscFree(A->spptr);
103: }
105: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
106: * to destroy everything that remains. */
107: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
108: /* Note that I don't call MatSetType(). I believe this is because that
109: * is only to be called when *building* a matrix. I could be wrong, but
110: * that is how things work for the SuperLU matrix class. */
111: MatDestroy_SeqAIJ(A);
112: return(0);
113: }
115: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)116: {
117: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
118: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
119: * does nothing. We make it callable anyway in this case because it cuts
120: * down on littering the code with #ifdefs. */
121: return(0);
122: #else
123: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
124: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
125: PetscInt m,n;
126: MatScalar *aa;
127: PetscInt *aj,*ai;
128: sparse_status_t stat;
131: if (aijmkl->no_SpMV2) return(0);
133: if (aijmkl->sparse_optimized) {
134: /* Matrix has been previously assembled and optimized. Must destroy old
135: * matrix handle before running the optimization step again. */
136: stat = mkl_sparse_destroy(aijmkl->csrA);
137: if (stat != SPARSE_STATUS_SUCCESS) {
138: PetscFunctionReturn(PETSC_ERR_LIB);
139: }
140: }
141: aijmkl->sparse_optimized = PETSC_FALSE;
143: /* Now perform the SpMV2 setup and matrix optimization. */
144: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
145: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
146: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
147: m = A->rmap->n;
148: n = A->cmap->n;
149: aj = a->j; /* aj[k] gives column index for element aa[k]. */
150: aa = a->a; /* Nonzero elements stored row-by-row. */
151: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
152: if ((a->nz!=0) & !(A->structure_only)) {
153: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
154: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
155: stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
156: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
157: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
158: stat = mkl_sparse_optimize(aijmkl->csrA);
159: if (stat != SPARSE_STATUS_SUCCESS) {
160: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
161: PetscFunctionReturn(PETSC_ERR_LIB);
162: }
163: aijmkl->sparse_optimized = PETSC_TRUE;
164: }
166: return(0);
167: #endif
168: }
170: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)171: {
173: Mat_SeqAIJMKL *aijmkl;
174: Mat_SeqAIJMKL *aijmkl_dest;
177: MatDuplicate_SeqAIJ(A,op,M);
178: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
179: aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
180: PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));
181: aijmkl_dest->sparse_optimized = PETSC_FALSE;
182: MatSeqAIJMKL_create_mkl_handle(A);
183: return(0);
184: }
186: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)187: {
188: PetscErrorCode ierr;
189: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
192: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
194: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
195: * extra information and some different methods, call the AssemblyEnd
196: * routine for a MATSEQAIJ.
197: * I'm not sure if this is the best way to do this, but it avoids
198: * a lot of code duplication. */
199: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
200: MatAssemblyEnd_SeqAIJ(A, mode);
202: /* Now create the MKL sparse handle (if needed; the function checks). */
203: MatSeqAIJMKL_create_mkl_handle(A);
205: return(0);
206: }
208: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)209: {
210: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
211: const PetscScalar *x;
212: PetscScalar *y;
213: const MatScalar *aa;
214: PetscErrorCode ierr;
215: PetscInt m=A->rmap->n;
216: PetscInt n=A->cmap->n;
217: PetscScalar alpha = 1.0;
218: PetscScalar beta = 0.0;
219: const PetscInt *aj,*ai;
220: char matdescra[6];
223: /* Variables not in MatMult_SeqAIJ. */
224: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
227: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
228: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
229: VecGetArrayRead(xx,&x);
230: VecGetArray(yy,&y);
231: aj = a->j; /* aj[k] gives column index for element aa[k]. */
232: aa = a->a; /* Nonzero elements stored row-by-row. */
233: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
235: /* Call MKL sparse BLAS routine to do the MatMult. */
236: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
238: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
239: VecRestoreArrayRead(xx,&x);
240: VecRestoreArray(yy,&y);
241: return(0);
242: }
244: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
245: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)246: {
247: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
248: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
249: const PetscScalar *x;
250: PetscScalar *y;
251: PetscErrorCode ierr;
252: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
256: /* If there are no nonzero entries, zero yy and return immediately. */
257: if(!a->nz) {
258: PetscInt i;
259: PetscInt m=A->rmap->n;
260: VecGetArray(yy,&y);
261: for (i=0; i<m; i++) {
262: y[i] = 0.0;
263: }
264: VecRestoreArray(yy,&y);
265: return(0);
266: }
268: VecGetArrayRead(xx,&x);
269: VecGetArray(yy,&y);
271: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
272: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
273: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
274: if (!aijmkl->sparse_optimized) {
275: MatSeqAIJMKL_create_mkl_handle(A);
276: }
278: /* Call MKL SpMV2 executor routine to do the MatMult. */
279: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
280: 281: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
282: VecRestoreArrayRead(xx,&x);
283: VecRestoreArray(yy,&y);
284: if (stat != SPARSE_STATUS_SUCCESS) {
285: PetscFunctionReturn(PETSC_ERR_LIB);
286: }
287: return(0);
288: }
289: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
291: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)292: {
293: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
294: const PetscScalar *x;
295: PetscScalar *y;
296: const MatScalar *aa;
297: PetscErrorCode ierr;
298: PetscInt m=A->rmap->n;
299: PetscInt n=A->cmap->n;
300: PetscScalar alpha = 1.0;
301: PetscScalar beta = 0.0;
302: const PetscInt *aj,*ai;
303: char matdescra[6];
305: /* Variables not in MatMultTranspose_SeqAIJ. */
306: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
309: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
310: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
311: VecGetArrayRead(xx,&x);
312: VecGetArray(yy,&y);
313: aj = a->j; /* aj[k] gives column index for element aa[k]. */
314: aa = a->a; /* Nonzero elements stored row-by-row. */
315: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
317: /* Call MKL sparse BLAS routine to do the MatMult. */
318: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
320: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
321: VecRestoreArrayRead(xx,&x);
322: VecRestoreArray(yy,&y);
323: return(0);
324: }
326: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
327: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)328: {
329: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
330: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
331: const PetscScalar *x;
332: PetscScalar *y;
333: PetscErrorCode ierr;
334: sparse_status_t stat;
338: /* If there are no nonzero entries, zero yy and return immediately. */
339: if(!a->nz) {
340: PetscInt i;
341: PetscInt n=A->cmap->n;
342: VecGetArray(yy,&y);
343: for (i=0; i<n; i++) {
344: y[i] = 0.0;
345: }
346: VecRestoreArray(yy,&y);
347: return(0);
348: }
350: VecGetArrayRead(xx,&x);
351: VecGetArray(yy,&y);
353: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
354: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
355: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
356: if (!aijmkl->sparse_optimized) {
357: MatSeqAIJMKL_create_mkl_handle(A);
358: }
360: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
361: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
362: 363: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
364: VecRestoreArrayRead(xx,&x);
365: VecRestoreArray(yy,&y);
366: if (stat != SPARSE_STATUS_SUCCESS) {
367: PetscFunctionReturn(PETSC_ERR_LIB);
368: }
369: return(0);
370: }
371: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
373: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)374: {
375: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
376: const PetscScalar *x;
377: PetscScalar *y,*z;
378: const MatScalar *aa;
379: PetscErrorCode ierr;
380: PetscInt m=A->rmap->n;
381: PetscInt n=A->cmap->n;
382: const PetscInt *aj,*ai;
383: PetscInt i;
385: /* Variables not in MatMultAdd_SeqAIJ. */
386: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
387: PetscScalar alpha = 1.0;
388: PetscScalar beta;
389: char matdescra[6];
392: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
393: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
395: VecGetArrayRead(xx,&x);
396: VecGetArrayPair(yy,zz,&y,&z);
397: aj = a->j; /* aj[k] gives column index for element aa[k]. */
398: aa = a->a; /* Nonzero elements stored row-by-row. */
399: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
401: /* Call MKL sparse BLAS routine to do the MatMult. */
402: if (zz == yy) {
403: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
404: beta = 1.0;
405: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
406: } else {
407: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
408: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
409: beta = 0.0;
410: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
411: for (i=0; i<m; i++) {
412: z[i] += y[i];
413: }
414: }
416: PetscLogFlops(2.0*a->nz);
417: VecRestoreArrayRead(xx,&x);
418: VecRestoreArrayPair(yy,zz,&y,&z);
419: return(0);
420: }
422: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
423: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)424: {
425: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
426: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
427: const PetscScalar *x;
428: PetscScalar *y,*z;
429: PetscErrorCode ierr;
430: PetscInt m=A->rmap->n;
431: PetscInt i;
433: /* Variables not in MatMultAdd_SeqAIJ. */
434: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
438: /* If there are no nonzero entries, set zz = yy and return immediately. */
439: if(!a->nz) {
440: PetscInt i;
441: VecGetArrayPair(yy,zz,&y,&z);
442: for (i=0; i<m; i++) {
443: z[i] = y[i];
444: }
445: VecRestoreArrayPair(yy,zz,&y,&z);
446: return(0);
447: }
449: VecGetArrayRead(xx,&x);
450: VecGetArrayPair(yy,zz,&y,&z);
452: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
453: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
454: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
455: if (!aijmkl->sparse_optimized) {
456: MatSeqAIJMKL_create_mkl_handle(A);
457: }
459: /* Call MKL sparse BLAS routine to do the MatMult. */
460: if (zz == yy) {
461: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
462: * with alpha and beta both set to 1.0. */
463: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
464: } else {
465: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
466: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
467: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
468: for (i=0; i<m; i++) {
469: z[i] += y[i];
470: }
471: }
473: PetscLogFlops(2.0*a->nz);
474: VecRestoreArrayRead(xx,&x);
475: VecRestoreArrayPair(yy,zz,&y,&z);
476: if (stat != SPARSE_STATUS_SUCCESS) {
477: PetscFunctionReturn(PETSC_ERR_LIB);
478: }
479: return(0);
480: }
481: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
483: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)484: {
485: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
486: const PetscScalar *x;
487: PetscScalar *y,*z;
488: const MatScalar *aa;
489: PetscErrorCode ierr;
490: PetscInt m=A->rmap->n;
491: PetscInt n=A->cmap->n;
492: const PetscInt *aj,*ai;
493: PetscInt i;
495: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
496: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
497: PetscScalar alpha = 1.0;
498: PetscScalar beta;
499: char matdescra[6];
502: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
503: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
505: VecGetArrayRead(xx,&x);
506: VecGetArrayPair(yy,zz,&y,&z);
507: aj = a->j; /* aj[k] gives column index for element aa[k]. */
508: aa = a->a; /* Nonzero elements stored row-by-row. */
509: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
511: /* Call MKL sparse BLAS routine to do the MatMult. */
512: if (zz == yy) {
513: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
514: beta = 1.0;
515: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
516: } else {
517: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
518: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
519: beta = 0.0;
520: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
521: for (i=0; i<n; i++) {
522: z[i] += y[i];
523: }
524: }
526: PetscLogFlops(2.0*a->nz);
527: VecRestoreArrayRead(xx,&x);
528: VecRestoreArrayPair(yy,zz,&y,&z);
529: return(0);
530: }
532: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
533: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)534: {
535: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
536: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
537: const PetscScalar *x;
538: PetscScalar *y,*z;
539: PetscErrorCode ierr;
540: PetscInt n=A->cmap->n;
541: PetscInt i;
543: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
544: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
548: /* If there are no nonzero entries, set zz = yy and return immediately. */
549: if(!a->nz) {
550: PetscInt i;
551: VecGetArrayPair(yy,zz,&y,&z);
552: for (i=0; i<n; i++) {
553: z[i] = y[i];
554: }
555: VecRestoreArrayPair(yy,zz,&y,&z);
556: return(0);
557: }
559: VecGetArrayRead(xx,&x);
560: VecGetArrayPair(yy,zz,&y,&z);
562: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
563: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
564: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
565: if (!aijmkl->sparse_optimized) {
566: MatSeqAIJMKL_create_mkl_handle(A);
567: }
569: /* Call MKL sparse BLAS routine to do the MatMult. */
570: if (zz == yy) {
571: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
572: * with alpha and beta both set to 1.0. */
573: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
574: } else {
575: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
576: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
577: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
578: for (i=0; i<n; i++) {
579: z[i] += y[i];
580: }
581: }
583: PetscLogFlops(2.0*a->nz);
584: VecRestoreArrayRead(xx,&x);
585: VecRestoreArrayPair(yy,zz,&y,&z);
586: if (stat != SPARSE_STATUS_SUCCESS) {
587: PetscFunctionReturn(PETSC_ERR_LIB);
588: }
589: return(0);
590: }
591: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
593: PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)594: {
598: MatScale_SeqAIJ(inA,alpha);
599: MatSeqAIJMKL_create_mkl_handle(inA);
600: return(0);
601: }
603: PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)604: {
608: MatDiagonalScale_SeqAIJ(A,ll,rr);
609: MatSeqAIJMKL_create_mkl_handle(A);
610: return(0);
611: }
613: PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)614: {
618: MatDiagonalSet_SeqAIJ(Y,D,is);
619: MatSeqAIJMKL_create_mkl_handle(Y);
620: return(0);
621: }
623: PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)624: {
628: MatAXPY_SeqAIJ(Y,a,X,str);
629: if (str == SAME_NONZERO_PATTERN) {
630: /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
631: MatSeqAIJMKL_create_mkl_handle(Y);
632: }
633: return(0);
634: }
636: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
637: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ()
638: * routine, but can also be used to convert an assembled SeqAIJ matrix
639: * into a SeqAIJMKL one. */
640: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)641: {
643: Mat B = *newmat;
644: Mat_SeqAIJMKL *aijmkl;
645: PetscBool set;
646: PetscBool sametype;
649: if (reuse == MAT_INITIAL_MATRIX) {
650: MatDuplicate(A,MAT_COPY_VALUES,&B);
651: }
653: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
654: if (sametype) return(0);
656: PetscNewLog(B,&aijmkl);
657: B->spptr = (void*) aijmkl;
659: /* Set function pointers for methods that we inherit from AIJ but override.
660: * We also parse some command line options below, since those determine some of the methods we point to. */
661: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
662: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
663: B->ops->destroy = MatDestroy_SeqAIJMKL;
665: aijmkl->sparse_optimized = PETSC_FALSE;
666: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
667: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
668: #else
669: aijmkl->no_SpMV2 = PETSC_TRUE;
670: #endif
672: /* Parse command line options. */
673: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
674: PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
675: PetscOptionsEnd();
676: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
677: if(!aijmkl->no_SpMV2) {
678: PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
679: aijmkl->no_SpMV2 = PETSC_TRUE;
680: }
681: #endif
683: if(!aijmkl->no_SpMV2) {
684: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
685: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
686: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
687: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
688: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
689: #endif
690: } else {
691: B->ops->mult = MatMult_SeqAIJMKL;
692: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
693: B->ops->multadd = MatMultAdd_SeqAIJMKL;
694: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
695: }
697: B->ops->scale = MatScale_SeqAIJMKL;
698: B->ops->diagonalscale = MatDiagonalScale_SeqAIJMKL;
699: B->ops->diagonalset = MatDiagonalSet_SeqAIJMKL;
700: B->ops->axpy = MatAXPY_SeqAIJMKL;
702: PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);
703: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
704: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);
705: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
706: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
708: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
709: *newmat = B;
710: return(0);
711: }
713: /*@C
714: MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
715: This type inherits from AIJ and is largely identical, but uses sparse BLAS
716: routines from Intel MKL whenever possible.
717: MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd718: operations are currently supported.
719: If the installed version of MKL supports the "SpMV2" sparse
720: inspector-executor routines, then those are used by default.
722: Collective on MPI_Comm724: Input Parameters:
725: + comm - MPI communicator, set to PETSC_COMM_SELF726: . m - number of rows
727: . n - number of columns
728: . nz - number of nonzeros per row (same for all rows)
729: - nnz - array containing the number of nonzeros in the various rows
730: (possibly different for each row) or NULL
732: Output Parameter:
733: . A - the matrix
735: Options Database Keys:
736: . -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines
738: Notes:
739: If nnz is given then nz is ignored
741: Level: intermediate
743: .keywords: matrix, MKL, sparse, parallel
745: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
746: @*/
747: PetscErrorCodeMatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)748: {
752: MatCreate(comm,A);
753: MatSetSizes(*A,m,n,m,n);
754: MatSetType(*A,MATSEQAIJMKL);
755: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
756: return(0);
757: }
759: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)760: {
764: MatSetType(A,MATSEQAIJ);
765: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
766: return(0);
767: }