Actual source code: aijmkl.c
2: /*
3: Defines basic operations for the MATSEQAIJMKL matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but uses
6: sparse BLAS operations from the Intel Math Kernel Library (MKL)
7: wherever possible.
8: */
10: #include <../src/mat/impls/aij/seq/aij.h>
11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12: #include <mkl_spblas.h>
14: typedef struct {
15: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18: PetscObjectState state;
19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21: struct matrix_descr descr;
22: #endif
23: } Mat_SeqAIJMKL;
25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28: {
29: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30: /* so we will ignore 'MatType type'. */
31: Mat B = *newmat;
32: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
33: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
34: #endif
36: if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A,MAT_COPY_VALUES,&B);
38: /* Reset the original function pointers. */
39: B->ops->duplicate = MatDuplicate_SeqAIJ;
40: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
41: B->ops->destroy = MatDestroy_SeqAIJ;
42: B->ops->mult = MatMult_SeqAIJ;
43: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
44: B->ops->multadd = MatMultAdd_SeqAIJ;
45: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
46: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
47: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
48: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
49: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
50: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
51: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
53: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
55: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
56: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
57: * simply involves destroying the MKL sparse matrix handle and then freeing
58: * the spptr pointer. */
59: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
61: if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
62: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
63: PetscFree(B->spptr);
65: /* Change the type of B to MATSEQAIJ. */
66: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
68: *newmat = B;
69: return 0;
70: }
72: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
73: {
74: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
77: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
78: if (aijmkl) {
79: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
80: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
81: if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
82: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
83: PetscFree(A->spptr);
84: }
86: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
87: * to destroy everything that remains. */
88: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
89: /* Note that I don't call MatSetType(). I believe this is because that
90: * is only to be called when *building* a matrix. I could be wrong, but
91: * that is how things work for the SuperLU matrix class. */
92: MatDestroy_SeqAIJ(A);
93: return 0;
94: }
96: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
97: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
98: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
99: * handle, creates a new one, and then calls mkl_sparse_optimize().
100: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
101: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
102: * an unoptimized matrix handle here. */
103: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
104: {
105: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
106: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
107: * does nothing. We make it callable anyway in this case because it cuts
108: * down on littering the code with #ifdefs. */
109: return 0;
110: #else
111: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
112: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
113: PetscInt m,n;
114: MatScalar *aa;
115: PetscInt *aj,*ai;
117: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
118: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
119: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
120: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
121: * mkl_sparse_optimize() later. */
122: if (aijmkl->no_SpMV2) return 0;
123: #endif
125: if (aijmkl->sparse_optimized) {
126: /* Matrix has been previously assembled and optimized. Must destroy old
127: * matrix handle before running the optimization step again. */
128: PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
129: }
130: aijmkl->sparse_optimized = PETSC_FALSE;
132: /* Now perform the SpMV2 setup and matrix optimization. */
133: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
134: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
135: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
136: m = A->rmap->n;
137: n = A->cmap->n;
138: aj = a->j; /* aj[k] gives column index for element aa[k]. */
139: aa = a->a; /* Nonzero elements stored row-by-row. */
140: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
141: if (a->nz && aa && !A->structure_only) {
142: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
143: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
144: PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
145: PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
146: PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
147: if (!aijmkl->no_SpMV2) {
148: PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA);
149: }
150: aijmkl->sparse_optimized = PETSC_TRUE;
151: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
152: } else {
153: aijmkl->csrA = PETSC_NULL;
154: }
156: return 0;
157: #endif
158: }
160: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
161: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
162: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
163: {
164: sparse_index_base_t indexing;
165: PetscInt m,n;
166: PetscInt *aj,*ai,*dummy;
167: MatScalar *aa;
168: Mat_SeqAIJMKL *aijmkl;
170: if (csrA) {
171: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
172: PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
174: } else {
175: aj = ai = PETSC_NULL;
176: aa = PETSC_NULL;
177: }
179: MatSetType(A,MATSEQAIJ);
180: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
181: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
182: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
183: * they will be destroyed when the MKL handle is destroyed.
184: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
185: if (csrA) {
186: MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);
187: } else {
188: /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
189: MatSetUp(A);
190: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
191: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
192: }
194: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
195: * Now turn it into a MATSEQAIJMKL. */
196: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
198: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
199: aijmkl->csrA = csrA;
201: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
202: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
203: * and just need to be able to run the MKL optimization step. */
204: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
205: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
206: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
207: if (csrA) {
208: PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
209: PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
210: }
211: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
212: return 0;
213: }
214: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
216: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
217: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
218: * MatMatMultNumeric(). */
219: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
220: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
221: {
222: PetscInt i;
223: PetscInt nrows,ncols;
224: PetscInt nz;
225: PetscInt *ai,*aj,*dummy;
226: PetscScalar *aa;
227: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
228: sparse_index_base_t indexing;
230: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
231: if (!aijmkl->csrA) return 0;
233: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
234: PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
236: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
237: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
238: for (i=0; i<nrows; i++) {
239: nz = ai[i+1] - ai[i];
240: MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
241: }
243: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
244: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
246: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
247: /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
248: * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
249: * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
250: aijmkl->sparse_optimized = PETSC_FALSE;
251: return 0;
252: }
253: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
255: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
256: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
257: {
258: PetscInt i,j,k;
259: PetscInt nrows,ncols;
260: PetscInt nz;
261: PetscInt *ai,*aj,*dummy;
262: PetscScalar *aa;
263: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
264: sparse_index_base_t indexing;
266: PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");
268: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
269: if (!aijmkl->csrA) {
270: PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");
271: return 0;
272: }
274: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
275: PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
277: k = 0;
278: for (i=0; i<nrows; i++) {
279: PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i);
280: nz = ai[i+1] - ai[i];
281: for (j=0; j<nz; j++) {
282: if (aa) {
283: PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g) ",aj[k],PetscRealPart(aa[k]));
284: } else {
285: PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k]);
286: }
287: k++;
288: }
289: PetscViewerASCIIPrintf(viewer,"\n");
290: }
291: return 0;
292: }
293: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
295: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
296: {
297: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
298: Mat_SeqAIJMKL *aijmkl_dest;
300: MatDuplicate_SeqAIJ(A,op,M);
301: aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
302: PetscArraycpy(aijmkl_dest,aijmkl,1);
303: aijmkl_dest->sparse_optimized = PETSC_FALSE;
304: if (aijmkl->eager_inspection) {
305: MatSeqAIJMKL_create_mkl_handle(A);
306: }
307: return 0;
308: }
310: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
311: {
312: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
313: Mat_SeqAIJMKL *aijmkl;
315: if (mode == MAT_FLUSH_ASSEMBLY) return 0;
317: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
318: * extra information and some different methods, call the AssemblyEnd
319: * routine for a MATSEQAIJ.
320: * I'm not sure if this is the best way to do this, but it avoids
321: * a lot of code duplication. */
322: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
323: MatAssemblyEnd_SeqAIJ(A, mode);
325: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
326: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
327: aijmkl = (Mat_SeqAIJMKL*)A->spptr;
328: if (aijmkl->eager_inspection) {
329: MatSeqAIJMKL_create_mkl_handle(A);
330: }
332: return 0;
333: }
335: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
336: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
337: {
338: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
339: const PetscScalar *x;
340: PetscScalar *y;
341: const MatScalar *aa;
342: PetscInt m = A->rmap->n;
343: PetscInt n = A->cmap->n;
344: PetscScalar alpha = 1.0;
345: PetscScalar beta = 0.0;
346: const PetscInt *aj,*ai;
347: char matdescra[6];
349: /* Variables not in MatMult_SeqAIJ. */
350: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
352: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
353: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
354: VecGetArrayRead(xx,&x);
355: VecGetArray(yy,&y);
356: aj = a->j; /* aj[k] gives column index for element aa[k]. */
357: aa = a->a; /* Nonzero elements stored row-by-row. */
358: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
360: /* Call MKL sparse BLAS routine to do the MatMult. */
361: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
363: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
364: VecRestoreArrayRead(xx,&x);
365: VecRestoreArray(yy,&y);
366: return 0;
367: }
368: #endif
370: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
371: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
372: {
373: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
374: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
375: const PetscScalar *x;
376: PetscScalar *y;
377: PetscObjectState state;
380: /* If there are no nonzero entries, zero yy and return immediately. */
381: if (!a->nz) {
382: VecGetArray(yy,&y);
383: PetscArrayzero(y,A->rmap->n);
384: VecRestoreArray(yy,&y);
385: return 0;
386: }
388: VecGetArrayRead(xx,&x);
389: VecGetArray(yy,&y);
391: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
392: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
393: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
394: PetscObjectStateGet((PetscObject)A,&state);
395: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
397: /* Call MKL SpMV2 executor routine to do the MatMult. */
398: PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
400: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
401: VecRestoreArrayRead(xx,&x);
402: VecRestoreArray(yy,&y);
403: return 0;
404: }
405: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
407: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
408: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
409: {
410: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
411: const PetscScalar *x;
412: PetscScalar *y;
413: const MatScalar *aa;
414: PetscInt m = A->rmap->n;
415: PetscInt n = A->cmap->n;
416: PetscScalar alpha = 1.0;
417: PetscScalar beta = 0.0;
418: const PetscInt *aj,*ai;
419: char matdescra[6];
421: /* Variables not in MatMultTranspose_SeqAIJ. */
422: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
424: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
425: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
426: VecGetArrayRead(xx,&x);
427: VecGetArray(yy,&y);
428: aj = a->j; /* aj[k] gives column index for element aa[k]. */
429: aa = a->a; /* Nonzero elements stored row-by-row. */
430: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
432: /* Call MKL sparse BLAS routine to do the MatMult. */
433: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
435: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
436: VecRestoreArrayRead(xx,&x);
437: VecRestoreArray(yy,&y);
438: return 0;
439: }
440: #endif
442: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
443: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
444: {
445: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
446: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
447: const PetscScalar *x;
448: PetscScalar *y;
449: PetscObjectState state;
452: /* If there are no nonzero entries, zero yy and return immediately. */
453: if (!a->nz) {
454: VecGetArray(yy,&y);
455: PetscArrayzero(y,A->cmap->n);
456: VecRestoreArray(yy,&y);
457: return 0;
458: }
460: VecGetArrayRead(xx,&x);
461: VecGetArray(yy,&y);
463: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
464: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
465: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
466: PetscObjectStateGet((PetscObject)A,&state);
467: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
469: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
470: PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
472: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
473: VecRestoreArrayRead(xx,&x);
474: VecRestoreArray(yy,&y);
475: return 0;
476: }
477: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
479: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
480: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
481: {
482: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
483: const PetscScalar *x;
484: PetscScalar *y,*z;
485: const MatScalar *aa;
486: PetscInt m = A->rmap->n;
487: PetscInt n = A->cmap->n;
488: const PetscInt *aj,*ai;
489: PetscInt i;
491: /* Variables not in MatMultAdd_SeqAIJ. */
492: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
493: PetscScalar alpha = 1.0;
494: PetscScalar beta;
495: char matdescra[6];
497: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
498: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
500: VecGetArrayRead(xx,&x);
501: VecGetArrayPair(yy,zz,&y,&z);
502: aj = a->j; /* aj[k] gives column index for element aa[k]. */
503: aa = a->a; /* Nonzero elements stored row-by-row. */
504: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
506: /* Call MKL sparse BLAS routine to do the MatMult. */
507: if (zz == yy) {
508: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
509: beta = 1.0;
510: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
511: } else {
512: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
513: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
514: beta = 0.0;
515: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
516: for (i=0; i<m; i++) {
517: z[i] += y[i];
518: }
519: }
521: PetscLogFlops(2.0*a->nz);
522: VecRestoreArrayRead(xx,&x);
523: VecRestoreArrayPair(yy,zz,&y,&z);
524: return 0;
525: }
526: #endif
528: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
529: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
530: {
531: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
532: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
533: const PetscScalar *x;
534: PetscScalar *y,*z;
535: PetscInt m = A->rmap->n;
536: PetscInt i;
538: /* Variables not in MatMultAdd_SeqAIJ. */
539: PetscObjectState state;
542: /* If there are no nonzero entries, set zz = yy and return immediately. */
543: if (!a->nz) {
544: VecGetArrayPair(yy,zz,&y,&z);
545: PetscArraycpy(z,y,m);
546: VecRestoreArrayPair(yy,zz,&y,&z);
547: return 0;
548: }
550: VecGetArrayRead(xx,&x);
551: VecGetArrayPair(yy,zz,&y,&z);
553: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
554: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
555: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
556: PetscObjectStateGet((PetscObject)A,&state);
557: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
559: /* Call MKL sparse BLAS routine to do the MatMult. */
560: if (zz == yy) {
561: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
562: * with alpha and beta both set to 1.0. */
563: PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
564: } else {
565: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
566: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
567: PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
568: for (i=0; i<m; i++) z[i] += y[i];
569: }
571: PetscLogFlops(2.0*a->nz);
572: VecRestoreArrayRead(xx,&x);
573: VecRestoreArrayPair(yy,zz,&y,&z);
574: return 0;
575: }
576: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
578: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
579: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
580: {
581: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
582: const PetscScalar *x;
583: PetscScalar *y,*z;
584: const MatScalar *aa;
585: PetscInt m = A->rmap->n;
586: PetscInt n = A->cmap->n;
587: const PetscInt *aj,*ai;
588: PetscInt i;
590: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
591: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
592: PetscScalar alpha = 1.0;
593: PetscScalar beta;
594: char matdescra[6];
596: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
597: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
599: VecGetArrayRead(xx,&x);
600: VecGetArrayPair(yy,zz,&y,&z);
601: aj = a->j; /* aj[k] gives column index for element aa[k]. */
602: aa = a->a; /* Nonzero elements stored row-by-row. */
603: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
605: /* Call MKL sparse BLAS routine to do the MatMult. */
606: if (zz == yy) {
607: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
608: beta = 1.0;
609: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
610: } else {
611: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
612: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
613: beta = 0.0;
614: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
615: for (i=0; i<n; i++) {
616: z[i] += y[i];
617: }
618: }
620: PetscLogFlops(2.0*a->nz);
621: VecRestoreArrayRead(xx,&x);
622: VecRestoreArrayPair(yy,zz,&y,&z);
623: return 0;
624: }
625: #endif
627: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
628: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
629: {
630: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
631: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
632: const PetscScalar *x;
633: PetscScalar *y,*z;
634: PetscInt n = A->cmap->n;
635: PetscInt i;
636: PetscObjectState state;
638: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
641: /* If there are no nonzero entries, set zz = yy and return immediately. */
642: if (!a->nz) {
643: VecGetArrayPair(yy,zz,&y,&z);
644: PetscArraycpy(z,y,n);
645: VecRestoreArrayPair(yy,zz,&y,&z);
646: return 0;
647: }
649: VecGetArrayRead(xx,&x);
650: VecGetArrayPair(yy,zz,&y,&z);
652: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
653: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
654: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
655: PetscObjectStateGet((PetscObject)A,&state);
656: if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
658: /* Call MKL sparse BLAS routine to do the MatMult. */
659: if (zz == yy) {
660: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
661: * with alpha and beta both set to 1.0. */
662: PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
663: } else {
664: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
665: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
666: PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
667: for (i=0; i<n; i++) z[i] += y[i];
668: }
670: PetscLogFlops(2.0*a->nz);
671: VecRestoreArrayRead(xx,&x);
672: VecRestoreArrayPair(yy,zz,&y,&z);
673: return 0;
674: }
675: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
677: /* -------------------------- MatProduct code -------------------------- */
678: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
679: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
680: {
681: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
682: sparse_matrix_t csrA,csrB,csrC;
683: PetscInt nrows,ncols;
684: struct matrix_descr descr_type_gen;
685: PetscObjectState state;
687: /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
688: * not handle sparse matrices with zero rows or columns. */
689: if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
690: else nrows = A->cmap->N;
691: if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
692: else ncols = B->rmap->N;
694: PetscObjectStateGet((PetscObject)A,&state);
695: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
696: PetscObjectStateGet((PetscObject)B,&state);
697: if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
698: csrA = a->csrA;
699: csrB = b->csrA;
700: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
702: if (csrA && csrB) {
703: PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
704: } else {
705: csrC = PETSC_NULL;
706: }
708: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);
710: return 0;
711: }
713: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
714: {
715: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
716: sparse_matrix_t csrA, csrB, csrC;
717: struct matrix_descr descr_type_gen;
718: PetscObjectState state;
720: PetscObjectStateGet((PetscObject)A,&state);
721: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
722: PetscObjectStateGet((PetscObject)B,&state);
723: if (!b->sparse_optimized || b->state != state) MatSeqAIJMKL_create_mkl_handle(B);
724: csrA = a->csrA;
725: csrB = b->csrA;
726: csrC = c->csrA;
727: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
729: if (csrA && csrB) {
730: PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
731: } else {
732: csrC = PETSC_NULL;
733: }
735: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
736: MatSeqAIJMKL_update_from_mkl_handle(C);
738: return 0;
739: }
741: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
742: {
743: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
744: return 0;
745: }
747: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
748: {
749: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
750: return 0;
751: }
753: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
754: {
755: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
756: return 0;
757: }
759: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
760: {
761: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
762: return 0;
763: }
765: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
766: {
767: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
768: return 0;
769: }
771: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
772: {
773: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
774: return 0;
775: }
777: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
778: {
779: Mat_Product *product = C->product;
780: Mat A = product->A,B = product->B;
782: MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);
783: return 0;
784: }
786: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
787: {
788: Mat_Product *product = C->product;
789: Mat A = product->A,B = product->B;
790: PetscReal fill = product->fill;
792: MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);
793: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
794: return 0;
795: }
797: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
798: {
799: Mat Ct;
800: Vec zeros;
801: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
802: sparse_matrix_t csrA, csrP, csrC;
803: PetscBool set, flag;
804: struct matrix_descr descr_type_sym;
805: PetscObjectState state;
807: MatIsSymmetricKnown(A,&set,&flag);
810: PetscObjectStateGet((PetscObject)A,&state);
811: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
812: PetscObjectStateGet((PetscObject)P,&state);
813: if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
814: csrA = a->csrA;
815: csrP = p->csrA;
816: csrC = c->csrA;
817: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
818: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
819: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
821: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
822: PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
824: /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
825: * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
826: * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
827: * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
828: * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
829: * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
830: * the full matrix. */
831: MatSeqAIJMKL_update_from_mkl_handle(C);
832: MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);
833: MatCreateVecs(C,&zeros,NULL);
834: VecSetFromOptions(zeros);
835: VecZeroEntries(zeros);
836: MatDiagonalSet(Ct,zeros,INSERT_VALUES);
837: MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);
838: /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
839: MatProductCreateWithMat(A,P,PETSC_NULL,C);
840: MatProductSetType(C,MATPRODUCT_PtAP);
841: MatSeqAIJMKL_create_mkl_handle(C);
842: VecDestroy(&zeros);
843: MatDestroy(&Ct);
844: return 0;
845: }
847: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
848: {
849: Mat_Product *product = C->product;
850: Mat A = product->A,P = product->B;
851: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
852: sparse_matrix_t csrA,csrP,csrC;
853: struct matrix_descr descr_type_sym;
854: PetscObjectState state;
856: PetscObjectStateGet((PetscObject)A,&state);
857: if (!a->sparse_optimized || a->state != state) MatSeqAIJMKL_create_mkl_handle(A);
858: PetscObjectStateGet((PetscObject)P,&state);
859: if (!p->sparse_optimized || p->state != state) MatSeqAIJMKL_create_mkl_handle(P);
860: csrA = a->csrA;
861: csrP = p->csrA;
862: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
863: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
864: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
866: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
867: if (csrP && csrA) {
868: PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
869: } else {
870: csrC = PETSC_NULL;
871: }
873: /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
874: * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
875: * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
876: * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
877: * is guaranteed. */
878: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);
880: C->ops->productnumeric = MatProductNumeric_PtAP;
881: return 0;
882: }
884: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
885: {
886: C->ops->productsymbolic = MatProductSymbolic_AB;
887: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
888: return 0;
889: }
891: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
892: {
893: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
894: return 0;
895: }
897: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
898: {
899: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
900: C->ops->productsymbolic = MatProductSymbolic_ABt;
901: return 0;
902: }
904: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
905: {
906: Mat_Product *product = C->product;
907: Mat A = product->A;
908: PetscBool set, flag;
910: if (PetscDefined(USE_COMPLEX)) {
911: /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
912: * We do this in several other locations in this file. This works for the time being, but these
913: * routines are considered unsafe and may be removed from the MatProduct code in the future.
914: * TODO: Add proper MATSEQAIJMKL implementations */
915: C->ops->productsymbolic = NULL;
916: } else {
917: /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
918: MatIsSymmetricKnown(A,&set,&flag);
919: if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
920: else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
921: /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
922: * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
923: }
924: return 0;
925: }
927: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
928: {
929: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
930: return 0;
931: }
933: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
934: {
935: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
936: return 0;
937: }
939: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
940: {
941: Mat_Product *product = C->product;
943: switch (product->type) {
944: case MATPRODUCT_AB:
945: MatProductSetFromOptions_SeqAIJMKL_AB(C);
946: break;
947: case MATPRODUCT_AtB:
948: MatProductSetFromOptions_SeqAIJMKL_AtB(C);
949: break;
950: case MATPRODUCT_ABt:
951: MatProductSetFromOptions_SeqAIJMKL_ABt(C);
952: break;
953: case MATPRODUCT_PtAP:
954: MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
955: break;
956: case MATPRODUCT_RARt:
957: MatProductSetFromOptions_SeqAIJMKL_RARt(C);
958: break;
959: case MATPRODUCT_ABC:
960: MatProductSetFromOptions_SeqAIJMKL_ABC(C);
961: break;
962: default:
963: break;
964: }
965: return 0;
966: }
967: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
968: /* ------------------------ End MatProduct code ------------------------ */
970: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
971: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
972: * routine, but can also be used to convert an assembled SeqAIJ matrix
973: * into a SeqAIJMKL one. */
974: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
975: {
976: Mat B = *newmat;
977: Mat_SeqAIJMKL *aijmkl;
978: PetscBool set;
979: PetscBool sametype;
982: if (reuse == MAT_INITIAL_MATRIX) MatDuplicate(A,MAT_COPY_VALUES,&B);
984: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
985: if (sametype) return 0;
987: PetscNewLog(B,&aijmkl);
988: B->spptr = (void*) aijmkl;
990: /* Set function pointers for methods that we inherit from AIJ but override.
991: * We also parse some command line options below, since those determine some of the methods we point to. */
992: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
993: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
994: B->ops->destroy = MatDestroy_SeqAIJMKL;
996: aijmkl->sparse_optimized = PETSC_FALSE;
997: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
998: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
999: #else
1000: aijmkl->no_SpMV2 = PETSC_TRUE;
1001: #endif
1002: aijmkl->eager_inspection = PETSC_FALSE;
1004: /* Parse command line options. */
1005: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
1006: PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
1007: PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
1008: PetscOptionsEnd();
1009: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1010: if (!aijmkl->no_SpMV2) {
1011: PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
1012: aijmkl->no_SpMV2 = PETSC_TRUE;
1013: }
1014: #endif
1016: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1017: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1018: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1019: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1020: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1021: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1022: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1023: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1024: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1025: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1026: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1027: # if !defined(PETSC_USE_COMPLEX)
1028: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1029: # else
1030: B->ops->ptapnumeric = NULL;
1031: # endif
1032: # endif
1033: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1035: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1036: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1037: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1038: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1039: * versions in which the older interface has not been deprecated, we use the old interface. */
1040: if (aijmkl->no_SpMV2) {
1041: B->ops->mult = MatMult_SeqAIJMKL;
1042: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1043: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1044: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1045: }
1046: #endif
1048: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1050: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1051: *newmat = B;
1052: return 0;
1053: }
1055: /*@C
1056: MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1057: This type inherits from AIJ and is largely identical, but uses sparse BLAS
1058: routines from Intel MKL whenever possible.
1059: If the installed version of MKL supports the "SpMV2" sparse
1060: inspector-executor routines, then those are used by default.
1061: MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1062: symmetric A) operations are currently supported.
1063: Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1065: Collective
1067: Input Parameters:
1068: + comm - MPI communicator, set to PETSC_COMM_SELF
1069: . m - number of rows
1070: . n - number of columns
1071: . nz - number of nonzeros per row (same for all rows)
1072: - nnz - array containing the number of nonzeros in the various rows
1073: (possibly different for each row) or NULL
1075: Output Parameter:
1076: . A - the matrix
1078: Options Database Keys:
1079: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1080: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1082: Notes:
1083: If nnz is given then nz is ignored
1085: Level: intermediate
1087: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1088: @*/
1089: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1090: {
1091: MatCreate(comm,A);
1092: MatSetSizes(*A,m,n,m,n);
1093: MatSetType(*A,MATSEQAIJMKL);
1094: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1095: return 0;
1096: }
1098: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1099: {
1100: MatSetType(A,MATSEQAIJ);
1101: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1102: return 0;
1103: }