2: /*
3: Defines basic operations for the MATSEQAIJMKL matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but uses
6: sparse BLAS operations from the Intel Math Kernel Library (MKL)
7: wherever possible.
8: */
10: #include <../src/mat/impls/aij/seq/aij.h> 11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12: #include <mkl_spblas.h>
14: typedef struct {
15: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18: PetscObjectState state;
19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21: struct matrix_descr descr;
22: #endif
23: } Mat_SeqAIJMKL;
25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 28: {
29: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30: /* so we will ignore 'MatType type'. */
32: Mat B = *newmat;
33: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35: #endif
38: if (reuse == MAT_INITIAL_MATRIX) {
39: MatDuplicate(A,MAT_COPY_VALUES,&B);
40: }
42: /* Reset the original function pointers. */
43: B->ops->duplicate = MatDuplicate_SeqAIJ;
44: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
45: B->ops->destroy = MatDestroy_SeqAIJ;
46: B->ops->mult = MatMult_SeqAIJ;
47: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
48: B->ops->multadd = MatMultAdd_SeqAIJ;
49: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
51: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
57: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
59: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60: if (!aijmkl->no_SpMV2) {
61: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
62: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);
63: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
64: }
66: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67: * simply involves destroying the MKL sparse matrix handle and then freeing
68: * the spptr pointer. */
69: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
71: if (aijmkl->sparse_optimized) {
72: sparse_status_t stat;
73: stat = mkl_sparse_destroy(aijmkl->csrA);
74: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
75: }
76: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
77: PetscFree(B->spptr);
79: /* Change the type of B to MATSEQAIJ. */
80: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
82: *newmat = B;
83: return(0);
84: }
86: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 87: {
89: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
93: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
94: if (aijmkl) {
95: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
96: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
97: if (aijmkl->sparse_optimized) {
98: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
99: stat = mkl_sparse_destroy(aijmkl->csrA);
100: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
101: }
102: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
103: PetscFree(A->spptr);
104: }
106: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
107: * to destroy everything that remains. */
108: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
109: /* Note that I don't call MatSetType(). I believe this is because that
110: * is only to be called when *building* a matrix. I could be wrong, but
111: * that is how things work for the SuperLU matrix class. */
112: MatDestroy_SeqAIJ(A);
113: return(0);
114: }
116: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
117: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
118: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
119: * handle, creates a new one, and then calls mkl_sparse_optimize().
120: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
121: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
122: * an unoptimized matrix handle here. */
123: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)124: {
125: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
126: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
127: * does nothing. We make it callable anyway in this case because it cuts
128: * down on littering the code with #ifdefs. */
130: return(0);
131: #else
132: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
133: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
134: PetscInt m,n;
135: MatScalar *aa;
136: PetscInt *aj,*ai;
137: sparse_status_t stat;
138: PetscErrorCode ierr;
141: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
142: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
143: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
144: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
145: * mkl_sparse_optimize() later. */
146: if (aijmkl->no_SpMV2) return(0);
147: #endif
149: if (aijmkl->sparse_optimized) {
150: /* Matrix has been previously assembled and optimized. Must destroy old
151: * matrix handle before running the optimization step again. */
152: stat = mkl_sparse_destroy(aijmkl->csrA);
153: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
154: }
155: aijmkl->sparse_optimized = PETSC_FALSE;
157: /* Now perform the SpMV2 setup and matrix optimization. */
158: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
159: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
160: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
161: m = A->rmap->n;
162: n = A->cmap->n;
163: aj = a->j; /* aj[k] gives column index for element aa[k]. */
164: aa = a->a; /* Nonzero elements stored row-by-row. */
165: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
166: if (a->nz && aa && !A->structure_only) {
167: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
168: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
169: stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
170: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
171: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
172: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
173: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
174: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
175: if (!aijmkl->no_SpMV2) {
176: stat = mkl_sparse_optimize(aijmkl->csrA);
177: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
178: }
179: aijmkl->sparse_optimized = PETSC_TRUE;
180: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
181: } else {
182: aijmkl->csrA = PETSC_NULL;
183: }
185: return(0);
186: #endif
187: }
189: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
190: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
191: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)192: {
193: PetscErrorCode ierr;
194: sparse_status_t stat;
195: sparse_index_base_t indexing;
196: PetscInt m,n;
197: PetscInt *aj,*ai,*dummy;
198: MatScalar *aa;
199: Mat_SeqAIJMKL *aijmkl;
201: if (csrA) {
202: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
203: stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
204: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
205: if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
206: } else {
207: aj = ai = PETSC_NULL;
208: aa = PETSC_NULL;
209: }
211: MatSetType(A,MATSEQAIJ);
212: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
213: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
214: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
215: * they will be destroyed when the MKL handle is destroyed.
216: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
217: if (csrA) {
218: MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);
219: } else {
220: /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
221: MatSetUp(A);
222: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
223: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
224: }
226: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
227: * Now turn it into a MATSEQAIJMKL. */
228: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
230: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
231: aijmkl->csrA = csrA;
233: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
234: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
235: * and just need to be able to run the MKL optimization step. */
236: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
237: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
238: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
239: if (csrA) {
240: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
241: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
242: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
243: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
244: }
245: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
246: return(0);
247: }
248: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
250: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
251: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
252: * MatMatMultNumeric(). */
253: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
254: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)255: {
256: PetscInt i;
257: PetscInt nrows,ncols;
258: PetscInt nz;
259: PetscInt *ai,*aj,*dummy;
260: PetscScalar *aa;
261: PetscErrorCode ierr;
262: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
263: sparse_status_t stat;
264: sparse_index_base_t indexing;
266: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
267: if (!aijmkl->csrA) return(0);
269: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
270: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
271: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
273: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
274: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
275: for (i=0; i<nrows; i++) {
276: nz = ai[i+1] - ai[i];
277: MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
278: }
280: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
281: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
283: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
284: /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
285: * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
286: * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
287: aijmkl->sparse_optimized = PETSC_FALSE;
288: return(0);
289: }
290: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
292: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
293: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)294: {
295: PetscInt i,j,k;
296: PetscInt nrows,ncols;
297: PetscInt nz;
298: PetscInt *ai,*aj,*dummy;
299: PetscScalar *aa;
300: PetscErrorCode ierr;
301: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
302: sparse_status_t stat;
303: sparse_index_base_t indexing;
305: PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");
307: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
308: if (!aijmkl->csrA) {
309: PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");
310: return(0);
311: }
313: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
314: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
315: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
317: k = 0;
318: for (i=0; i<nrows; i++) {
319: PetscViewerASCIIPrintf(viewer,"row %D: ",i);
320: nz = ai[i+1] - ai[i];
321: for (j=0; j<nz; j++) {
322: if (aa) {
323: PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],PetscRealPart(aa[k]));
324: } else {
325: PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);
326: }
327: k++;
328: }
329: PetscViewerASCIIPrintf(viewer,"\n");
330: }
331: return(0);
332: }
333: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
335: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)336: {
338: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
339: Mat_SeqAIJMKL *aijmkl_dest;
342: MatDuplicate_SeqAIJ(A,op,M);
343: aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
344: PetscArraycpy(aijmkl_dest,aijmkl,1);
345: aijmkl_dest->sparse_optimized = PETSC_FALSE;
346: if (aijmkl->eager_inspection) {
347: MatSeqAIJMKL_create_mkl_handle(A);
348: }
349: return(0);
350: }
352: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)353: {
354: PetscErrorCode ierr;
355: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
356: Mat_SeqAIJMKL *aijmkl;
359: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
361: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
362: * extra information and some different methods, call the AssemblyEnd
363: * routine for a MATSEQAIJ.
364: * I'm not sure if this is the best way to do this, but it avoids
365: * a lot of code duplication. */
366: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
367: MatAssemblyEnd_SeqAIJ(A, mode);
369: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
370: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
371: aijmkl = (Mat_SeqAIJMKL*)A->spptr;
372: if (aijmkl->eager_inspection) {
373: MatSeqAIJMKL_create_mkl_handle(A);
374: }
376: return(0);
377: }
379: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
380: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)381: {
382: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
383: const PetscScalar *x;
384: PetscScalar *y;
385: const MatScalar *aa;
386: PetscErrorCode ierr;
387: PetscInt m = A->rmap->n;
388: PetscInt n = A->cmap->n;
389: PetscScalar alpha = 1.0;
390: PetscScalar beta = 0.0;
391: const PetscInt *aj,*ai;
392: char matdescra[6];
395: /* Variables not in MatMult_SeqAIJ. */
396: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
399: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
400: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
401: VecGetArrayRead(xx,&x);
402: VecGetArray(yy,&y);
403: aj = a->j; /* aj[k] gives column index for element aa[k]. */
404: aa = a->a; /* Nonzero elements stored row-by-row. */
405: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
407: /* Call MKL sparse BLAS routine to do the MatMult. */
408: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
410: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
411: VecRestoreArrayRead(xx,&x);
412: VecRestoreArray(yy,&y);
413: return(0);
414: }
415: #endif
417: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
418: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)419: {
420: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
421: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
422: const PetscScalar *x;
423: PetscScalar *y;
424: PetscErrorCode ierr;
425: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
426: PetscObjectState state;
430: /* If there are no nonzero entries, zero yy and return immediately. */
431: if (!a->nz) {
432: PetscInt i;
433: PetscInt m=A->rmap->n;
434: VecGetArray(yy,&y);
435: for (i=0; i<m; i++) {
436: y[i] = 0.0;
437: }
438: VecRestoreArray(yy,&y);
439: return(0);
440: }
442: VecGetArrayRead(xx,&x);
443: VecGetArray(yy,&y);
445: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
446: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
447: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
448: PetscObjectStateGet((PetscObject)A,&state);
449: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
450: MatSeqAIJMKL_create_mkl_handle(A);
451: }
453: /* Call MKL SpMV2 executor routine to do the MatMult. */
454: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
455: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
457: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
458: VecRestoreArrayRead(xx,&x);
459: VecRestoreArray(yy,&y);
460: return(0);
461: }
462: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
464: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
465: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)466: {
467: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
468: const PetscScalar *x;
469: PetscScalar *y;
470: const MatScalar *aa;
471: PetscErrorCode ierr;
472: PetscInt m = A->rmap->n;
473: PetscInt n = A->cmap->n;
474: PetscScalar alpha = 1.0;
475: PetscScalar beta = 0.0;
476: const PetscInt *aj,*ai;
477: char matdescra[6];
479: /* Variables not in MatMultTranspose_SeqAIJ. */
480: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
483: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
484: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
485: VecGetArrayRead(xx,&x);
486: VecGetArray(yy,&y);
487: aj = a->j; /* aj[k] gives column index for element aa[k]. */
488: aa = a->a; /* Nonzero elements stored row-by-row. */
489: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
491: /* Call MKL sparse BLAS routine to do the MatMult. */
492: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
494: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
495: VecRestoreArrayRead(xx,&x);
496: VecRestoreArray(yy,&y);
497: return(0);
498: }
499: #endif
501: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
502: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)503: {
504: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
505: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
506: const PetscScalar *x;
507: PetscScalar *y;
508: PetscErrorCode ierr;
509: sparse_status_t stat;
510: PetscObjectState state;
514: /* If there are no nonzero entries, zero yy and return immediately. */
515: if (!a->nz) {
516: PetscInt i;
517: PetscInt n=A->cmap->n;
518: VecGetArray(yy,&y);
519: for (i=0; i<n; i++) {
520: y[i] = 0.0;
521: }
522: VecRestoreArray(yy,&y);
523: return(0);
524: }
526: VecGetArrayRead(xx,&x);
527: VecGetArray(yy,&y);
529: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
530: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
531: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
532: PetscObjectStateGet((PetscObject)A,&state);
533: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
534: MatSeqAIJMKL_create_mkl_handle(A);
535: }
537: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
538: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
539: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
541: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
542: VecRestoreArrayRead(xx,&x);
543: VecRestoreArray(yy,&y);
544: return(0);
545: }
546: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
548: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
549: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)550: {
551: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
552: const PetscScalar *x;
553: PetscScalar *y,*z;
554: const MatScalar *aa;
555: PetscErrorCode ierr;
556: PetscInt m = A->rmap->n;
557: PetscInt n = A->cmap->n;
558: const PetscInt *aj,*ai;
559: PetscInt i;
561: /* Variables not in MatMultAdd_SeqAIJ. */
562: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
563: PetscScalar alpha = 1.0;
564: PetscScalar beta;
565: char matdescra[6];
568: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
569: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
571: VecGetArrayRead(xx,&x);
572: VecGetArrayPair(yy,zz,&y,&z);
573: aj = a->j; /* aj[k] gives column index for element aa[k]. */
574: aa = a->a; /* Nonzero elements stored row-by-row. */
575: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
577: /* Call MKL sparse BLAS routine to do the MatMult. */
578: if (zz == yy) {
579: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
580: beta = 1.0;
581: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
582: } else {
583: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
584: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
585: beta = 0.0;
586: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
587: for (i=0; i<m; i++) {
588: z[i] += y[i];
589: }
590: }
592: PetscLogFlops(2.0*a->nz);
593: VecRestoreArrayRead(xx,&x);
594: VecRestoreArrayPair(yy,zz,&y,&z);
595: return(0);
596: }
597: #endif
599: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
600: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)601: {
602: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
603: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
604: const PetscScalar *x;
605: PetscScalar *y,*z;
606: PetscErrorCode ierr;
607: PetscInt m = A->rmap->n;
608: PetscInt i;
610: /* Variables not in MatMultAdd_SeqAIJ. */
611: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
612: PetscObjectState state;
616: /* If there are no nonzero entries, set zz = yy and return immediately. */
617: if (!a->nz) {
618: PetscInt i;
619: VecGetArrayPair(yy,zz,&y,&z);
620: for (i=0; i<m; i++) {
621: z[i] = y[i];
622: }
623: VecRestoreArrayPair(yy,zz,&y,&z);
624: return(0);
625: }
627: VecGetArrayRead(xx,&x);
628: VecGetArrayPair(yy,zz,&y,&z);
630: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
631: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
632: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
633: PetscObjectStateGet((PetscObject)A,&state);
634: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
635: MatSeqAIJMKL_create_mkl_handle(A);
636: }
638: /* Call MKL sparse BLAS routine to do the MatMult. */
639: if (zz == yy) {
640: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
641: * with alpha and beta both set to 1.0. */
642: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
643: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
644: } else {
645: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
646: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
647: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
648: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
649: for (i=0; i<m; i++) {
650: z[i] += y[i];
651: }
652: }
654: PetscLogFlops(2.0*a->nz);
655: VecRestoreArrayRead(xx,&x);
656: VecRestoreArrayPair(yy,zz,&y,&z);
657: return(0);
658: }
659: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
661: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
662: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)663: {
664: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
665: const PetscScalar *x;
666: PetscScalar *y,*z;
667: const MatScalar *aa;
668: PetscErrorCode ierr;
669: PetscInt m = A->rmap->n;
670: PetscInt n = A->cmap->n;
671: const PetscInt *aj,*ai;
672: PetscInt i;
674: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
675: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
676: PetscScalar alpha = 1.0;
677: PetscScalar beta;
678: char matdescra[6];
681: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
682: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
684: VecGetArrayRead(xx,&x);
685: VecGetArrayPair(yy,zz,&y,&z);
686: aj = a->j; /* aj[k] gives column index for element aa[k]. */
687: aa = a->a; /* Nonzero elements stored row-by-row. */
688: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
690: /* Call MKL sparse BLAS routine to do the MatMult. */
691: if (zz == yy) {
692: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
693: beta = 1.0;
694: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
695: } else {
696: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
697: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
698: beta = 0.0;
699: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
700: for (i=0; i<n; i++) {
701: z[i] += y[i];
702: }
703: }
705: PetscLogFlops(2.0*a->nz);
706: VecRestoreArrayRead(xx,&x);
707: VecRestoreArrayPair(yy,zz,&y,&z);
708: return(0);
709: }
710: #endif
712: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
713: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)714: {
715: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
716: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
717: const PetscScalar *x;
718: PetscScalar *y,*z;
719: PetscErrorCode ierr;
720: PetscInt n = A->cmap->n;
721: PetscInt i;
722: PetscObjectState state;
724: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
725: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
729: /* If there are no nonzero entries, set zz = yy and return immediately. */
730: if (!a->nz) {
731: PetscInt i;
732: VecGetArrayPair(yy,zz,&y,&z);
733: for (i=0; i<n; i++) {
734: z[i] = y[i];
735: }
736: VecRestoreArrayPair(yy,zz,&y,&z);
737: return(0);
738: }
740: VecGetArrayRead(xx,&x);
741: VecGetArrayPair(yy,zz,&y,&z);
743: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
744: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
745: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
746: PetscObjectStateGet((PetscObject)A,&state);
747: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
748: MatSeqAIJMKL_create_mkl_handle(A);
749: }
751: /* Call MKL sparse BLAS routine to do the MatMult. */
752: if (zz == yy) {
753: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
754: * with alpha and beta both set to 1.0. */
755: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
756: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
757: } else {
758: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
759: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
760: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
761: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
762: for (i=0; i<n; i++) {
763: z[i] += y[i];
764: }
765: }
767: PetscLogFlops(2.0*a->nz);
768: VecRestoreArrayRead(xx,&x);
769: VecRestoreArrayPair(yy,zz,&y,&z);
770: return(0);
771: }
772: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
774: /* -------------------------- MatProduct code -------------------------- */
775: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
776: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)777: {
778: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
779: sparse_matrix_t csrA,csrB,csrC;
780: PetscInt nrows,ncols;
781: PetscErrorCode ierr;
782: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
783: struct matrix_descr descr_type_gen;
784: PetscObjectState state;
787: /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
788: * not handle sparse matrices with zero rows or columns. */
789: if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
790: else nrows = A->cmap->N;
791: if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
792: else ncols = B->rmap->N;
794: PetscObjectStateGet((PetscObject)A,&state);
795: if (!a->sparse_optimized || a->state != state) {
796: MatSeqAIJMKL_create_mkl_handle(A);
797: }
798: PetscObjectStateGet((PetscObject)B,&state);
799: if (!b->sparse_optimized || b->state != state) {
800: MatSeqAIJMKL_create_mkl_handle(B);
801: }
802: csrA = a->csrA;
803: csrB = b->csrA;
804: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
806: if (csrA && csrB) {
807: stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
808: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
809: } else {
810: csrC = PETSC_NULL;
811: }
813: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);
815: return(0);
816: }
818: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)819: {
820: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
821: sparse_matrix_t csrA, csrB, csrC;
822: PetscErrorCode ierr;
823: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
824: struct matrix_descr descr_type_gen;
825: PetscObjectState state;
828: PetscObjectStateGet((PetscObject)A,&state);
829: if (!a->sparse_optimized || a->state != state) {
830: MatSeqAIJMKL_create_mkl_handle(A);
831: }
832: PetscObjectStateGet((PetscObject)B,&state);
833: if (!b->sparse_optimized || b->state != state) {
834: MatSeqAIJMKL_create_mkl_handle(B);
835: }
836: csrA = a->csrA;
837: csrB = b->csrA;
838: csrC = c->csrA;
839: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
841: if (csrA && csrB) {
842: stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
843: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
844: } else {
845: csrC = PETSC_NULL;
846: }
848: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
849: MatSeqAIJMKL_update_from_mkl_handle(C);
851: return(0);
852: }
854: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)855: {
859: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
860: return(0);
861: }
863: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)864: {
868: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
869: return(0);
870: }
872: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)873: {
877: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
878: return(0);
879: }
881: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)882: {
886: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
887: return(0);
888: }
890: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)891: {
895: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
896: return(0);
897: }
899: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)900: {
904: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
905: return(0);
906: }
908: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)909: {
911: Mat_Product *product = C->product;
912: Mat A = product->A,B = product->B;
915: MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);
916: return(0);
917: }
919: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)920: {
922: Mat_Product *product = C->product;
923: Mat A = product->A,B = product->B;
924: PetscReal fill = product->fill;
927: MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);
928: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
929: return(0);
930: }
932: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)933: {
934: Mat Ct;
935: Vec zeros;
936: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
937: sparse_matrix_t csrA, csrP, csrC;
938: PetscBool set, flag;
939: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
940: struct matrix_descr descr_type_sym;
941: PetscObjectState state;
942: PetscErrorCode ierr;
945: MatIsSymmetricKnown(A,&set,&flag);
946: if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
948: PetscObjectStateGet((PetscObject)A,&state);
949: if (!a->sparse_optimized || a->state != state) {
950: MatSeqAIJMKL_create_mkl_handle(A);
951: }
952: PetscObjectStateGet((PetscObject)P,&state);
953: if (!p->sparse_optimized || p->state != state) {
954: MatSeqAIJMKL_create_mkl_handle(P);
955: }
956: csrA = a->csrA;
957: csrP = p->csrA;
958: csrC = c->csrA;
959: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
960: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
961: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
963: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
964: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
965: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
967: /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
968: * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
969: * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
970: * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY971: * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
972: * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
973: * the full matrix. */
974: MatSeqAIJMKL_update_from_mkl_handle(C);
975: MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);
976: MatCreateVecs(C,&zeros,NULL);
977: VecSetFromOptions(zeros);
978: VecZeroEntries(zeros);
979: MatDiagonalSet(Ct,zeros,INSERT_VALUES);
980: MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);
981: /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
982: MatProductCreateWithMat(A,P,PETSC_NULL,C);
983: MatProductSetType(C,MATPRODUCT_PtAP);
984: MatSeqAIJMKL_create_mkl_handle(C);
985: VecDestroy(&zeros);
986: MatDestroy(&Ct);
987: return(0);
988: }
990: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)991: {
992: Mat_Product *product = C->product;
993: Mat A = product->A,P = product->B;
994: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
995: sparse_matrix_t csrA,csrP,csrC;
996: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
997: struct matrix_descr descr_type_sym;
998: PetscObjectState state;
999: PetscErrorCode ierr;
1002: PetscObjectStateGet((PetscObject)A,&state);
1003: if (!a->sparse_optimized || a->state != state) {
1004: MatSeqAIJMKL_create_mkl_handle(A);
1005: }
1006: PetscObjectStateGet((PetscObject)P,&state);
1007: if (!p->sparse_optimized || p->state != state) {
1008: MatSeqAIJMKL_create_mkl_handle(P);
1009: }
1010: csrA = a->csrA;
1011: csrP = p->csrA;
1012: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1013: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1014: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1016: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1017: if (csrP && csrA) {
1018: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1019: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1020: } else {
1021: csrC = PETSC_NULL;
1022: }
1024: /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1025: * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1026: * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1027: * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1028: * is guaranteed. */
1029: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);
1031: C->ops->productnumeric = MatProductNumeric_PtAP;
1032: return(0);
1033: }
1035: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)1036: {
1038: C->ops->productsymbolic = MatProductSymbolic_AB;
1039: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1040: return(0);
1041: }
1043: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)1044: {
1046: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1047: return(0);
1048: }
1050: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)1051: {
1053: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1054: C->ops->productsymbolic = MatProductSymbolic_ABt;
1055: return(0);
1056: }
1058: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)1059: {
1061: Mat_Product *product = C->product;
1062: Mat A = product->A;
1063: PetscBool set, flag;
1066: #if defined(PETSC_USE_COMPLEX)
1067: /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used.
1068: * We do this in several other locations in this file. This works for the time being, but the _Basic()
1069: * routines are considered unsafe and may be removed from the MatProduct code in the future.
1070: * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */
1071: C->ops->productsymbolic = NULL;
1072: #else
1073: /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1074: MatIsSymmetricKnown(A,&set,&flag);
1075: if (set && flag) {
1076: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1077: return(0);
1078: } else {
1079: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1080: }
1081: /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1082: * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1083: #endif
1084: return(0);
1085: }
1087: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)1088: {
1090: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1091: return(0);
1092: }
1094: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)1095: {
1097: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1098: return(0);
1099: }
1101: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)1102: {
1104: Mat_Product *product = C->product;
1107: switch (product->type) {
1108: case MATPRODUCT_AB:
1109: MatProductSetFromOptions_SeqAIJMKL_AB(C);
1110: break;
1111: case MATPRODUCT_AtB:
1112: MatProductSetFromOptions_SeqAIJMKL_AtB(C);
1113: break;
1114: case MATPRODUCT_ABt:
1115: MatProductSetFromOptions_SeqAIJMKL_ABt(C);
1116: break;
1117: case MATPRODUCT_PtAP:
1118: MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
1119: break;
1120: case MATPRODUCT_RARt:
1121: MatProductSetFromOptions_SeqAIJMKL_RARt(C);
1122: break;
1123: case MATPRODUCT_ABC:
1124: MatProductSetFromOptions_SeqAIJMKL_ABC(C);
1125: break;
1126: default:1127: break;
1128: }
1129: return(0);
1130: }
1131: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1132: /* ------------------------ End MatProduct code ------------------------ */
1134: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1135: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
1136: * routine, but can also be used to convert an assembled SeqAIJ matrix
1137: * into a SeqAIJMKL one. */
1138: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)1139: {
1141: Mat B = *newmat;
1142: Mat_SeqAIJMKL *aijmkl;
1143: PetscBool set;
1144: PetscBool sametype;
1147: if (reuse == MAT_INITIAL_MATRIX) {
1148: MatDuplicate(A,MAT_COPY_VALUES,&B);
1149: }
1151: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
1152: if (sametype) return(0);
1154: PetscNewLog(B,&aijmkl);
1155: B->spptr = (void*) aijmkl;
1157: /* Set function pointers for methods that we inherit from AIJ but override.
1158: * We also parse some command line options below, since those determine some of the methods we point to. */
1159: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
1160: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1161: B->ops->destroy = MatDestroy_SeqAIJMKL;
1163: aijmkl->sparse_optimized = PETSC_FALSE;
1164: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1165: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1166: #else
1167: aijmkl->no_SpMV2 = PETSC_TRUE;
1168: #endif
1169: aijmkl->eager_inspection = PETSC_FALSE;
1171: /* Parse command line options. */
1172: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
1173: PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
1174: PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
1175: PetscOptionsEnd();
1176: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1177: if (!aijmkl->no_SpMV2) {
1178: PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
1179: aijmkl->no_SpMV2 = PETSC_TRUE;
1180: }
1181: #endif
1183: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1184: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1185: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1186: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1187: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1188: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1189: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1190: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1191: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1192: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1193: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1194: # if !defined(PETSC_USE_COMPLEX)
1195: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1196: # else
1197: B->ops->ptapnumeric = NULL;
1198: # endif
1199: # endif
1200: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1202: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1203: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1204: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1205: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1206: * versions in which the older interface has not been deprecated, we use the old interface. */
1207: if (aijmkl->no_SpMV2) {
1208: B->ops->mult = MatMult_SeqAIJMKL;
1209: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1210: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1211: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1212: }
1213: #endif
1215: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1217: if (!aijmkl->no_SpMV2) {
1218: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1219: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1220: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);
1221: #endif
1222: #endif
1223: }
1225: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1226: *newmat = B;
1227: return(0);
1228: }
1230: /*@C
1231: MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1232: This type inherits from AIJ and is largely identical, but uses sparse BLAS
1233: routines from Intel MKL whenever possible.
1234: If the installed version of MKL supports the "SpMV2" sparse
1235: inspector-executor routines, then those are used by default.
1236: MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1237: symmetric A) operations are currently supported.
1238: Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1240: Collective
1242: Input Parameters:
1243: + comm - MPI communicator, set to PETSC_COMM_SELF1244: . m - number of rows
1245: . n - number of columns
1246: . nz - number of nonzeros per row (same for all rows)
1247: - nnz - array containing the number of nonzeros in the various rows
1248: (possibly different for each row) or NULL
1250: Output Parameter:
1251: . A - the matrix
1253: Options Database Keys:
1254: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1255: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1257: Notes:
1258: If nnz is given then nz is ignored
1260: Level: intermediate
1262: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1263: @*/
1264: PetscErrorCodeMatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)1265: {
1269: MatCreate(comm,A);
1270: MatSetSizes(*A,m,n,m,n);
1271: MatSetType(*A,MATSEQAIJMKL);
1272: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1273: return(0);
1274: }
1276: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)1277: {
1281: MatSetType(A,MATSEQAIJ);
1282: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1283: return(0);
1284: }