Actual source code: aijmkl.c
2: /*
3: Defines basic operations for the MATSEQAIJMKL matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but uses
6: sparse BLAS operations from the Intel Math Kernel Library (MKL)
7: wherever possible.
8: */
10: #include <../src/mat/impls/aij/seq/aij.h>
11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12: #include <mkl_spblas.h>
14: typedef struct {
15: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18: PetscObjectState state;
19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21: struct matrix_descr descr;
22: #endif
23: } Mat_SeqAIJMKL;
25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28: {
29: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30: /* so we will ignore 'MatType type'. */
32: Mat B = *newmat;
33: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35: #endif
38: if (reuse == MAT_INITIAL_MATRIX) {
39: MatDuplicate(A,MAT_COPY_VALUES,&B);
40: }
42: /* Reset the original function pointers. */
43: B->ops->duplicate = MatDuplicate_SeqAIJ;
44: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
45: B->ops->destroy = MatDestroy_SeqAIJ;
46: B->ops->mult = MatMult_SeqAIJ;
47: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
48: B->ops->multadd = MatMultAdd_SeqAIJ;
49: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
51: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
57: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
59: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
61: * simply involves destroying the MKL sparse matrix handle and then freeing
62: * the spptr pointer. */
63: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
65: if (aijmkl->sparse_optimized) {
66: sparse_status_t stat;
67: stat = mkl_sparse_destroy(aijmkl->csrA);
68: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
69: }
70: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
71: PetscFree(B->spptr);
73: /* Change the type of B to MATSEQAIJ. */
74: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
76: *newmat = B;
77: return(0);
78: }
80: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
81: {
83: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
87: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
88: if (aijmkl) {
89: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
90: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
91: if (aijmkl->sparse_optimized) {
92: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
93: stat = mkl_sparse_destroy(aijmkl->csrA);
94: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
95: }
96: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
97: PetscFree(A->spptr);
98: }
100: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
101: * to destroy everything that remains. */
102: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
103: /* Note that I don't call MatSetType(). I believe this is because that
104: * is only to be called when *building* a matrix. I could be wrong, but
105: * that is how things work for the SuperLU matrix class. */
106: MatDestroy_SeqAIJ(A);
107: return(0);
108: }
110: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
111: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
112: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
113: * handle, creates a new one, and then calls mkl_sparse_optimize().
114: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
115: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
116: * an unoptimized matrix handle here. */
117: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
118: {
119: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
120: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
121: * does nothing. We make it callable anyway in this case because it cuts
122: * down on littering the code with #ifdefs. */
124: return(0);
125: #else
126: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
127: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
128: PetscInt m,n;
129: MatScalar *aa;
130: PetscInt *aj,*ai;
131: sparse_status_t stat;
132: PetscErrorCode ierr;
135: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
136: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
137: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
138: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
139: * mkl_sparse_optimize() later. */
140: if (aijmkl->no_SpMV2) return(0);
141: #endif
143: if (aijmkl->sparse_optimized) {
144: /* Matrix has been previously assembled and optimized. Must destroy old
145: * matrix handle before running the optimization step again. */
146: stat = mkl_sparse_destroy(aijmkl->csrA);
147: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
148: }
149: aijmkl->sparse_optimized = PETSC_FALSE;
151: /* Now perform the SpMV2 setup and matrix optimization. */
152: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
153: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
154: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
155: m = A->rmap->n;
156: n = A->cmap->n;
157: aj = a->j; /* aj[k] gives column index for element aa[k]. */
158: aa = a->a; /* Nonzero elements stored row-by-row. */
159: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
160: if (a->nz && aa && !A->structure_only) {
161: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
162: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
163: stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
164: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
165: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
166: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
167: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
168: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
169: if (!aijmkl->no_SpMV2) {
170: stat = mkl_sparse_optimize(aijmkl->csrA);
171: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
172: }
173: aijmkl->sparse_optimized = PETSC_TRUE;
174: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
175: } else {
176: aijmkl->csrA = PETSC_NULL;
177: }
179: return(0);
180: #endif
181: }
183: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
184: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
185: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
186: {
187: PetscErrorCode ierr;
188: sparse_status_t stat;
189: sparse_index_base_t indexing;
190: PetscInt m,n;
191: PetscInt *aj,*ai,*dummy;
192: MatScalar *aa;
193: Mat_SeqAIJMKL *aijmkl;
196: if (csrA) {
197: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
198: stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
199: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
200: if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
201: } else {
202: aj = ai = PETSC_NULL;
203: aa = PETSC_NULL;
204: }
206: MatSetType(A,MATSEQAIJ);
207: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
208: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
209: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
210: * they will be destroyed when the MKL handle is destroyed.
211: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
212: if (csrA) {
213: MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);
214: } else {
215: /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
216: MatSetUp(A);
217: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
218: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
219: }
221: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
222: * Now turn it into a MATSEQAIJMKL. */
223: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
225: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
226: aijmkl->csrA = csrA;
228: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
229: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
230: * and just need to be able to run the MKL optimization step. */
231: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
232: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
233: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
234: if (csrA) {
235: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
236: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
237: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
238: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
239: }
240: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
241: return(0);
242: }
243: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
245: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
246: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
247: * MatMatMultNumeric(). */
248: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
249: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
250: {
251: PetscInt i;
252: PetscInt nrows,ncols;
253: PetscInt nz;
254: PetscInt *ai,*aj,*dummy;
255: PetscScalar *aa;
256: PetscErrorCode ierr;
257: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
258: sparse_status_t stat;
259: sparse_index_base_t indexing;
262: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
263: if (!aijmkl->csrA) return(0);
265: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
266: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
267: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
269: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
270: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
271: for (i=0; i<nrows; i++) {
272: nz = ai[i+1] - ai[i];
273: MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
274: }
276: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
277: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
279: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
280: /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
281: * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
282: * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
283: aijmkl->sparse_optimized = PETSC_FALSE;
284: return(0);
285: }
286: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
288: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
289: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
290: {
291: PetscInt i,j,k;
292: PetscInt nrows,ncols;
293: PetscInt nz;
294: PetscInt *ai,*aj,*dummy;
295: PetscScalar *aa;
296: PetscErrorCode ierr;
297: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
298: sparse_status_t stat;
299: sparse_index_base_t indexing;
302: PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");
304: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
305: if (!aijmkl->csrA) {
306: PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");
307: return(0);
308: }
310: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
311: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
312: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
314: k = 0;
315: for (i=0; i<nrows; i++) {
316: PetscViewerASCIIPrintf(viewer,"row %D: ",i);
317: nz = ai[i+1] - ai[i];
318: for (j=0; j<nz; j++) {
319: if (aa) {
320: PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],PetscRealPart(aa[k]));
321: } else {
322: PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);
323: }
324: k++;
325: }
326: PetscViewerASCIIPrintf(viewer,"\n");
327: }
328: return(0);
329: }
330: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
332: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
333: {
335: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
336: Mat_SeqAIJMKL *aijmkl_dest;
339: MatDuplicate_SeqAIJ(A,op,M);
340: aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
341: PetscArraycpy(aijmkl_dest,aijmkl,1);
342: aijmkl_dest->sparse_optimized = PETSC_FALSE;
343: if (aijmkl->eager_inspection) {
344: MatSeqAIJMKL_create_mkl_handle(A);
345: }
346: return(0);
347: }
349: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
350: {
351: PetscErrorCode ierr;
352: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
353: Mat_SeqAIJMKL *aijmkl;
356: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
358: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
359: * extra information and some different methods, call the AssemblyEnd
360: * routine for a MATSEQAIJ.
361: * I'm not sure if this is the best way to do this, but it avoids
362: * a lot of code duplication. */
363: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
364: MatAssemblyEnd_SeqAIJ(A, mode);
366: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
367: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
368: aijmkl = (Mat_SeqAIJMKL*)A->spptr;
369: if (aijmkl->eager_inspection) {
370: MatSeqAIJMKL_create_mkl_handle(A);
371: }
373: return(0);
374: }
376: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
377: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
378: {
379: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
380: const PetscScalar *x;
381: PetscScalar *y;
382: const MatScalar *aa;
383: PetscErrorCode ierr;
384: PetscInt m = A->rmap->n;
385: PetscInt n = A->cmap->n;
386: PetscScalar alpha = 1.0;
387: PetscScalar beta = 0.0;
388: const PetscInt *aj,*ai;
389: char matdescra[6];
391: /* Variables not in MatMult_SeqAIJ. */
392: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
395: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
396: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
397: VecGetArrayRead(xx,&x);
398: VecGetArray(yy,&y);
399: aj = a->j; /* aj[k] gives column index for element aa[k]. */
400: aa = a->a; /* Nonzero elements stored row-by-row. */
401: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
403: /* Call MKL sparse BLAS routine to do the MatMult. */
404: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
406: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
407: VecRestoreArrayRead(xx,&x);
408: VecRestoreArray(yy,&y);
409: return(0);
410: }
411: #endif
413: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
414: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
415: {
416: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
417: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
418: const PetscScalar *x;
419: PetscScalar *y;
420: PetscErrorCode ierr;
421: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
422: PetscObjectState state;
426: /* If there are no nonzero entries, zero yy and return immediately. */
427: if (!a->nz) {
428: PetscInt i;
429: PetscInt m=A->rmap->n;
430: VecGetArray(yy,&y);
431: for (i=0; i<m; i++) {
432: y[i] = 0.0;
433: }
434: VecRestoreArray(yy,&y);
435: return(0);
436: }
438: VecGetArrayRead(xx,&x);
439: VecGetArray(yy,&y);
441: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
442: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
443: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
444: PetscObjectStateGet((PetscObject)A,&state);
445: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
446: MatSeqAIJMKL_create_mkl_handle(A);
447: }
449: /* Call MKL SpMV2 executor routine to do the MatMult. */
450: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
451: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
453: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
454: VecRestoreArrayRead(xx,&x);
455: VecRestoreArray(yy,&y);
456: return(0);
457: }
458: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
460: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
461: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
462: {
463: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
464: const PetscScalar *x;
465: PetscScalar *y;
466: const MatScalar *aa;
467: PetscErrorCode ierr;
468: PetscInt m = A->rmap->n;
469: PetscInt n = A->cmap->n;
470: PetscScalar alpha = 1.0;
471: PetscScalar beta = 0.0;
472: const PetscInt *aj,*ai;
473: char matdescra[6];
475: /* Variables not in MatMultTranspose_SeqAIJ. */
476: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
479: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
480: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
481: VecGetArrayRead(xx,&x);
482: VecGetArray(yy,&y);
483: aj = a->j; /* aj[k] gives column index for element aa[k]. */
484: aa = a->a; /* Nonzero elements stored row-by-row. */
485: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
487: /* Call MKL sparse BLAS routine to do the MatMult. */
488: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
490: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
491: VecRestoreArrayRead(xx,&x);
492: VecRestoreArray(yy,&y);
493: return(0);
494: }
495: #endif
497: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
498: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
499: {
500: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
501: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
502: const PetscScalar *x;
503: PetscScalar *y;
504: PetscErrorCode ierr;
505: sparse_status_t stat;
506: PetscObjectState state;
510: /* If there are no nonzero entries, zero yy and return immediately. */
511: if (!a->nz) {
512: PetscInt i;
513: PetscInt n=A->cmap->n;
514: VecGetArray(yy,&y);
515: for (i=0; i<n; i++) {
516: y[i] = 0.0;
517: }
518: VecRestoreArray(yy,&y);
519: return(0);
520: }
522: VecGetArrayRead(xx,&x);
523: VecGetArray(yy,&y);
525: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
526: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
527: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
528: PetscObjectStateGet((PetscObject)A,&state);
529: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
530: MatSeqAIJMKL_create_mkl_handle(A);
531: }
533: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
534: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
535: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
537: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
538: VecRestoreArrayRead(xx,&x);
539: VecRestoreArray(yy,&y);
540: return(0);
541: }
542: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
544: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
545: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
546: {
547: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
548: const PetscScalar *x;
549: PetscScalar *y,*z;
550: const MatScalar *aa;
551: PetscErrorCode ierr;
552: PetscInt m = A->rmap->n;
553: PetscInt n = A->cmap->n;
554: const PetscInt *aj,*ai;
555: PetscInt i;
557: /* Variables not in MatMultAdd_SeqAIJ. */
558: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
559: PetscScalar alpha = 1.0;
560: PetscScalar beta;
561: char matdescra[6];
564: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
565: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
567: VecGetArrayRead(xx,&x);
568: VecGetArrayPair(yy,zz,&y,&z);
569: aj = a->j; /* aj[k] gives column index for element aa[k]. */
570: aa = a->a; /* Nonzero elements stored row-by-row. */
571: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
573: /* Call MKL sparse BLAS routine to do the MatMult. */
574: if (zz == yy) {
575: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
576: beta = 1.0;
577: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
578: } else {
579: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
580: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
581: beta = 0.0;
582: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
583: for (i=0; i<m; i++) {
584: z[i] += y[i];
585: }
586: }
588: PetscLogFlops(2.0*a->nz);
589: VecRestoreArrayRead(xx,&x);
590: VecRestoreArrayPair(yy,zz,&y,&z);
591: return(0);
592: }
593: #endif
595: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
596: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
597: {
598: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
599: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
600: const PetscScalar *x;
601: PetscScalar *y,*z;
602: PetscErrorCode ierr;
603: PetscInt m = A->rmap->n;
604: PetscInt i;
606: /* Variables not in MatMultAdd_SeqAIJ. */
607: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
608: PetscObjectState state;
612: /* If there are no nonzero entries, set zz = yy and return immediately. */
613: if (!a->nz) {
614: PetscInt i;
615: VecGetArrayPair(yy,zz,&y,&z);
616: for (i=0; i<m; i++) {
617: z[i] = y[i];
618: }
619: VecRestoreArrayPair(yy,zz,&y,&z);
620: return(0);
621: }
623: VecGetArrayRead(xx,&x);
624: VecGetArrayPair(yy,zz,&y,&z);
626: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
627: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
628: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
629: PetscObjectStateGet((PetscObject)A,&state);
630: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
631: MatSeqAIJMKL_create_mkl_handle(A);
632: }
634: /* Call MKL sparse BLAS routine to do the MatMult. */
635: if (zz == yy) {
636: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
637: * with alpha and beta both set to 1.0. */
638: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
639: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
640: } else {
641: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
642: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
643: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
644: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
645: for (i=0; i<m; i++) {
646: z[i] += y[i];
647: }
648: }
650: PetscLogFlops(2.0*a->nz);
651: VecRestoreArrayRead(xx,&x);
652: VecRestoreArrayPair(yy,zz,&y,&z);
653: return(0);
654: }
655: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
657: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
658: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
659: {
660: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
661: const PetscScalar *x;
662: PetscScalar *y,*z;
663: const MatScalar *aa;
664: PetscErrorCode ierr;
665: PetscInt m = A->rmap->n;
666: PetscInt n = A->cmap->n;
667: const PetscInt *aj,*ai;
668: PetscInt i;
670: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
671: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
672: PetscScalar alpha = 1.0;
673: PetscScalar beta;
674: char matdescra[6];
677: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
678: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
680: VecGetArrayRead(xx,&x);
681: VecGetArrayPair(yy,zz,&y,&z);
682: aj = a->j; /* aj[k] gives column index for element aa[k]. */
683: aa = a->a; /* Nonzero elements stored row-by-row. */
684: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
686: /* Call MKL sparse BLAS routine to do the MatMult. */
687: if (zz == yy) {
688: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
689: beta = 1.0;
690: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
691: } else {
692: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
693: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
694: beta = 0.0;
695: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
696: for (i=0; i<n; i++) {
697: z[i] += y[i];
698: }
699: }
701: PetscLogFlops(2.0*a->nz);
702: VecRestoreArrayRead(xx,&x);
703: VecRestoreArrayPair(yy,zz,&y,&z);
704: return(0);
705: }
706: #endif
708: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
709: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
710: {
711: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
712: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
713: const PetscScalar *x;
714: PetscScalar *y,*z;
715: PetscErrorCode ierr;
716: PetscInt n = A->cmap->n;
717: PetscInt i;
718: PetscObjectState state;
720: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
721: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
725: /* If there are no nonzero entries, set zz = yy and return immediately. */
726: if (!a->nz) {
727: PetscInt i;
728: VecGetArrayPair(yy,zz,&y,&z);
729: for (i=0; i<n; i++) {
730: z[i] = y[i];
731: }
732: VecRestoreArrayPair(yy,zz,&y,&z);
733: return(0);
734: }
736: VecGetArrayRead(xx,&x);
737: VecGetArrayPair(yy,zz,&y,&z);
739: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
740: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
741: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
742: PetscObjectStateGet((PetscObject)A,&state);
743: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
744: MatSeqAIJMKL_create_mkl_handle(A);
745: }
747: /* Call MKL sparse BLAS routine to do the MatMult. */
748: if (zz == yy) {
749: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
750: * with alpha and beta both set to 1.0. */
751: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
752: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
753: } else {
754: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
755: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
756: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
757: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
758: for (i=0; i<n; i++) {
759: z[i] += y[i];
760: }
761: }
763: PetscLogFlops(2.0*a->nz);
764: VecRestoreArrayRead(xx,&x);
765: VecRestoreArrayPair(yy,zz,&y,&z);
766: return(0);
767: }
768: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
770: /* -------------------------- MatProduct code -------------------------- */
771: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
772: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
773: {
774: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
775: sparse_matrix_t csrA,csrB,csrC;
776: PetscInt nrows,ncols;
777: PetscErrorCode ierr;
778: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
779: struct matrix_descr descr_type_gen;
780: PetscObjectState state;
783: /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
784: * not handle sparse matrices with zero rows or columns. */
785: if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
786: else nrows = A->cmap->N;
787: if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
788: else ncols = B->rmap->N;
790: PetscObjectStateGet((PetscObject)A,&state);
791: if (!a->sparse_optimized || a->state != state) {
792: MatSeqAIJMKL_create_mkl_handle(A);
793: }
794: PetscObjectStateGet((PetscObject)B,&state);
795: if (!b->sparse_optimized || b->state != state) {
796: MatSeqAIJMKL_create_mkl_handle(B);
797: }
798: csrA = a->csrA;
799: csrB = b->csrA;
800: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
802: if (csrA && csrB) {
803: stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
804: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
805: } else {
806: csrC = PETSC_NULL;
807: }
809: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);
811: return(0);
812: }
814: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
815: {
816: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
817: sparse_matrix_t csrA, csrB, csrC;
818: PetscErrorCode ierr;
819: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
820: struct matrix_descr descr_type_gen;
821: PetscObjectState state;
824: PetscObjectStateGet((PetscObject)A,&state);
825: if (!a->sparse_optimized || a->state != state) {
826: MatSeqAIJMKL_create_mkl_handle(A);
827: }
828: PetscObjectStateGet((PetscObject)B,&state);
829: if (!b->sparse_optimized || b->state != state) {
830: MatSeqAIJMKL_create_mkl_handle(B);
831: }
832: csrA = a->csrA;
833: csrB = b->csrA;
834: csrC = c->csrA;
835: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
837: if (csrA && csrB) {
838: stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
839: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
840: } else {
841: csrC = PETSC_NULL;
842: }
844: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
845: MatSeqAIJMKL_update_from_mkl_handle(C);
847: return(0);
848: }
850: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
851: {
855: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
856: return(0);
857: }
859: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
860: {
864: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
865: return(0);
866: }
868: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
869: {
873: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
874: return(0);
875: }
877: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
878: {
882: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
883: return(0);
884: }
886: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
887: {
891: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
892: return(0);
893: }
895: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
896: {
900: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
901: return(0);
902: }
904: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
905: {
907: Mat_Product *product = C->product;
908: Mat A = product->A,B = product->B;
911: MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);
912: return(0);
913: }
915: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
916: {
918: Mat_Product *product = C->product;
919: Mat A = product->A,B = product->B;
920: PetscReal fill = product->fill;
923: MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);
924: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
925: return(0);
926: }
928: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
929: {
930: Mat Ct;
931: Vec zeros;
932: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
933: sparse_matrix_t csrA, csrP, csrC;
934: PetscBool set, flag;
935: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
936: struct matrix_descr descr_type_sym;
937: PetscObjectState state;
938: PetscErrorCode ierr;
941: MatIsSymmetricKnown(A,&set,&flag);
942: if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
944: PetscObjectStateGet((PetscObject)A,&state);
945: if (!a->sparse_optimized || a->state != state) {
946: MatSeqAIJMKL_create_mkl_handle(A);
947: }
948: PetscObjectStateGet((PetscObject)P,&state);
949: if (!p->sparse_optimized || p->state != state) {
950: MatSeqAIJMKL_create_mkl_handle(P);
951: }
952: csrA = a->csrA;
953: csrP = p->csrA;
954: csrC = c->csrA;
955: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
956: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
957: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
959: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
960: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
961: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
963: /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
964: * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
965: * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
966: * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
967: * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
968: * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
969: * the full matrix. */
970: MatSeqAIJMKL_update_from_mkl_handle(C);
971: MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);
972: MatCreateVecs(C,&zeros,NULL);
973: VecSetFromOptions(zeros);
974: VecZeroEntries(zeros);
975: MatDiagonalSet(Ct,zeros,INSERT_VALUES);
976: MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);
977: /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
978: MatProductCreateWithMat(A,P,PETSC_NULL,C);
979: MatProductSetType(C,MATPRODUCT_PtAP);
980: MatSeqAIJMKL_create_mkl_handle(C);
981: VecDestroy(&zeros);
982: MatDestroy(&Ct);
983: return(0);
984: }
986: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
987: {
988: Mat_Product *product = C->product;
989: Mat A = product->A,P = product->B;
990: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
991: sparse_matrix_t csrA,csrP,csrC;
992: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
993: struct matrix_descr descr_type_sym;
994: PetscObjectState state;
995: PetscErrorCode ierr;
998: PetscObjectStateGet((PetscObject)A,&state);
999: if (!a->sparse_optimized || a->state != state) {
1000: MatSeqAIJMKL_create_mkl_handle(A);
1001: }
1002: PetscObjectStateGet((PetscObject)P,&state);
1003: if (!p->sparse_optimized || p->state != state) {
1004: MatSeqAIJMKL_create_mkl_handle(P);
1005: }
1006: csrA = a->csrA;
1007: csrP = p->csrA;
1008: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1009: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1010: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1012: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1013: if (csrP && csrA) {
1014: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1015: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1016: } else {
1017: csrC = PETSC_NULL;
1018: }
1020: /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1021: * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1022: * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1023: * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1024: * is guaranteed. */
1025: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);
1027: C->ops->productnumeric = MatProductNumeric_PtAP;
1028: return(0);
1029: }
1031: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1032: {
1034: C->ops->productsymbolic = MatProductSymbolic_AB;
1035: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1036: return(0);
1037: }
1039: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1040: {
1042: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1043: return(0);
1044: }
1046: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1047: {
1049: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1050: C->ops->productsymbolic = MatProductSymbolic_ABt;
1051: return(0);
1052: }
1054: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1055: {
1057: Mat_Product *product = C->product;
1058: Mat A = product->A;
1059: PetscBool set, flag;
1062: if (PetscDefined(USE_COMPLEX)) {
1063: /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
1064: * We do this in several other locations in this file. This works for the time being, but these
1065: * routines are considered unsafe and may be removed from the MatProduct code in the future.
1066: * TODO: Add proper MATSEQAIJMKL implementations */
1067: C->ops->productsymbolic = NULL;
1068: } else {
1069: /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1070: MatIsSymmetricKnown(A,&set,&flag);
1071: if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1072: else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1073: /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1074: * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1075: }
1076: return(0);
1077: }
1079: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1080: {
1082: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1083: return(0);
1084: }
1086: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1087: {
1089: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1090: return(0);
1091: }
1093: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1094: {
1096: Mat_Product *product = C->product;
1099: switch (product->type) {
1100: case MATPRODUCT_AB:
1101: MatProductSetFromOptions_SeqAIJMKL_AB(C);
1102: break;
1103: case MATPRODUCT_AtB:
1104: MatProductSetFromOptions_SeqAIJMKL_AtB(C);
1105: break;
1106: case MATPRODUCT_ABt:
1107: MatProductSetFromOptions_SeqAIJMKL_ABt(C);
1108: break;
1109: case MATPRODUCT_PtAP:
1110: MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
1111: break;
1112: case MATPRODUCT_RARt:
1113: MatProductSetFromOptions_SeqAIJMKL_RARt(C);
1114: break;
1115: case MATPRODUCT_ABC:
1116: MatProductSetFromOptions_SeqAIJMKL_ABC(C);
1117: break;
1118: default:
1119: break;
1120: }
1121: return(0);
1122: }
1123: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1124: /* ------------------------ End MatProduct code ------------------------ */
1126: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1127: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
1128: * routine, but can also be used to convert an assembled SeqAIJ matrix
1129: * into a SeqAIJMKL one. */
1130: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1131: {
1133: Mat B = *newmat;
1134: Mat_SeqAIJMKL *aijmkl;
1135: PetscBool set;
1136: PetscBool sametype;
1139: if (reuse == MAT_INITIAL_MATRIX) {
1140: MatDuplicate(A,MAT_COPY_VALUES,&B);
1141: }
1143: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
1144: if (sametype) return(0);
1146: PetscNewLog(B,&aijmkl);
1147: B->spptr = (void*) aijmkl;
1149: /* Set function pointers for methods that we inherit from AIJ but override.
1150: * We also parse some command line options below, since those determine some of the methods we point to. */
1151: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
1152: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1153: B->ops->destroy = MatDestroy_SeqAIJMKL;
1155: aijmkl->sparse_optimized = PETSC_FALSE;
1156: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1157: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1158: #else
1159: aijmkl->no_SpMV2 = PETSC_TRUE;
1160: #endif
1161: aijmkl->eager_inspection = PETSC_FALSE;
1163: /* Parse command line options. */
1164: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
1165: PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
1166: PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
1167: PetscOptionsEnd();
1168: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1169: if (!aijmkl->no_SpMV2) {
1170: PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
1171: aijmkl->no_SpMV2 = PETSC_TRUE;
1172: }
1173: #endif
1175: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1176: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1177: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1178: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1179: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1180: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1181: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1182: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1183: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1184: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1185: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1186: # if !defined(PETSC_USE_COMPLEX)
1187: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1188: # else
1189: B->ops->ptapnumeric = NULL;
1190: # endif
1191: # endif
1192: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1194: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1195: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1196: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1197: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1198: * versions in which the older interface has not been deprecated, we use the old interface. */
1199: if (aijmkl->no_SpMV2) {
1200: B->ops->mult = MatMult_SeqAIJMKL;
1201: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1202: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1203: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1204: }
1205: #endif
1207: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1209: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1210: *newmat = B;
1211: return(0);
1212: }
1214: /*@C
1215: MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1216: This type inherits from AIJ and is largely identical, but uses sparse BLAS
1217: routines from Intel MKL whenever possible.
1218: If the installed version of MKL supports the "SpMV2" sparse
1219: inspector-executor routines, then those are used by default.
1220: MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1221: symmetric A) operations are currently supported.
1222: Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1224: Collective
1226: Input Parameters:
1227: + comm - MPI communicator, set to PETSC_COMM_SELF
1228: . m - number of rows
1229: . n - number of columns
1230: . nz - number of nonzeros per row (same for all rows)
1231: - nnz - array containing the number of nonzeros in the various rows
1232: (possibly different for each row) or NULL
1234: Output Parameter:
1235: . A - the matrix
1237: Options Database Keys:
1238: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1239: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1241: Notes:
1242: If nnz is given then nz is ignored
1244: Level: intermediate
1246: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1247: @*/
1248: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1249: {
1253: MatCreate(comm,A);
1254: MatSetSizes(*A,m,n,m,n);
1255: MatSetType(*A,MATSEQAIJMKL);
1256: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1257: return(0);
1258: }
1260: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1261: {
1265: MatSetType(A,MATSEQAIJ);
1266: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1267: return(0);
1268: }