Actual source code: aijmkl.c
2: /*
3: Defines basic operations for the MATSEQAIJMKL matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but uses
6: sparse BLAS operations from the Intel Math Kernel Library (MKL)
7: wherever possible.
8: */
10: #include <../src/mat/impls/aij/seq/aij.h>
11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12: #include <mkl_spblas.h>
14: typedef struct {
15: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18: PetscObjectState state;
19: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21: struct matrix_descr descr;
22: #endif
23: } Mat_SeqAIJMKL;
25: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
27: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28: {
29: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30: /* so we will ignore 'MatType type'. */
32: Mat B = *newmat;
33: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35: #endif
38: if (reuse == MAT_INITIAL_MATRIX) {
39: MatDuplicate(A,MAT_COPY_VALUES,&B);
40: }
42: /* Reset the original function pointers. */
43: B->ops->duplicate = MatDuplicate_SeqAIJ;
44: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
45: B->ops->destroy = MatDestroy_SeqAIJ;
46: B->ops->mult = MatMult_SeqAIJ;
47: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
48: B->ops->multadd = MatMultAdd_SeqAIJ;
49: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
51: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
57: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
59: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
61: * simply involves destroying the MKL sparse matrix handle and then freeing
62: * the spptr pointer. */
63: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
65: if (aijmkl->sparse_optimized) {
66: sparse_status_t stat;
67: stat = mkl_sparse_destroy(aijmkl->csrA);
68: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
69: }
70: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
71: PetscFree(B->spptr);
73: /* Change the type of B to MATSEQAIJ. */
74: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
76: *newmat = B;
77: return(0);
78: }
80: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
81: {
83: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
87: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
88: if (aijmkl) {
89: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
90: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
91: if (aijmkl->sparse_optimized) {
92: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
93: stat = mkl_sparse_destroy(aijmkl->csrA);
94: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
95: }
96: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
97: PetscFree(A->spptr);
98: }
100: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
101: * to destroy everything that remains. */
102: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
103: /* Note that I don't call MatSetType(). I believe this is because that
104: * is only to be called when *building* a matrix. I could be wrong, but
105: * that is how things work for the SuperLU matrix class. */
106: MatDestroy_SeqAIJ(A);
107: return(0);
108: }
110: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
111: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
112: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
113: * handle, creates a new one, and then calls mkl_sparse_optimize().
114: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
115: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
116: * an unoptimized matrix handle here. */
117: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
118: {
119: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
120: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
121: * does nothing. We make it callable anyway in this case because it cuts
122: * down on littering the code with #ifdefs. */
124: return(0);
125: #else
126: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
127: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
128: PetscInt m,n;
129: MatScalar *aa;
130: PetscInt *aj,*ai;
131: sparse_status_t stat;
132: PetscErrorCode ierr;
135: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
136: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
137: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
138: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
139: * mkl_sparse_optimize() later. */
140: if (aijmkl->no_SpMV2) return(0);
141: #endif
143: if (aijmkl->sparse_optimized) {
144: /* Matrix has been previously assembled and optimized. Must destroy old
145: * matrix handle before running the optimization step again. */
146: stat = mkl_sparse_destroy(aijmkl->csrA);
147: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
148: }
149: aijmkl->sparse_optimized = PETSC_FALSE;
151: /* Now perform the SpMV2 setup and matrix optimization. */
152: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
153: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
154: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
155: m = A->rmap->n;
156: n = A->cmap->n;
157: aj = a->j; /* aj[k] gives column index for element aa[k]. */
158: aa = a->a; /* Nonzero elements stored row-by-row. */
159: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
160: if (a->nz && aa && !A->structure_only) {
161: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
162: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
163: stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
164: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
165: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
166: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
167: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
168: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
169: if (!aijmkl->no_SpMV2) {
170: stat = mkl_sparse_optimize(aijmkl->csrA);
171: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
172: }
173: aijmkl->sparse_optimized = PETSC_TRUE;
174: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
175: } else {
176: aijmkl->csrA = PETSC_NULL;
177: }
179: return(0);
180: #endif
181: }
183: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
184: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
185: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
186: {
187: PetscErrorCode ierr;
188: sparse_status_t stat;
189: sparse_index_base_t indexing;
190: PetscInt m,n;
191: PetscInt *aj,*ai,*dummy;
192: MatScalar *aa;
193: Mat_SeqAIJMKL *aijmkl;
195: if (csrA) {
196: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
197: stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
198: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
199: if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
200: } else {
201: aj = ai = PETSC_NULL;
202: aa = PETSC_NULL;
203: }
205: MatSetType(A,MATSEQAIJ);
206: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
207: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
208: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
209: * they will be destroyed when the MKL handle is destroyed.
210: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
211: if (csrA) {
212: MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);
213: } else {
214: /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
215: MatSetUp(A);
216: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
217: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
218: }
220: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
221: * Now turn it into a MATSEQAIJMKL. */
222: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
224: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
225: aijmkl->csrA = csrA;
227: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
228: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
229: * and just need to be able to run the MKL optimization step. */
230: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
231: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
232: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
233: if (csrA) {
234: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
235: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
236: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
237: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
238: }
239: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
240: return(0);
241: }
242: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
244: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
245: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
246: * MatMatMultNumeric(). */
247: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
248: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
249: {
250: PetscInt i;
251: PetscInt nrows,ncols;
252: PetscInt nz;
253: PetscInt *ai,*aj,*dummy;
254: PetscScalar *aa;
255: PetscErrorCode ierr;
256: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
257: sparse_status_t stat;
258: sparse_index_base_t indexing;
260: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
261: if (!aijmkl->csrA) return(0);
263: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
264: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
265: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
267: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
268: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
269: for (i=0; i<nrows; i++) {
270: nz = ai[i+1] - ai[i];
271: MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
272: }
274: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
275: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
277: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
278: /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
279: * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
280: * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
281: aijmkl->sparse_optimized = PETSC_FALSE;
282: return(0);
283: }
284: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
286: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
287: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
288: {
289: PetscInt i,j,k;
290: PetscInt nrows,ncols;
291: PetscInt nz;
292: PetscInt *ai,*aj,*dummy;
293: PetscScalar *aa;
294: PetscErrorCode ierr;
295: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
296: sparse_status_t stat;
297: sparse_index_base_t indexing;
299: PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");
301: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
302: if (!aijmkl->csrA) {
303: PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");
304: return(0);
305: }
307: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
308: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
309: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
311: k = 0;
312: for (i=0; i<nrows; i++) {
313: PetscViewerASCIIPrintf(viewer,"row %D: ",i);
314: nz = ai[i+1] - ai[i];
315: for (j=0; j<nz; j++) {
316: if (aa) {
317: PetscViewerASCIIPrintf(viewer,"(%D, %g) ",aj[k],PetscRealPart(aa[k]));
318: } else {
319: PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);
320: }
321: k++;
322: }
323: PetscViewerASCIIPrintf(viewer,"\n");
324: }
325: return(0);
326: }
327: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
329: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
330: {
332: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
333: Mat_SeqAIJMKL *aijmkl_dest;
336: MatDuplicate_SeqAIJ(A,op,M);
337: aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
338: PetscArraycpy(aijmkl_dest,aijmkl,1);
339: aijmkl_dest->sparse_optimized = PETSC_FALSE;
340: if (aijmkl->eager_inspection) {
341: MatSeqAIJMKL_create_mkl_handle(A);
342: }
343: return(0);
344: }
346: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
347: {
348: PetscErrorCode ierr;
349: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
350: Mat_SeqAIJMKL *aijmkl;
353: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
355: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
356: * extra information and some different methods, call the AssemblyEnd
357: * routine for a MATSEQAIJ.
358: * I'm not sure if this is the best way to do this, but it avoids
359: * a lot of code duplication. */
360: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
361: MatAssemblyEnd_SeqAIJ(A, mode);
363: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
364: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
365: aijmkl = (Mat_SeqAIJMKL*)A->spptr;
366: if (aijmkl->eager_inspection) {
367: MatSeqAIJMKL_create_mkl_handle(A);
368: }
370: return(0);
371: }
373: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
374: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
375: {
376: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
377: const PetscScalar *x;
378: PetscScalar *y;
379: const MatScalar *aa;
380: PetscErrorCode ierr;
381: PetscInt m = A->rmap->n;
382: PetscInt n = A->cmap->n;
383: PetscScalar alpha = 1.0;
384: PetscScalar beta = 0.0;
385: const PetscInt *aj,*ai;
386: char matdescra[6];
389: /* Variables not in MatMult_SeqAIJ. */
390: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
393: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
394: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
395: VecGetArrayRead(xx,&x);
396: VecGetArray(yy,&y);
397: aj = a->j; /* aj[k] gives column index for element aa[k]. */
398: aa = a->a; /* Nonzero elements stored row-by-row. */
399: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
401: /* Call MKL sparse BLAS routine to do the MatMult. */
402: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
404: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
405: VecRestoreArrayRead(xx,&x);
406: VecRestoreArray(yy,&y);
407: return(0);
408: }
409: #endif
411: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
412: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
413: {
414: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
415: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
416: const PetscScalar *x;
417: PetscScalar *y;
418: PetscErrorCode ierr;
419: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
420: PetscObjectState state;
424: /* If there are no nonzero entries, zero yy and return immediately. */
425: if (!a->nz) {
426: PetscInt i;
427: PetscInt m=A->rmap->n;
428: VecGetArray(yy,&y);
429: for (i=0; i<m; i++) {
430: y[i] = 0.0;
431: }
432: VecRestoreArray(yy,&y);
433: return(0);
434: }
436: VecGetArrayRead(xx,&x);
437: VecGetArray(yy,&y);
439: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
440: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
441: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
442: PetscObjectStateGet((PetscObject)A,&state);
443: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
444: MatSeqAIJMKL_create_mkl_handle(A);
445: }
447: /* Call MKL SpMV2 executor routine to do the MatMult. */
448: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
449: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
451: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
452: VecRestoreArrayRead(xx,&x);
453: VecRestoreArray(yy,&y);
454: return(0);
455: }
456: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
458: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
459: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
460: {
461: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
462: const PetscScalar *x;
463: PetscScalar *y;
464: const MatScalar *aa;
465: PetscErrorCode ierr;
466: PetscInt m = A->rmap->n;
467: PetscInt n = A->cmap->n;
468: PetscScalar alpha = 1.0;
469: PetscScalar beta = 0.0;
470: const PetscInt *aj,*ai;
471: char matdescra[6];
473: /* Variables not in MatMultTranspose_SeqAIJ. */
474: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
477: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
478: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
479: VecGetArrayRead(xx,&x);
480: VecGetArray(yy,&y);
481: aj = a->j; /* aj[k] gives column index for element aa[k]. */
482: aa = a->a; /* Nonzero elements stored row-by-row. */
483: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
485: /* Call MKL sparse BLAS routine to do the MatMult. */
486: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
488: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
489: VecRestoreArrayRead(xx,&x);
490: VecRestoreArray(yy,&y);
491: return(0);
492: }
493: #endif
495: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
496: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
497: {
498: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
499: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
500: const PetscScalar *x;
501: PetscScalar *y;
502: PetscErrorCode ierr;
503: sparse_status_t stat;
504: PetscObjectState state;
508: /* If there are no nonzero entries, zero yy and return immediately. */
509: if (!a->nz) {
510: PetscInt i;
511: PetscInt n=A->cmap->n;
512: VecGetArray(yy,&y);
513: for (i=0; i<n; i++) {
514: y[i] = 0.0;
515: }
516: VecRestoreArray(yy,&y);
517: return(0);
518: }
520: VecGetArrayRead(xx,&x);
521: VecGetArray(yy,&y);
523: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
524: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
525: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
526: PetscObjectStateGet((PetscObject)A,&state);
527: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
528: MatSeqAIJMKL_create_mkl_handle(A);
529: }
531: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
532: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
533: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
535: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
536: VecRestoreArrayRead(xx,&x);
537: VecRestoreArray(yy,&y);
538: return(0);
539: }
540: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
542: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
543: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
544: {
545: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
546: const PetscScalar *x;
547: PetscScalar *y,*z;
548: const MatScalar *aa;
549: PetscErrorCode ierr;
550: PetscInt m = A->rmap->n;
551: PetscInt n = A->cmap->n;
552: const PetscInt *aj,*ai;
553: PetscInt i;
555: /* Variables not in MatMultAdd_SeqAIJ. */
556: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
557: PetscScalar alpha = 1.0;
558: PetscScalar beta;
559: char matdescra[6];
562: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
563: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
565: VecGetArrayRead(xx,&x);
566: VecGetArrayPair(yy,zz,&y,&z);
567: aj = a->j; /* aj[k] gives column index for element aa[k]. */
568: aa = a->a; /* Nonzero elements stored row-by-row. */
569: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
571: /* Call MKL sparse BLAS routine to do the MatMult. */
572: if (zz == yy) {
573: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
574: beta = 1.0;
575: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
576: } else {
577: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
578: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
579: beta = 0.0;
580: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
581: for (i=0; i<m; i++) {
582: z[i] += y[i];
583: }
584: }
586: PetscLogFlops(2.0*a->nz);
587: VecRestoreArrayRead(xx,&x);
588: VecRestoreArrayPair(yy,zz,&y,&z);
589: return(0);
590: }
591: #endif
593: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
594: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
595: {
596: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
597: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
598: const PetscScalar *x;
599: PetscScalar *y,*z;
600: PetscErrorCode ierr;
601: PetscInt m = A->rmap->n;
602: PetscInt i;
604: /* Variables not in MatMultAdd_SeqAIJ. */
605: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
606: PetscObjectState state;
610: /* If there are no nonzero entries, set zz = yy and return immediately. */
611: if (!a->nz) {
612: PetscInt i;
613: VecGetArrayPair(yy,zz,&y,&z);
614: for (i=0; i<m; i++) {
615: z[i] = y[i];
616: }
617: VecRestoreArrayPair(yy,zz,&y,&z);
618: return(0);
619: }
621: VecGetArrayRead(xx,&x);
622: VecGetArrayPair(yy,zz,&y,&z);
624: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
625: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
626: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
627: PetscObjectStateGet((PetscObject)A,&state);
628: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
629: MatSeqAIJMKL_create_mkl_handle(A);
630: }
632: /* Call MKL sparse BLAS routine to do the MatMult. */
633: if (zz == yy) {
634: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
635: * with alpha and beta both set to 1.0. */
636: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
637: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
638: } else {
639: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
640: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
641: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
642: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
643: for (i=0; i<m; i++) {
644: z[i] += y[i];
645: }
646: }
648: PetscLogFlops(2.0*a->nz);
649: VecRestoreArrayRead(xx,&x);
650: VecRestoreArrayPair(yy,zz,&y,&z);
651: return(0);
652: }
653: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
655: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
656: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
657: {
658: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
659: const PetscScalar *x;
660: PetscScalar *y,*z;
661: const MatScalar *aa;
662: PetscErrorCode ierr;
663: PetscInt m = A->rmap->n;
664: PetscInt n = A->cmap->n;
665: const PetscInt *aj,*ai;
666: PetscInt i;
668: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
669: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
670: PetscScalar alpha = 1.0;
671: PetscScalar beta;
672: char matdescra[6];
675: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
676: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
678: VecGetArrayRead(xx,&x);
679: VecGetArrayPair(yy,zz,&y,&z);
680: aj = a->j; /* aj[k] gives column index for element aa[k]. */
681: aa = a->a; /* Nonzero elements stored row-by-row. */
682: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
684: /* Call MKL sparse BLAS routine to do the MatMult. */
685: if (zz == yy) {
686: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
687: beta = 1.0;
688: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
689: } else {
690: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
691: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
692: beta = 0.0;
693: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
694: for (i=0; i<n; i++) {
695: z[i] += y[i];
696: }
697: }
699: PetscLogFlops(2.0*a->nz);
700: VecRestoreArrayRead(xx,&x);
701: VecRestoreArrayPair(yy,zz,&y,&z);
702: return(0);
703: }
704: #endif
706: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
707: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
708: {
709: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
710: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
711: const PetscScalar *x;
712: PetscScalar *y,*z;
713: PetscErrorCode ierr;
714: PetscInt n = A->cmap->n;
715: PetscInt i;
716: PetscObjectState state;
718: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
719: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
723: /* If there are no nonzero entries, set zz = yy and return immediately. */
724: if (!a->nz) {
725: PetscInt i;
726: VecGetArrayPair(yy,zz,&y,&z);
727: for (i=0; i<n; i++) {
728: z[i] = y[i];
729: }
730: VecRestoreArrayPair(yy,zz,&y,&z);
731: return(0);
732: }
734: VecGetArrayRead(xx,&x);
735: VecGetArrayPair(yy,zz,&y,&z);
737: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
738: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
739: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
740: PetscObjectStateGet((PetscObject)A,&state);
741: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
742: MatSeqAIJMKL_create_mkl_handle(A);
743: }
745: /* Call MKL sparse BLAS routine to do the MatMult. */
746: if (zz == yy) {
747: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
748: * with alpha and beta both set to 1.0. */
749: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
750: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
751: } else {
752: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
753: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
754: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
755: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
756: for (i=0; i<n; i++) {
757: z[i] += y[i];
758: }
759: }
761: PetscLogFlops(2.0*a->nz);
762: VecRestoreArrayRead(xx,&x);
763: VecRestoreArrayPair(yy,zz,&y,&z);
764: return(0);
765: }
766: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
768: /* -------------------------- MatProduct code -------------------------- */
769: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
770: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
771: {
772: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
773: sparse_matrix_t csrA,csrB,csrC;
774: PetscInt nrows,ncols;
775: PetscErrorCode ierr;
776: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
777: struct matrix_descr descr_type_gen;
778: PetscObjectState state;
781: /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
782: * not handle sparse matrices with zero rows or columns. */
783: if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
784: else nrows = A->cmap->N;
785: if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
786: else ncols = B->rmap->N;
788: PetscObjectStateGet((PetscObject)A,&state);
789: if (!a->sparse_optimized || a->state != state) {
790: MatSeqAIJMKL_create_mkl_handle(A);
791: }
792: PetscObjectStateGet((PetscObject)B,&state);
793: if (!b->sparse_optimized || b->state != state) {
794: MatSeqAIJMKL_create_mkl_handle(B);
795: }
796: csrA = a->csrA;
797: csrB = b->csrA;
798: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
800: if (csrA && csrB) {
801: stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
802: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
803: } else {
804: csrC = PETSC_NULL;
805: }
807: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);
809: return(0);
810: }
812: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
813: {
814: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
815: sparse_matrix_t csrA, csrB, csrC;
816: PetscErrorCode ierr;
817: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
818: struct matrix_descr descr_type_gen;
819: PetscObjectState state;
822: PetscObjectStateGet((PetscObject)A,&state);
823: if (!a->sparse_optimized || a->state != state) {
824: MatSeqAIJMKL_create_mkl_handle(A);
825: }
826: PetscObjectStateGet((PetscObject)B,&state);
827: if (!b->sparse_optimized || b->state != state) {
828: MatSeqAIJMKL_create_mkl_handle(B);
829: }
830: csrA = a->csrA;
831: csrB = b->csrA;
832: csrC = c->csrA;
833: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
835: if (csrA && csrB) {
836: stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
837: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
838: } else {
839: csrC = PETSC_NULL;
840: }
842: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
843: MatSeqAIJMKL_update_from_mkl_handle(C);
845: return(0);
846: }
848: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
849: {
853: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
854: return(0);
855: }
857: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
858: {
862: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
863: return(0);
864: }
866: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
867: {
871: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
872: return(0);
873: }
875: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
876: {
880: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);
881: return(0);
882: }
884: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
885: {
889: MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
890: return(0);
891: }
893: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
894: {
898: MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);
899: return(0);
900: }
902: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
903: {
905: Mat_Product *product = C->product;
906: Mat A = product->A,B = product->B;
909: MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);
910: return(0);
911: }
913: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
914: {
916: Mat_Product *product = C->product;
917: Mat A = product->A,B = product->B;
918: PetscReal fill = product->fill;
921: MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);
922: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
923: return(0);
924: }
926: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
927: {
928: Mat Ct;
929: Vec zeros;
930: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
931: sparse_matrix_t csrA, csrP, csrC;
932: PetscBool set, flag;
933: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
934: struct matrix_descr descr_type_sym;
935: PetscObjectState state;
936: PetscErrorCode ierr;
939: MatIsSymmetricKnown(A,&set,&flag);
940: if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
942: PetscObjectStateGet((PetscObject)A,&state);
943: if (!a->sparse_optimized || a->state != state) {
944: MatSeqAIJMKL_create_mkl_handle(A);
945: }
946: PetscObjectStateGet((PetscObject)P,&state);
947: if (!p->sparse_optimized || p->state != state) {
948: MatSeqAIJMKL_create_mkl_handle(P);
949: }
950: csrA = a->csrA;
951: csrP = p->csrA;
952: csrC = c->csrA;
953: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
954: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
955: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
957: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
958: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
959: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
961: /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
962: * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
963: * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
964: * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
965: * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
966: * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
967: * the full matrix. */
968: MatSeqAIJMKL_update_from_mkl_handle(C);
969: MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);
970: MatCreateVecs(C,&zeros,NULL);
971: VecSetFromOptions(zeros);
972: VecZeroEntries(zeros);
973: MatDiagonalSet(Ct,zeros,INSERT_VALUES);
974: MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);
975: /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
976: MatProductCreateWithMat(A,P,PETSC_NULL,C);
977: MatProductSetType(C,MATPRODUCT_PtAP);
978: MatSeqAIJMKL_create_mkl_handle(C);
979: VecDestroy(&zeros);
980: MatDestroy(&Ct);
981: return(0);
982: }
984: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
985: {
986: Mat_Product *product = C->product;
987: Mat A = product->A,P = product->B;
988: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
989: sparse_matrix_t csrA,csrP,csrC;
990: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
991: struct matrix_descr descr_type_sym;
992: PetscObjectState state;
993: PetscErrorCode ierr;
996: PetscObjectStateGet((PetscObject)A,&state);
997: if (!a->sparse_optimized || a->state != state) {
998: MatSeqAIJMKL_create_mkl_handle(A);
999: }
1000: PetscObjectStateGet((PetscObject)P,&state);
1001: if (!p->sparse_optimized || p->state != state) {
1002: MatSeqAIJMKL_create_mkl_handle(P);
1003: }
1004: csrA = a->csrA;
1005: csrP = p->csrA;
1006: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1007: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1008: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1010: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1011: if (csrP && csrA) {
1012: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1013: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1014: } else {
1015: csrC = PETSC_NULL;
1016: }
1018: /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1019: * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1020: * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1021: * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1022: * is guaranteed. */
1023: MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);
1025: C->ops->productnumeric = MatProductNumeric_PtAP;
1026: return(0);
1027: }
1029: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1030: {
1032: C->ops->productsymbolic = MatProductSymbolic_AB;
1033: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1034: return(0);
1035: }
1037: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1038: {
1040: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1041: return(0);
1042: }
1044: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1045: {
1047: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1048: C->ops->productsymbolic = MatProductSymbolic_ABt;
1049: return(0);
1050: }
1052: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1053: {
1055: Mat_Product *product = C->product;
1056: Mat A = product->A;
1057: PetscBool set, flag;
1060: if (PetscDefined(USE_COMPLEX)) {
1061: /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
1062: * We do this in several other locations in this file. This works for the time being, but these
1063: * routines are considered unsafe and may be removed from the MatProduct code in the future.
1064: * TODO: Add proper MATSEQAIJMKL implementations */
1065: C->ops->productsymbolic = NULL;
1066: } else {
1067: /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1068: MatIsSymmetricKnown(A,&set,&flag);
1069: if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1070: else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1071: /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1072: * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1073: }
1074: return(0);
1075: }
1077: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1078: {
1080: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1081: return(0);
1082: }
1084: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1085: {
1087: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1088: return(0);
1089: }
1091: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1092: {
1094: Mat_Product *product = C->product;
1097: switch (product->type) {
1098: case MATPRODUCT_AB:
1099: MatProductSetFromOptions_SeqAIJMKL_AB(C);
1100: break;
1101: case MATPRODUCT_AtB:
1102: MatProductSetFromOptions_SeqAIJMKL_AtB(C);
1103: break;
1104: case MATPRODUCT_ABt:
1105: MatProductSetFromOptions_SeqAIJMKL_ABt(C);
1106: break;
1107: case MATPRODUCT_PtAP:
1108: MatProductSetFromOptions_SeqAIJMKL_PtAP(C);
1109: break;
1110: case MATPRODUCT_RARt:
1111: MatProductSetFromOptions_SeqAIJMKL_RARt(C);
1112: break;
1113: case MATPRODUCT_ABC:
1114: MatProductSetFromOptions_SeqAIJMKL_ABC(C);
1115: break;
1116: default:
1117: break;
1118: }
1119: return(0);
1120: }
1121: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1122: /* ------------------------ End MatProduct code ------------------------ */
1124: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1125: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
1126: * routine, but can also be used to convert an assembled SeqAIJ matrix
1127: * into a SeqAIJMKL one. */
1128: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1129: {
1131: Mat B = *newmat;
1132: Mat_SeqAIJMKL *aijmkl;
1133: PetscBool set;
1134: PetscBool sametype;
1137: if (reuse == MAT_INITIAL_MATRIX) {
1138: MatDuplicate(A,MAT_COPY_VALUES,&B);
1139: }
1141: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
1142: if (sametype) return(0);
1144: PetscNewLog(B,&aijmkl);
1145: B->spptr = (void*) aijmkl;
1147: /* Set function pointers for methods that we inherit from AIJ but override.
1148: * We also parse some command line options below, since those determine some of the methods we point to. */
1149: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
1150: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1151: B->ops->destroy = MatDestroy_SeqAIJMKL;
1153: aijmkl->sparse_optimized = PETSC_FALSE;
1154: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1155: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1156: #else
1157: aijmkl->no_SpMV2 = PETSC_TRUE;
1158: #endif
1159: aijmkl->eager_inspection = PETSC_FALSE;
1161: /* Parse command line options. */
1162: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
1163: PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
1164: PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
1165: PetscOptionsEnd();
1166: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1167: if (!aijmkl->no_SpMV2) {
1168: PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
1169: aijmkl->no_SpMV2 = PETSC_TRUE;
1170: }
1171: #endif
1173: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1174: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1175: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1176: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1177: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1178: # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1179: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1180: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1181: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1182: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1183: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1184: # if !defined(PETSC_USE_COMPLEX)
1185: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1186: # else
1187: B->ops->ptapnumeric = NULL;
1188: # endif
1189: # endif
1190: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1192: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1193: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1194: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1195: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1196: * versions in which the older interface has not been deprecated, we use the old interface. */
1197: if (aijmkl->no_SpMV2) {
1198: B->ops->mult = MatMult_SeqAIJMKL;
1199: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1200: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1201: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1202: }
1203: #endif
1205: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
1207: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1208: *newmat = B;
1209: return(0);
1210: }
1212: /*@C
1213: MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1214: This type inherits from AIJ and is largely identical, but uses sparse BLAS
1215: routines from Intel MKL whenever possible.
1216: If the installed version of MKL supports the "SpMV2" sparse
1217: inspector-executor routines, then those are used by default.
1218: MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1219: symmetric A) operations are currently supported.
1220: Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1222: Collective
1224: Input Parameters:
1225: + comm - MPI communicator, set to PETSC_COMM_SELF
1226: . m - number of rows
1227: . n - number of columns
1228: . nz - number of nonzeros per row (same for all rows)
1229: - nnz - array containing the number of nonzeros in the various rows
1230: (possibly different for each row) or NULL
1232: Output Parameter:
1233: . A - the matrix
1235: Options Database Keys:
1236: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1237: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1239: Notes:
1240: If nnz is given then nz is ignored
1242: Level: intermediate
1244: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1245: @*/
1246: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1247: {
1251: MatCreate(comm,A);
1252: MatSetSizes(*A,m,n,m,n);
1253: MatSetType(*A,MATSEQAIJMKL);
1254: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1255: return(0);
1256: }
1258: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1259: {
1263: MatSetType(A,MATSEQAIJ);
1264: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1265: return(0);
1266: }