1: #include <../src/mat/impls/baij/mpi/mpibaij.h> 3: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
4: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
6: PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 7: {
8: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
12: MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);
13: MatConvert_SeqBAIJ_SeqBAIJMKL(b->A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->A);
14: MatConvert_SeqBAIJ_SeqBAIJMKL(b->B,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->B);
15: return(0);
16: }
18: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 19: {
21: Mat B = *newmat;
24: if (reuse == MAT_INITIAL_MATRIX) {
25: MatDuplicate(A,MAT_COPY_VALUES,&B);
26: }
28: PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);
29: PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);
30: *newmat = B;
31: return(0);
32: }
33: #endif
34: /*@C
35: MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format
36: (block compressed row).
37: This type inherits from BAIJ and is largely identical, but uses sparse BLAS
38: routines from Intel MKL whenever possible.
39: MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 40: operations are currently supported.
41: If the installed version of MKL supports the "SpMV2" sparse
42: inspector-executor routines, then those are used by default.
43: Default PETSc kernels are used otherwise.
44: For good matrix assembly performance the user should preallocate the matrix
45: storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz).
46: By setting these parameters accurately, performance can be increased by more
47: than a factor of 50.
49: Collective on MPI_Comm 51: Input Parameters:
52: + comm - MPI communicator
53: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
54: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
55: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
56: This value should be the same as the local size used in creating the
57: y vector for the matrix-vector product y = Ax.
58: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
59: This value should be the same as the local size used in creating the
60: x vector for the matrix-vector product y = Ax.
61: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
62: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
63: . d_nz - number of nonzero blocks per block row in diagonal portion of local
64: submatrix (same for all local rows)
65: . d_nnz - array containing the number of nonzero blocks in the various block rows
66: of the in diagonal portion of the local (possibly different for each block
67: row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry
68: and set it even if it is zero.
69: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
70: submatrix (same for all local rows).
71: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
72: off-diagonal portion of the local submatrix (possibly different for
73: each block row) or NULL.
75: Output Parameter:
76: . A - the matrix
78: Options Database Keys:
79: + -mat_block_size - size of the blocks to use
80: - -mat_use_hash_table <fact>
82: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
83: MatXXXXSetPreallocation() paradgm instead of this routine directly.
84: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
86: Notes:
87: If the *_nnz parameter is given then the *_nz parameter is ignored
89: A nonzero block is any block that as 1 or more nonzeros in it
91: The user MUST specify either the local or global matrix dimensions
92: (possibly both).
94: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
95: than it must be used on all processors that share the object for that argument.
97: Storage Information:
98: For a square global matrix we define each processor's diagonal portion
99: to be its local rows and the corresponding columns (a square submatrix);
100: each processor's off-diagonal portion encompasses the remainder of the
101: local matrix (a rectangular submatrix).
103: The user can specify preallocated storage for the diagonal part of
104: the local submatrix with either d_nz or d_nnz (not both). Set
105: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
106: memory allocation. Likewise, specify preallocated storage for the
107: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
109: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
110: the figure below we depict these three local rows and all columns (0-11).
112: .vb
113: 0 1 2 3 4 5 6 7 8 9 10 11
114: --------------------------
115: row 3 |o o o d d d o o o o o o
116: row 4 |o o o d d d o o o o o o
117: row 5 |o o o d d d o o o o o o
118: --------------------------
119: .ve
121: Thus, any entries in the d locations are stored in the d (diagonal)
122: submatrix, and any entries in the o locations are stored in the
123: o (off-diagonal) submatrix. Note that the d and the o submatrices are
124: stored simply in the MATSEQBAIJMKL format for compressed row storage.
126: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
127: and o_nz should indicate the number of block nonzeros per row in the o matrix.
128: In general, for PDE problems in which most nonzeros are near the diagonal,
129: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
130: or you will get TERRIBLE performance; see the users' manual chapter on
131: matrices.
133: Level: intermediate
135: .keywords: matrix, block, aij, compressed row, sparse, parallel
137: .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
138: @*/
140: PetscErrorCodeMatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)141: {
143: PetscMPIInt size;
146: MatCreate(comm,A);
147: MatSetSizes(*A,m,n,M,N);
148: MPI_Comm_size(comm,&size);
149: if (size > 1) {
150: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
151: MatSetType(*A,MATMPIBAIJMKL);
152: #else
153: PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
154: MatSetType(*A,MATMPIBAIJ);
155: #endif
156: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
157: } else {
158: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
159: MatSetType(*A,MATSEQBAIJMKL);
160: #else
161: PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
162: MatSetType(*A,MATSEQBAIJ);
163: #endif
164: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
165: }
166: return(0);
167: }
169: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)170: {
174: MatSetType(A,MATMPIBAIJ);
175: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
176: MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);
177: #else
178: PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
179: #endif
180: return(0);
181: }
183: /*MC
184: MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
186: This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator,
187: and MATMPIBAIJMKL otherwise. As a result, for single process communicators,
188: MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported
189: for communicators controlling multiple processes. It is recommended that you call both of
190: the above preallocation routines for simplicity.
192: Options Database Keys:
193: . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions()
195: Level: beginner
197: .seealso: MatCreateMPIBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL
198: M*/