1: #include <../src/mat/impls/baij/mpi/mpibaij.h> 3: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
5: static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 6: {
7: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
11: MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);
12: MatConvert_SeqBAIJ_SeqBAIJMKL(b->A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->A);
13: MatConvert_SeqBAIJ_SeqBAIJMKL(b->B,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->B);
14: return(0);
15: }
17: static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 18: {
20: Mat B = *newmat;
23: if (reuse == MAT_INITIAL_MATRIX) {
24: MatDuplicate(A,MAT_COPY_VALUES,&B);
25: }
27: PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);
28: PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);
29: *newmat = B;
30: return(0);
31: }
33: /*@C
34: MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format
35: (block compressed row).
36: This type inherits from BAIJ and is largely identical, but uses sparse BLAS
37: routines from Intel MKL whenever possible.
38: MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 39: operations are currently supported.
40: If the installed version of MKL supports the "SpMV2" sparse
41: inspector-executor routines, then those are used by default.
42: Default PETSc kernels are used otherwise.
43: For good matrix assembly performance the user should preallocate the matrix
44: storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz).
45: By setting these parameters accurately, performance can be increased by more
46: than a factor of 50.
48: Collective
50: Input Parameters:
51: + comm - MPI communicator
52: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
53: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
54: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
55: This value should be the same as the local size used in creating the
56: y vector for the matrix-vector product y = Ax.
57: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
58: This value should be the same as the local size used in creating the
59: x vector for the matrix-vector product y = Ax.
60: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
61: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
62: . d_nz - number of nonzero blocks per block row in diagonal portion of local
63: submatrix (same for all local rows)
64: . d_nnz - array containing the number of nonzero blocks in the various block rows
65: of the in diagonal portion of the local (possibly different for each block
66: row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry
67: and set it even if it is zero.
68: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
69: submatrix (same for all local rows).
70: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
71: off-diagonal portion of the local submatrix (possibly different for
72: each block row) or NULL.
74: Output Parameter:
75: . A - the matrix
77: Options Database Keys:
78: + -mat_block_size - size of the blocks to use
79: - -mat_use_hash_table <fact>
81: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
82: MatXXXXSetPreallocation() paradigm instead of this routine directly.
83: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
85: Notes:
86: If the *_nnz parameter is given then the *_nz parameter is ignored
88: A nonzero block is any block that as 1 or more nonzeros in it
90: The user MUST specify either the local or global matrix dimensions
91: (possibly both).
93: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
94: than it must be used on all processors that share the object for that argument.
96: Storage Information:
97: For a square global matrix we define each processor's diagonal portion
98: to be its local rows and the corresponding columns (a square submatrix);
99: each processor's off-diagonal portion encompasses the remainder of the
100: local matrix (a rectangular submatrix).
102: The user can specify preallocated storage for the diagonal part of
103: the local submatrix with either d_nz or d_nnz (not both). Set
104: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
105: memory allocation. Likewise, specify preallocated storage for the
106: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
108: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
109: the figure below we depict these three local rows and all columns (0-11).
111: .vb
112: 0 1 2 3 4 5 6 7 8 9 10 11
113: --------------------------
114: row 3 |o o o d d d o o o o o o
115: row 4 |o o o d d d o o o o o o
116: row 5 |o o o d d d o o o o o o
117: --------------------------
118: .ve
120: Thus, any entries in the d locations are stored in the d (diagonal)
121: submatrix, and any entries in the o locations are stored in the
122: o (off-diagonal) submatrix. Note that the d and the o submatrices are
123: stored simply in the MATSEQBAIJMKL format for compressed row storage.
125: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
126: and o_nz should indicate the number of block nonzeros per row in the o matrix.
127: In general, for PDE problems in which most nonzeros are near the diagonal,
128: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
129: or you will get TERRIBLE performance; see the users' manual chapter on
130: matrices.
132: Level: intermediate
134: .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
135: @*/
137: PetscErrorCodeMatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)138: {
140: PetscMPIInt size;
143: MatCreate(comm,A);
144: MatSetSizes(*A,m,n,M,N);
145: MPI_Comm_size(comm,&size);
146: if (size > 1) {
147: MatSetType(*A,MATMPIBAIJMKL);
148: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
149: } else {
150: MatSetType(*A,MATSEQBAIJMKL);
151: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
152: }
153: return(0);
154: }
156: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)157: {
161: MatSetType(A,MATMPIBAIJ);
162: MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);
163: return(0);
164: }
166: /*MC
167: MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
169: This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator,
170: and MATMPIBAIJMKL otherwise. As a result, for single process communicators,
171: MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported
172: for communicators controlling multiple processes. It is recommended that you call both of
173: the above preallocation routines for simplicity.
175: Options Database Keys:
176: . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions()
178: Level: beginner
180: .seealso: MatCreateBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL
181: M*/