Actual source code: mpibaijmkl.c
1: #include <../src/mat/impls/baij/mpi/mpibaij.h>
3: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
5: static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
6: {
7: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
9: MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);
10: MatConvert_SeqBAIJ_SeqBAIJMKL(b->A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->A);
11: MatConvert_SeqBAIJ_SeqBAIJMKL(b->B,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->B);
12: return 0;
13: }
15: static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
16: {
17: Mat B = *newmat;
19: if (reuse == MAT_INITIAL_MATRIX) {
20: MatDuplicate(A,MAT_COPY_VALUES,&B);
21: }
23: PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);
24: PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);
25: *newmat = B;
26: return 0;
27: }
29: /*@C
30: MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format
31: (block compressed row).
32: This type inherits from BAIJ and is largely identical, but uses sparse BLAS
33: routines from Intel MKL whenever possible.
34: MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
35: operations are currently supported.
36: If the installed version of MKL supports the "SpMV2" sparse
37: inspector-executor routines, then those are used by default.
38: Default PETSc kernels are used otherwise.
39: For good matrix assembly performance the user should preallocate the matrix
40: storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz).
41: By setting these parameters accurately, performance can be increased by more
42: than a factor of 50.
44: Collective
46: Input Parameters:
47: + comm - MPI communicator
48: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
49: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
50: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
51: This value should be the same as the local size used in creating the
52: y vector for the matrix-vector product y = Ax.
53: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
54: This value should be the same as the local size used in creating the
55: x vector for the matrix-vector product y = Ax.
56: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
57: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
58: . d_nz - number of nonzero blocks per block row in diagonal portion of local
59: submatrix (same for all local rows)
60: . d_nnz - array containing the number of nonzero blocks in the various block rows
61: of the in diagonal portion of the local (possibly different for each block
62: row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry
63: and set it even if it is zero.
64: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
65: submatrix (same for all local rows).
66: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
67: off-diagonal portion of the local submatrix (possibly different for
68: each block row) or NULL.
70: Output Parameter:
71: . A - the matrix
73: Options Database Keys:
74: + -mat_block_size - size of the blocks to use
75: - -mat_use_hash_table <fact> - set hash table factor
77: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
78: MatXXXXSetPreallocation() paradigm instead of this routine directly.
79: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
81: Notes:
82: If the *_nnz parameter is given then the *_nz parameter is ignored
84: A nonzero block is any block that as 1 or more nonzeros in it
86: The user MUST specify either the local or global matrix dimensions
87: (possibly both).
89: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
90: than it must be used on all processors that share the object for that argument.
92: Storage Information:
93: For a square global matrix we define each processor's diagonal portion
94: to be its local rows and the corresponding columns (a square submatrix);
95: each processor's off-diagonal portion encompasses the remainder of the
96: local matrix (a rectangular submatrix).
98: The user can specify preallocated storage for the diagonal part of
99: the local submatrix with either d_nz or d_nnz (not both). Set
100: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
101: memory allocation. Likewise, specify preallocated storage for the
102: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
104: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
105: the figure below we depict these three local rows and all columns (0-11).
107: .vb
108: 0 1 2 3 4 5 6 7 8 9 10 11
109: --------------------------
110: row 3 |o o o d d d o o o o o o
111: row 4 |o o o d d d o o o o o o
112: row 5 |o o o d d d o o o o o o
113: --------------------------
114: .ve
116: Thus, any entries in the d locations are stored in the d (diagonal)
117: submatrix, and any entries in the o locations are stored in the
118: o (off-diagonal) submatrix. Note that the d and the o submatrices are
119: stored simply in the MATSEQBAIJMKL format for compressed row storage.
121: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
122: and o_nz should indicate the number of block nonzeros per row in the o matrix.
123: In general, for PDE problems in which most nonzeros are near the diagonal,
124: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
125: or you will get TERRIBLE performance; see the users' manual chapter on
126: matrices.
128: Level: intermediate
130: .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
131: @*/
133: PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
134: {
135: PetscMPIInt size;
137: MatCreate(comm,A);
138: MatSetSizes(*A,m,n,M,N);
139: MPI_Comm_size(comm,&size);
140: if (size > 1) {
141: MatSetType(*A,MATMPIBAIJMKL);
142: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
143: } else {
144: MatSetType(*A,MATSEQBAIJMKL);
145: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
146: }
147: return 0;
148: }
150: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
151: {
152: MatSetType(A,MATMPIBAIJ);
153: MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);
154: return 0;
155: }
157: /*MC
158: MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
160: This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator,
161: and MATMPIBAIJMKL otherwise. As a result, for single process communicators,
162: MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported
163: for communicators controlling multiple processes. It is recommended that you call both of
164: the above preallocation routines for simplicity.
166: Options Database Keys:
167: . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions()
169: Level: beginner
171: .seealso: MatCreateBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL
172: M*/