Actual source code: mpibaijmkl.c

petsc-3.8.4 2018-03-24
Report Typos and Errors
  1:  #include <../src/mat/impls/baij/mpi/mpibaij.h>

  3: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
  4: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);

  6: PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
  7: {
  8:   Mat_MPIBAIJ     *b = (Mat_MPIBAIJ*)B->data;

 12:   MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);
 13:   MatConvert_SeqBAIJ_SeqBAIJMKL(b->A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->A);
 14:   MatConvert_SeqBAIJ_SeqBAIJMKL(b->B,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->B);
 15:   return(0);
 16: }

 18: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 19: {
 21:   Mat            B = *newmat;

 24:   if (reuse == MAT_INITIAL_MATRIX) {
 25:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 26:   }

 28:   PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);
 29:   PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);
 30:   *newmat = B;
 31:   return(0);
 32: }
 33: #endif
 34: /*@C
 35:    MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format
 36:    (block compressed row).  
 37:    This type inherits from BAIJ and is largely identical, but uses sparse BLAS 
 38:    routines from Intel MKL whenever possible.
 39:    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 
 40:    operations are currently supported.
 41:    If the installed version of MKL supports the "SpMV2" sparse 
 42:    inspector-executor routines, then those are used by default. 
 43:    Default PETSc kernels are used otherwise. 
 44:    For good matrix assembly performance the user should preallocate the matrix 
 45:    storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz). 
 46:    By setting these parameters accurately, performance can be increased by more 
 47:    than a factor of 50.

 49:    Collective on MPI_Comm

 51:    Input Parameters:
 52: +  comm - MPI communicator
 53: .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
 54:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
 55: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
 56:            This value should be the same as the local size used in creating the
 57:            y vector for the matrix-vector product y = Ax.
 58: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
 59:            This value should be the same as the local size used in creating the
 60:            x vector for the matrix-vector product y = Ax.
 61: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
 62: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
 63: .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
 64:            submatrix  (same for all local rows)
 65: .  d_nnz - array containing the number of nonzero blocks in the various block rows
 66:            of the in diagonal portion of the local (possibly different for each block
 67:            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
 68:            and set it even if it is zero.
 69: .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
 70:            submatrix (same for all local rows).
 71: -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
 72:            off-diagonal portion of the local submatrix (possibly different for
 73:            each block row) or NULL.

 75:    Output Parameter:
 76: .  A - the matrix

 78:    Options Database Keys:
 79: +   -mat_block_size - size of the blocks to use
 80: -   -mat_use_hash_table <fact>

 82:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
 83:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
 84:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

 86:    Notes:
 87:    If the *_nnz parameter is given then the *_nz parameter is ignored

 89:    A nonzero block is any block that as 1 or more nonzeros in it

 91:    The user MUST specify either the local or global matrix dimensions
 92:    (possibly both).

 94:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
 95:    than it must be used on all processors that share the object for that argument.

 97:    Storage Information:
 98:    For a square global matrix we define each processor's diagonal portion
 99:    to be its local rows and the corresponding columns (a square submatrix);
100:    each processor's off-diagonal portion encompasses the remainder of the
101:    local matrix (a rectangular submatrix).

103:    The user can specify preallocated storage for the diagonal part of
104:    the local submatrix with either d_nz or d_nnz (not both).  Set
105:    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
106:    memory allocation.  Likewise, specify preallocated storage for the
107:    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).

109:    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
110:    the figure below we depict these three local rows and all columns (0-11).

112: .vb
113:            0 1 2 3 4 5 6 7 8 9 10 11
114:           --------------------------
115:    row 3  |o o o d d d o o o o  o  o
116:    row 4  |o o o d d d o o o o  o  o
117:    row 5  |o o o d d d o o o o  o  o
118:           --------------------------
119: .ve

121:    Thus, any entries in the d locations are stored in the d (diagonal)
122:    submatrix, and any entries in the o locations are stored in the
123:    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
124:    stored simply in the MATSEQBAIJMKL format for compressed row storage.

126:    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
127:    and o_nz should indicate the number of block nonzeros per row in the o matrix.
128:    In general, for PDE problems in which most nonzeros are near the diagonal,
129:    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
130:    or you will get TERRIBLE performance; see the users' manual chapter on
131:    matrices.

133:    Level: intermediate

135: .keywords: matrix, block, aij, compressed row, sparse, parallel

137: .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
138: @*/

140: PetscErrorCode  MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
141: {
143:   PetscMPIInt    size;

146:   MatCreate(comm,A);
147:   MatSetSizes(*A,m,n,M,N);
148:   MPI_Comm_size(comm,&size);
149:   if (size > 1) {
150: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE  
151:     MatSetType(*A,MATMPIBAIJMKL);
152: #else    
153:     PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
154:     MatSetType(*A,MATMPIBAIJ);
155: #endif
156:     MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
157:   } else {
158: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 
159:     MatSetType(*A,MATSEQBAIJMKL);
160: #else
161:     PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
162:     MatSetType(*A,MATSEQBAIJ);
163: #endif    
164:     MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
165:   }
166:   return(0);
167: }

169: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
170: {

174:   MatSetType(A,MATMPIBAIJ);
175: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE    
176:   MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);
177: #else
178:   PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
179: #endif
180:   return(0);
181: }

183: /*MC
184:    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.

186:    This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator,
187:    and MATMPIBAIJMKL otherwise.  As a result, for single process communicators,
188:   MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported
189:   for communicators controlling multiple processes.  It is recommended that you call both of
190:   the above preallocation routines for simplicity.

192:    Options Database Keys:
193: . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions()

195:   Level: beginner

197: .seealso: MatCreateMPIBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL
198: M*/