Actual source code: mkl_cpardiso.c

petsc-3.13.6 2020-09-29
Report Typos and Errors

  2:  #include <petscsys.h>
  3:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
  4:  #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>

  6: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
  7: #define MKL_ILP64
  8: #endif
  9: #include <mkl.h>
 10: #include <mkl_cluster_sparse_solver.h>

 12: /*
 13:  *  Possible mkl_cpardiso phases that controls the execution of the solver.
 14:  *  For more information check mkl_cpardiso manual.
 15:  */
 16: #define JOB_ANALYSIS 11
 17: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
 18: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
 19: #define JOB_NUMERICAL_FACTORIZATION 22
 20: #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
 21: #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
 22: #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
 23: #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
 24: #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
 25: #define JOB_RELEASE_OF_LU_MEMORY 0
 26: #define JOB_RELEASE_OF_ALL_MEMORY -1

 28: #define IPARM_SIZE 64
 29: #define INT_TYPE MKL_INT

 31: static const char *Err_MSG_CPardiso(int errNo) {
 32:   switch (errNo) {
 33:     case -1:
 34:       return "input inconsistent"; break;
 35:     case -2:
 36:       return "not enough memory"; break;
 37:     case -3:
 38:       return "reordering problem"; break;
 39:     case -4:
 40:       return "zero pivot, numerical factorization or iterative refinement problem"; break;
 41:     case -5:
 42:       return "unclassified (internal) error"; break;
 43:     case -6:
 44:       return "preordering failed (matrix types 11, 13 only)"; break;
 45:     case -7:
 46:       return "diagonal matrix problem"; break;
 47:     case -8:
 48:       return "32-bit integer overflow problem"; break;
 49:     case -9:
 50:       return "not enough memory for OOC"; break;
 51:     case -10:
 52:       return "problems with opening OOC temporary files"; break;
 53:     case -11:
 54:       return "read/write problems with the OOC data file"; break;
 55:     default : 
 56:       return "unknown error";
 57:   }
 58: }

 60: /*
 61:  *  Internal data structure.
 62:  *  For more information check mkl_cpardiso manual.
 63:  */

 65: typedef struct {

 67:   /* Configuration vector */
 68:   INT_TYPE     iparm[IPARM_SIZE];

 70:   /*
 71:    * Internal mkl_cpardiso memory location.
 72:    * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
 73:    */
 74:   void         *pt[IPARM_SIZE];

 76:   MPI_Comm     comm_mkl_cpardiso;

 78:   /* Basic mkl_cpardiso info*/
 79:   INT_TYPE     phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;

 81:   /* Matrix structure */
 82:   PetscScalar  *a;

 84:   INT_TYPE     *ia, *ja;

 86:   /* Number of non-zero elements */
 87:   INT_TYPE     nz;

 89:   /* Row permutaton vector*/
 90:   INT_TYPE     *perm;

 92:   /* Define is matrix preserve sparce structure. */
 93:   MatStructure matstruc;

 95:   PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);

 97:   /* True if mkl_cpardiso function have been used. */
 98:   PetscBool CleanUp;
 99: } Mat_MKL_CPARDISO;

101: /*
102:  * Copy the elements of matrix A.
103:  * Input:
104:  *   - Mat A: MATSEQAIJ matrix
105:  *   - int shift: matrix index.
106:  *     - 0 for c representation
107:  *     - 1 for fortran representation
108:  *   - MatReuse reuse:
109:  *     - MAT_INITIAL_MATRIX: Create a new aij representation
110:  *     - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
111:  * Output:
112:  *   - int *nnz: Number of nonzero-elements.
113:  *   - int **r pointer to i index
114:  *   - int **c pointer to j elements
115:  *   - MATRIXTYPE **v: Non-zero elements
116:  */
117: PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
118: {
119:   Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;

122:   *v=aa->a;
123:   if (reuse == MAT_INITIAL_MATRIX) {
124:     *r   = (INT_TYPE*)aa->i;
125:     *c   = (INT_TYPE*)aa->j;
126:     *nnz = aa->nz;
127:   }
128:   return(0);
129: }

131: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
132: {
133:   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
134:   PetscErrorCode    ierr;
135:   PetscInt          rstart,nz,i,j,countA,countB;
136:   PetscInt          *row,*col;
137:   const PetscScalar *av, *bv;
138:   PetscScalar       *val;
139:   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
140:   Mat_SeqAIJ        *aa  = (Mat_SeqAIJ*)(mat->A)->data;
141:   Mat_SeqAIJ        *bb  = (Mat_SeqAIJ*)(mat->B)->data;
142:   PetscInt          colA_start,jB,jcol;

145:   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart;
146:   av=aa->a; bv=bb->a;

148:   garray = mat->garray;

150:   if (reuse == MAT_INITIAL_MATRIX) {
151:     nz   = aa->nz + bb->nz;
152:     *nnz = nz;
153:     PetscMalloc3(m+1,&row,nz,&col,nz,&val);
154:     *r = row; *c = col; *v = val;
155:   } else {
156:     row = *r; col = *c; val = *v;
157:   }

159:   nz = 0;
160:   for (i=0; i<m; i++) {
161:     row[i] = nz;
162:     countA     = ai[i+1] - ai[i];
163:     countB     = bi[i+1] - bi[i];
164:     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
165:     bjj        = bj + bi[i];

167:     /* B part, smaller col index */
168:     colA_start = rstart + ajj[0]; /* the smallest global col index of A */
169:     jB         = 0;
170:     for (j=0; j<countB; j++) {
171:       jcol = garray[bjj[j]];
172:       if (jcol > colA_start) break;
173:       col[nz]   = jcol;
174:       val[nz++] = *bv++;
175:     }
176:     jB = j;

178:     /* A part */
179:     for (j=0; j<countA; j++) {
180:       col[nz]   = rstart + ajj[j];
181:       val[nz++] = *av++;
182:     }

184:     /* B part, larger col index */
185:     for (j=jB; j<countB; j++) {
186:       col[nz]   = garray[bjj[j]];
187:       val[nz++] = *bv++;
188:     }
189:   }
190:   row[m] = nz;

192:   return(0);
193: }

195: PetscErrorCode MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
196: {
197:   const PetscInt    *ai, *aj, *bi, *bj,*garray,bs=A->rmap->bs,bs2=bs*bs,m=A->rmap->n/bs,*ajj,*bjj;
198:   PetscErrorCode    ierr;
199:   PetscInt          rstart,nz,i,j,countA,countB;
200:   PetscInt          *row,*col;
201:   const PetscScalar *av, *bv;
202:   PetscScalar       *val;
203:   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ*)A->data;
204:   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ*)(mat->A)->data;
205:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
206:   PetscInt          colA_start,jB,jcol;

209:   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart/bs;
210:   av=aa->a; bv=bb->a;

212:   garray = mat->garray;

214:   if (reuse == MAT_INITIAL_MATRIX) {
215:     nz   = aa->nz + bb->nz;
216:     *nnz = nz;
217:     PetscMalloc3(m+1,&row,nz,&col,nz*bs2,&val);
218:     *r = row; *c = col; *v = val;
219:   } else {
220:     row = *r; col = *c; val = *v;
221:   }

223:   nz = 0;
224:   for (i=0; i<m; i++) {
225:     row[i]     = nz+1;
226:     countA     = ai[i+1] - ai[i];
227:     countB     = bi[i+1] - bi[i];
228:     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
229:     bjj        = bj + bi[i];

231:     /* B part, smaller col index */
232:     colA_start = rstart + (countA > 0 ? ajj[0] : 0); /* the smallest global col index of A */
233:     jB         = 0;
234:     for (j=0; j<countB; j++) {
235:       jcol = garray[bjj[j]];
236:       if (jcol > colA_start) break;
237:       col[nz++] = jcol + 1;
238:     }
239:     jB = j;
240:     PetscArraycpy(val,bv,jB*bs2);
241:     val += jB*bs2;
242:     bv  += jB*bs2;

244:     /* A part */
245:     for (j=0; j<countA; j++) col[nz++] = rstart + ajj[j] + 1;
246:     PetscArraycpy(val,av,countA*bs2);
247:     val += countA*bs2;
248:     av  += countA*bs2;

250:     /* B part, larger col index */
251:     for (j=jB; j<countB; j++) col[nz++] = garray[bjj[j]] + 1;
252:     PetscArraycpy(val,bv,(countB-jB)*bs2);
253:     val += (countB-jB)*bs2;
254:     bv  += (countB-jB)*bs2;
255:   }
256:   row[m] = nz+1;

258:   return(0);
259: }

261: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
262: {
263:   const PetscInt    *ai, *aj, *bi, *bj,*garray,bs=A->rmap->bs,bs2=bs*bs,m=A->rmap->n/bs,*ajj,*bjj;
264:   PetscErrorCode    ierr;
265:   PetscInt          rstart,nz,i,j,countA,countB;
266:   PetscInt          *row,*col;
267:   const PetscScalar *av, *bv;
268:   PetscScalar       *val;
269:   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
270:   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
271:   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;

274:   ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart/bs;
275:   av=aa->a; bv=bb->a;

277:   garray = mat->garray;

279:   if (reuse == MAT_INITIAL_MATRIX) {
280:     nz   = aa->nz + bb->nz;
281:     *nnz = nz;
282:     PetscMalloc3(m+1,&row,nz,&col,nz*bs2,&val);
283:     *r = row; *c = col; *v = val;
284:   } else {
285:     row = *r; col = *c; val = *v;
286:   }

288:   nz = 0;
289:   for (i=0; i<m; i++) {
290:     row[i]     = nz+1;
291:     countA     = ai[i+1] - ai[i];
292:     countB     = bi[i+1] - bi[i];
293:     ajj        = aj + ai[i]; /* ptr to the beginning of this row */
294:     bjj        = bj + bi[i];

296:     /* A part */
297:     for (j=0; j<countA; j++) col[nz++] = rstart + ajj[j] + 1;
298:     PetscArraycpy(val,av,countA*bs2);
299:     val += countA*bs2;
300:     av  += countA*bs2;

302:     /* B part, larger col index */
303:     for (j=0; j<countB; j++) col[nz++] = garray[bjj[j]] + 1;
304:     PetscArraycpy(val,bv,countB*bs2);
305:     val += countB*bs2;
306:     bv  += countB*bs2;
307:   }
308:   row[m] = nz+1;

310:   return(0);
311: }

313: /*
314:  * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
315:  */
316: PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
317: {
318:   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
319:   PetscErrorCode   ierr;

322:   /* Terminate instance, deallocate memories */
323:   if (mat_mkl_cpardiso->CleanUp) {
324:     mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;

326:     cluster_sparse_solver (
327:       mat_mkl_cpardiso->pt,
328:       &mat_mkl_cpardiso->maxfct,
329:       &mat_mkl_cpardiso->mnum,
330:       &mat_mkl_cpardiso->mtype,
331:       &mat_mkl_cpardiso->phase,
332:       &mat_mkl_cpardiso->n,
333:       NULL,
334:       NULL,
335:       NULL,
336:       mat_mkl_cpardiso->perm,
337:       &mat_mkl_cpardiso->nrhs,
338:       mat_mkl_cpardiso->iparm,
339:       &mat_mkl_cpardiso->msglvl,
340:       NULL,
341:       NULL,
342:       &mat_mkl_cpardiso->comm_mkl_cpardiso,
343:       (PetscInt*)&mat_mkl_cpardiso->err);
344:   }

346:   if (mat_mkl_cpardiso->ConvertToTriples != MatCopy_seqaij_seqaij_MKL_CPARDISO) {
347:     PetscFree3(mat_mkl_cpardiso->ia,mat_mkl_cpardiso->ja,mat_mkl_cpardiso->a);
348:   }
349:   MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));
350:   PetscFree(A->data);

352:   /* clear composed functions */
353:   PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
354:   PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);
355:   return(0);
356: }

358: /*
359:  * Computes Ax = b
360:  */
361: PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
362: {
363:   Mat_MKL_CPARDISO   *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
364:   PetscErrorCode    ierr;
365:   PetscScalar       *xarray;
366:   const PetscScalar *barray;

369:   mat_mkl_cpardiso->nrhs = 1;
370:   VecGetArray(x,&xarray);
371:   VecGetArrayRead(b,&barray);

373:   /* solve phase */
374:   /*-------------*/
375:   mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
376:   cluster_sparse_solver (
377:     mat_mkl_cpardiso->pt,
378:     &mat_mkl_cpardiso->maxfct,
379:     &mat_mkl_cpardiso->mnum,
380:     &mat_mkl_cpardiso->mtype,
381:     &mat_mkl_cpardiso->phase,
382:     &mat_mkl_cpardiso->n,
383:     mat_mkl_cpardiso->a,
384:     mat_mkl_cpardiso->ia,
385:     mat_mkl_cpardiso->ja,
386:     mat_mkl_cpardiso->perm,
387:     &mat_mkl_cpardiso->nrhs,
388:     mat_mkl_cpardiso->iparm,
389:     &mat_mkl_cpardiso->msglvl,
390:     (void*)barray,
391:     (void*)xarray,
392:     &mat_mkl_cpardiso->comm_mkl_cpardiso,
393:     (PetscInt*)&mat_mkl_cpardiso->err);

395:   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));

397:   VecRestoreArray(x,&xarray);
398:   VecRestoreArrayRead(b,&barray);
399:   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
400:   return(0);
401: }

403: PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
404: {
405:   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
406:   PetscErrorCode   ierr;

409: #if defined(PETSC_USE_COMPLEX)
410:   mat_mkl_cpardiso->iparm[12 - 1] = 1;
411: #else
412:   mat_mkl_cpardiso->iparm[12 - 1] = 2;
413: #endif
414:   MatSolve_MKL_CPARDISO(A,b,x);
415:   mat_mkl_cpardiso->iparm[12 - 1] = 0;
416:   return(0);
417: }

419: PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
420: {
421:   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
422:   PetscErrorCode    ierr;
423:   PetscScalar       *xarray;
424:   const PetscScalar *barray;

427:   MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);

429:   if (mat_mkl_cpardiso->nrhs > 0) {
430:     MatDenseGetArrayRead(B,&barray);
431:     MatDenseGetArray(X,&xarray);

433:     if (barray == xarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"B and X cannot share the same memory location");

435:     /* solve phase */
436:     /*-------------*/
437:     mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
438:     cluster_sparse_solver (
439:       mat_mkl_cpardiso->pt,
440:       &mat_mkl_cpardiso->maxfct,
441:       &mat_mkl_cpardiso->mnum,
442:       &mat_mkl_cpardiso->mtype,
443:       &mat_mkl_cpardiso->phase,
444:       &mat_mkl_cpardiso->n,
445:       mat_mkl_cpardiso->a,
446:       mat_mkl_cpardiso->ia,
447:       mat_mkl_cpardiso->ja,
448:       mat_mkl_cpardiso->perm,
449:       &mat_mkl_cpardiso->nrhs,
450:       mat_mkl_cpardiso->iparm,
451:       &mat_mkl_cpardiso->msglvl,
452:       (void*)barray,
453:       (void*)xarray,
454:       &mat_mkl_cpardiso->comm_mkl_cpardiso,
455:       (PetscInt*)&mat_mkl_cpardiso->err);
456:     if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
457:     MatDenseRestoreArrayRead(B,&barray);
458:     MatDenseRestoreArray(X,&xarray);

460:   }
461:   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
462:   return(0);
463: }

465: /*
466:  * LU Decomposition
467:  */
468: PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
469: {
470:   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
471:   PetscErrorCode   ierr;

474:   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
475:   (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);

477:   mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
478:   cluster_sparse_solver (
479:     mat_mkl_cpardiso->pt,
480:     &mat_mkl_cpardiso->maxfct,
481:     &mat_mkl_cpardiso->mnum,
482:     &mat_mkl_cpardiso->mtype,
483:     &mat_mkl_cpardiso->phase,
484:     &mat_mkl_cpardiso->n,
485:     mat_mkl_cpardiso->a,
486:     mat_mkl_cpardiso->ia,
487:     mat_mkl_cpardiso->ja,
488:     mat_mkl_cpardiso->perm,
489:     &mat_mkl_cpardiso->nrhs,
490:     mat_mkl_cpardiso->iparm,
491:     &mat_mkl_cpardiso->msglvl,
492:     NULL,
493:     NULL,
494:     &mat_mkl_cpardiso->comm_mkl_cpardiso,
495:     &mat_mkl_cpardiso->err);
496:   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));

498:   mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
499:   mat_mkl_cpardiso->CleanUp  = PETSC_TRUE;
500:   return(0);
501: }

503: /* Sets mkl_cpardiso options from the options database */
504: PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
505: {
506:   Mat_MKL_CPARDISO    *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
507:   PetscErrorCode      ierr;
508:   PetscInt            icntl,threads;
509:   PetscBool           flg;

512:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");
513:   PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);
514:   if (flg) mkl_set_num_threads((int)threads);

516:   PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);
517:   if (flg) mat_mkl_cpardiso->maxfct = icntl;

519:   PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);
520:   if (flg) mat_mkl_cpardiso->mnum = icntl;

522:   PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);
523:   if (flg) mat_mkl_cpardiso->msglvl = icntl;

525:   PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);
526:   if (flg) mat_mkl_cpardiso->mtype = icntl;
527:   PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);

529:   if (flg && icntl != 0) {
530:     PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);
531:     if (flg) mat_mkl_cpardiso->iparm[1] = icntl;

533:     PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);
534:     if (flg) mat_mkl_cpardiso->iparm[3] = icntl;

536:     PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);
537:     if (flg) mat_mkl_cpardiso->iparm[4] = icntl;

539:     PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);
540:     if (flg) mat_mkl_cpardiso->iparm[5] = icntl;

542:     PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);
543:     if (flg) mat_mkl_cpardiso->iparm[7] = icntl;

545:     PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);
546:     if (flg) mat_mkl_cpardiso->iparm[9] = icntl;

548:     PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);
549:     if (flg) mat_mkl_cpardiso->iparm[10] = icntl;

551:     PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);
552:     if (flg) mat_mkl_cpardiso->iparm[11] = icntl;

554:     PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
555:       &flg);
556:     if (flg) mat_mkl_cpardiso->iparm[12] = icntl;

558:     PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
559:       &flg);
560:     if (flg) mat_mkl_cpardiso->iparm[17] = icntl;

562:     PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);
563:     if (flg) mat_mkl_cpardiso->iparm[18] = icntl;

565:     PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);
566:     if (flg) mat_mkl_cpardiso->iparm[20] = icntl;

568:     PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);
569:     if (flg) mat_mkl_cpardiso->iparm[23] = icntl;

571:     PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);
572:     if (flg) mat_mkl_cpardiso->iparm[24] = icntl;

574:     PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);
575:     if (flg) mat_mkl_cpardiso->iparm[26] = icntl;

577:     PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);
578:     if (flg) mat_mkl_cpardiso->iparm[30] = icntl;

580:     PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);
581:     if (flg) mat_mkl_cpardiso->iparm[33] = icntl;

583:     PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);
584:     if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
585:   }

587:   PetscOptionsEnd();
588:   return(0);
589: }

591: PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
592: {
593:   PetscErrorCode  ierr;
594:   PetscInt        bs;
595:   PetscBool       match;
596:   PetscMPIInt     size;


600:   MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));
601:   MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);

603:   mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
604:   mat_mkl_cpardiso->maxfct = 1;
605:   mat_mkl_cpardiso->mnum = 1;
606:   mat_mkl_cpardiso->n = A->rmap->N;
607:   if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
608:   mat_mkl_cpardiso->msglvl = 0;
609:   mat_mkl_cpardiso->nrhs = 1;
610:   mat_mkl_cpardiso->err = 0;
611:   mat_mkl_cpardiso->phase = -1;
612: #if defined(PETSC_USE_COMPLEX)
613:   mat_mkl_cpardiso->mtype = 13;
614: #else
615:   mat_mkl_cpardiso->mtype = 11;
616: #endif

618: #if defined(PETSC_USE_REAL_SINGLE)
619:   mat_mkl_cpardiso->iparm[27] = 1;
620: #else
621:   mat_mkl_cpardiso->iparm[27] = 0;
622: #endif

624:   mat_mkl_cpardiso->iparm[ 0] =  1; /* Solver default parameters overriden with provided by iparm */
625:   mat_mkl_cpardiso->iparm[ 1] =  2; /* Use METIS for fill-in reordering */
626:   mat_mkl_cpardiso->iparm[ 5] =  0; /* Write solution into x */
627:   mat_mkl_cpardiso->iparm[ 7] =  2; /* Max number of iterative refinement steps */
628:   mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
629:   mat_mkl_cpardiso->iparm[10] =  1; /* Use nonsymmetric permutation and scaling MPS */
630:   mat_mkl_cpardiso->iparm[12] =  1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
631:   mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
632:   mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
633:   mat_mkl_cpardiso->iparm[26] =  1; /* Check input data for correctness */

635:   mat_mkl_cpardiso->iparm[39] = 0;
636:   if (size > 1) {
637:     mat_mkl_cpardiso->iparm[39] = 2;
638:     mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
639:     mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
640:   }
641:   PetscObjectTypeCompareAny((PetscObject)A,&match,MATMPIBAIJ,MATMPISBAIJ,"");
642:   if (match) {
643:     MatGetBlockSize(A,&bs);
644:     mat_mkl_cpardiso->iparm[36] = bs;
645:     mat_mkl_cpardiso->iparm[40] /= bs;
646:     mat_mkl_cpardiso->iparm[41] /= bs;
647:     mat_mkl_cpardiso->iparm[40]++;
648:     mat_mkl_cpardiso->iparm[41]++;
649:     mat_mkl_cpardiso->iparm[34] = 0;  /* Fortran style */
650:   } else {
651:     mat_mkl_cpardiso->iparm[34] = 1;  /* C style */
652:   }

654:   mat_mkl_cpardiso->perm = 0;
655:   return(0);
656: }

658: /*
659:  * Symbolic decomposition. Mkl_Pardiso analysis phase.
660:  */
661: PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
662: {
663:   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
664:   PetscErrorCode  ierr;

667:   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;

669:   /* Set MKL_CPARDISO options from the options database */
670:   PetscSetMKL_CPARDISOFromOptions(F,A);
671:   (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);

673:   mat_mkl_cpardiso->n = A->rmap->N;
674:   if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];

676:   /* analysis phase */
677:   /*----------------*/
678:   mat_mkl_cpardiso->phase = JOB_ANALYSIS;

680:   cluster_sparse_solver (
681:     mat_mkl_cpardiso->pt,
682:     &mat_mkl_cpardiso->maxfct,
683:     &mat_mkl_cpardiso->mnum,
684:     &mat_mkl_cpardiso->mtype,
685:     &mat_mkl_cpardiso->phase,
686:     &mat_mkl_cpardiso->n,
687:     mat_mkl_cpardiso->a,
688:     mat_mkl_cpardiso->ia,
689:     mat_mkl_cpardiso->ja,
690:     mat_mkl_cpardiso->perm,
691:     &mat_mkl_cpardiso->nrhs,
692:     mat_mkl_cpardiso->iparm,
693:     &mat_mkl_cpardiso->msglvl,
694:     NULL,
695:     NULL,
696:     &mat_mkl_cpardiso->comm_mkl_cpardiso,
697:     (PetscInt*)&mat_mkl_cpardiso->err);

699:   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));

701:   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
702:   F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
703:   F->ops->solve           = MatSolve_MKL_CPARDISO;
704:   F->ops->solvetranspose  = MatSolveTranspose_MKL_CPARDISO;
705:   F->ops->matsolve        = MatMatSolve_MKL_CPARDISO;
706:   return(0);
707: }

709: PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS perm,const MatFactorInfo *info)
710: {
711:   Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
712:   PetscErrorCode  ierr;

715:   mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;


718:   /* Set MKL_CPARDISO options from the options database */
719:   PetscSetMKL_CPARDISOFromOptions(F,A);
720:   (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);

722:   mat_mkl_cpardiso->n = A->rmap->N;
723:   if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
724: #if defined(PETSC_USE_COMPLEX)
725:   SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead",((PetscObject)A)->type_name);
726: #endif
727:   if (A->spd_set && A->spd) mat_mkl_cpardiso->mtype = 2;
728:   else                      mat_mkl_cpardiso->mtype = -2;

730:   /* analysis phase */
731:   /*----------------*/
732:   mat_mkl_cpardiso->phase = JOB_ANALYSIS;

734:   cluster_sparse_solver (
735:     mat_mkl_cpardiso->pt,
736:     &mat_mkl_cpardiso->maxfct,
737:     &mat_mkl_cpardiso->mnum,
738:     &mat_mkl_cpardiso->mtype,
739:     &mat_mkl_cpardiso->phase,
740:     &mat_mkl_cpardiso->n,
741:     mat_mkl_cpardiso->a,
742:     mat_mkl_cpardiso->ia,
743:     mat_mkl_cpardiso->ja,
744:     mat_mkl_cpardiso->perm,
745:     &mat_mkl_cpardiso->nrhs,
746:     mat_mkl_cpardiso->iparm,
747:     &mat_mkl_cpardiso->msglvl,
748:     NULL,
749:     NULL,
750:     &mat_mkl_cpardiso->comm_mkl_cpardiso,
751:     (PetscInt*)&mat_mkl_cpardiso->err);

753:   if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));

755:   mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
756:   F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_CPARDISO;
757:   F->ops->solve                 = MatSolve_MKL_CPARDISO;
758:   F->ops->solvetranspose        = MatSolveTranspose_MKL_CPARDISO;
759:   F->ops->matsolve              = MatMatSolve_MKL_CPARDISO;
760:   return(0);
761: }

763: PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
764: {
765:   PetscErrorCode    ierr;
766:   PetscBool         iascii;
767:   PetscViewerFormat format;
768:   Mat_MKL_CPARDISO  *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
769:   PetscInt          i;

772:   /* check if matrix is mkl_cpardiso type */
773:   if (A->ops->solve != MatSolve_MKL_CPARDISO) return(0);

775:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
776:   if (iascii) {
777:     PetscViewerGetFormat(viewer,&format);
778:     if (format == PETSC_VIEWER_ASCII_INFO) {
779:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");
780:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase:             %d \n",mat_mkl_cpardiso->phase);
781:       for (i = 1; i <= 64; i++) {
782:         PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]:     %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);
783:       }
784:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct:     %d \n", mat_mkl_cpardiso->maxfct);
785:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum:     %d \n", mat_mkl_cpardiso->mnum);
786:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype:     %d \n", mat_mkl_cpardiso->mtype);
787:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n:     %d \n", mat_mkl_cpardiso->n);
788:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs:     %d \n", mat_mkl_cpardiso->nrhs);
789:       PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl:     %d \n", mat_mkl_cpardiso->msglvl);
790:     }
791:   }
792:   return(0);
793: }

795: PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
796: {
797:   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;

800:   info->block_size        = 1.0;
801:   info->nz_allocated      = mat_mkl_cpardiso->nz + 0.0;
802:   info->nz_unneeded       = 0.0;
803:   info->assemblies        = 0.0;
804:   info->mallocs           = 0.0;
805:   info->memory            = 0.0;
806:   info->fill_ratio_given  = 0;
807:   info->fill_ratio_needed = 0;
808:   info->factor_mallocs    = 0;
809:   return(0);
810: }

812: PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
813: {
814:   Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)F->data;

817:   if (icntl <= 64) {
818:     mat_mkl_cpardiso->iparm[icntl - 1] = ival;
819:   } else {
820:     if (icntl == 65) mkl_set_num_threads((int)ival);
821:     else if (icntl == 66) mat_mkl_cpardiso->maxfct = ival;
822:     else if (icntl == 67) mat_mkl_cpardiso->mnum = ival;
823:     else if (icntl == 68) mat_mkl_cpardiso->msglvl = ival;
824:     else if (icntl == 69) mat_mkl_cpardiso->mtype = ival;
825:   }
826:   return(0);
827: }

829: /*@
830:   MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters

832:    Logically Collective on Mat

834:    Input Parameters:
835: +  F - the factored matrix obtained by calling MatGetFactor()
836: .  icntl - index of Mkl_Pardiso parameter
837: -  ival - value of Mkl_Pardiso parameter

839:   Options Database:
840: .   -mat_mkl_cpardiso_<icntl> <ival>

842:    Level: Intermediate

844:    Notes:
845:     This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options 
846:           database approach when working with TS, SNES, or KSP.

848:    References:
849: .      Mkl_Pardiso Users' Guide

851: .seealso: MatGetFactor()
852: @*/
853: PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
854: {

858:   PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
859:   return(0);
860: }

862: static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
863: {
865:   *type = MATSOLVERMKL_CPARDISO;
866:   return(0);
867: }

869: /* MatGetFactor for MPI AIJ matrices */
870: static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
871: {
872:   Mat              B;
873:   PetscErrorCode   ierr;
874:   Mat_MKL_CPARDISO *mat_mkl_cpardiso;
875:   PetscBool        isSeqAIJ,isMPIBAIJ,isMPISBAIJ;

878:   /* Create the factorization matrix */

880:   PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
881:   PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&isMPIBAIJ);
882:   PetscObjectTypeCompare((PetscObject)A,MATMPISBAIJ,&isMPISBAIJ);
883:   MatCreate(PetscObjectComm((PetscObject)A),&B);
884:   MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
885:   PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);
886:   MatSetUp(B);

888:   PetscNewLog(B,&mat_mkl_cpardiso);

890:   if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
891:   else if (isMPIBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO;
892:   else if (isMPISBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO;
893:   else          mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;

895:   if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
896:   else B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_CPARDISO;
897:   B->ops->destroy = MatDestroy_MKL_CPARDISO;

899:   B->ops->view    = MatView_MKL_CPARDISO;
900:   B->ops->getinfo = MatGetInfo_MKL_CPARDISO;

902:   B->factortype   = ftype;
903:   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */

905:   B->data = mat_mkl_cpardiso;

907:   /* set solvertype */
908:   PetscFree(B->solvertype);
909:   PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);

911:   PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);
912:   PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);
913:   PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);

915:   *F = B;
916:   return(0);
917: }

919: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
920: {
922:   
924:   MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
925:   MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
926:   MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
927:   MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_mpiaij_mkl_cpardiso);
928:   return(0);
929: }