Actual source code: mkl_pardiso.c
petsc-3.10.5 2019-03-28
1: #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2: #define MKL_ILP64
3: #endif
5: #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
6: #include <../src/mat/impls/sbaij/seq/sbaij.h>
7: #include <../src/mat/impls/dense/seq/dense.h>
9: #include <stdio.h>
10: #include <stdlib.h>
11: #include <math.h>
12: #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
13: #define MKL_ILP64
14: #endif
15: #include <mkl_pardiso.h>
17: PETSC_EXTERN void PetscSetMKL_PARDISOThreads(int);
19: /*
20: * Possible mkl_pardiso phases that controls the execution of the solver.
21: * For more information check mkl_pardiso manual.
22: */
23: #define JOB_ANALYSIS 11
24: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
25: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
26: #define JOB_NUMERICAL_FACTORIZATION 22
27: #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
28: #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
29: #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
30: #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
31: #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
32: #define JOB_RELEASE_OF_LU_MEMORY 0
33: #define JOB_RELEASE_OF_ALL_MEMORY -1
35: #define IPARM_SIZE 64
37: #if defined(PETSC_USE_64BIT_INDICES)
38: #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
39: #define INT_TYPE long long int
40: #define MKL_PARDISO pardiso
41: #define MKL_PARDISO_INIT pardisoinit
42: #else
43: /* this is the case where the MKL BLAS/LAPACK are 32 bit integers but the 64 bit integer version of
44: of Pardiso code is used; hence the need for the 64 below*/
45: #define INT_TYPE long long int
46: #define MKL_PARDISO pardiso_64
47: #define MKL_PARDISO_INIT pardiso_64init
48: void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm [])
49: {
50: int iparm_copy[IPARM_SIZE], mtype_copy, i;
51:
52: mtype_copy = *mtype;
53: pardisoinit(pt, &mtype_copy, iparm_copy);
54: for(i = 0; i < IPARM_SIZE; i++){
55: iparm[i] = iparm_copy[i];
56: }
57: }
58: #endif
59: #else
60: #define INT_TYPE int
61: #define MKL_PARDISO pardiso
62: #define MKL_PARDISO_INIT pardisoinit
63: #endif
66: /*
67: * Internal data structure.
68: * For more information check mkl_pardiso manual.
69: */
70: typedef struct {
72: /* Configuration vector*/
73: INT_TYPE iparm[IPARM_SIZE];
75: /*
76: * Internal mkl_pardiso memory location.
77: * After the first call to mkl_pardiso do not modify pt, as that could cause a serious memory leak.
78: */
79: void *pt[IPARM_SIZE];
81: /* Basic mkl_pardiso info*/
82: INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
84: /* Matrix structure*/
85: void *a;
86: INT_TYPE *ia, *ja;
88: /* Number of non-zero elements*/
89: INT_TYPE nz;
91: /* Row permutaton vector*/
92: INT_TYPE *perm;
94: /* Define if matrix preserves sparse structure.*/
95: MatStructure matstruc;
97: PetscBool needsym;
98: PetscBool freeaij;
100: /* Schur complement */
101: PetscScalar *schur;
102: PetscInt schur_size;
103: PetscInt *schur_idxs;
104: PetscScalar *schur_work;
105: PetscBLASInt schur_work_size;
106: PetscBool solve_interior;
108: /* True if mkl_pardiso function have been used.*/
109: PetscBool CleanUp;
111: /* Conversion to a format suitable for MKL */
112: PetscErrorCode (*Convert)(Mat, PetscBool, MatReuse, PetscBool*, INT_TYPE*, INT_TYPE**, INT_TYPE**, PetscScalar**);
113: } Mat_MKL_PARDISO;
115: PetscErrorCode MatMKLPardiso_Convert_seqsbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v)
116: {
117: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
118: PetscInt bs = A->rmap->bs;
121: if (!sym) {
122: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"This should not happen");
123: }
124: if (bs == 1) { /* already in the correct format */
125: *v = aa->a;
126: /* though PetscInt and INT_TYPE are of the same size since they are defined differently the Intel compiler requires a cast */
127: *r = (INT_TYPE*)aa->i;
128: *c = (INT_TYPE*)aa->j;
129: *nnz = (INT_TYPE)aa->nz;
130: *free = PETSC_FALSE;
131: } else {
132: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqSBAIJ to MKL Pardiso format still need to be implemented");
133: }
134: return(0);
135: }
137: PetscErrorCode MatMKLPardiso_Convert_seqbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v)
138: {
140: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqBAIJ to MKL Pardiso format still need to be implemented");
141: return(0);
142: }
144: PetscErrorCode MatMKLPardiso_Convert_seqaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v)
145: {
146: Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data;
150: if (!sym) { /* already in the correct format */
151: *v = aa->a;
152: *r = (INT_TYPE*)aa->i;
153: *c = (INT_TYPE*)aa->j;
154: *nnz = (INT_TYPE)aa->nz;
155: *free = PETSC_FALSE;
156: return(0);
157: }
158: /* need to get the triangular part */
159: if (reuse == MAT_INITIAL_MATRIX) {
160: PetscScalar *vals,*vv;
161: PetscInt *row,*col,*jj;
162: PetscInt m = A->rmap->n,nz,i;
164: nz = 0;
165: for (i=0; i<m; i++) {
166: nz += aa->i[i+1] - aa->diag[i];
167: }
168: PetscMalloc2(m+1,&row,nz,&col);
169: PetscMalloc1(nz,&vals);
170: jj = col;
171: vv = vals;
173: row[0] = 0;
174: for (i=0; i<m; i++) {
175: PetscInt *aj = aa->j + aa->diag[i];
176: PetscScalar *av = aa->a + aa->diag[i];
177: PetscInt rl = aa->i[i+1] - aa->diag[i],j;
178: for (j=0; j<rl; j++) {
179: *jj = *aj; jj++; aj++;
180: *vv = *av; vv++; av++;
181: }
182: row[i+1] = row[i] + rl;
183: }
184: *v = vals;
185: *r = (INT_TYPE*)row;
186: *c = (INT_TYPE*)col;
187: *nnz = (INT_TYPE)nz;
188: *free = PETSC_TRUE;
189: } else {
190: PetscScalar *vv;
191: PetscInt m = A->rmap->n,i;
193: vv = *v;
194: for (i=0; i<m; i++) {
195: PetscScalar *av = aa->a + aa->diag[i];
196: PetscInt rl = aa->i[i+1] - aa->diag[i],j;
197: for (j=0; j<rl; j++) {
198: *vv = *av; vv++; av++;
199: }
200: }
201: *free = PETSC_TRUE;
202: }
203: return(0);
204: }
207: static PetscErrorCode MatMKLPardisoSolveSchur_Private(Mat F, PetscScalar *B, PetscScalar *X)
208: {
209: Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data;
210: Mat S,Xmat,Bmat;
211: MatFactorSchurStatus schurstatus;
212: PetscErrorCode ierr;
215: MatFactorFactorizeSchurComplement(F);
216: MatFactorGetSchurComplement(F,&S,&schurstatus);
217: if (X == B && schurstatus == MAT_FACTOR_SCHUR_INVERTED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"X and B cannot point to the same address");
218: MatCreateSeqDense(PETSC_COMM_SELF,mpardiso->schur_size,mpardiso->nrhs,B,&Bmat);
219: MatCreateSeqDense(PETSC_COMM_SELF,mpardiso->schur_size,mpardiso->nrhs,X,&Xmat);
220: if (X != B) { /* using MatMatSolve */
221: MatCopy(Bmat,Xmat,SAME_NONZERO_PATTERN);
222: }
224: #if defined(PETSC_USE_COMPLEX)
225: if (mpardiso->iparm[12-1] == 1) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Hermitian solve not implemented yet");
226: #endif
228: switch (schurstatus) {
229: case MAT_FACTOR_SCHUR_FACTORED:
230: if (!mpardiso->iparm[12-1]) {
231: MatMatSolve(S,Bmat,Xmat);
232: } else { /* transpose solve */
233: MatMatSolveTranspose(S,Bmat,Xmat);
234: }
235: break;
236: case MAT_FACTOR_SCHUR_INVERTED:
237: if (!mpardiso->iparm[12-1]) {
238: MatMatMult(S,Bmat,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Xmat);
239: } else { /* transpose solve */
240: MatTransposeMatMult(S,Bmat,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Xmat);
241: }
242: break;
243: default:
244: SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
245: break;
246: }
247: MatFactorRestoreSchurComplement(F,&S,schurstatus);
248: MatDestroy(&Bmat);
249: MatDestroy(&Xmat);
250: return(0);
251: }
253: PetscErrorCode MatFactorSetSchurIS_MKL_PARDISO(Mat F, IS is)
254: {
255: Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data;
256: const PetscInt *idxs;
257: PetscInt size,i;
258: PetscMPIInt csize;
259: PetscBool sorted;
260: PetscErrorCode ierr;
263: MPI_Comm_size(PetscObjectComm((PetscObject)F),&csize);
264: if (csize > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MKL_PARDISO parallel Schur complements not yet supported from PETSc");
265: ISSorted(is,&sorted);
266: if (!sorted) {
267: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS for MKL_PARDISO Schur complements needs to be sorted");
268: }
269: ISGetLocalSize(is,&size);
270: if (mpardiso->schur_size != size) {
271: mpardiso->schur_size = size;
272: PetscFree2(mpardiso->schur,mpardiso->schur_work);
273: PetscFree(mpardiso->schur_idxs);
274: PetscBLASIntCast(PetscMax(mpardiso->n,2*size),&mpardiso->schur_work_size);
275: PetscMalloc2(size*size,&mpardiso->schur,mpardiso->schur_work_size,&mpardiso->schur_work);
276: PetscMalloc1(size,&mpardiso->schur_idxs);
277: }
278: MatCreateSeqDense(PETSC_COMM_SELF,mpardiso->schur_size,mpardiso->schur_size,mpardiso->schur,&F->schur);
279: if (mpardiso->mtype == 2) {
280: MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);
281: }
283: PetscMemzero(mpardiso->perm,mpardiso->n*sizeof(INT_TYPE));
284: ISGetIndices(is,&idxs);
285: PetscMemcpy(mpardiso->schur_idxs,idxs,size*sizeof(PetscInt));
286: for (i=0;i<size;i++) mpardiso->perm[idxs[i]] = 1;
287: ISRestoreIndices(is,&idxs);
288: if (size) { /* turn on Schur switch if the set of indices is not empty */
289: mpardiso->iparm[36-1] = 2;
290: }
291: return(0);
292: }
294: PetscErrorCode MatDestroy_MKL_PARDISO(Mat A)
295: {
296: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data;
297: PetscErrorCode ierr;
300: if (mat_mkl_pardiso->CleanUp) {
301: mat_mkl_pardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
303: MKL_PARDISO (mat_mkl_pardiso->pt,
304: &mat_mkl_pardiso->maxfct,
305: &mat_mkl_pardiso->mnum,
306: &mat_mkl_pardiso->mtype,
307: &mat_mkl_pardiso->phase,
308: &mat_mkl_pardiso->n,
309: NULL,
310: NULL,
311: NULL,
312: NULL,
313: &mat_mkl_pardiso->nrhs,
314: mat_mkl_pardiso->iparm,
315: &mat_mkl_pardiso->msglvl,
316: NULL,
317: NULL,
318: &mat_mkl_pardiso->err);
319: }
320: PetscFree(mat_mkl_pardiso->perm);
321: PetscFree2(mat_mkl_pardiso->schur,mat_mkl_pardiso->schur_work);
322: PetscFree(mat_mkl_pardiso->schur_idxs);
323: if (mat_mkl_pardiso->freeaij) {
324: PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);
325: PetscFree(mat_mkl_pardiso->a);
326: }
327: PetscFree(A->data);
329: /* clear composed functions */
330: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
331: PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);
332: PetscObjectComposeFunction((PetscObject)A,"MatMkl_PardisoSetCntl_C",NULL);
333: return(0);
334: }
336: static PetscErrorCode MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO *mpardiso, PetscScalar *whole, PetscScalar *schur, PetscBool reduce)
337: {
339: if (reduce) { /* data given for the whole matrix */
340: PetscInt i,m=0,p=0;
341: for (i=0;i<mpardiso->nrhs;i++) {
342: PetscInt j;
343: for (j=0;j<mpardiso->schur_size;j++) {
344: schur[p+j] = whole[m+mpardiso->schur_idxs[j]];
345: }
346: m += mpardiso->n;
347: p += mpardiso->schur_size;
348: }
349: } else { /* from Schur to whole */
350: PetscInt i,m=0,p=0;
351: for (i=0;i<mpardiso->nrhs;i++) {
352: PetscInt j;
353: for (j=0;j<mpardiso->schur_size;j++) {
354: whole[m+mpardiso->schur_idxs[j]] = schur[p+j];
355: }
356: m += mpardiso->n;
357: p += mpardiso->schur_size;
358: }
359: }
360: return(0);
361: }
363: PetscErrorCode MatSolve_MKL_PARDISO(Mat A,Vec b,Vec x)
364: {
365: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data;
366: PetscErrorCode ierr;
367: PetscScalar *xarray;
368: const PetscScalar *barray;
371: mat_mkl_pardiso->nrhs = 1;
372: VecGetArray(x,&xarray);
373: VecGetArrayRead(b,&barray);
375: if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
376: else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
378: if (barray == xarray) { /* if the two vectors share the same memory */
379: PetscScalar *work;
380: if (!mat_mkl_pardiso->schur_work) {
381: PetscMalloc1(mat_mkl_pardiso->n,&work);
382: } else {
383: work = mat_mkl_pardiso->schur_work;
384: }
385: mat_mkl_pardiso->iparm[6-1] = 1;
386: MKL_PARDISO (mat_mkl_pardiso->pt,
387: &mat_mkl_pardiso->maxfct,
388: &mat_mkl_pardiso->mnum,
389: &mat_mkl_pardiso->mtype,
390: &mat_mkl_pardiso->phase,
391: &mat_mkl_pardiso->n,
392: mat_mkl_pardiso->a,
393: mat_mkl_pardiso->ia,
394: mat_mkl_pardiso->ja,
395: NULL,
396: &mat_mkl_pardiso->nrhs,
397: mat_mkl_pardiso->iparm,
398: &mat_mkl_pardiso->msglvl,
399: (void*)xarray,
400: (void*)work,
401: &mat_mkl_pardiso->err);
402: if (!mat_mkl_pardiso->schur_work) {
403: PetscFree(work);
404: }
405: } else {
406: mat_mkl_pardiso->iparm[6-1] = 0;
407: MKL_PARDISO (mat_mkl_pardiso->pt,
408: &mat_mkl_pardiso->maxfct,
409: &mat_mkl_pardiso->mnum,
410: &mat_mkl_pardiso->mtype,
411: &mat_mkl_pardiso->phase,
412: &mat_mkl_pardiso->n,
413: mat_mkl_pardiso->a,
414: mat_mkl_pardiso->ia,
415: mat_mkl_pardiso->ja,
416: mat_mkl_pardiso->perm,
417: &mat_mkl_pardiso->nrhs,
418: mat_mkl_pardiso->iparm,
419: &mat_mkl_pardiso->msglvl,
420: (void*)barray,
421: (void*)xarray,
422: &mat_mkl_pardiso->err);
423: }
424: VecRestoreArrayRead(b,&barray);
426: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
428: if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */
429: PetscInt shift = mat_mkl_pardiso->schur_size;
431: /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */
432: if (A->schur_status != MAT_FACTOR_SCHUR_INVERTED) {
433: shift = 0;
434: }
436: if (!mat_mkl_pardiso->solve_interior) {
437: /* solve Schur complement */
438: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);
439: MatMKLPardisoSolveSchur_Private(A,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);
440: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);
441: } else { /* if we are solving for the interior problem, any value in barray[schur] forward-substituted to xarray[schur] will be neglected */
442: PetscInt i;
443: for (i=0;i<mat_mkl_pardiso->schur_size;i++) {
444: xarray[mat_mkl_pardiso->schur_idxs[i]] = 0.;
445: }
446: }
448: /* expansion phase */
449: mat_mkl_pardiso->iparm[6-1] = 1;
450: mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION;
451: MKL_PARDISO (mat_mkl_pardiso->pt,
452: &mat_mkl_pardiso->maxfct,
453: &mat_mkl_pardiso->mnum,
454: &mat_mkl_pardiso->mtype,
455: &mat_mkl_pardiso->phase,
456: &mat_mkl_pardiso->n,
457: mat_mkl_pardiso->a,
458: mat_mkl_pardiso->ia,
459: mat_mkl_pardiso->ja,
460: mat_mkl_pardiso->perm,
461: &mat_mkl_pardiso->nrhs,
462: mat_mkl_pardiso->iparm,
463: &mat_mkl_pardiso->msglvl,
464: (void*)xarray,
465: (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */
466: &mat_mkl_pardiso->err);
468: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
469: mat_mkl_pardiso->iparm[6-1] = 0;
470: }
471: VecRestoreArray(x,&xarray);
472: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
473: return(0);
474: }
476: PetscErrorCode MatSolveTranspose_MKL_PARDISO(Mat A,Vec b,Vec x)
477: {
478: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data;
479: PetscInt oiparm12;
480: PetscErrorCode ierr;
483: oiparm12 = mat_mkl_pardiso->iparm[12 - 1];
484: mat_mkl_pardiso->iparm[12 - 1] = 2;
485: MatSolve_MKL_PARDISO(A,b,x);
486: mat_mkl_pardiso->iparm[12 - 1] = oiparm12;
487: return(0);
488: }
490: PetscErrorCode MatMatSolve_MKL_PARDISO(Mat A,Mat B,Mat X)
491: {
492: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->data;
493: PetscErrorCode ierr;
494: PetscScalar *barray, *xarray;
495: PetscBool flg;
498: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);
499: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
500: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);
501: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
503: MatGetSize(B,NULL,(PetscInt*)&mat_mkl_pardiso->nrhs);
505: if (mat_mkl_pardiso->nrhs > 0) {
506: MatDenseGetArray(B,&barray);
507: MatDenseGetArray(X,&xarray);
509: if (barray == xarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"B and X cannot share the same memory location");
510: if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
511: else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
512: mat_mkl_pardiso->iparm[6-1] = 0;
514: MKL_PARDISO (mat_mkl_pardiso->pt,
515: &mat_mkl_pardiso->maxfct,
516: &mat_mkl_pardiso->mnum,
517: &mat_mkl_pardiso->mtype,
518: &mat_mkl_pardiso->phase,
519: &mat_mkl_pardiso->n,
520: mat_mkl_pardiso->a,
521: mat_mkl_pardiso->ia,
522: mat_mkl_pardiso->ja,
523: mat_mkl_pardiso->perm,
524: &mat_mkl_pardiso->nrhs,
525: mat_mkl_pardiso->iparm,
526: &mat_mkl_pardiso->msglvl,
527: (void*)barray,
528: (void*)xarray,
529: &mat_mkl_pardiso->err);
530: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
532: if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */
533: PetscScalar *o_schur_work = NULL;
534: PetscInt shift = mat_mkl_pardiso->schur_size*mat_mkl_pardiso->nrhs,scale;
535: PetscInt mem = mat_mkl_pardiso->n*mat_mkl_pardiso->nrhs;
537: /* allocate extra memory if it is needed */
538: scale = 1;
539: if (A->schur_status == MAT_FACTOR_SCHUR_INVERTED) scale = 2;
541: mem *= scale;
542: if (mem > mat_mkl_pardiso->schur_work_size) {
543: o_schur_work = mat_mkl_pardiso->schur_work;
544: PetscMalloc1(mem,&mat_mkl_pardiso->schur_work);
545: }
547: /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */
548: if (A->schur_status != MAT_FACTOR_SCHUR_INVERTED) shift = 0;
550: /* solve Schur complement */
551: if (!mat_mkl_pardiso->solve_interior) {
552: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);
553: MatMKLPardisoSolveSchur_Private(A,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);
554: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);
555: } else { /* if we are solving for the interior problem, any value in barray[schur,n] forward-substituted to xarray[schur,n] will be neglected */
556: PetscInt i,n,m=0;
557: for (n=0;n<mat_mkl_pardiso->nrhs;n++) {
558: for (i=0;i<mat_mkl_pardiso->schur_size;i++) {
559: xarray[mat_mkl_pardiso->schur_idxs[i]+m] = 0.;
560: }
561: m += mat_mkl_pardiso->n;
562: }
563: }
565: /* expansion phase */
566: mat_mkl_pardiso->iparm[6-1] = 1;
567: mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION;
568: MKL_PARDISO (mat_mkl_pardiso->pt,
569: &mat_mkl_pardiso->maxfct,
570: &mat_mkl_pardiso->mnum,
571: &mat_mkl_pardiso->mtype,
572: &mat_mkl_pardiso->phase,
573: &mat_mkl_pardiso->n,
574: mat_mkl_pardiso->a,
575: mat_mkl_pardiso->ia,
576: mat_mkl_pardiso->ja,
577: mat_mkl_pardiso->perm,
578: &mat_mkl_pardiso->nrhs,
579: mat_mkl_pardiso->iparm,
580: &mat_mkl_pardiso->msglvl,
581: (void*)xarray,
582: (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */
583: &mat_mkl_pardiso->err);
584: if (o_schur_work) { /* restore original schur_work (minimal size) */
585: PetscFree(mat_mkl_pardiso->schur_work);
586: mat_mkl_pardiso->schur_work = o_schur_work;
587: }
588: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
589: mat_mkl_pardiso->iparm[6-1] = 0;
590: }
591: }
592: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
593: return(0);
594: }
596: PetscErrorCode MatFactorNumeric_MKL_PARDISO(Mat F,Mat A,const MatFactorInfo *info)
597: {
598: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(F)->data;
599: PetscErrorCode ierr;
602: mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN;
603: (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_REUSE_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);
605: mat_mkl_pardiso->phase = JOB_NUMERICAL_FACTORIZATION;
606: MKL_PARDISO (mat_mkl_pardiso->pt,
607: &mat_mkl_pardiso->maxfct,
608: &mat_mkl_pardiso->mnum,
609: &mat_mkl_pardiso->mtype,
610: &mat_mkl_pardiso->phase,
611: &mat_mkl_pardiso->n,
612: mat_mkl_pardiso->a,
613: mat_mkl_pardiso->ia,
614: mat_mkl_pardiso->ja,
615: mat_mkl_pardiso->perm,
616: &mat_mkl_pardiso->nrhs,
617: mat_mkl_pardiso->iparm,
618: &mat_mkl_pardiso->msglvl,
619: NULL,
620: (void*)mat_mkl_pardiso->schur,
621: &mat_mkl_pardiso->err);
622: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
624: if (F->schur) { /* schur output from pardiso is in row major format */
625: MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);
626: MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);
627: }
628: mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN;
629: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
630: return(0);
631: }
633: PetscErrorCode PetscSetMKL_PARDISOFromOptions(Mat F, Mat A)
634: {
635: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->data;
636: PetscErrorCode ierr;
637: PetscInt icntl,threads=1;
638: PetscBool flg;
641: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_PARDISO Options","Mat");
643: PetscOptionsInt("-mat_mkl_pardiso_65","Number of threads to use within PARDISO","None",threads,&threads,&flg);
644: if (flg) PetscSetMKL_PARDISOThreads((int)threads);
646: PetscOptionsInt("-mat_mkl_pardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_pardiso->maxfct,&icntl,&flg);
647: if (flg) mat_mkl_pardiso->maxfct = icntl;
649: PetscOptionsInt("-mat_mkl_pardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_pardiso->mnum,&icntl,&flg);
650: if (flg) mat_mkl_pardiso->mnum = icntl;
651:
652: PetscOptionsInt("-mat_mkl_pardiso_68","Message level information","None",mat_mkl_pardiso->msglvl,&icntl,&flg);
653: if (flg) mat_mkl_pardiso->msglvl = icntl;
655: PetscOptionsInt("-mat_mkl_pardiso_69","Defines the matrix type","None",mat_mkl_pardiso->mtype,&icntl,&flg);
656: if(flg){
657: void *pt[IPARM_SIZE];
658: mat_mkl_pardiso->mtype = icntl;
659: MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm);
660: #if defined(PETSC_USE_REAL_SINGLE)
661: mat_mkl_pardiso->iparm[27] = 1;
662: #else
663: mat_mkl_pardiso->iparm[27] = 0;
664: #endif
665: mat_mkl_pardiso->iparm[34] = 1; /* use 0-based indexing */
666: }
667: PetscOptionsInt("-mat_mkl_pardiso_1","Use default values","None",mat_mkl_pardiso->iparm[0],&icntl,&flg);
669: if (flg && icntl != 0) {
670: PetscOptionsInt("-mat_mkl_pardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_pardiso->iparm[1],&icntl,&flg);
671: if (flg) mat_mkl_pardiso->iparm[1] = icntl;
673: PetscOptionsInt("-mat_mkl_pardiso_4","Preconditioned CGS/CG","None",mat_mkl_pardiso->iparm[3],&icntl,&flg);
674: if (flg) mat_mkl_pardiso->iparm[3] = icntl;
676: PetscOptionsInt("-mat_mkl_pardiso_5","User permutation","None",mat_mkl_pardiso->iparm[4],&icntl,&flg);
677: if (flg) mat_mkl_pardiso->iparm[4] = icntl;
679: PetscOptionsInt("-mat_mkl_pardiso_6","Write solution on x","None",mat_mkl_pardiso->iparm[5],&icntl,&flg);
680: if (flg) mat_mkl_pardiso->iparm[5] = icntl;
682: PetscOptionsInt("-mat_mkl_pardiso_8","Iterative refinement step","None",mat_mkl_pardiso->iparm[7],&icntl,&flg);
683: if (flg) mat_mkl_pardiso->iparm[7] = icntl;
685: PetscOptionsInt("-mat_mkl_pardiso_10","Pivoting perturbation","None",mat_mkl_pardiso->iparm[9],&icntl,&flg);
686: if (flg) mat_mkl_pardiso->iparm[9] = icntl;
688: PetscOptionsInt("-mat_mkl_pardiso_11","Scaling vectors","None",mat_mkl_pardiso->iparm[10],&icntl,&flg);
689: if (flg) mat_mkl_pardiso->iparm[10] = icntl;
691: PetscOptionsInt("-mat_mkl_pardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_pardiso->iparm[11],&icntl,&flg);
692: if (flg) mat_mkl_pardiso->iparm[11] = icntl;
694: PetscOptionsInt("-mat_mkl_pardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_pardiso->iparm[12],&icntl,&flg);
695: if (flg) mat_mkl_pardiso->iparm[12] = icntl;
697: PetscOptionsInt("-mat_mkl_pardiso_18","Numbers of non-zero elements","None",mat_mkl_pardiso->iparm[17],&icntl,&flg);
698: if (flg) mat_mkl_pardiso->iparm[17] = icntl;
700: PetscOptionsInt("-mat_mkl_pardiso_19","Report number of floating point operations","None",mat_mkl_pardiso->iparm[18],&icntl,&flg);
701: if (flg) mat_mkl_pardiso->iparm[18] = icntl;
703: PetscOptionsInt("-mat_mkl_pardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_pardiso->iparm[20],&icntl,&flg);
704: if (flg) mat_mkl_pardiso->iparm[20] = icntl;
706: PetscOptionsInt("-mat_mkl_pardiso_24","Parallel factorization control","None",mat_mkl_pardiso->iparm[23],&icntl,&flg);
707: if (flg) mat_mkl_pardiso->iparm[23] = icntl;
709: PetscOptionsInt("-mat_mkl_pardiso_25","Parallel forward/backward solve control","None",mat_mkl_pardiso->iparm[24],&icntl,&flg);
710: if (flg) mat_mkl_pardiso->iparm[24] = icntl;
712: PetscOptionsInt("-mat_mkl_pardiso_27","Matrix checker","None",mat_mkl_pardiso->iparm[26],&icntl,&flg);
713: if (flg) mat_mkl_pardiso->iparm[26] = icntl;
715: PetscOptionsInt("-mat_mkl_pardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_pardiso->iparm[30],&icntl,&flg);
716: if (flg) mat_mkl_pardiso->iparm[30] = icntl;
718: PetscOptionsInt("-mat_mkl_pardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_pardiso->iparm[33],&icntl,&flg);
719: if (flg) mat_mkl_pardiso->iparm[33] = icntl;
721: PetscOptionsInt("-mat_mkl_pardiso_60","Intel MKL_PARDISO mode","None",mat_mkl_pardiso->iparm[59],&icntl,&flg);
722: if (flg) mat_mkl_pardiso->iparm[59] = icntl;
723: }
724: PetscOptionsEnd();
725: return(0);
726: }
728: PetscErrorCode MatFactorMKL_PARDISOInitialize_Private(Mat A, MatFactorType ftype, Mat_MKL_PARDISO *mat_mkl_pardiso)
729: {
731: PetscInt i;
734: for ( i = 0; i < IPARM_SIZE; i++ ){
735: mat_mkl_pardiso->iparm[i] = 0;
736: }
737: for ( i = 0; i < IPARM_SIZE; i++ ){
738: mat_mkl_pardiso->pt[i] = 0;
739: }
740: /* Default options for both sym and unsym */
741: mat_mkl_pardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */
742: mat_mkl_pardiso->iparm[ 1] = 2; /* Metis reordering */
743: mat_mkl_pardiso->iparm[ 5] = 0; /* Write solution into x */
744: mat_mkl_pardiso->iparm[ 7] = 0; /* Max number of iterative refinement steps */
745: mat_mkl_pardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
746: mat_mkl_pardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
747: #if 0
748: mat_mkl_pardiso->iparm[23] = 1; /* Parallel factorization control*/
749: #endif
750: mat_mkl_pardiso->iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */
751: mat_mkl_pardiso->iparm[39] = 0; /* Input: matrix/rhs/solution stored on master */
752:
753: mat_mkl_pardiso->CleanUp = PETSC_FALSE;
754: mat_mkl_pardiso->maxfct = 1; /* Maximum number of numerical factorizations. */
755: mat_mkl_pardiso->mnum = 1; /* Which factorization to use. */
756: mat_mkl_pardiso->msglvl = 0; /* 0: do not print 1: Print statistical information in file */
757: mat_mkl_pardiso->phase = -1;
758: mat_mkl_pardiso->err = 0;
759:
760: mat_mkl_pardiso->n = A->rmap->N;
761: mat_mkl_pardiso->nrhs = 1;
762: mat_mkl_pardiso->err = 0;
763: mat_mkl_pardiso->phase = -1;
764:
765: if(ftype == MAT_FACTOR_LU){
766: mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
767: mat_mkl_pardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */
768: mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
770: } else {
771: mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
772: mat_mkl_pardiso->iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */
773: mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
774: /* mat_mkl_pardiso->iparm[20] = 1; */ /* Apply 1x1 and 2x2 Bunch-Kaufman pivoting during the factorization process */
775: #if defined(PETSC_USE_DEBUG)
776: mat_mkl_pardiso->iparm[26] = 1; /* Matrix checker */
777: #endif
778: }
779: PetscMalloc1(A->rmap->N*sizeof(INT_TYPE), &mat_mkl_pardiso->perm);
780: for(i = 0; i < A->rmap->N; i++){
781: mat_mkl_pardiso->perm[i] = 0;
782: }
783: mat_mkl_pardiso->schur_size = 0;
784: return(0);
785: }
787: PetscErrorCode MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F,Mat A,const MatFactorInfo *info)
788: {
789: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->data;
790: PetscErrorCode ierr;
793: mat_mkl_pardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
794: PetscSetMKL_PARDISOFromOptions(F,A);
796: /* throw away any previously computed structure */
797: if (mat_mkl_pardiso->freeaij) {
798: PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);
799: PetscFree(mat_mkl_pardiso->a);
800: }
801: (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_INITIAL_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);
802: mat_mkl_pardiso->n = A->rmap->N;
804: mat_mkl_pardiso->phase = JOB_ANALYSIS;
806: MKL_PARDISO (mat_mkl_pardiso->pt,
807: &mat_mkl_pardiso->maxfct,
808: &mat_mkl_pardiso->mnum,
809: &mat_mkl_pardiso->mtype,
810: &mat_mkl_pardiso->phase,
811: &mat_mkl_pardiso->n,
812: mat_mkl_pardiso->a,
813: mat_mkl_pardiso->ia,
814: mat_mkl_pardiso->ja,
815: mat_mkl_pardiso->perm,
816: &mat_mkl_pardiso->nrhs,
817: mat_mkl_pardiso->iparm,
818: &mat_mkl_pardiso->msglvl,
819: NULL,
820: NULL,
821: &mat_mkl_pardiso->err);
822: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
824: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
826: if (F->factortype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_MKL_PARDISO;
827: else F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_PARDISO;
829: F->ops->solve = MatSolve_MKL_PARDISO;
830: F->ops->solvetranspose = MatSolveTranspose_MKL_PARDISO;
831: F->ops->matsolve = MatMatSolve_MKL_PARDISO;
832: return(0);
833: }
835: PetscErrorCode MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
836: {
840: MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);
841: return(0);
842: }
844: #if !defined(PETSC_USE_COMPLEX)
845: PetscErrorCode MatGetInertia_MKL_PARDISO(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
846: {
847: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)F->data;
850: if (nneg) *nneg = mat_mkl_pardiso->iparm[22];
851: if (npos) *npos = mat_mkl_pardiso->iparm[21];
852: if (nzero) *nzero = F->rmap->N -(mat_mkl_pardiso->iparm[22] + mat_mkl_pardiso->iparm[21]);
853: return(0);
854: }
855: #endif
857: PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,const MatFactorInfo *info)
858: {
862: MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);
863: #if defined(PETSC_USE_COMPLEX)
864: F->ops->getinertia = NULL;
865: #else
866: F->ops->getinertia = MatGetInertia_MKL_PARDISO;
867: #endif
868: return(0);
869: }
871: PetscErrorCode MatView_MKL_PARDISO(Mat A, PetscViewer viewer)
872: {
873: PetscErrorCode ierr;
874: PetscBool iascii;
875: PetscViewerFormat format;
876: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data;
877: PetscInt i;
880: if (A->ops->solve != MatSolve_MKL_PARDISO) return(0);
882: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
883: if (iascii) {
884: PetscViewerGetFormat(viewer,&format);
885: if (format == PETSC_VIEWER_ASCII_INFO) {
886: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO run parameters:\n");
887: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO phase: %d \n",mat_mkl_pardiso->phase);
888: for(i = 1; i <= 64; i++){
889: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO iparm[%d]: %d \n",i, mat_mkl_pardiso->iparm[i - 1]);
890: }
891: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO maxfct: %d \n", mat_mkl_pardiso->maxfct);
892: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mnum: %d \n", mat_mkl_pardiso->mnum);
893: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mtype: %d \n", mat_mkl_pardiso->mtype);
894: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO n: %d \n", mat_mkl_pardiso->n);
895: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO nrhs: %d \n", mat_mkl_pardiso->nrhs);
896: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO msglvl: %d \n", mat_mkl_pardiso->msglvl);
897: }
898: }
899: return(0);
900: }
903: PetscErrorCode MatGetInfo_MKL_PARDISO(Mat A, MatInfoType flag, MatInfo *info)
904: {
905: Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)A->data;
908: info->block_size = 1.0;
909: info->nz_used = mat_mkl_pardiso->nz;
910: info->nz_allocated = mat_mkl_pardiso->nz;
911: info->nz_unneeded = 0.0;
912: info->assemblies = 0.0;
913: info->mallocs = 0.0;
914: info->memory = 0.0;
915: info->fill_ratio_given = 0;
916: info->fill_ratio_needed = 0;
917: info->factor_mallocs = 0;
918: return(0);
919: }
921: PetscErrorCode MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F,PetscInt icntl,PetscInt ival)
922: {
923: Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)F->data;
926: if(icntl <= 64){
927: mat_mkl_pardiso->iparm[icntl - 1] = ival;
928: } else {
929: if(icntl == 65) PetscSetMKL_PARDISOThreads(ival);
930: else if(icntl == 66) mat_mkl_pardiso->maxfct = ival;
931: else if(icntl == 67) mat_mkl_pardiso->mnum = ival;
932: else if(icntl == 68) mat_mkl_pardiso->msglvl = ival;
933: else if(icntl == 69){
934: void *pt[IPARM_SIZE];
935: mat_mkl_pardiso->mtype = ival;
936: MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm);
937: #if defined(PETSC_USE_REAL_SINGLE)
938: mat_mkl_pardiso->iparm[27] = 1;
939: #else
940: mat_mkl_pardiso->iparm[27] = 0;
941: #endif
942: mat_mkl_pardiso->iparm[34] = 1;
943: } else if(icntl==70) mat_mkl_pardiso->solve_interior = (PetscBool)!!ival;
944: }
945: return(0);
946: }
948: /*@
949: MatMkl_PardisoSetCntl - Set Mkl_Pardiso parameters
951: Logically Collective on Mat
953: Input Parameters:
954: + F - the factored matrix obtained by calling MatGetFactor()
955: . icntl - index of Mkl_Pardiso parameter
956: - ival - value of Mkl_Pardiso parameter
958: Options Database:
959: . -mat_mkl_pardiso_<icntl> <ival>
961: Level: beginner
963: References:
964: . Mkl_Pardiso Users' Guide
966: .seealso: MatGetFactor()
967: @*/
968: PetscErrorCode MatMkl_PardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
969: {
973: PetscTryMethod(F,"MatMkl_PardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
974: return(0);
975: }
977: /*MC
978: MATSOLVERMKL_PARDISO - A matrix type providing direct solvers (LU) for
979: sequential matrices via the external package MKL_PARDISO.
981: Works with MATSEQAIJ matrices
983: Use -pc_type lu -pc_factor_mat_solver_type mkl_pardiso to use this direct solver
985: Options Database Keys:
986: + -mat_mkl_pardiso_65 - Number of threads to use within MKL_PARDISO
987: . -mat_mkl_pardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time
988: . -mat_mkl_pardiso_67 - Indicates the actual matrix for the solution phase
989: . -mat_mkl_pardiso_68 - Message level information
990: . -mat_mkl_pardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type
991: . -mat_mkl_pardiso_1 - Use default values
992: . -mat_mkl_pardiso_2 - Fill-in reducing ordering for the input matrix
993: . -mat_mkl_pardiso_4 - Preconditioned CGS/CG
994: . -mat_mkl_pardiso_5 - User permutation
995: . -mat_mkl_pardiso_6 - Write solution on x
996: . -mat_mkl_pardiso_8 - Iterative refinement step
997: . -mat_mkl_pardiso_10 - Pivoting perturbation
998: . -mat_mkl_pardiso_11 - Scaling vectors
999: . -mat_mkl_pardiso_12 - Solve with transposed or conjugate transposed matrix A
1000: . -mat_mkl_pardiso_13 - Improved accuracy using (non-) symmetric weighted matching
1001: . -mat_mkl_pardiso_18 - Numbers of non-zero elements
1002: . -mat_mkl_pardiso_19 - Report number of floating point operations
1003: . -mat_mkl_pardiso_21 - Pivoting for symmetric indefinite matrices
1004: . -mat_mkl_pardiso_24 - Parallel factorization control
1005: . -mat_mkl_pardiso_25 - Parallel forward/backward solve control
1006: . -mat_mkl_pardiso_27 - Matrix checker
1007: . -mat_mkl_pardiso_31 - Partial solve and computing selected components of the solution vectors
1008: . -mat_mkl_pardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode
1009: - -mat_mkl_pardiso_60 - Intel MKL_PARDISO mode
1011: Level: beginner
1013: For more information please check mkl_pardiso manual
1015: .seealso: PCFactorSetMatSolverType(), MatSolverType
1017: M*/
1018: static PetscErrorCode MatFactorGetSolverType_mkl_pardiso(Mat A, MatSolverType *type)
1019: {
1021: *type = MATSOLVERMKL_PARDISO;
1022: return(0);
1023: }
1025: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mkl_pardiso(Mat A,MatFactorType ftype,Mat *F)
1026: {
1027: Mat B;
1028: PetscErrorCode ierr;
1029: Mat_MKL_PARDISO *mat_mkl_pardiso;
1030: PetscBool isSeqAIJ,isSeqBAIJ,isSeqSBAIJ;
1033: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
1034: PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);
1035: PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);
1036: MatCreate(PetscObjectComm((PetscObject)A),&B);
1037: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1038: PetscStrallocpy("mkl_pardiso",&((PetscObject)B)->type_name);
1039: MatSetUp(B);
1041: PetscNewLog(B,&mat_mkl_pardiso);
1042: B->data = mat_mkl_pardiso;
1044: MatFactorMKL_PARDISOInitialize_Private(A, ftype, mat_mkl_pardiso);
1045: if (ftype == MAT_FACTOR_LU) {
1046: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_PARDISO;
1047: B->factortype = MAT_FACTOR_LU;
1048: mat_mkl_pardiso->needsym = PETSC_FALSE;
1049: if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij;
1050: else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij;
1051: else if (isSeqSBAIJ) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU factor with SEQSBAIJ format! Use MAT_FACTOR_CHOLESKY instead");
1052: else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU with %s format",((PetscObject)A)->type_name);
1053: #if defined(PETSC_USE_COMPLEX)
1054: mat_mkl_pardiso->mtype = 13;
1055: #else
1056: mat_mkl_pardiso->mtype = 11;
1057: #endif
1058: } else {
1059: #if defined(PETSC_USE_COMPLEX)
1060: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead",((PetscObject)A)->type_name);
1061: #endif
1062: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_PARDISO;
1063: B->factortype = MAT_FACTOR_CHOLESKY;
1064: if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij;
1065: else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij;
1066: else if (isSeqSBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqsbaij;
1067: else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with %s format",((PetscObject)A)->type_name);
1069: mat_mkl_pardiso->needsym = PETSC_TRUE;
1070: if (A->spd_set && A->spd) mat_mkl_pardiso->mtype = 2;
1071: else mat_mkl_pardiso->mtype = -2;
1072: }
1073: B->ops->destroy = MatDestroy_MKL_PARDISO;
1074: B->ops->view = MatView_MKL_PARDISO;
1075: B->factortype = ftype;
1076: B->ops->getinfo = MatGetInfo_MKL_PARDISO;
1077: B->assembled = PETSC_TRUE;
1079: PetscFree(B->solvertype);
1080: PetscStrallocpy(MATSOLVERMKL_PARDISO,&B->solvertype);
1082: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_pardiso);
1083: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MKL_PARDISO);
1084: PetscObjectComposeFunction((PetscObject)B,"MatMkl_PardisoSetCntl_C",MatMkl_PardisoSetCntl_MKL_PARDISO);
1086: *F = B;
1087: return(0);
1088: }
1090: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_Pardiso(void)
1091: {
1095: MatSolverTypeRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mkl_pardiso);
1096: MatSolverTypeRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);
1097: MatSolverTypeRegister(MATSOLVERMKL_PARDISO,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);
1098: return(0);
1099: }