Actual source code: mkl_pardiso.c
petsc-3.9.4 2018-09-11
1: #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2: #define MKL_ILP64
3: #endif
5: #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
6: #include <../src/mat/impls/sbaij/seq/sbaij.h>
7: #include <../src/mat/impls/dense/seq/dense.h>
9: #include <stdio.h>
10: #include <stdlib.h>
11: #include <math.h>
12: #include <mkl_pardiso.h>
14: PETSC_EXTERN void PetscSetMKL_PARDISOThreads(int);
16: /*
17: * Possible mkl_pardiso phases that controls the execution of the solver.
18: * For more information check mkl_pardiso manual.
19: */
20: #define JOB_ANALYSIS 11
21: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
22: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
23: #define JOB_NUMERICAL_FACTORIZATION 22
24: #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
25: #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
26: #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
27: #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
28: #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
29: #define JOB_RELEASE_OF_LU_MEMORY 0
30: #define JOB_RELEASE_OF_ALL_MEMORY -1
32: #define IPARM_SIZE 64
34: #if defined(PETSC_USE_64BIT_INDICES)
35: #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
36: #define INT_TYPE long long int
37: #define MKL_PARDISO pardiso
38: #define MKL_PARDISO_INIT pardisoinit
39: #else
40: #define INT_TYPE long long int
41: #define MKL_PARDISO pardiso_64
42: #define MKL_PARDISO_INIT pardiso_64init
43: #endif
44: #else
45: #define INT_TYPE int
46: #define MKL_PARDISO pardiso
47: #define MKL_PARDISO_INIT pardisoinit
48: #endif
51: /*
52: * Internal data structure.
53: * For more information check mkl_pardiso manual.
54: */
55: typedef struct {
57: /* Configuration vector*/
58: INT_TYPE iparm[IPARM_SIZE];
60: /*
61: * Internal mkl_pardiso memory location.
62: * After the first call to mkl_pardiso do not modify pt, as that could cause a serious memory leak.
63: */
64: void *pt[IPARM_SIZE];
66: /* Basic mkl_pardiso info*/
67: INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
69: /* Matrix structure*/
70: void *a;
71: INT_TYPE *ia, *ja;
73: /* Number of non-zero elements*/
74: INT_TYPE nz;
76: /* Row permutaton vector*/
77: INT_TYPE *perm;
79: /* Define if matrix preserves sparse structure.*/
80: MatStructure matstruc;
82: PetscBool needsym;
83: PetscBool freeaij;
85: /* Schur complement */
86: PetscScalar *schur;
87: PetscInt schur_size;
88: PetscInt *schur_idxs;
89: PetscScalar *schur_work;
90: PetscBLASInt schur_work_size;
91: PetscBool solve_interior;
93: /* True if mkl_pardiso function have been used.*/
94: PetscBool CleanUp;
96: /* Conversion to a format suitable for MKL */
97: PetscErrorCode (*Convert)(Mat, PetscBool, MatReuse, PetscBool*, INT_TYPE*, INT_TYPE**, INT_TYPE**, PetscScalar**);
98: } Mat_MKL_PARDISO;
100: PetscErrorCode MatMKLPardiso_Convert_seqsbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v)
101: {
102: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
103: PetscInt bs = A->rmap->bs;
106: if (!sym) {
107: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"This should not happen");
108: }
109: if (bs == 1) { /* already in the correct format */
110: *v = aa->a;
111: /* though PetscInt and INT_TYPE are of the same size since they are defined differently the Intel compiler requires a cast */
112: *r = (INT_TYPE*)aa->i;
113: *c = (INT_TYPE*)aa->j;
114: *nnz = (INT_TYPE)aa->nz;
115: *free = PETSC_FALSE;
116: } else {
117: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqSBAIJ to MKL Pardiso format still need to be implemented");
118: }
119: return(0);
120: }
122: PetscErrorCode MatMKLPardiso_Convert_seqbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v)
123: {
125: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqBAIJ to MKL Pardiso format still need to be implemented");
126: return(0);
127: }
129: PetscErrorCode MatMKLPardiso_Convert_seqaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v)
130: {
131: Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data;
135: if (!sym) { /* already in the correct format */
136: *v = aa->a;
137: *r = (INT_TYPE*)aa->i;
138: *c = (INT_TYPE*)aa->j;
139: *nnz = (INT_TYPE)aa->nz;
140: *free = PETSC_FALSE;
141: return(0);
142: }
143: /* need to get the triangular part */
144: if (reuse == MAT_INITIAL_MATRIX) {
145: PetscScalar *vals,*vv;
146: PetscInt *row,*col,*jj;
147: PetscInt m = A->rmap->n,nz,i;
149: nz = 0;
150: for (i=0; i<m; i++) {
151: nz += aa->i[i+1] - aa->diag[i];
152: }
153: PetscMalloc2(m+1,&row,nz,&col);
154: PetscMalloc1(nz,&vals);
155: jj = col;
156: vv = vals;
158: row[0] = 0;
159: for (i=0; i<m; i++) {
160: PetscInt *aj = aa->j + aa->diag[i];
161: PetscScalar *av = aa->a + aa->diag[i];
162: PetscInt rl = aa->i[i+1] - aa->diag[i],j;
163: for (j=0; j<rl; j++) {
164: *jj = *aj; jj++; aj++;
165: *vv = *av; vv++; av++;
166: }
167: row[i+1] = row[i] + rl;
168: }
169: *v = vals;
170: *r = (INT_TYPE*)row;
171: *c = (INT_TYPE*)col;
172: *nnz = (INT_TYPE)nz;
173: *free = PETSC_TRUE;
174: } else {
175: PetscScalar *vv;
176: PetscInt m = A->rmap->n,i;
178: vv = *v;
179: for (i=0; i<m; i++) {
180: PetscScalar *av = aa->a + aa->diag[i];
181: PetscInt rl = aa->i[i+1] - aa->diag[i],j;
182: for (j=0; j<rl; j++) {
183: *vv = *av; vv++; av++;
184: }
185: }
186: *free = PETSC_TRUE;
187: }
188: return(0);
189: }
191: void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm [])
192: {
193: int iparm_copy[IPARM_SIZE], mtype_copy, i;
194:
195: mtype_copy = *mtype;
196: pardisoinit(pt, &mtype_copy, iparm_copy);
197: for(i = 0; i < IPARM_SIZE; i++){
198: iparm[i] = iparm_copy[i];
199: }
200: }
202: static PetscErrorCode MatMKLPardisoSolveSchur_Private(Mat F, PetscScalar *B, PetscScalar *X)
203: {
204: Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data;
205: Mat S,Xmat,Bmat;
206: MatFactorSchurStatus schurstatus;
207: PetscErrorCode ierr;
210: MatFactorFactorizeSchurComplement(F);
211: MatFactorGetSchurComplement(F,&S,&schurstatus);
212: if (X == B && schurstatus == MAT_FACTOR_SCHUR_INVERTED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"X and B cannot point to the same address");
213: MatCreateSeqDense(PETSC_COMM_SELF,mpardiso->schur_size,mpardiso->nrhs,B,&Bmat);
214: MatCreateSeqDense(PETSC_COMM_SELF,mpardiso->schur_size,mpardiso->nrhs,X,&Xmat);
215: if (X != B) { /* using MatMatSolve */
216: MatCopy(Bmat,Xmat,SAME_NONZERO_PATTERN);
217: }
219: #if defined(PETSC_USE_COMPLEX)
220: if (mpardiso->iparm[12-1] == 1) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Hermitian solve not implemented yet");
221: #endif
223: switch (schurstatus) {
224: case MAT_FACTOR_SCHUR_FACTORED:
225: if (!mpardiso->iparm[12-1]) {
226: MatMatSolve(S,Bmat,Xmat);
227: } else { /* transpose solve */
228: MatMatSolveTranspose(S,Bmat,Xmat);
229: }
230: break;
231: case MAT_FACTOR_SCHUR_INVERTED:
232: if (!mpardiso->iparm[12-1]) {
233: MatMatMult(S,Bmat,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Xmat);
234: } else { /* transpose solve */
235: MatTransposeMatMult(S,Bmat,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Xmat);
236: }
237: break;
238: default:
239: SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
240: break;
241: }
242: MatFactorRestoreSchurComplement(F,&S,schurstatus);
243: MatDestroy(&Bmat);
244: MatDestroy(&Xmat);
245: return(0);
246: }
248: PetscErrorCode MatFactorSetSchurIS_MKL_PARDISO(Mat F, IS is)
249: {
250: Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->data;
251: const PetscInt *idxs;
252: PetscInt size,i;
253: PetscMPIInt csize;
254: PetscBool sorted;
255: PetscErrorCode ierr;
258: MPI_Comm_size(PetscObjectComm((PetscObject)F),&csize);
259: if (csize > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MKL_PARDISO parallel Schur complements not yet supported from PETSc");
260: ISSorted(is,&sorted);
261: if (!sorted) {
262: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS for MKL_PARDISO Schur complements needs to be sorted");
263: }
264: ISGetLocalSize(is,&size);
265: if (mpardiso->schur_size != size) {
266: mpardiso->schur_size = size;
267: PetscFree2(mpardiso->schur,mpardiso->schur_work);
268: PetscFree(mpardiso->schur_idxs);
269: PetscBLASIntCast(PetscMax(mpardiso->n,2*size),&mpardiso->schur_work_size);
270: PetscMalloc2(size*size,&mpardiso->schur,mpardiso->schur_work_size,&mpardiso->schur_work);
271: PetscMalloc1(size,&mpardiso->schur_idxs);
272: }
273: MatCreateSeqDense(PETSC_COMM_SELF,mpardiso->schur_size,mpardiso->schur_size,mpardiso->schur,&F->schur);
274: if (mpardiso->mtype == 2) {
275: MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);
276: }
278: PetscMemzero(mpardiso->perm,mpardiso->n*sizeof(INT_TYPE));
279: ISGetIndices(is,&idxs);
280: PetscMemcpy(mpardiso->schur_idxs,idxs,size*sizeof(PetscInt));
281: for (i=0;i<size;i++) mpardiso->perm[idxs[i]] = 1;
282: ISRestoreIndices(is,&idxs);
283: if (size) { /* turn on Schur switch if the set of indices is not empty */
284: mpardiso->iparm[36-1] = 2;
285: }
286: return(0);
287: }
289: PetscErrorCode MatDestroy_MKL_PARDISO(Mat A)
290: {
291: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data;
292: PetscErrorCode ierr;
295: if (mat_mkl_pardiso->CleanUp) {
296: mat_mkl_pardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
298: MKL_PARDISO (mat_mkl_pardiso->pt,
299: &mat_mkl_pardiso->maxfct,
300: &mat_mkl_pardiso->mnum,
301: &mat_mkl_pardiso->mtype,
302: &mat_mkl_pardiso->phase,
303: &mat_mkl_pardiso->n,
304: NULL,
305: NULL,
306: NULL,
307: NULL,
308: &mat_mkl_pardiso->nrhs,
309: mat_mkl_pardiso->iparm,
310: &mat_mkl_pardiso->msglvl,
311: NULL,
312: NULL,
313: &mat_mkl_pardiso->err);
314: }
315: PetscFree(mat_mkl_pardiso->perm);
316: PetscFree2(mat_mkl_pardiso->schur,mat_mkl_pardiso->schur_work);
317: PetscFree(mat_mkl_pardiso->schur_idxs);
318: if (mat_mkl_pardiso->freeaij) {
319: PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);
320: PetscFree(mat_mkl_pardiso->a);
321: }
322: PetscFree(A->data);
324: /* clear composed functions */
325: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
326: PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);
327: PetscObjectComposeFunction((PetscObject)A,"MatMkl_PardisoSetCntl_C",NULL);
328: return(0);
329: }
331: static PetscErrorCode MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO *mpardiso, PetscScalar *whole, PetscScalar *schur, PetscBool reduce)
332: {
334: if (reduce) { /* data given for the whole matrix */
335: PetscInt i,m=0,p=0;
336: for (i=0;i<mpardiso->nrhs;i++) {
337: PetscInt j;
338: for (j=0;j<mpardiso->schur_size;j++) {
339: schur[p+j] = whole[m+mpardiso->schur_idxs[j]];
340: }
341: m += mpardiso->n;
342: p += mpardiso->schur_size;
343: }
344: } else { /* from Schur to whole */
345: PetscInt i,m=0,p=0;
346: for (i=0;i<mpardiso->nrhs;i++) {
347: PetscInt j;
348: for (j=0;j<mpardiso->schur_size;j++) {
349: whole[m+mpardiso->schur_idxs[j]] = schur[p+j];
350: }
351: m += mpardiso->n;
352: p += mpardiso->schur_size;
353: }
354: }
355: return(0);
356: }
358: PetscErrorCode MatSolve_MKL_PARDISO(Mat A,Vec b,Vec x)
359: {
360: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data;
361: PetscErrorCode ierr;
362: PetscScalar *xarray;
363: const PetscScalar *barray;
366: mat_mkl_pardiso->nrhs = 1;
367: VecGetArray(x,&xarray);
368: VecGetArrayRead(b,&barray);
370: if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
371: else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
373: if (barray == xarray) { /* if the two vectors share the same memory */
374: PetscScalar *work;
375: if (!mat_mkl_pardiso->schur_work) {
376: PetscMalloc1(mat_mkl_pardiso->n,&work);
377: } else {
378: work = mat_mkl_pardiso->schur_work;
379: }
380: mat_mkl_pardiso->iparm[6-1] = 1;
381: MKL_PARDISO (mat_mkl_pardiso->pt,
382: &mat_mkl_pardiso->maxfct,
383: &mat_mkl_pardiso->mnum,
384: &mat_mkl_pardiso->mtype,
385: &mat_mkl_pardiso->phase,
386: &mat_mkl_pardiso->n,
387: mat_mkl_pardiso->a,
388: mat_mkl_pardiso->ia,
389: mat_mkl_pardiso->ja,
390: NULL,
391: &mat_mkl_pardiso->nrhs,
392: mat_mkl_pardiso->iparm,
393: &mat_mkl_pardiso->msglvl,
394: (void*)xarray,
395: (void*)work,
396: &mat_mkl_pardiso->err);
397: if (!mat_mkl_pardiso->schur_work) {
398: PetscFree(work);
399: }
400: } else {
401: mat_mkl_pardiso->iparm[6-1] = 0;
402: MKL_PARDISO (mat_mkl_pardiso->pt,
403: &mat_mkl_pardiso->maxfct,
404: &mat_mkl_pardiso->mnum,
405: &mat_mkl_pardiso->mtype,
406: &mat_mkl_pardiso->phase,
407: &mat_mkl_pardiso->n,
408: mat_mkl_pardiso->a,
409: mat_mkl_pardiso->ia,
410: mat_mkl_pardiso->ja,
411: mat_mkl_pardiso->perm,
412: &mat_mkl_pardiso->nrhs,
413: mat_mkl_pardiso->iparm,
414: &mat_mkl_pardiso->msglvl,
415: (void*)barray,
416: (void*)xarray,
417: &mat_mkl_pardiso->err);
418: }
419: VecRestoreArrayRead(b,&barray);
421: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
423: if (A->schur) { /* solve Schur complement and expand solution */
424: PetscInt shift = mat_mkl_pardiso->schur_size;
426: /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */
427: if (A->schur_status != MAT_FACTOR_SCHUR_INVERTED) {
428: shift = 0;
429: }
431: if (!mat_mkl_pardiso->solve_interior) {
432: /* solve Schur complement */
433: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);
434: MatMKLPardisoSolveSchur_Private(A,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);
435: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);
436: } else { /* if we are solving for the interior problem, any value in barray[schur] forward-substituted to xarray[schur] will be neglected */
437: PetscInt i;
438: for (i=0;i<mat_mkl_pardiso->schur_size;i++) {
439: xarray[mat_mkl_pardiso->schur_idxs[i]] = 0.;
440: }
441: }
443: /* expansion phase */
444: mat_mkl_pardiso->iparm[6-1] = 1;
445: mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION;
446: MKL_PARDISO (mat_mkl_pardiso->pt,
447: &mat_mkl_pardiso->maxfct,
448: &mat_mkl_pardiso->mnum,
449: &mat_mkl_pardiso->mtype,
450: &mat_mkl_pardiso->phase,
451: &mat_mkl_pardiso->n,
452: mat_mkl_pardiso->a,
453: mat_mkl_pardiso->ia,
454: mat_mkl_pardiso->ja,
455: mat_mkl_pardiso->perm,
456: &mat_mkl_pardiso->nrhs,
457: mat_mkl_pardiso->iparm,
458: &mat_mkl_pardiso->msglvl,
459: (void*)xarray,
460: (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */
461: &mat_mkl_pardiso->err);
463: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
464: mat_mkl_pardiso->iparm[6-1] = 0;
465: }
466: VecRestoreArray(x,&xarray);
467: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
468: return(0);
469: }
471: PetscErrorCode MatSolveTranspose_MKL_PARDISO(Mat A,Vec b,Vec x)
472: {
473: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data;
474: PetscInt oiparm12;
475: PetscErrorCode ierr;
478: oiparm12 = mat_mkl_pardiso->iparm[12 - 1];
479: mat_mkl_pardiso->iparm[12 - 1] = 2;
480: MatSolve_MKL_PARDISO(A,b,x);
481: mat_mkl_pardiso->iparm[12 - 1] = oiparm12;
482: return(0);
483: }
485: PetscErrorCode MatMatSolve_MKL_PARDISO(Mat A,Mat B,Mat X)
486: {
487: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->data;
488: PetscErrorCode ierr;
489: PetscScalar *barray, *xarray;
490: PetscBool flg;
493: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);
494: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
495: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);
496: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
498: MatGetSize(B,NULL,(PetscInt*)&mat_mkl_pardiso->nrhs);
500: if (mat_mkl_pardiso->nrhs > 0) {
501: MatDenseGetArray(B,&barray);
502: MatDenseGetArray(X,&xarray);
504: if (barray == xarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"B and X cannot share the same memory location");
505: if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
506: else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
507: mat_mkl_pardiso->iparm[6-1] = 0;
509: MKL_PARDISO (mat_mkl_pardiso->pt,
510: &mat_mkl_pardiso->maxfct,
511: &mat_mkl_pardiso->mnum,
512: &mat_mkl_pardiso->mtype,
513: &mat_mkl_pardiso->phase,
514: &mat_mkl_pardiso->n,
515: mat_mkl_pardiso->a,
516: mat_mkl_pardiso->ia,
517: mat_mkl_pardiso->ja,
518: mat_mkl_pardiso->perm,
519: &mat_mkl_pardiso->nrhs,
520: mat_mkl_pardiso->iparm,
521: &mat_mkl_pardiso->msglvl,
522: (void*)barray,
523: (void*)xarray,
524: &mat_mkl_pardiso->err);
525: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
527: if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */
528: PetscScalar *o_schur_work = NULL;
529: PetscInt shift = mat_mkl_pardiso->schur_size*mat_mkl_pardiso->nrhs,scale;
530: PetscInt mem = mat_mkl_pardiso->n*mat_mkl_pardiso->nrhs;
532: /* allocate extra memory if it is needed */
533: scale = 1;
534: if (A->schur_status == MAT_FACTOR_SCHUR_INVERTED) scale = 2;
536: mem *= scale;
537: if (mem > mat_mkl_pardiso->schur_work_size) {
538: o_schur_work = mat_mkl_pardiso->schur_work;
539: PetscMalloc1(mem,&mat_mkl_pardiso->schur_work);
540: }
542: /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */
543: if (A->schur_status != MAT_FACTOR_SCHUR_INVERTED) shift = 0;
545: /* solve Schur complement */
546: if (!mat_mkl_pardiso->solve_interior) {
547: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);
548: MatMKLPardisoSolveSchur_Private(A,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);
549: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);
550: } else { /* if we are solving for the interior problem, any value in barray[schur,n] forward-substituted to xarray[schur,n] will be neglected */
551: PetscInt i,n,m=0;
552: for (n=0;n<mat_mkl_pardiso->nrhs;n++) {
553: for (i=0;i<mat_mkl_pardiso->schur_size;i++) {
554: xarray[mat_mkl_pardiso->schur_idxs[i]+m] = 0.;
555: }
556: m += mat_mkl_pardiso->n;
557: }
558: }
560: /* expansion phase */
561: mat_mkl_pardiso->iparm[6-1] = 1;
562: mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION;
563: MKL_PARDISO (mat_mkl_pardiso->pt,
564: &mat_mkl_pardiso->maxfct,
565: &mat_mkl_pardiso->mnum,
566: &mat_mkl_pardiso->mtype,
567: &mat_mkl_pardiso->phase,
568: &mat_mkl_pardiso->n,
569: mat_mkl_pardiso->a,
570: mat_mkl_pardiso->ia,
571: mat_mkl_pardiso->ja,
572: mat_mkl_pardiso->perm,
573: &mat_mkl_pardiso->nrhs,
574: mat_mkl_pardiso->iparm,
575: &mat_mkl_pardiso->msglvl,
576: (void*)xarray,
577: (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */
578: &mat_mkl_pardiso->err);
579: if (o_schur_work) { /* restore original schur_work (minimal size) */
580: PetscFree(mat_mkl_pardiso->schur_work);
581: mat_mkl_pardiso->schur_work = o_schur_work;
582: }
583: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
584: mat_mkl_pardiso->iparm[6-1] = 0;
585: }
586: }
587: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
588: return(0);
589: }
591: PetscErrorCode MatFactorNumeric_MKL_PARDISO(Mat F,Mat A,const MatFactorInfo *info)
592: {
593: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(F)->data;
594: PetscErrorCode ierr;
597: mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN;
598: (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_REUSE_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);
600: mat_mkl_pardiso->phase = JOB_NUMERICAL_FACTORIZATION;
601: MKL_PARDISO (mat_mkl_pardiso->pt,
602: &mat_mkl_pardiso->maxfct,
603: &mat_mkl_pardiso->mnum,
604: &mat_mkl_pardiso->mtype,
605: &mat_mkl_pardiso->phase,
606: &mat_mkl_pardiso->n,
607: mat_mkl_pardiso->a,
608: mat_mkl_pardiso->ia,
609: mat_mkl_pardiso->ja,
610: mat_mkl_pardiso->perm,
611: &mat_mkl_pardiso->nrhs,
612: mat_mkl_pardiso->iparm,
613: &mat_mkl_pardiso->msglvl,
614: NULL,
615: (void*)mat_mkl_pardiso->schur,
616: &mat_mkl_pardiso->err);
617: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
619: if (F->schur) { /* schur output from pardiso is in row major format */
620: MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);
621: MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);
622: }
623: mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN;
624: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
625: return(0);
626: }
628: PetscErrorCode PetscSetMKL_PARDISOFromOptions(Mat F, Mat A)
629: {
630: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->data;
631: PetscErrorCode ierr;
632: PetscInt icntl,threads=1;
633: PetscBool flg;
636: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_PARDISO Options","Mat");
638: PetscOptionsInt("-mat_mkl_pardiso_65","Number of threads to use within PARDISO","None",threads,&threads,&flg);
639: if (flg) PetscSetMKL_PARDISOThreads((int)threads);
641: PetscOptionsInt("-mat_mkl_pardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_pardiso->maxfct,&icntl,&flg);
642: if (flg) mat_mkl_pardiso->maxfct = icntl;
644: PetscOptionsInt("-mat_mkl_pardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_pardiso->mnum,&icntl,&flg);
645: if (flg) mat_mkl_pardiso->mnum = icntl;
646:
647: PetscOptionsInt("-mat_mkl_pardiso_68","Message level information","None",mat_mkl_pardiso->msglvl,&icntl,&flg);
648: if (flg) mat_mkl_pardiso->msglvl = icntl;
650: PetscOptionsInt("-mat_mkl_pardiso_69","Defines the matrix type","None",mat_mkl_pardiso->mtype,&icntl,&flg);
651: if(flg){
652: void *pt[IPARM_SIZE];
653: mat_mkl_pardiso->mtype = icntl;
654: MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm);
655: #if defined(PETSC_USE_REAL_SINGLE)
656: mat_mkl_pardiso->iparm[27] = 1;
657: #else
658: mat_mkl_pardiso->iparm[27] = 0;
659: #endif
660: mat_mkl_pardiso->iparm[34] = 1; /* use 0-based indexing */
661: }
662: PetscOptionsInt("-mat_mkl_pardiso_1","Use default values","None",mat_mkl_pardiso->iparm[0],&icntl,&flg);
664: if (flg && icntl != 0) {
665: PetscOptionsInt("-mat_mkl_pardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_pardiso->iparm[1],&icntl,&flg);
666: if (flg) mat_mkl_pardiso->iparm[1] = icntl;
668: PetscOptionsInt("-mat_mkl_pardiso_4","Preconditioned CGS/CG","None",mat_mkl_pardiso->iparm[3],&icntl,&flg);
669: if (flg) mat_mkl_pardiso->iparm[3] = icntl;
671: PetscOptionsInt("-mat_mkl_pardiso_5","User permutation","None",mat_mkl_pardiso->iparm[4],&icntl,&flg);
672: if (flg) mat_mkl_pardiso->iparm[4] = icntl;
674: PetscOptionsInt("-mat_mkl_pardiso_6","Write solution on x","None",mat_mkl_pardiso->iparm[5],&icntl,&flg);
675: if (flg) mat_mkl_pardiso->iparm[5] = icntl;
677: PetscOptionsInt("-mat_mkl_pardiso_8","Iterative refinement step","None",mat_mkl_pardiso->iparm[7],&icntl,&flg);
678: if (flg) mat_mkl_pardiso->iparm[7] = icntl;
680: PetscOptionsInt("-mat_mkl_pardiso_10","Pivoting perturbation","None",mat_mkl_pardiso->iparm[9],&icntl,&flg);
681: if (flg) mat_mkl_pardiso->iparm[9] = icntl;
683: PetscOptionsInt("-mat_mkl_pardiso_11","Scaling vectors","None",mat_mkl_pardiso->iparm[10],&icntl,&flg);
684: if (flg) mat_mkl_pardiso->iparm[10] = icntl;
686: PetscOptionsInt("-mat_mkl_pardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_pardiso->iparm[11],&icntl,&flg);
687: if (flg) mat_mkl_pardiso->iparm[11] = icntl;
689: PetscOptionsInt("-mat_mkl_pardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_pardiso->iparm[12],&icntl,&flg);
690: if (flg) mat_mkl_pardiso->iparm[12] = icntl;
692: PetscOptionsInt("-mat_mkl_pardiso_18","Numbers of non-zero elements","None",mat_mkl_pardiso->iparm[17],&icntl,&flg);
693: if (flg) mat_mkl_pardiso->iparm[17] = icntl;
695: PetscOptionsInt("-mat_mkl_pardiso_19","Report number of floating point operations","None",mat_mkl_pardiso->iparm[18],&icntl,&flg);
696: if (flg) mat_mkl_pardiso->iparm[18] = icntl;
698: PetscOptionsInt("-mat_mkl_pardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_pardiso->iparm[20],&icntl,&flg);
699: if (flg) mat_mkl_pardiso->iparm[20] = icntl;
701: PetscOptionsInt("-mat_mkl_pardiso_24","Parallel factorization control","None",mat_mkl_pardiso->iparm[23],&icntl,&flg);
702: if (flg) mat_mkl_pardiso->iparm[23] = icntl;
704: PetscOptionsInt("-mat_mkl_pardiso_25","Parallel forward/backward solve control","None",mat_mkl_pardiso->iparm[24],&icntl,&flg);
705: if (flg) mat_mkl_pardiso->iparm[24] = icntl;
707: PetscOptionsInt("-mat_mkl_pardiso_27","Matrix checker","None",mat_mkl_pardiso->iparm[26],&icntl,&flg);
708: if (flg) mat_mkl_pardiso->iparm[26] = icntl;
710: PetscOptionsInt("-mat_mkl_pardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_pardiso->iparm[30],&icntl,&flg);
711: if (flg) mat_mkl_pardiso->iparm[30] = icntl;
713: PetscOptionsInt("-mat_mkl_pardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_pardiso->iparm[33],&icntl,&flg);
714: if (flg) mat_mkl_pardiso->iparm[33] = icntl;
716: PetscOptionsInt("-mat_mkl_pardiso_60","Intel MKL_PARDISO mode","None",mat_mkl_pardiso->iparm[59],&icntl,&flg);
717: if (flg) mat_mkl_pardiso->iparm[59] = icntl;
718: }
719: PetscOptionsEnd();
720: return(0);
721: }
723: PetscErrorCode MatFactorMKL_PARDISOInitialize_Private(Mat A, MatFactorType ftype, Mat_MKL_PARDISO *mat_mkl_pardiso)
724: {
726: PetscInt i;
729: for ( i = 0; i < IPARM_SIZE; i++ ){
730: mat_mkl_pardiso->iparm[i] = 0;
731: }
732: for ( i = 0; i < IPARM_SIZE; i++ ){
733: mat_mkl_pardiso->pt[i] = 0;
734: }
735: /* Default options for both sym and unsym */
736: mat_mkl_pardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */
737: mat_mkl_pardiso->iparm[ 1] = 2; /* Metis reordering */
738: mat_mkl_pardiso->iparm[ 5] = 0; /* Write solution into x */
739: mat_mkl_pardiso->iparm[ 7] = 0; /* Max number of iterative refinement steps */
740: mat_mkl_pardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
741: mat_mkl_pardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
742: #if 0
743: mat_mkl_pardiso->iparm[23] = 1; /* Parallel factorization control*/
744: #endif
745: mat_mkl_pardiso->iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */
746: mat_mkl_pardiso->iparm[39] = 0; /* Input: matrix/rhs/solution stored on master */
747:
748: mat_mkl_pardiso->CleanUp = PETSC_FALSE;
749: mat_mkl_pardiso->maxfct = 1; /* Maximum number of numerical factorizations. */
750: mat_mkl_pardiso->mnum = 1; /* Which factorization to use. */
751: mat_mkl_pardiso->msglvl = 0; /* 0: do not print 1: Print statistical information in file */
752: mat_mkl_pardiso->phase = -1;
753: mat_mkl_pardiso->err = 0;
754:
755: mat_mkl_pardiso->n = A->rmap->N;
756: mat_mkl_pardiso->nrhs = 1;
757: mat_mkl_pardiso->err = 0;
758: mat_mkl_pardiso->phase = -1;
759:
760: if(ftype == MAT_FACTOR_LU){
761: mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
762: mat_mkl_pardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */
763: mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
765: } else {
766: mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
767: mat_mkl_pardiso->iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */
768: mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
769: /* mat_mkl_pardiso->iparm[20] = 1; */ /* Apply 1x1 and 2x2 Bunch-Kaufman pivoting during the factorization process */
770: #if defined(PETSC_USE_DEBUG)
771: mat_mkl_pardiso->iparm[26] = 1; /* Matrix checker */
772: #endif
773: }
774: PetscMalloc1(A->rmap->N*sizeof(INT_TYPE), &mat_mkl_pardiso->perm);
775: for(i = 0; i < A->rmap->N; i++){
776: mat_mkl_pardiso->perm[i] = 0;
777: }
778: mat_mkl_pardiso->schur_size = 0;
779: return(0);
780: }
782: PetscErrorCode MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F,Mat A,const MatFactorInfo *info)
783: {
784: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->data;
785: PetscErrorCode ierr;
788: mat_mkl_pardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
789: PetscSetMKL_PARDISOFromOptions(F,A);
791: /* throw away any previously computed structure */
792: if (mat_mkl_pardiso->freeaij) {
793: PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);
794: PetscFree(mat_mkl_pardiso->a);
795: }
796: (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_INITIAL_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);
797: mat_mkl_pardiso->n = A->rmap->N;
799: mat_mkl_pardiso->phase = JOB_ANALYSIS;
801: MKL_PARDISO (mat_mkl_pardiso->pt,
802: &mat_mkl_pardiso->maxfct,
803: &mat_mkl_pardiso->mnum,
804: &mat_mkl_pardiso->mtype,
805: &mat_mkl_pardiso->phase,
806: &mat_mkl_pardiso->n,
807: mat_mkl_pardiso->a,
808: mat_mkl_pardiso->ia,
809: mat_mkl_pardiso->ja,
810: mat_mkl_pardiso->perm,
811: &mat_mkl_pardiso->nrhs,
812: mat_mkl_pardiso->iparm,
813: &mat_mkl_pardiso->msglvl,
814: NULL,
815: NULL,
816: &mat_mkl_pardiso->err);
817: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual",mat_mkl_pardiso->err);
819: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
821: if (F->factortype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_MKL_PARDISO;
822: else F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_PARDISO;
824: F->ops->solve = MatSolve_MKL_PARDISO;
825: F->ops->solvetranspose = MatSolveTranspose_MKL_PARDISO;
826: F->ops->matsolve = MatMatSolve_MKL_PARDISO;
827: return(0);
828: }
830: PetscErrorCode MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
831: {
835: MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);
836: return(0);
837: }
839: #if !defined(PETSC_USE_COMPLEX)
840: PetscErrorCode MatGetInertia_MKL_PARDISO(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
841: {
842: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)F->data;
845: if (nneg) *nneg = mat_mkl_pardiso->iparm[22];
846: if (npos) *npos = mat_mkl_pardiso->iparm[21];
847: if (nzero) *nzero = F->rmap->N -(mat_mkl_pardiso->iparm[22] + mat_mkl_pardiso->iparm[21]);
848: return(0);
849: }
850: #endif
852: PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,const MatFactorInfo *info)
853: {
857: MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);
858: #if defined(PETSC_USE_COMPLEX)
859: F->ops->getinertia = NULL;
860: #else
861: F->ops->getinertia = MatGetInertia_MKL_PARDISO;
862: #endif
863: return(0);
864: }
866: PetscErrorCode MatView_MKL_PARDISO(Mat A, PetscViewer viewer)
867: {
868: PetscErrorCode ierr;
869: PetscBool iascii;
870: PetscViewerFormat format;
871: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->data;
872: PetscInt i;
875: if (A->ops->solve != MatSolve_MKL_PARDISO) return(0);
877: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
878: if (iascii) {
879: PetscViewerGetFormat(viewer,&format);
880: if (format == PETSC_VIEWER_ASCII_INFO) {
881: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO run parameters:\n");
882: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO phase: %d \n",mat_mkl_pardiso->phase);
883: for(i = 1; i <= 64; i++){
884: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO iparm[%d]: %d \n",i, mat_mkl_pardiso->iparm[i - 1]);
885: }
886: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO maxfct: %d \n", mat_mkl_pardiso->maxfct);
887: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mnum: %d \n", mat_mkl_pardiso->mnum);
888: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mtype: %d \n", mat_mkl_pardiso->mtype);
889: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO n: %d \n", mat_mkl_pardiso->n);
890: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO nrhs: %d \n", mat_mkl_pardiso->nrhs);
891: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO msglvl: %d \n", mat_mkl_pardiso->msglvl);
892: }
893: }
894: return(0);
895: }
898: PetscErrorCode MatGetInfo_MKL_PARDISO(Mat A, MatInfoType flag, MatInfo *info)
899: {
900: Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)A->data;
903: info->block_size = 1.0;
904: info->nz_used = mat_mkl_pardiso->nz;
905: info->nz_allocated = mat_mkl_pardiso->nz;
906: info->nz_unneeded = 0.0;
907: info->assemblies = 0.0;
908: info->mallocs = 0.0;
909: info->memory = 0.0;
910: info->fill_ratio_given = 0;
911: info->fill_ratio_needed = 0;
912: info->factor_mallocs = 0;
913: return(0);
914: }
916: PetscErrorCode MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F,PetscInt icntl,PetscInt ival)
917: {
918: Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)F->data;
921: if(icntl <= 64){
922: mat_mkl_pardiso->iparm[icntl - 1] = ival;
923: } else {
924: if(icntl == 65) PetscSetMKL_PARDISOThreads(ival);
925: else if(icntl == 66) mat_mkl_pardiso->maxfct = ival;
926: else if(icntl == 67) mat_mkl_pardiso->mnum = ival;
927: else if(icntl == 68) mat_mkl_pardiso->msglvl = ival;
928: else if(icntl == 69){
929: void *pt[IPARM_SIZE];
930: mat_mkl_pardiso->mtype = ival;
931: MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm);
932: #if defined(PETSC_USE_REAL_SINGLE)
933: mat_mkl_pardiso->iparm[27] = 1;
934: #else
935: mat_mkl_pardiso->iparm[27] = 0;
936: #endif
937: mat_mkl_pardiso->iparm[34] = 1;
938: } else if(icntl==70) mat_mkl_pardiso->solve_interior = (PetscBool)!!ival;
939: }
940: return(0);
941: }
943: /*@
944: MatMkl_PardisoSetCntl - Set Mkl_Pardiso parameters
946: Logically Collective on Mat
948: Input Parameters:
949: + F - the factored matrix obtained by calling MatGetFactor()
950: . icntl - index of Mkl_Pardiso parameter
951: - ival - value of Mkl_Pardiso parameter
953: Options Database:
954: . -mat_mkl_pardiso_<icntl> <ival>
956: Level: beginner
958: References:
959: . Mkl_Pardiso Users' Guide
961: .seealso: MatGetFactor()
962: @*/
963: PetscErrorCode MatMkl_PardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
964: {
968: PetscTryMethod(F,"MatMkl_PardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
969: return(0);
970: }
972: /*MC
973: MATSOLVERMKL_PARDISO - A matrix type providing direct solvers (LU) for
974: sequential matrices via the external package MKL_PARDISO.
976: Works with MATSEQAIJ matrices
978: Use -pc_type lu -pc_factor_mat_solver_type mkl_pardiso to use this direct solver
980: Options Database Keys:
981: + -mat_mkl_pardiso_65 - Number of threads to use within MKL_PARDISO
982: . -mat_mkl_pardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time
983: . -mat_mkl_pardiso_67 - Indicates the actual matrix for the solution phase
984: . -mat_mkl_pardiso_68 - Message level information
985: . -mat_mkl_pardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type
986: . -mat_mkl_pardiso_1 - Use default values
987: . -mat_mkl_pardiso_2 - Fill-in reducing ordering for the input matrix
988: . -mat_mkl_pardiso_4 - Preconditioned CGS/CG
989: . -mat_mkl_pardiso_5 - User permutation
990: . -mat_mkl_pardiso_6 - Write solution on x
991: . -mat_mkl_pardiso_8 - Iterative refinement step
992: . -mat_mkl_pardiso_10 - Pivoting perturbation
993: . -mat_mkl_pardiso_11 - Scaling vectors
994: . -mat_mkl_pardiso_12 - Solve with transposed or conjugate transposed matrix A
995: . -mat_mkl_pardiso_13 - Improved accuracy using (non-) symmetric weighted matching
996: . -mat_mkl_pardiso_18 - Numbers of non-zero elements
997: . -mat_mkl_pardiso_19 - Report number of floating point operations
998: . -mat_mkl_pardiso_21 - Pivoting for symmetric indefinite matrices
999: . -mat_mkl_pardiso_24 - Parallel factorization control
1000: . -mat_mkl_pardiso_25 - Parallel forward/backward solve control
1001: . -mat_mkl_pardiso_27 - Matrix checker
1002: . -mat_mkl_pardiso_31 - Partial solve and computing selected components of the solution vectors
1003: . -mat_mkl_pardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode
1004: - -mat_mkl_pardiso_60 - Intel MKL_PARDISO mode
1006: Level: beginner
1008: For more information please check mkl_pardiso manual
1010: .seealso: PCFactorSetMatSolverType(), MatSolverType
1012: M*/
1013: static PetscErrorCode MatFactorGetSolverType_mkl_pardiso(Mat A, MatSolverType *type)
1014: {
1016: *type = MATSOLVERMKL_PARDISO;
1017: return(0);
1018: }
1020: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mkl_pardiso(Mat A,MatFactorType ftype,Mat *F)
1021: {
1022: Mat B;
1023: PetscErrorCode ierr;
1024: Mat_MKL_PARDISO *mat_mkl_pardiso;
1025: PetscBool isSeqAIJ,isSeqBAIJ,isSeqSBAIJ;
1028: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
1029: PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);
1030: PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);
1031: MatCreate(PetscObjectComm((PetscObject)A),&B);
1032: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1033: PetscStrallocpy("mkl_pardiso",&((PetscObject)B)->type_name);
1034: MatSetUp(B);
1036: PetscNewLog(B,&mat_mkl_pardiso);
1037: B->data = mat_mkl_pardiso;
1039: MatFactorMKL_PARDISOInitialize_Private(A, ftype, mat_mkl_pardiso);
1040: if (ftype == MAT_FACTOR_LU) {
1041: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_PARDISO;
1042: B->factortype = MAT_FACTOR_LU;
1043: mat_mkl_pardiso->needsym = PETSC_FALSE;
1044: if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij;
1045: else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij;
1046: else if (isSeqSBAIJ) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU factor with SEQSBAIJ format! Use MAT_FACTOR_CHOLESKY instead");
1047: else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU with %s format",((PetscObject)A)->type_name);
1048: #if defined(PETSC_USE_COMPLEX)
1049: mat_mkl_pardiso->mtype = 13;
1050: #else
1051: mat_mkl_pardiso->mtype = 11;
1052: #endif
1053: } else {
1054: #if defined(PETSC_USE_COMPLEX)
1055: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead",((PetscObject)A)->type_name);
1056: #endif
1057: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_PARDISO;
1058: B->factortype = MAT_FACTOR_CHOLESKY;
1059: if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij;
1060: else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij;
1061: else if (isSeqSBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqsbaij;
1062: else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with %s format",((PetscObject)A)->type_name);
1064: mat_mkl_pardiso->needsym = PETSC_TRUE;
1065: if (A->spd_set && A->spd) mat_mkl_pardiso->mtype = 2;
1066: else mat_mkl_pardiso->mtype = -2;
1067: }
1068: B->ops->destroy = MatDestroy_MKL_PARDISO;
1069: B->ops->view = MatView_MKL_PARDISO;
1070: B->factortype = ftype;
1071: B->ops->getinfo = MatGetInfo_MKL_PARDISO;
1072: B->assembled = PETSC_TRUE;
1074: PetscFree(B->solvertype);
1075: PetscStrallocpy(MATSOLVERMKL_PARDISO,&B->solvertype);
1077: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_pardiso);
1078: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MKL_PARDISO);
1079: PetscObjectComposeFunction((PetscObject)B,"MatMkl_PardisoSetCntl_C",MatMkl_PardisoSetCntl_MKL_PARDISO);
1081: *F = B;
1082: return(0);
1083: }
1085: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_Pardiso(void)
1086: {
1090: MatSolverTypeRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mkl_pardiso);
1091: MatSolverTypeRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);
1092: MatSolverTypeRegister(MATSOLVERMKL_PARDISO,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);
1093: return(0);
1094: }