Actual source code: mkl_pardiso.c
petsc-3.7.3 2016-08-01
1: #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2: #define MKL_ILP64
3: #endif
5: #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
6: #include <../src/mat/impls/sbaij/seq/sbaij.h>
7: #include <../src/mat/impls/dense/seq/dense.h>
8: #include <petscblaslapack.h>
10: #include <stdio.h>
11: #include <stdlib.h>
12: #include <math.h>
13: #include <mkl_pardiso.h>
15: PETSC_EXTERN void PetscSetMKL_PARDISOThreads(int);
17: /*
18: * Possible mkl_pardiso phases that controls the execution of the solver.
19: * For more information check mkl_pardiso manual.
20: */
21: #define JOB_ANALYSIS 11
22: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
23: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
24: #define JOB_NUMERICAL_FACTORIZATION 22
25: #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
26: #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
27: #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
28: #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
29: #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
30: #define JOB_RELEASE_OF_LU_MEMORY 0
31: #define JOB_RELEASE_OF_ALL_MEMORY -1
33: #define IPARM_SIZE 64
35: #if defined(PETSC_USE_64BIT_INDICES)
36: #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
37: /* sizeof(MKL_INT) == sizeof(long long int) if ilp64*/
38: #define INT_TYPE long long int
39: #define MKL_PARDISO pardiso
40: #define MKL_PARDISO_INIT pardisoinit
41: #else
42: #define INT_TYPE long long int
43: #define MKL_PARDISO pardiso_64
44: #define MKL_PARDISO_INIT pardiso_64init
45: #endif
46: #else
47: #define INT_TYPE int
48: #define MKL_PARDISO pardiso
49: #define MKL_PARDISO_INIT pardisoinit
50: #endif
53: /*
54: * Internal data structure.
55: * For more information check mkl_pardiso manual.
56: */
57: typedef struct {
59: /* Configuration vector*/
60: INT_TYPE iparm[IPARM_SIZE];
62: /*
63: * Internal mkl_pardiso memory location.
64: * After the first call to mkl_pardiso do not modify pt, as that could cause a serious memory leak.
65: */
66: void *pt[IPARM_SIZE];
68: /* Basic mkl_pardiso info*/
69: INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
71: /* Matrix structure*/
72: void *a;
73: INT_TYPE *ia, *ja;
75: /* Number of non-zero elements*/
76: INT_TYPE nz;
78: /* Row permutaton vector*/
79: INT_TYPE *perm;
81: /* Define if matrix preserves sparse structure.*/
82: MatStructure matstruc;
84: PetscBool needsym;
85: PetscBool freeaij;
87: /* Schur complement */
88: PetscScalar *schur;
89: PetscInt schur_size;
90: PetscInt *schur_idxs;
91: PetscScalar *schur_work;
92: PetscBLASInt schur_work_size;
93: PetscInt schur_solver_type;
94: PetscInt *schur_pivots;
95: PetscBool schur_factored;
96: PetscBool schur_inverted;
97: PetscBool solve_interior;
99: /* True if mkl_pardiso function have been used.*/
100: PetscBool CleanUp;
102: /* Conversion to a format suitable for MKL */
103: PetscErrorCode (*Convert)(Mat, PetscBool, MatReuse, PetscBool*, INT_TYPE*, INT_TYPE**, INT_TYPE**, PetscScalar**);
104: PetscErrorCode (*Destroy)(Mat);
105: } Mat_MKL_PARDISO;
109: PetscErrorCode MatMKLPardiso_Convert_seqsbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v)
110: {
111: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
112: PetscInt bs = A->rmap->bs;
115: if (!sym) {
116: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"This should not happen");
117: }
118: if (bs == 1) { /* already in the correct format */
119: *v = aa->a;
120: *r = aa->i;
121: *c = aa->j;
122: *nnz = aa->nz;
123: *free = PETSC_FALSE;
124: } else {
125: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqSBAIJ to MKL Pardiso format still need to be implemented");
126: }
127: return(0);
128: }
132: PetscErrorCode MatMKLPardiso_Convert_seqbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v)
133: {
135: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Conversion from SeqBAIJ to MKL Pardiso format still need to be implemented");
136: return(0);
137: }
141: PetscErrorCode MatMKLPardiso_Convert_seqaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool *free,INT_TYPE *nnz,INT_TYPE **r,INT_TYPE **c,PetscScalar **v)
142: {
143: Mat_SeqAIJ *aa = (Mat_SeqAIJ*)A->data;
147: if (!sym) { /* already in the correct format */
148: *v = aa->a;
149: *r = aa->i;
150: *c = aa->j;
151: *nnz = aa->nz;
152: *free = PETSC_FALSE;
153: return(0);
154: }
155: /* need to get the triangular part */
156: if (reuse == MAT_INITIAL_MATRIX) {
157: PetscScalar *vals,*vv;
158: PetscInt *row,*col,*jj;
159: PetscInt m = A->rmap->n,nz,i;
161: nz = 0;
162: for (i=0; i<m; i++) {
163: nz += aa->i[i+1] - aa->diag[i];
164: }
165: PetscMalloc2(m+1,&row,nz,&col);
166: PetscMalloc1(nz,&vals);
167: jj = col;
168: vv = vals;
170: row[0] = 0;
171: for (i=0; i<m; i++) {
172: PetscInt *aj = aa->j + aa->diag[i];
173: PetscScalar *av = aa->a + aa->diag[i];
174: PetscInt rl = aa->i[i+1] - aa->diag[i],j;
175: for (j=0; j<rl; j++) {
176: *jj = *aj; jj++; aj++;
177: *vv = *av; vv++; av++;
178: }
179: row[i+1] = row[i] + rl;
180: }
181: *v = vals;
182: *r = row;
183: *c = col;
184: *nnz = nz;
185: *free = PETSC_TRUE;
186: } else {
187: PetscScalar *vv;
188: PetscInt m = A->rmap->n,i;
190: vv = *v;
191: for (i=0; i<m; i++) {
192: PetscScalar *av = aa->a + aa->diag[i];
193: PetscInt rl = aa->i[i+1] - aa->diag[i],j;
194: for (j=0; j<rl; j++) {
195: *vv = *av; vv++; av++;
196: }
197: }
198: *free = PETSC_TRUE;
199: }
200: return(0);
201: }
203: void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm [])
204: {
205: int iparm_copy[IPARM_SIZE], mtype_copy, i;
206:
207: mtype_copy = *mtype;
208: pardisoinit(pt, &mtype_copy, iparm_copy);
209: for(i = 0; i < IPARM_SIZE; i++){
210: iparm[i] = iparm_copy[i];
211: }
212: }
216: static PetscErrorCode MatMKLPardisoFactorSchur_Private(Mat_MKL_PARDISO* mpardiso)
217: {
218: PetscBLASInt B_N,B_ierr;
219: PetscScalar *work,val;
220: PetscBLASInt lwork = -1;
224: if (mpardiso->schur_factored) {
225: return(0);
226: }
227: PetscBLASIntCast(mpardiso->schur_size,&B_N);
228: switch (mpardiso->schur_solver_type) {
229: case 1: /* hermitian solver */
230: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
231: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,mpardiso->schur,&B_N,&B_ierr));
232: PetscFPTrapPop();
233: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
234: break;
235: case 2: /* symmetric */
236: if (!mpardiso->schur_pivots) {
237: PetscMalloc1(B_N,&mpardiso->schur_pivots);
238: }
239: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
240: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,&val,&lwork,&B_ierr));
241: PetscBLASIntCast((PetscInt)PetscRealPart(val),&lwork);
242: PetscMalloc1(lwork,&work);
243: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,work,&lwork,&B_ierr));
244: PetscFree(work);
245: PetscFPTrapPop();
246: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
247: break;
248: default: /* general */
249: if (!mpardiso->schur_pivots) {
250: PetscMalloc1(B_N,&mpardiso->schur_pivots);
251: }
252: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
253: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,&B_ierr));
254: PetscFPTrapPop();
255: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
256: break;
257: }
258: mpardiso->schur_factored = PETSC_TRUE;
259: return(0);
260: }
264: static PetscErrorCode MatMKLPardisoInvertSchur_Private(Mat_MKL_PARDISO* mpardiso)
265: {
266: PetscBLASInt B_N,B_ierr;
270: MatMKLPardisoFactorSchur_Private(mpardiso);
271: PetscBLASIntCast(mpardiso->schur_size,&B_N);
272: switch (mpardiso->schur_solver_type) {
273: case 1: /* hermitian solver */
274: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
275: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,mpardiso->schur,&B_N,&B_ierr));
276: PetscFPTrapPop();
277: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
278: break;
279: case 2: /* symmetric */
280: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
281: PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&B_ierr));
282: PetscFPTrapPop();
283: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
284: break;
285: default: /* general */
286: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
287: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,mpardiso->schur,&B_N,mpardiso->schur_pivots,mpardiso->schur_work,&mpardiso->schur_work_size,&B_ierr));
288: PetscFPTrapPop();
289: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
290: break;
291: }
292: mpardiso->schur_inverted = PETSC_TRUE;
293: return(0);
294: }
298: static PetscErrorCode MatMKLPardisoSolveSchur_Private(Mat_MKL_PARDISO* mpardiso, PetscScalar *B, PetscScalar *X)
299: {
300: PetscScalar one=1.,zero=0.,*schur_rhs,*schur_sol;
301: PetscBLASInt B_N,B_Nrhs,B_ierr;
302: char type[2];
306: MatMKLPardisoFactorSchur_Private(mpardiso);
307: PetscBLASIntCast(mpardiso->schur_size,&B_N);
308: PetscBLASIntCast(mpardiso->nrhs,&B_Nrhs);
309: if (X == B && mpardiso->schur_inverted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"X and B cannot point to the same address");
310: if (X != B) { /* using LAPACK *TRS subroutines */
311: PetscMemcpy(X,B,B_N*B_Nrhs*sizeof(PetscScalar));
312: }
313: schur_rhs = B;
314: schur_sol = X;
315: switch (mpardiso->schur_solver_type) {
316: case 1: /* hermitian solver */
317: if (mpardiso->schur_inverted) { /* BLAShemm should go here */
318: PetscStackCallBLAS("BLASsymm",BLASsymm_("L","L",&B_N,&B_Nrhs,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N));
319: } else {
320: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
321: PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("L",&B_N,&B_Nrhs,mpardiso->schur,&B_N,schur_sol,&B_N,&B_ierr));
322: PetscFPTrapPop();
323: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRS Lapack routine %d",(int)B_ierr);
324: }
325: break;
326: case 2: /* symmetric solver */
327: if (mpardiso->schur_inverted) {
328: PetscStackCallBLAS("BLASsymm",BLASsymm_("L","L",&B_N,&B_Nrhs,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N));
329: } else {
330: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
331: PetscStackCallBLAS("LAPACKsytrs",LAPACKsytrs_("L",&B_N,&B_Nrhs,mpardiso->schur,&B_N,mpardiso->schur_pivots,schur_sol,&B_N,&B_ierr));
332: PetscFPTrapPop();
333: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRS Lapack routine %d",(int)B_ierr);
334: }
335: break;
336: default: /* general */
337: switch (mpardiso->iparm[12-1]) {
338: case 1:
339: sprintf(type,"C");
340: break;
341: case 2:
342: sprintf(type,"T");
343: break;
344: default:
345: sprintf(type,"N");
346: break;
347: }
348: if (mpardiso->schur_inverted) {
349: PetscStackCallBLAS("BLASgemm",BLASgemm_(type,"N",&B_N,&B_Nrhs,&B_N,&one,mpardiso->schur,&B_N,schur_rhs,&B_N,&zero,schur_sol,&B_N));
350: } else {
351: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
352: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_(type,&B_N,&B_Nrhs,mpardiso->schur,&B_N,mpardiso->schur_pivots,schur_sol,&B_N,&B_ierr));
353: PetscFPTrapPop();
354: if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRS Lapack routine %d",(int)B_ierr);
355: }
356: break;
357: }
358: return(0);
359: }
364: PetscErrorCode MatFactorSetSchurIS_MKL_PARDISO(Mat F, IS is)
365: {
366: Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr;
367: const PetscInt *idxs;
368: PetscInt size,i;
369: PetscMPIInt csize;
370: PetscBool sorted;
371: PetscErrorCode ierr;
374: MPI_Comm_size(PetscObjectComm((PetscObject)F),&csize);
375: if (csize > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MKL_PARDISO parallel Schur complements not yet supported from PETSc\n");
376: ISSorted(is,&sorted);
377: if (!sorted) {
378: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS for MKL_PARDISO Schur complements needs to be sorted\n");
379: }
380: ISGetLocalSize(is,&size);
381: if (mpardiso->schur_size != size) {
382: mpardiso->schur_size = size;
383: PetscFree2(mpardiso->schur,mpardiso->schur_work);
384: PetscFree(mpardiso->schur_idxs);
385: PetscFree(mpardiso->schur_pivots);
386: PetscBLASIntCast(PetscMax(mpardiso->n,2*size),&mpardiso->schur_work_size);
387: PetscMalloc2(size*size,&mpardiso->schur,mpardiso->schur_work_size,&mpardiso->schur_work);
388: PetscMalloc1(size,&mpardiso->schur_idxs);
389: }
390: PetscMemzero(mpardiso->perm,mpardiso->n*sizeof(INT_TYPE));
391: ISGetIndices(is,&idxs);
392: PetscMemcpy(mpardiso->schur_idxs,idxs,size*sizeof(PetscInt));
393: for (i=0;i<size;i++) mpardiso->perm[idxs[i]] = 1;
394: ISRestoreIndices(is,&idxs);
395: if (size) { /* turn on Schur switch if we the set of indices is not empty */
396: mpardiso->iparm[36-1] = 2;
397: }
398: mpardiso->schur_factored = PETSC_FALSE;
399: mpardiso->schur_inverted = PETSC_FALSE;
400: return(0);
401: }
405: PetscErrorCode MatFactorCreateSchurComplement_MKL_PARDISO(Mat F,Mat* S)
406: {
407: Mat St;
408: Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr;
409: PetscScalar *array;
410: PetscErrorCode ierr;
413: if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
414: else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before");
416: MatCreate(PetscObjectComm((PetscObject)F),&St);
417: MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mpardiso->schur_size,mpardiso->schur_size);
418: MatSetType(St,MATDENSE);
419: MatSetUp(St);
420: MatDenseGetArray(St,&array);
421: PetscMemcpy(array,mpardiso->schur,mpardiso->schur_size*mpardiso->schur_size*sizeof(PetscScalar));
422: MatDenseRestoreArray(St,&array);
423: *S = St;
424: return(0);
425: }
429: PetscErrorCode MatFactorGetSchurComplement_MKL_PARDISO(Mat F,Mat* S)
430: {
431: Mat St;
432: Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr;
433: PetscErrorCode ierr;
436: if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
437: else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before");
439: MatCreateSeqDense(PetscObjectComm((PetscObject)F),mpardiso->schur_size,mpardiso->schur_size,mpardiso->schur,&St);
440: *S = St;
441: return(0);
442: }
446: PetscErrorCode MatFactorInvertSchurComplement_MKL_PARDISO(Mat F)
447: {
448: Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr;
449: PetscErrorCode ierr;
452: if (!mpardiso->iparm[36-1]) { /* do nothing */
453: return(0);
454: }
455: if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before");
456: MatMKLPardisoInvertSchur_Private(mpardiso);
457: return(0);
458: }
462: PetscErrorCode MatFactorSolveSchurComplement_MKL_PARDISO(Mat F, Vec rhs, Vec sol)
463: {
464: Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr;
465: PetscScalar *asol,*arhs;
469: if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
470: else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before");
472: mpardiso->nrhs = 1;
473: VecGetArrayRead(rhs,(const PetscScalar**)&arhs);
474: VecGetArray(sol,&asol);
475: MatMKLPardisoSolveSchur_Private(mpardiso,arhs,asol);
476: VecRestoreArrayRead(rhs,(const PetscScalar**)&arhs);
477: VecRestoreArray(sol,&asol);
478: return(0);
479: }
483: PetscErrorCode MatFactorSolveSchurComplementTranspose_MKL_PARDISO(Mat F, Vec rhs, Vec sol)
484: {
485: Mat_MKL_PARDISO *mpardiso =(Mat_MKL_PARDISO*)F->spptr;
486: PetscScalar *asol,*arhs;
487: PetscInt oiparm12;
488: PetscErrorCode ierr;
491: if (!mpardiso->iparm[36-1]) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
492: else if (!mpardiso->schur_size) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur indices not set! You should call MatFactorSetSchurIS before");
494: mpardiso->nrhs = 1;
495: VecGetArrayRead(rhs,(const PetscScalar**)&arhs);
496: VecGetArray(sol,&asol);
497: oiparm12 = mpardiso->iparm[12 - 1];
498: mpardiso->iparm[12 - 1] = 2;
499: MatMKLPardisoSolveSchur_Private(mpardiso,arhs,asol);
500: mpardiso->iparm[12 - 1] = oiparm12;
501: VecRestoreArrayRead(rhs,(const PetscScalar**)&arhs);
502: VecRestoreArray(sol,&asol);
503: return(0);
504: }
508: PetscErrorCode MatDestroy_MKL_PARDISO(Mat A)
509: {
510: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->spptr;
511: PetscErrorCode ierr;
514: if (mat_mkl_pardiso->CleanUp) {
515: mat_mkl_pardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
517: MKL_PARDISO (mat_mkl_pardiso->pt,
518: &mat_mkl_pardiso->maxfct,
519: &mat_mkl_pardiso->mnum,
520: &mat_mkl_pardiso->mtype,
521: &mat_mkl_pardiso->phase,
522: &mat_mkl_pardiso->n,
523: NULL,
524: NULL,
525: NULL,
526: mat_mkl_pardiso->perm,
527: &mat_mkl_pardiso->nrhs,
528: mat_mkl_pardiso->iparm,
529: &mat_mkl_pardiso->msglvl,
530: NULL,
531: NULL,
532: &mat_mkl_pardiso->err);
533: }
534: PetscFree(mat_mkl_pardiso->perm);
535: PetscFree2(mat_mkl_pardiso->schur,mat_mkl_pardiso->schur_work);
536: PetscFree(mat_mkl_pardiso->schur_idxs);
537: PetscFree(mat_mkl_pardiso->schur_pivots);
538: if (mat_mkl_pardiso->freeaij) {
539: PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);
540: PetscFree(mat_mkl_pardiso->a);
541: }
542: if (mat_mkl_pardiso->Destroy) {
543: (mat_mkl_pardiso->Destroy)(A);
544: }
545: PetscFree(A->spptr);
547: /* clear composed functions */
548: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);
549: PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);
550: PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);
551: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSchurComplement_C",NULL);
552: PetscObjectComposeFunction((PetscObject)A,"MatFactorInvertSchurComplement_C",NULL);
553: PetscObjectComposeFunction((PetscObject)A,"MatFactorSolveSchurComplement_C",NULL);
554: PetscObjectComposeFunction((PetscObject)A,"MatFactorSolveSchurComplementTranspose_C",NULL);
555: PetscObjectComposeFunction((PetscObject)A,"MatMkl_PardisoSetCntl_C",NULL);
556: return(0);
557: }
561: static PetscErrorCode MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO *mpardiso, PetscScalar *whole, PetscScalar *schur, PetscBool reduce)
562: {
564: if (reduce) { /* data given for the whole matrix */
565: PetscInt i,m=0,p=0;
566: for (i=0;i<mpardiso->nrhs;i++) {
567: PetscInt j;
568: for (j=0;j<mpardiso->schur_size;j++) {
569: schur[p+j] = whole[m+mpardiso->schur_idxs[j]];
570: }
571: m += mpardiso->n;
572: p += mpardiso->schur_size;
573: }
574: } else { /* from Schur to whole */
575: PetscInt i,m=0,p=0;
576: for (i=0;i<mpardiso->nrhs;i++) {
577: PetscInt j;
578: for (j=0;j<mpardiso->schur_size;j++) {
579: whole[m+mpardiso->schur_idxs[j]] = schur[p+j];
580: }
581: m += mpardiso->n;
582: p += mpardiso->schur_size;
583: }
584: }
585: return(0);
586: }
590: PetscErrorCode MatSolve_MKL_PARDISO(Mat A,Vec b,Vec x)
591: {
592: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->spptr;
593: PetscErrorCode ierr;
594: PetscScalar *xarray;
595: const PetscScalar *barray;
598: mat_mkl_pardiso->nrhs = 1;
599: VecGetArray(x,&xarray);
600: VecGetArrayRead(b,&barray);
602: if (!mat_mkl_pardiso->schur) {
603: mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
604: } else {
605: mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
606: }
607: mat_mkl_pardiso->iparm[6-1] = 0;
609: MKL_PARDISO (mat_mkl_pardiso->pt,
610: &mat_mkl_pardiso->maxfct,
611: &mat_mkl_pardiso->mnum,
612: &mat_mkl_pardiso->mtype,
613: &mat_mkl_pardiso->phase,
614: &mat_mkl_pardiso->n,
615: mat_mkl_pardiso->a,
616: mat_mkl_pardiso->ia,
617: mat_mkl_pardiso->ja,
618: mat_mkl_pardiso->perm,
619: &mat_mkl_pardiso->nrhs,
620: mat_mkl_pardiso->iparm,
621: &mat_mkl_pardiso->msglvl,
622: (void*)barray,
623: (void*)xarray,
624: &mat_mkl_pardiso->err);
626: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err);
628: if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */
629: PetscInt shift = mat_mkl_pardiso->schur_size;
631: /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */
632: if (!mat_mkl_pardiso->schur_inverted) {
633: shift = 0;
634: }
636: if (!mat_mkl_pardiso->solve_interior) {
637: /* solve Schur complement */
638: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);
639: MatMKLPardisoSolveSchur_Private(mat_mkl_pardiso,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);
640: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);
641: } else { /* if we are solving for the interior problem, any value in barray[schur] forward-substitued to xarray[schur] will be neglected */
642: PetscInt i;
643: for (i=0;i<mat_mkl_pardiso->schur_size;i++) {
644: xarray[mat_mkl_pardiso->schur_idxs[i]] = 0.;
645: }
646: }
647: /* expansion phase */
648: mat_mkl_pardiso->iparm[6-1] = 1;
649: mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION;
650: MKL_PARDISO (mat_mkl_pardiso->pt,
651: &mat_mkl_pardiso->maxfct,
652: &mat_mkl_pardiso->mnum,
653: &mat_mkl_pardiso->mtype,
654: &mat_mkl_pardiso->phase,
655: &mat_mkl_pardiso->n,
656: mat_mkl_pardiso->a,
657: mat_mkl_pardiso->ia,
658: mat_mkl_pardiso->ja,
659: mat_mkl_pardiso->perm,
660: &mat_mkl_pardiso->nrhs,
661: mat_mkl_pardiso->iparm,
662: &mat_mkl_pardiso->msglvl,
663: (void*)xarray,
664: (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */
665: &mat_mkl_pardiso->err);
667: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err);
668: mat_mkl_pardiso->iparm[6-1] = 0;
669: }
670: VecRestoreArray(x,&xarray);
671: VecRestoreArrayRead(b,&barray);
672: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
673: return(0);
674: }
678: PetscErrorCode MatSolveTranspose_MKL_PARDISO(Mat A,Vec b,Vec x)
679: {
680: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->spptr;
681: PetscInt oiparm12;
682: PetscErrorCode ierr;
685: oiparm12 = mat_mkl_pardiso->iparm[12 - 1];
686: mat_mkl_pardiso->iparm[12 - 1] = 2;
687: MatSolve_MKL_PARDISO(A,b,x);
688: mat_mkl_pardiso->iparm[12 - 1] = oiparm12;
689: return(0);
690: }
694: PetscErrorCode MatMatSolve_MKL_PARDISO(Mat A,Mat B,Mat X)
695: {
696: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(A)->spptr;
697: PetscErrorCode ierr;
698: PetscScalar *barray, *xarray;
699: PetscBool flg;
702: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);
703: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
704: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);
705: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
707: MatGetSize(B,NULL,(PetscInt*)&mat_mkl_pardiso->nrhs);
709: if(mat_mkl_pardiso->nrhs > 0){
710: MatDenseGetArray(B,&barray);
711: MatDenseGetArray(X,&xarray);
713: if (!mat_mkl_pardiso->schur) {
714: mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
715: } else {
716: mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
717: }
718: mat_mkl_pardiso->iparm[6-1] = 0;
720: MKL_PARDISO (mat_mkl_pardiso->pt,
721: &mat_mkl_pardiso->maxfct,
722: &mat_mkl_pardiso->mnum,
723: &mat_mkl_pardiso->mtype,
724: &mat_mkl_pardiso->phase,
725: &mat_mkl_pardiso->n,
726: mat_mkl_pardiso->a,
727: mat_mkl_pardiso->ia,
728: mat_mkl_pardiso->ja,
729: mat_mkl_pardiso->perm,
730: &mat_mkl_pardiso->nrhs,
731: mat_mkl_pardiso->iparm,
732: &mat_mkl_pardiso->msglvl,
733: (void*)barray,
734: (void*)xarray,
735: &mat_mkl_pardiso->err);
736: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err);
738: if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */
739: PetscScalar *o_schur_work = NULL;
740: PetscInt shift = mat_mkl_pardiso->schur_size*mat_mkl_pardiso->nrhs,scale;
741: PetscInt mem = mat_mkl_pardiso->n*mat_mkl_pardiso->nrhs;
743: /* allocate extra memory if it is needed */
744: scale = 1;
745: if (mat_mkl_pardiso->schur_inverted) {
746: scale = 2;
747: }
748: mem *= scale;
749: if (mem > mat_mkl_pardiso->schur_work_size) {
750: o_schur_work = mat_mkl_pardiso->schur_work;
751: PetscMalloc1(mem,&mat_mkl_pardiso->schur_work);
752: }
754: /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */
755: if (!mat_mkl_pardiso->schur_inverted) {
756: shift = 0;
757: }
759: /* solve Schur complement */
760: if (!mat_mkl_pardiso->solve_interior) {
761: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work,PETSC_TRUE);
762: MatMKLPardisoSolveSchur_Private(mat_mkl_pardiso,mat_mkl_pardiso->schur_work,mat_mkl_pardiso->schur_work+shift);
763: MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso,xarray,mat_mkl_pardiso->schur_work+shift,PETSC_FALSE);
764: } else { /* if we are solving for the interior problem, any value in barray[schur,n] forward-substitued to xarray[schur,n] will be neglected */
765: PetscInt i,n,m=0;
766: for (n=0;n<mat_mkl_pardiso->nrhs;n++) {
767: for (i=0;i<mat_mkl_pardiso->schur_size;i++) {
768: xarray[mat_mkl_pardiso->schur_idxs[i]+m] = 0.;
769: }
770: m += mat_mkl_pardiso->n;
771: }
772: }
773: /* expansion phase */
774: mat_mkl_pardiso->iparm[6-1] = 1;
775: mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION;
776: MKL_PARDISO (mat_mkl_pardiso->pt,
777: &mat_mkl_pardiso->maxfct,
778: &mat_mkl_pardiso->mnum,
779: &mat_mkl_pardiso->mtype,
780: &mat_mkl_pardiso->phase,
781: &mat_mkl_pardiso->n,
782: mat_mkl_pardiso->a,
783: mat_mkl_pardiso->ia,
784: mat_mkl_pardiso->ja,
785: mat_mkl_pardiso->perm,
786: &mat_mkl_pardiso->nrhs,
787: mat_mkl_pardiso->iparm,
788: &mat_mkl_pardiso->msglvl,
789: (void*)xarray,
790: (void*)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */
791: &mat_mkl_pardiso->err);
792: if (o_schur_work) { /* restore original schur_work (minimal size) */
793: PetscFree(mat_mkl_pardiso->schur_work);
794: mat_mkl_pardiso->schur_work = o_schur_work;
795: }
796: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err);
797: mat_mkl_pardiso->iparm[6-1] = 0;
798: }
799: }
800: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
801: return(0);
802: }
806: PetscErrorCode MatFactorNumeric_MKL_PARDISO(Mat F,Mat A,const MatFactorInfo *info)
807: {
808: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)(F)->spptr;
809: PetscErrorCode ierr;
812: mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN;
813: (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_REUSE_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);
815: mat_mkl_pardiso->phase = JOB_NUMERICAL_FACTORIZATION;
816: MKL_PARDISO (mat_mkl_pardiso->pt,
817: &mat_mkl_pardiso->maxfct,
818: &mat_mkl_pardiso->mnum,
819: &mat_mkl_pardiso->mtype,
820: &mat_mkl_pardiso->phase,
821: &mat_mkl_pardiso->n,
822: mat_mkl_pardiso->a,
823: mat_mkl_pardiso->ia,
824: mat_mkl_pardiso->ja,
825: mat_mkl_pardiso->perm,
826: &mat_mkl_pardiso->nrhs,
827: mat_mkl_pardiso->iparm,
828: &mat_mkl_pardiso->msglvl,
829: NULL,
830: (void*)mat_mkl_pardiso->schur,
831: &mat_mkl_pardiso->err);
832: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d. Please check manual\n",mat_mkl_pardiso->err);
834: if (mat_mkl_pardiso->schur) { /* schur output from pardiso is in row major format */
835: PetscInt j,k,n=mat_mkl_pardiso->schur_size;
836: if (!mat_mkl_pardiso->schur_solver_type) {
837: for (j=0; j<n; j++) {
838: for (k=0; k<j; k++) {
839: PetscScalar tmp = mat_mkl_pardiso->schur[j + k*n];
840: mat_mkl_pardiso->schur[j + k*n] = mat_mkl_pardiso->schur[k + j*n];
841: mat_mkl_pardiso->schur[k + j*n] = tmp;
842: }
843: }
844: } else { /* we could use row-major in LAPACK routines (e.g. use 'U' instead of 'L'; instead, I prefer consistency between data structures and swap to column major */
845: for (j=0; j<n; j++) {
846: for (k=0; k<j; k++) {
847: mat_mkl_pardiso->schur[j + k*n] = mat_mkl_pardiso->schur[k + j*n];
848: }
849: }
850: }
851: }
852: mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN;
853: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
854: mat_mkl_pardiso->schur_factored = PETSC_FALSE;
855: mat_mkl_pardiso->schur_inverted = PETSC_FALSE;
856: return(0);
857: }
861: PetscErrorCode PetscSetMKL_PARDISOFromOptions(Mat F, Mat A)
862: {
863: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->spptr;
864: PetscErrorCode ierr;
865: PetscInt icntl,threads=1;
866: PetscBool flg;
869: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_PARDISO Options","Mat");
871: PetscOptionsInt("-mat_mkl_pardiso_65","Number of threads to use within PARDISO","None",threads,&threads,&flg);
872: if (flg) PetscSetMKL_PARDISOThreads((int)threads);
874: PetscOptionsInt("-mat_mkl_pardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_pardiso->maxfct,&icntl,&flg);
875: if (flg) mat_mkl_pardiso->maxfct = icntl;
877: PetscOptionsInt("-mat_mkl_pardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_pardiso->mnum,&icntl,&flg);
878: if (flg) mat_mkl_pardiso->mnum = icntl;
879:
880: PetscOptionsInt("-mat_mkl_pardiso_68","Message level information","None",mat_mkl_pardiso->msglvl,&icntl,&flg);
881: if (flg) mat_mkl_pardiso->msglvl = icntl;
883: PetscOptionsInt("-mat_mkl_pardiso_69","Defines the matrix type","None",mat_mkl_pardiso->mtype,&icntl,&flg);
884: if(flg){
885: void *pt[IPARM_SIZE];
886: mat_mkl_pardiso->mtype = icntl;
887: MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm);
888: #if defined(PETSC_USE_REAL_SINGLE)
889: mat_mkl_pardiso->iparm[27] = 1;
890: #else
891: mat_mkl_pardiso->iparm[27] = 0;
892: #endif
893: mat_mkl_pardiso->iparm[34] = 1; /* use 0-based indexing */
894: }
895: PetscOptionsInt("-mat_mkl_pardiso_1","Use default values","None",mat_mkl_pardiso->iparm[0],&icntl,&flg);
897: if(flg && icntl != 0){
898: PetscOptionsInt("-mat_mkl_pardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_pardiso->iparm[1],&icntl,&flg);
899: if (flg) mat_mkl_pardiso->iparm[1] = icntl;
901: PetscOptionsInt("-mat_mkl_pardiso_4","Preconditioned CGS/CG","None",mat_mkl_pardiso->iparm[3],&icntl,&flg);
902: if (flg) mat_mkl_pardiso->iparm[3] = icntl;
904: PetscOptionsInt("-mat_mkl_pardiso_5","User permutation","None",mat_mkl_pardiso->iparm[4],&icntl,&flg);
905: if (flg) mat_mkl_pardiso->iparm[4] = icntl;
907: PetscOptionsInt("-mat_mkl_pardiso_6","Write solution on x","None",mat_mkl_pardiso->iparm[5],&icntl,&flg);
908: if (flg) mat_mkl_pardiso->iparm[5] = icntl;
910: PetscOptionsInt("-mat_mkl_pardiso_8","Iterative refinement step","None",mat_mkl_pardiso->iparm[7],&icntl,&flg);
911: if (flg) mat_mkl_pardiso->iparm[7] = icntl;
913: PetscOptionsInt("-mat_mkl_pardiso_10","Pivoting perturbation","None",mat_mkl_pardiso->iparm[9],&icntl,&flg);
914: if (flg) mat_mkl_pardiso->iparm[9] = icntl;
916: PetscOptionsInt("-mat_mkl_pardiso_11","Scaling vectors","None",mat_mkl_pardiso->iparm[10],&icntl,&flg);
917: if (flg) mat_mkl_pardiso->iparm[10] = icntl;
919: PetscOptionsInt("-mat_mkl_pardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_pardiso->iparm[11],&icntl,&flg);
920: if (flg) mat_mkl_pardiso->iparm[11] = icntl;
922: PetscOptionsInt("-mat_mkl_pardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_pardiso->iparm[12],&icntl,&flg);
923: if (flg) mat_mkl_pardiso->iparm[12] = icntl;
925: PetscOptionsInt("-mat_mkl_pardiso_18","Numbers of non-zero elements","None",mat_mkl_pardiso->iparm[17],&icntl,&flg);
926: if (flg) mat_mkl_pardiso->iparm[17] = icntl;
928: PetscOptionsInt("-mat_mkl_pardiso_19","Report number of floating point operations","None",mat_mkl_pardiso->iparm[18],&icntl,&flg);
929: if (flg) mat_mkl_pardiso->iparm[18] = icntl;
931: PetscOptionsInt("-mat_mkl_pardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_pardiso->iparm[20],&icntl,&flg);
932: if (flg) mat_mkl_pardiso->iparm[20] = icntl;
934: PetscOptionsInt("-mat_mkl_pardiso_24","Parallel factorization control","None",mat_mkl_pardiso->iparm[23],&icntl,&flg);
935: if (flg) mat_mkl_pardiso->iparm[23] = icntl;
937: PetscOptionsInt("-mat_mkl_pardiso_25","Parallel forward/backward solve control","None",mat_mkl_pardiso->iparm[24],&icntl,&flg);
938: if (flg) mat_mkl_pardiso->iparm[24] = icntl;
940: PetscOptionsInt("-mat_mkl_pardiso_27","Matrix checker","None",mat_mkl_pardiso->iparm[26],&icntl,&flg);
941: if (flg) mat_mkl_pardiso->iparm[26] = icntl;
943: PetscOptionsInt("-mat_mkl_pardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_pardiso->iparm[30],&icntl,&flg);
944: if (flg) mat_mkl_pardiso->iparm[30] = icntl;
946: PetscOptionsInt("-mat_mkl_pardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_pardiso->iparm[33],&icntl,&flg);
947: if (flg) mat_mkl_pardiso->iparm[33] = icntl;
949: PetscOptionsInt("-mat_mkl_pardiso_60","Intel MKL_PARDISO mode","None",mat_mkl_pardiso->iparm[59],&icntl,&flg);
950: if (flg) mat_mkl_pardiso->iparm[59] = icntl;
951: }
952: PetscOptionsEnd();
953: return(0);
954: }
958: PetscErrorCode MatFactorMKL_PARDISOInitialize_Private(Mat A, MatFactorType ftype, Mat_MKL_PARDISO *mat_mkl_pardiso)
959: {
961: PetscInt i;
964: for ( i = 0; i < IPARM_SIZE; i++ ){
965: mat_mkl_pardiso->iparm[i] = 0;
966: }
967: for ( i = 0; i < IPARM_SIZE; i++ ){
968: mat_mkl_pardiso->pt[i] = 0;
969: }
970: /* Default options for both sym and unsym */
971: mat_mkl_pardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */
972: mat_mkl_pardiso->iparm[ 1] = 2; /* Metis reordering */
973: mat_mkl_pardiso->iparm[ 5] = 0; /* Write solution into x */
974: mat_mkl_pardiso->iparm[ 7] = 0; /* Max number of iterative refinement steps */
975: mat_mkl_pardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
976: mat_mkl_pardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
977: #if 0
978: mat_mkl_pardiso->iparm[23] = 1; /* Parallel factorization control*/
979: #endif
980: mat_mkl_pardiso->iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */
981: mat_mkl_pardiso->iparm[39] = 0; /* Input: matrix/rhs/solution stored on master */
982:
983: mat_mkl_pardiso->CleanUp = PETSC_FALSE;
984: mat_mkl_pardiso->maxfct = 1; /* Maximum number of numerical factorizations. */
985: mat_mkl_pardiso->mnum = 1; /* Which factorization to use. */
986: mat_mkl_pardiso->msglvl = 0; /* 0: do not print 1: Print statistical information in file */
987: mat_mkl_pardiso->phase = -1;
988: mat_mkl_pardiso->err = 0;
989:
990: mat_mkl_pardiso->n = A->rmap->N;
991: mat_mkl_pardiso->nrhs = 1;
992: mat_mkl_pardiso->err = 0;
993: mat_mkl_pardiso->phase = -1;
994:
995: if(ftype == MAT_FACTOR_LU){
996: mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
997: mat_mkl_pardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */
998: mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
1000: } else {
1001: mat_mkl_pardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
1002: mat_mkl_pardiso->iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */
1003: mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
1004: /* mat_mkl_pardiso->iparm[20] = 1; */ /* Apply 1x1 and 2x2 Bunch-Kaufman pivoting during the factorization process */
1005: #if defined(PETSC_USE_DEBUG)
1006: mat_mkl_pardiso->iparm[26] = 1; /* Matrix checker */
1007: #endif
1008: }
1009: PetscMalloc1(A->rmap->N*sizeof(INT_TYPE), &mat_mkl_pardiso->perm);
1010: for(i = 0; i < A->rmap->N; i++){
1011: mat_mkl_pardiso->perm[i] = 0;
1012: }
1013: mat_mkl_pardiso->schur_size = 0;
1014: return(0);
1015: }
1019: PetscErrorCode MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F,Mat A,const MatFactorInfo *info)
1020: {
1021: Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO*)F->spptr;
1022: PetscErrorCode ierr;
1025: mat_mkl_pardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
1026: PetscSetMKL_PARDISOFromOptions(F,A);
1028: /* throw away any previously computed structure */
1029: if (mat_mkl_pardiso->freeaij) {
1030: PetscFree2(mat_mkl_pardiso->ia,mat_mkl_pardiso->ja);
1031: PetscFree(mat_mkl_pardiso->a);
1032: }
1033: (*mat_mkl_pardiso->Convert)(A,mat_mkl_pardiso->needsym,MAT_INITIAL_MATRIX,&mat_mkl_pardiso->freeaij,&mat_mkl_pardiso->nz,&mat_mkl_pardiso->ia,&mat_mkl_pardiso->ja,(PetscScalar**)&mat_mkl_pardiso->a);
1034: mat_mkl_pardiso->n = A->rmap->N;
1036: mat_mkl_pardiso->phase = JOB_ANALYSIS;
1038: MKL_PARDISO (mat_mkl_pardiso->pt,
1039: &mat_mkl_pardiso->maxfct,
1040: &mat_mkl_pardiso->mnum,
1041: &mat_mkl_pardiso->mtype,
1042: &mat_mkl_pardiso->phase,
1043: &mat_mkl_pardiso->n,
1044: mat_mkl_pardiso->a,
1045: mat_mkl_pardiso->ia,
1046: mat_mkl_pardiso->ja,
1047: mat_mkl_pardiso->perm,
1048: &mat_mkl_pardiso->nrhs,
1049: mat_mkl_pardiso->iparm,
1050: &mat_mkl_pardiso->msglvl,
1051: NULL,
1052: NULL,
1053: &mat_mkl_pardiso->err);
1054: if (mat_mkl_pardiso->err < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_PARDISO: err=%d\n. Please check manual",mat_mkl_pardiso->err);
1056: mat_mkl_pardiso->CleanUp = PETSC_TRUE;
1058: if (F->factortype == MAT_FACTOR_LU) {
1059: F->ops->lufactornumeric = MatFactorNumeric_MKL_PARDISO;
1060: } else {
1061: F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_PARDISO;
1062: }
1063: F->ops->solve = MatSolve_MKL_PARDISO;
1064: F->ops->solvetranspose = MatSolveTranspose_MKL_PARDISO;
1065: F->ops->matsolve = MatMatSolve_MKL_PARDISO;
1066: return(0);
1067: }
1071: PetscErrorCode MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1072: {
1076: MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);
1077: return(0);
1078: }
1082: PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,const MatFactorInfo *info)
1083: {
1087: MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info);
1088: return(0);
1089: }
1093: PetscErrorCode MatView_MKL_PARDISO(Mat A, PetscViewer viewer)
1094: {
1095: PetscErrorCode ierr;
1096: PetscBool iascii;
1097: PetscViewerFormat format;
1098: Mat_MKL_PARDISO *mat_mkl_pardiso=(Mat_MKL_PARDISO*)A->spptr;
1099: PetscInt i;
1102: if (A->ops->solve != MatSolve_MKL_PARDISO) return(0);
1104: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1105: if (iascii) {
1106: PetscViewerGetFormat(viewer,&format);
1107: if (format == PETSC_VIEWER_ASCII_INFO) {
1108: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO run parameters:\n");
1109: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO phase: %d \n",mat_mkl_pardiso->phase);
1110: for(i = 1; i <= 64; i++){
1111: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO iparm[%d]: %d \n",i, mat_mkl_pardiso->iparm[i - 1]);
1112: }
1113: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO maxfct: %d \n", mat_mkl_pardiso->maxfct);
1114: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mnum: %d \n", mat_mkl_pardiso->mnum);
1115: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO mtype: %d \n", mat_mkl_pardiso->mtype);
1116: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO n: %d \n", mat_mkl_pardiso->n);
1117: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO nrhs: %d \n", mat_mkl_pardiso->nrhs);
1118: PetscViewerASCIIPrintf(viewer,"MKL_PARDISO msglvl: %d \n", mat_mkl_pardiso->msglvl);
1119: }
1120: }
1121: return(0);
1122: }
1127: PetscErrorCode MatGetInfo_MKL_PARDISO(Mat A, MatInfoType flag, MatInfo *info)
1128: {
1129: Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)A->spptr;
1132: info->block_size = 1.0;
1133: info->nz_used = mat_mkl_pardiso->nz;
1134: info->nz_allocated = mat_mkl_pardiso->nz;
1135: info->nz_unneeded = 0.0;
1136: info->assemblies = 0.0;
1137: info->mallocs = 0.0;
1138: info->memory = 0.0;
1139: info->fill_ratio_given = 0;
1140: info->fill_ratio_needed = 0;
1141: info->factor_mallocs = 0;
1142: return(0);
1143: }
1147: PetscErrorCode MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F,PetscInt icntl,PetscInt ival)
1148: {
1149: Mat_MKL_PARDISO *mat_mkl_pardiso =(Mat_MKL_PARDISO*)F->spptr;
1152: if(icntl <= 64){
1153: mat_mkl_pardiso->iparm[icntl - 1] = ival;
1154: } else {
1155: if(icntl == 65)
1156: PetscSetMKL_PARDISOThreads(ival);
1157: else if(icntl == 66)
1158: mat_mkl_pardiso->maxfct = ival;
1159: else if(icntl == 67)
1160: mat_mkl_pardiso->mnum = ival;
1161: else if(icntl == 68)
1162: mat_mkl_pardiso->msglvl = ival;
1163: else if(icntl == 69){
1164: void *pt[IPARM_SIZE];
1165: mat_mkl_pardiso->mtype = ival;
1166: MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm);
1167: #if defined(PETSC_USE_REAL_SINGLE)
1168: mat_mkl_pardiso->iparm[27] = 1;
1169: #else
1170: mat_mkl_pardiso->iparm[27] = 0;
1171: #endif
1172: mat_mkl_pardiso->iparm[34] = 1;
1173: } else if(icntl==70) {
1174: mat_mkl_pardiso->solve_interior = (PetscBool)!!ival;
1175: }
1176: }
1177: return(0);
1178: }
1182: /*@
1183: MatMkl_PardisoSetCntl - Set Mkl_Pardiso parameters
1185: Logically Collective on Mat
1187: Input Parameters:
1188: + F - the factored matrix obtained by calling MatGetFactor()
1189: . icntl - index of Mkl_Pardiso parameter
1190: - ival - value of Mkl_Pardiso parameter
1192: Options Database:
1193: . -mat_mkl_pardiso_<icntl> <ival>
1195: Level: beginner
1197: References:
1198: . Mkl_Pardiso Users' Guide
1200: .seealso: MatGetFactor()
1201: @*/
1202: PetscErrorCode MatMkl_PardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
1203: {
1207: PetscTryMethod(F,"MatMkl_PardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
1208: return(0);
1209: }
1211: /*MC
1212: MATSOLVERMKL_PARDISO - A matrix type providing direct solvers (LU) for
1213: sequential matrices via the external package MKL_PARDISO.
1215: Works with MATSEQAIJ matrices
1217: Use -pc_type lu -pc_factor_mat_solver_package mkl_pardiso to us this direct solver
1219: Options Database Keys:
1220: + -mat_mkl_pardiso_65 - Number of threads to use within MKL_PARDISO
1221: . -mat_mkl_pardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time
1222: . -mat_mkl_pardiso_67 - Indicates the actual matrix for the solution phase
1223: . -mat_mkl_pardiso_68 - Message level information
1224: . -mat_mkl_pardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type
1225: . -mat_mkl_pardiso_1 - Use default values
1226: . -mat_mkl_pardiso_2 - Fill-in reducing ordering for the input matrix
1227: . -mat_mkl_pardiso_4 - Preconditioned CGS/CG
1228: . -mat_mkl_pardiso_5 - User permutation
1229: . -mat_mkl_pardiso_6 - Write solution on x
1230: . -mat_mkl_pardiso_8 - Iterative refinement step
1231: . -mat_mkl_pardiso_10 - Pivoting perturbation
1232: . -mat_mkl_pardiso_11 - Scaling vectors
1233: . -mat_mkl_pardiso_12 - Solve with transposed or conjugate transposed matrix A
1234: . -mat_mkl_pardiso_13 - Improved accuracy using (non-) symmetric weighted matching
1235: . -mat_mkl_pardiso_18 - Numbers of non-zero elements
1236: . -mat_mkl_pardiso_19 - Report number of floating point operations
1237: . -mat_mkl_pardiso_21 - Pivoting for symmetric indefinite matrices
1238: . -mat_mkl_pardiso_24 - Parallel factorization control
1239: . -mat_mkl_pardiso_25 - Parallel forward/backward solve control
1240: . -mat_mkl_pardiso_27 - Matrix checker
1241: . -mat_mkl_pardiso_31 - Partial solve and computing selected components of the solution vectors
1242: . -mat_mkl_pardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode
1243: - -mat_mkl_pardiso_60 - Intel MKL_PARDISO mode
1245: Level: beginner
1247: For more information please check mkl_pardiso manual
1249: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
1251: M*/
1254: static PetscErrorCode MatFactorGetSolverPackage_mkl_pardiso(Mat A, const MatSolverPackage *type)
1255: {
1257: *type = MATSOLVERMKL_PARDISO;
1258: return(0);
1259: }
1263: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mkl_pardiso(Mat A,MatFactorType ftype,Mat *F)
1264: {
1265: Mat B;
1266: PetscErrorCode ierr;
1267: Mat_MKL_PARDISO *mat_mkl_pardiso;
1268: PetscBool isSeqAIJ,isSeqBAIJ,isSeqSBAIJ;
1271: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
1272: PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);
1273: PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);
1274: MatCreate(PetscObjectComm((PetscObject)A),&B);
1275: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1276: MatSetType(B,((PetscObject)A)->type_name);
1277: MatSeqAIJSetPreallocation(B,0,NULL);
1278: MatSeqBAIJSetPreallocation(B,A->rmap->bs,0,NULL);
1279: MatSeqSBAIJSetPreallocation(B,A->rmap->bs,0,NULL);
1281: PetscNewLog(B,&mat_mkl_pardiso);
1282: B->spptr = mat_mkl_pardiso;
1284: MatFactorMKL_PARDISOInitialize_Private(A, ftype, mat_mkl_pardiso);
1285: if (ftype == MAT_FACTOR_LU) {
1286: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_PARDISO;
1287: B->factortype = MAT_FACTOR_LU;
1288: mat_mkl_pardiso->needsym = PETSC_FALSE;
1289: if (isSeqAIJ) {
1290: mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij;
1291: } else if (isSeqBAIJ) {
1292: mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij;
1293: } else if (isSeqSBAIJ) {
1294: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU factor with SEQSBAIJ format! Use MAT_FACTOR_CHOLESKY instead");
1295: } else {
1296: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO LU with %s format",((PetscObject)A)->type_name);
1297: }
1298: #if defined(PETSC_USE_COMPLEX)
1299: mat_mkl_pardiso->mtype = 13;
1300: #else
1301: if (A->structurally_symmetric) {
1302: mat_mkl_pardiso->mtype = 1;
1303: } else {
1304: mat_mkl_pardiso->mtype = 11;
1305: }
1306: #endif
1307: mat_mkl_pardiso->schur_solver_type = 0;
1308: } else {
1309: #if defined(PETSC_USE_COMPLEX)
1310: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead",((PetscObject)A)->type_name);
1311: #endif
1312: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_PARDISO;
1313: B->factortype = MAT_FACTOR_CHOLESKY;
1314: if (isSeqAIJ) {
1315: mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij;
1316: } else if (isSeqBAIJ) {
1317: mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij;
1318: } else if (isSeqSBAIJ) {
1319: mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqsbaij;
1320: } else {
1321: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with %s format",((PetscObject)A)->type_name);
1322: }
1323: mat_mkl_pardiso->needsym = PETSC_TRUE;
1324: if (A->spd_set && A->spd) {
1325: mat_mkl_pardiso->schur_solver_type = 1;
1326: mat_mkl_pardiso->mtype = 2;
1327: } else {
1328: mat_mkl_pardiso->schur_solver_type = 2;
1329: mat_mkl_pardiso->mtype = -2;
1330: }
1331: }
1332: mat_mkl_pardiso->Destroy = B->ops->destroy;
1333: B->ops->destroy = MatDestroy_MKL_PARDISO;
1334: B->ops->view = MatView_MKL_PARDISO;
1335: B->factortype = ftype;
1336: B->ops->getinfo = MatGetInfo_MKL_PARDISO;
1337: B->assembled = PETSC_TRUE;
1339: PetscFree(B->solvertype);
1340: PetscStrallocpy(MATSOLVERMKL_PARDISO,&B->solvertype);
1343: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_pardiso);
1344: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MKL_PARDISO);
1345: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MKL_PARDISO);
1346: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSchurComplement_C",MatFactorGetSchurComplement_MKL_PARDISO);
1347: PetscObjectComposeFunction((PetscObject)B,"MatFactorInvertSchurComplement_C",MatFactorInvertSchurComplement_MKL_PARDISO);
1348: PetscObjectComposeFunction((PetscObject)B,"MatFactorSolveSchurComplement_C",MatFactorSolveSchurComplement_MKL_PARDISO);
1349: PetscObjectComposeFunction((PetscObject)B,"MatFactorSolveSchurComplementTranspose_C",MatFactorSolveSchurComplementTranspose_MKL_PARDISO);
1350: PetscObjectComposeFunction((PetscObject)B,"MatMkl_PardisoSetCntl_C",MatMkl_PardisoSetCntl_MKL_PARDISO);
1352: *F = B;
1353: return(0);
1354: }
1358: PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_Pardiso(void)
1359: {
1363: MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mkl_pardiso);
1364: MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);
1365: MatSolverPackageRegister(MATSOLVERMKL_PARDISO,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mkl_pardiso);
1366: return(0);
1367: }