Actual source code: mkl_cpardiso.c
petsc-3.7.3 2016-08-01
1: #if defined(PETSC_HAVE_LIBMKL_INTEL_ILP64)
2: #define MKL_ILP64
3: #endif
5: #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8: #include <stdio.h>
9: #include <stdlib.h>
10: #include <math.h>
11: #include <mkl.h>
12: #include <mkl_cluster_sparse_solver.h>
14: /*
15: * Possible mkl_cpardiso phases that controls the execution of the solver.
16: * For more information check mkl_cpardiso manual.
17: */
18: #define JOB_ANALYSIS 11
19: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
20: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
21: #define JOB_NUMERICAL_FACTORIZATION 22
22: #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
23: #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
24: #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
25: #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
26: #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
27: #define JOB_RELEASE_OF_LU_MEMORY 0
28: #define JOB_RELEASE_OF_ALL_MEMORY -1
30: #define IPARM_SIZE 64
31: #define INT_TYPE MKL_INT
33: static const char *Err_MSG_CPardiso(int errNo){
34: switch (errNo) {
35: case -1:
36: return "input inconsistent"; break;
37: case -2:
38: return "not enough memory"; break;
39: case -3:
40: return "reordering problem"; break;
41: case -4:
42: return "zero pivot, numerical factorization or iterative refinement problem"; break;
43: case -5:
44: return "unclassified (internal) error"; break;
45: case -6:
46: return "preordering failed (matrix types 11, 13 only)"; break;
47: case -7:
48: return "diagonal matrix problem"; break;
49: case -8:
50: return "32-bit integer overflow problem"; break;
51: case -9:
52: return "not enough memory for OOC"; break;
53: case -10:
54: return "problems with opening OOC temporary files"; break;
55: case -11:
56: return "read/write problems with the OOC data file"; break;
57: default :
58: return "unknown error";
59: }
60: }
62: /*
63: * Internal data structure.
64: * For more information check mkl_cpardiso manual.
65: */
67: typedef struct {
69: /* Configuration vector */
70: INT_TYPE iparm[IPARM_SIZE];
72: /*
73: * Internal mkl_cpardiso memory location.
74: * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
75: */
76: void *pt[IPARM_SIZE];
78: MPI_Comm comm_mkl_cpardiso;
80: /* Basic mkl_cpardiso info*/
81: INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
83: /* Matrix structure */
84: PetscScalar *a;
86: INT_TYPE *ia, *ja;
88: /* Number of non-zero elements */
89: INT_TYPE nz;
91: /* Row permutaton vector*/
92: INT_TYPE *perm;
94: /* Define is matrix preserve sparce structure. */
95: MatStructure matstruc;
97: PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
99: /* True if mkl_cpardiso function have been used. */
100: PetscBool CleanUp;
101: } Mat_MKL_CPARDISO;
103: /*
104: * Copy the elements of matrix A.
105: * Input:
106: * - Mat A: MATSEQAIJ matrix
107: * - int shift: matrix index.
108: * - 0 for c representation
109: * - 1 for fortran representation
110: * - MatReuse reuse:
111: * - MAT_INITIAL_MATRIX: Create a new aij representation
112: * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
113: * Output:
114: * - int *nnz: Number of nonzero-elements.
115: * - int **r pointer to i index
116: * - int **c pointer to j elements
117: * - MATRIXTYPE **v: Non-zero elements
118: */
121: PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
122: {
123: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
126: *v=aa->a;
127: if (reuse == MAT_INITIAL_MATRIX) {
128: *r = (INT_TYPE*)aa->i;
129: *c = (INT_TYPE*)aa->j;
130: *nnz = aa->nz;
131: }
132: return(0);
133: }
137: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
138: {
139: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
140: PetscErrorCode ierr;
141: PetscInt rstart,nz,i,j,jj,irow,countA,countB;
142: PetscInt *row,*col;
143: const PetscScalar *av, *bv,*v1,*v2;
144: PetscScalar *val;
145: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
146: Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data;
147: Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data;
148: PetscInt nn, colA_start,jB,jcol;
151: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap->rstart;
152: av=aa->a; bv=bb->a;
154: garray = mat->garray;
156: if (reuse == MAT_INITIAL_MATRIX) {
157: nz = aa->nz + bb->nz;
158: *nnz = nz;
159: PetscMalloc((nz*(sizeof(PetscInt)+sizeof(PetscScalar)) + (m+1)*sizeof(PetscInt)), &row);
160: col = row + m + 1;
161: val = (PetscScalar*)(col + nz);
162: *r = row; *c = col; *v = val;
163: row[0] = 0;
164: } else {
165: row = *r; col = *c; val = *v;
166: }
168: nz = 0;
169: for (i=0; i<m; i++) {
170: row[i] = nz;
171: countA = ai[i+1] - ai[i];
172: countB = bi[i+1] - bi[i];
173: ajj = aj + ai[i]; /* ptr to the beginning of this row */
174: bjj = bj + bi[i];
176: /* B part, smaller col index */
177: colA_start = rstart + ajj[0]; /* the smallest global col index of A */
178: jB = 0;
179: for (j=0; j<countB; j++) {
180: jcol = garray[bjj[j]];
181: if (jcol > colA_start) {
182: jB = j;
183: break;
184: }
185: col[nz] = jcol;
186: val[nz++] = *bv++;
187: if (j==countB-1) jB = countB;
188: }
190: /* A part */
191: for (j=0; j<countA; j++) {
192: col[nz] = rstart + ajj[j];
193: val[nz++] = *av++;
194: }
196: /* B part, larger col index */
197: for (j=jB; j<countB; j++) {
198: col[nz] = garray[bjj[j]];
199: val[nz++] = *bv++;
200: }
201: }
202: row[m] = nz;
204: return(0);
205: }
207: /*
208: * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
209: */
212: PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
213: {
214: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
215: PetscErrorCode ierr;
216: PetscBool flg;
219: /* Terminate instance, deallocate memories */
220: if (mat_mkl_cpardiso->CleanUp) {
221: mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
223: cluster_sparse_solver (
224: mat_mkl_cpardiso->pt,
225: &mat_mkl_cpardiso->maxfct,
226: &mat_mkl_cpardiso->mnum,
227: &mat_mkl_cpardiso->mtype,
228: &mat_mkl_cpardiso->phase,
229: &mat_mkl_cpardiso->n,
230: NULL,
231: NULL,
232: NULL,
233: mat_mkl_cpardiso->perm,
234: &mat_mkl_cpardiso->nrhs,
235: mat_mkl_cpardiso->iparm,
236: &mat_mkl_cpardiso->msglvl,
237: NULL,
238: NULL,
239: &mat_mkl_cpardiso->comm_mkl_cpardiso,
240: &mat_mkl_cpardiso->err);
241: }
243: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
244: if (flg) {
245: MatDestroy_SeqAIJ(A);
246: } else {
247: PetscFree(mat_mkl_cpardiso->ia);
248: MatDestroy_MPIAIJ(A);
249: }
250: MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));
251: PetscFree(A->spptr);
253: /* clear composed functions */
254: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);
255: PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);
256: return(0);
257: }
259: /*
260: * Computes Ax = b
261: */
264: PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
265: {
266: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr;
267: PetscErrorCode ierr;
268: PetscScalar *xarray;
269: const PetscScalar *barray;
272: mat_mkl_cpardiso->nrhs = 1;
273: VecGetArray(x,&xarray);
274: VecGetArrayRead(b,&barray);
276: /* solve phase */
277: /*-------------*/
278: mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
279: cluster_sparse_solver (
280: mat_mkl_cpardiso->pt,
281: &mat_mkl_cpardiso->maxfct,
282: &mat_mkl_cpardiso->mnum,
283: &mat_mkl_cpardiso->mtype,
284: &mat_mkl_cpardiso->phase,
285: &mat_mkl_cpardiso->n,
286: mat_mkl_cpardiso->a,
287: mat_mkl_cpardiso->ia,
288: mat_mkl_cpardiso->ja,
289: mat_mkl_cpardiso->perm,
290: &mat_mkl_cpardiso->nrhs,
291: mat_mkl_cpardiso->iparm,
292: &mat_mkl_cpardiso->msglvl,
293: (void*)barray,
294: (void*)xarray,
295: &mat_mkl_cpardiso->comm_mkl_cpardiso,
296: &mat_mkl_cpardiso->err);
298: if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
300: VecRestoreArray(x,&xarray);
301: VecRestoreArrayRead(b,&barray);
302: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
303: return(0);
304: }
308: PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
309: {
310: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
311: PetscErrorCode ierr;
314: #if defined(PETSC_USE_COMPLEX)
315: mat_mkl_cpardiso->iparm[12 - 1] = 1;
316: #else
317: mat_mkl_cpardiso->iparm[12 - 1] = 2;
318: #endif
319: MatSolve_MKL_CPARDISO(A,b,x);
320: mat_mkl_cpardiso->iparm[12 - 1] = 0;
321: return(0);
322: }
326: PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
327: {
328: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->spptr;
329: PetscErrorCode ierr;
330: PetscScalar *barray, *xarray;
331: PetscBool flg;
334: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&flg);
335: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATSEQDENSE matrix");
336: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&flg);
337: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATSEQDENSE matrix");
339: MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);
341: if(mat_mkl_cpardiso->nrhs > 0){
342: MatDenseGetArray(B,&barray);
343: MatDenseGetArray(X,&xarray);
345: /* solve phase */
346: /*-------------*/
347: mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
348: cluster_sparse_solver (
349: mat_mkl_cpardiso->pt,
350: &mat_mkl_cpardiso->maxfct,
351: &mat_mkl_cpardiso->mnum,
352: &mat_mkl_cpardiso->mtype,
353: &mat_mkl_cpardiso->phase,
354: &mat_mkl_cpardiso->n,
355: mat_mkl_cpardiso->a,
356: mat_mkl_cpardiso->ia,
357: mat_mkl_cpardiso->ja,
358: mat_mkl_cpardiso->perm,
359: &mat_mkl_cpardiso->nrhs,
360: mat_mkl_cpardiso->iparm,
361: &mat_mkl_cpardiso->msglvl,
362: (void*)barray,
363: (void*)xarray,
364: &mat_mkl_cpardiso->comm_mkl_cpardiso,
365: &mat_mkl_cpardiso->err);
366: if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
367: }
368: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
369: return(0);
371: }
373: /*
374: * LU Decomposition
375: */
378: PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
379: {
380: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->spptr;
381: PetscErrorCode ierr;
383: /* numerical factorization phase */
384: /*-------------------------------*/
387: mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
388: (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);
390: /* numerical factorization phase */
391: /*-------------------------------*/
392: mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
393: cluster_sparse_solver (
394: mat_mkl_cpardiso->pt,
395: &mat_mkl_cpardiso->maxfct,
396: &mat_mkl_cpardiso->mnum,
397: &mat_mkl_cpardiso->mtype,
398: &mat_mkl_cpardiso->phase,
399: &mat_mkl_cpardiso->n,
400: mat_mkl_cpardiso->a,
401: mat_mkl_cpardiso->ia,
402: mat_mkl_cpardiso->ja,
403: mat_mkl_cpardiso->perm,
404: &mat_mkl_cpardiso->nrhs,
405: mat_mkl_cpardiso->iparm,
406: &mat_mkl_cpardiso->msglvl,
407: NULL,
408: NULL,
409: &mat_mkl_cpardiso->comm_mkl_cpardiso,
410: &mat_mkl_cpardiso->err);
411: if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
413: mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
414: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
415: return(0);
416: }
418: /* Sets mkl_cpardiso options from the options database */
421: PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
422: {
423: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr;
424: PetscErrorCode ierr;
425: PetscInt icntl;
426: PetscBool flg;
427: int pt[IPARM_SIZE], threads;
430: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");
431: PetscOptionsInt("-mat_mkl_cpardiso_65",
432: "Number of threads to use",
433: "None",
434: threads,
435: &threads,
436: &flg);
437: if (flg) mkl_set_num_threads(threads);
439: PetscOptionsInt("-mat_mkl_cpardiso_66",
440: "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time",
441: "None",
442: mat_mkl_cpardiso->maxfct,
443: &icntl,
444: &flg);
445: if (flg) mat_mkl_cpardiso->maxfct = icntl;
447: PetscOptionsInt("-mat_mkl_cpardiso_67",
448: "Indicates the actual matrix for the solution phase",
449: "None",
450: mat_mkl_cpardiso->mnum,
451: &icntl,
452: &flg);
453: if (flg) mat_mkl_cpardiso->mnum = icntl;
455: PetscOptionsInt("-mat_mkl_cpardiso_68",
456: "Message level information",
457: "None",
458: mat_mkl_cpardiso->msglvl,
459: &icntl,
460: &flg);
461: if (flg) mat_mkl_cpardiso->msglvl = icntl;
463: PetscOptionsInt("-mat_mkl_cpardiso_69",
464: "Defines the matrix type",
465: "None",
466: mat_mkl_cpardiso->mtype,
467: &icntl,
468: &flg);
469: if(flg){
470: mat_mkl_cpardiso->mtype = icntl;
471: #if defined(PETSC_USE_REAL_SINGLE)
472: mat_mkl_cpardiso->iparm[27] = 1;
473: #else
474: mat_mkl_cpardiso->iparm[27] = 0;
475: #endif
476: mat_mkl_cpardiso->iparm[34] = 1;
477: }
478: PetscOptionsInt("-mat_mkl_cpardiso_1",
479: "Use default values",
480: "None",
481: mat_mkl_cpardiso->iparm[0],
482: &icntl,
483: &flg);
485: if(flg && icntl != 0){
486: PetscOptionsInt("-mat_mkl_cpardiso_2",
487: "Fill-in reducing ordering for the input matrix",
488: "None",
489: mat_mkl_cpardiso->iparm[1],
490: &icntl,
491: &flg);
492: if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
494: PetscOptionsInt("-mat_mkl_cpardiso_4",
495: "Preconditioned CGS/CG",
496: "None",
497: mat_mkl_cpardiso->iparm[3],
498: &icntl,
499: &flg);
500: if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
502: PetscOptionsInt("-mat_mkl_cpardiso_5",
503: "User permutation",
504: "None",
505: mat_mkl_cpardiso->iparm[4],
506: &icntl,
507: &flg);
508: if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
510: PetscOptionsInt("-mat_mkl_cpardiso_6",
511: "Write solution on x",
512: "None",
513: mat_mkl_cpardiso->iparm[5],
514: &icntl,
515: &flg);
516: if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
518: PetscOptionsInt("-mat_mkl_cpardiso_8",
519: "Iterative refinement step",
520: "None",
521: mat_mkl_cpardiso->iparm[7],
522: &icntl,
523: &flg);
524: if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
526: PetscOptionsInt("-mat_mkl_cpardiso_10",
527: "Pivoting perturbation",
528: "None",
529: mat_mkl_cpardiso->iparm[9],
530: &icntl,
531: &flg);
532: if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
534: PetscOptionsInt("-mat_mkl_cpardiso_11",
535: "Scaling vectors",
536: "None",
537: mat_mkl_cpardiso->iparm[10],
538: &icntl,
539: &flg);
540: if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
542: PetscOptionsInt("-mat_mkl_cpardiso_12",
543: "Solve with transposed or conjugate transposed matrix A",
544: "None",
545: mat_mkl_cpardiso->iparm[11],
546: &icntl,
547: &flg);
548: if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
550: PetscOptionsInt("-mat_mkl_cpardiso_13",
551: "Improved accuracy using (non-) symmetric weighted matching",
552: "None",
553: mat_mkl_cpardiso->iparm[12],
554: &icntl,
555: &flg);
556: if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
558: PetscOptionsInt("-mat_mkl_cpardiso_18",
559: "Numbers of non-zero elements",
560: "None",
561: mat_mkl_cpardiso->iparm[17],
562: &icntl,
563: &flg);
564: if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
566: PetscOptionsInt("-mat_mkl_cpardiso_19",
567: "Report number of floating point operations",
568: "None",
569: mat_mkl_cpardiso->iparm[18],
570: &icntl,
571: &flg);
572: if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
574: PetscOptionsInt("-mat_mkl_cpardiso_21",
575: "Pivoting for symmetric indefinite matrices",
576: "None",
577: mat_mkl_cpardiso->iparm[20],
578: &icntl,
579: &flg);
580: if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
582: PetscOptionsInt("-mat_mkl_cpardiso_24",
583: "Parallel factorization control",
584: "None",
585: mat_mkl_cpardiso->iparm[23],
586: &icntl,
587: &flg);
588: if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
590: PetscOptionsInt("-mat_mkl_cpardiso_25",
591: "Parallel forward/backward solve control",
592: "None",
593: mat_mkl_cpardiso->iparm[24],
594: &icntl,
595: &flg);
596: if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
598: PetscOptionsInt("-mat_mkl_cpardiso_27",
599: "Matrix checker",
600: "None",
601: mat_mkl_cpardiso->iparm[26],
602: &icntl,
603: &flg);
604: if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
606: PetscOptionsInt("-mat_mkl_cpardiso_31",
607: "Partial solve and computing selected components of the solution vectors",
608: "None",
609: mat_mkl_cpardiso->iparm[30],
610: &icntl,
611: &flg);
612: if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
614: PetscOptionsInt("-mat_mkl_cpardiso_34",
615: "Optimal number of threads for conditional numerical reproducibility (CNR) mode",
616: "None",
617: mat_mkl_cpardiso->iparm[33],
618: &icntl,
619: &flg);
620: if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
622: PetscOptionsInt("-mat_mkl_cpardiso_60",
623: "Intel MKL_CPARDISO mode",
624: "None",
625: mat_mkl_cpardiso->iparm[59],
626: &icntl,
627: &flg);
628: if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
629: }
631: PetscOptionsEnd();
632: return(0);
633: }
637: PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
638: {
639: PetscErrorCode ierr;
640: PetscInt i;
641: PetscMPIInt size;
645: MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));
646: MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);
648: mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
649: mat_mkl_cpardiso->maxfct = 1;
650: mat_mkl_cpardiso->mnum = 1;
651: mat_mkl_cpardiso->n = A->rmap->N;
652: mat_mkl_cpardiso->msglvl = 0;
653: mat_mkl_cpardiso->nrhs = 1;
654: mat_mkl_cpardiso->err = 0;
655: mat_mkl_cpardiso->phase = -1;
656: #if defined(PETSC_USE_COMPLEX)
657: mat_mkl_cpardiso->mtype = 13;
658: #else
659: mat_mkl_cpardiso->mtype = 11;
660: #endif
662: #if defined(PETSC_USE_REAL_SINGLE)
663: mat_mkl_cpardiso->iparm[27] = 1;
664: #else
665: mat_mkl_cpardiso->iparm[27] = 0;
666: #endif
668: mat_mkl_cpardiso->iparm[34] = 1; /* C style */
670: mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */
671: mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */
672: mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */
673: mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */
674: mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
675: mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */
676: mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
677: mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
678: mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
679: mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */
681: mat_mkl_cpardiso->iparm[39] = 0;
682: if (size > 1) {
683: mat_mkl_cpardiso->iparm[39] = 2;
684: mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
685: mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
686: }
687: mat_mkl_cpardiso->perm = 0;
688: return(0);
689: }
691: /*
692: * Symbolic decomposition. Mkl_Pardiso analysis phase.
693: */
696: PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
697: {
698: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->spptr;
699: PetscErrorCode ierr;
702: mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
704: /* Set MKL_CPARDISO options from the options database */
705: PetscSetMKL_CPARDISOFromOptions(F,A);
707: (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);
709: mat_mkl_cpardiso->n = A->rmap->N;
711: /* analysis phase */
712: /*----------------*/
713: mat_mkl_cpardiso->phase = JOB_ANALYSIS;
715: cluster_sparse_solver (
716: mat_mkl_cpardiso->pt,
717: &mat_mkl_cpardiso->maxfct,
718: &mat_mkl_cpardiso->mnum,
719: &mat_mkl_cpardiso->mtype,
720: &mat_mkl_cpardiso->phase,
721: &mat_mkl_cpardiso->n,
722: mat_mkl_cpardiso->a,
723: mat_mkl_cpardiso->ia,
724: mat_mkl_cpardiso->ja,
725: mat_mkl_cpardiso->perm,
726: &mat_mkl_cpardiso->nrhs,
727: mat_mkl_cpardiso->iparm,
728: &mat_mkl_cpardiso->msglvl,
729: NULL,
730: NULL,
731: &mat_mkl_cpardiso->comm_mkl_cpardiso,
732: &mat_mkl_cpardiso->err);
734: if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
736: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
737: F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
738: F->ops->solve = MatSolve_MKL_CPARDISO;
739: F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO;
740: F->ops->matsolve = MatMatSolve_MKL_CPARDISO;
741: return(0);
742: }
746: PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
747: {
748: PetscErrorCode ierr;
749: PetscBool iascii;
750: PetscViewerFormat format;
751: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->spptr;
752: PetscInt i;
755: /* check if matrix is mkl_cpardiso type */
756: if (A->ops->solve != MatSolve_MKL_CPARDISO) return(0);
758: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
759: if (iascii) {
760: PetscViewerGetFormat(viewer,&format);
761: if (format == PETSC_VIEWER_ASCII_INFO) {
762: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");
763: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);
764: for(i = 1; i <= 64; i++){
765: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);
766: }
767: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);
768: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);
769: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);
770: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);
771: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);
772: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);
773: }
774: }
775: return(0);
776: }
780: PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
781: {
782: Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)A->spptr;
785: info->block_size = 1.0;
786: info->nz_allocated = mat_mkl_cpardiso->nz + 0.0;
787: info->nz_unneeded = 0.0;
788: info->assemblies = 0.0;
789: info->mallocs = 0.0;
790: info->memory = 0.0;
791: info->fill_ratio_given = 0;
792: info->fill_ratio_needed = 0;
793: info->factor_mallocs = 0;
794: return(0);
795: }
799: PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
800: {
801: Mat_MKL_CPARDISO *mat_mkl_cpardiso =(Mat_MKL_CPARDISO*)F->spptr;
804: if(icntl <= 64){
805: mat_mkl_cpardiso->iparm[icntl - 1] = ival;
806: } else {
807: if(icntl == 65)
808: mkl_set_num_threads((int)ival);
809: else if(icntl == 66)
810: mat_mkl_cpardiso->maxfct = ival;
811: else if(icntl == 67)
812: mat_mkl_cpardiso->mnum = ival;
813: else if(icntl == 68)
814: mat_mkl_cpardiso->msglvl = ival;
815: else if(icntl == 69){
816: int pt[IPARM_SIZE];
817: mat_mkl_cpardiso->mtype = ival;
818: #if defined(PETSC_USE_REAL_SINGLE)
819: mat_mkl_cpardiso->iparm[27] = 1;
820: #else
821: mat_mkl_cpardiso->iparm[27] = 0;
822: #endif
823: mat_mkl_cpardiso->iparm[34] = 1;
824: }
825: }
826: return(0);
827: }
831: /*@
832: MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
834: Logically Collective on Mat
836: Input Parameters:
837: + F - the factored matrix obtained by calling MatGetFactor()
838: . icntl - index of Mkl_Pardiso parameter
839: - ival - value of Mkl_Pardiso parameter
841: Options Database:
842: . -mat_mkl_cpardiso_<icntl> <ival>
844: Level: beginner
846: References:
847: . Mkl_Pardiso Users' Guide
849: .seealso: MatGetFactor()
850: @*/
851: PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
852: {
856: PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
857: return(0);
858: }
862: static PetscErrorCode MatFactorGetSolverPackage_mkl_cpardiso(Mat A, const MatSolverPackage *type)
863: {
865: *type = MATSOLVERMKL_CPARDISO;
866: return(0);
867: }
869: /* MatGetFactor for MPI AIJ matrices */
872: static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
873: {
874: Mat B;
875: PetscErrorCode ierr;
876: Mat_MKL_CPARDISO *mat_mkl_cpardiso;
877: PetscBool isSeqAIJ;
880: /* Create the factorization matrix */
882: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
883: MatCreate(PetscObjectComm((PetscObject)A),&B);
884: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
885: MatSetType(B,((PetscObject)A)->type_name);
887: PetscNewLog(B,&mat_mkl_cpardiso);
889: if (isSeqAIJ) {
890: MatSeqAIJSetPreallocation(B,0,NULL);
891: mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
892: } else {
893: mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
894: MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);
895: }
897: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
898: B->ops->destroy = MatDestroy_MKL_CPARDISO;
900: B->ops->view = MatView_MKL_CPARDISO;
901: B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
903: B->factortype = ftype;
904: B->assembled = PETSC_TRUE; /* required by -ksp_view */
906: B->spptr = mat_mkl_cpardiso;
908: /* set solvertype */
909: PetscFree(B->solvertype);
910: PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);
912: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_mkl_cpardiso);
913: PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);
914: PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);
916: *F = B;
917: return(0);
918: }
922: PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_MKL_CPardiso(void)
923: {
925:
927: MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
928: MatSolverPackageRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
929: return(0);
930: }