Actual source code: mkl_cpardiso.c
2: #include <petscsys.h>
3: #include <../src/mat/impls/aij/mpi/mpiaij.h>
4: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
7: #define MKL_ILP64
8: #endif
9: #include <mkl.h>
10: #include <mkl_cluster_sparse_solver.h>
12: /*
13: * Possible mkl_cpardiso phases that controls the execution of the solver.
14: * For more information check mkl_cpardiso manual.
15: */
16: #define JOB_ANALYSIS 11
17: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
18: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
19: #define JOB_NUMERICAL_FACTORIZATION 22
20: #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
21: #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
22: #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
23: #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
24: #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
25: #define JOB_RELEASE_OF_LU_MEMORY 0
26: #define JOB_RELEASE_OF_ALL_MEMORY -1
28: #define IPARM_SIZE 64
29: #define INT_TYPE MKL_INT
31: static const char *Err_MSG_CPardiso(int errNo) {
32: switch (errNo) {
33: case -1:
34: return "input inconsistent"; break;
35: case -2:
36: return "not enough memory"; break;
37: case -3:
38: return "reordering problem"; break;
39: case -4:
40: return "zero pivot, numerical factorization or iterative refinement problem"; break;
41: case -5:
42: return "unclassified (internal) error"; break;
43: case -6:
44: return "preordering failed (matrix types 11, 13 only)"; break;
45: case -7:
46: return "diagonal matrix problem"; break;
47: case -8:
48: return "32-bit integer overflow problem"; break;
49: case -9:
50: return "not enough memory for OOC"; break;
51: case -10:
52: return "problems with opening OOC temporary files"; break;
53: case -11:
54: return "read/write problems with the OOC data file"; break;
55: default :
56: return "unknown error";
57: }
58: }
60: /*
61: * Internal data structure.
62: * For more information check mkl_cpardiso manual.
63: */
65: typedef struct {
67: /* Configuration vector */
68: INT_TYPE iparm[IPARM_SIZE];
70: /*
71: * Internal mkl_cpardiso memory location.
72: * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
73: */
74: void *pt[IPARM_SIZE];
76: MPI_Fint comm_mkl_cpardiso;
78: /* Basic mkl_cpardiso info*/
79: INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
81: /* Matrix structure */
82: PetscScalar *a;
84: INT_TYPE *ia, *ja;
86: /* Number of non-zero elements */
87: INT_TYPE nz;
89: /* Row permutaton vector*/
90: INT_TYPE *perm;
92: /* Define is matrix preserve sparce structure. */
93: MatStructure matstruc;
95: PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
97: /* True if mkl_cpardiso function have been used. */
98: PetscBool CleanUp;
99: } Mat_MKL_CPARDISO;
101: /*
102: * Copy the elements of matrix A.
103: * Input:
104: * - Mat A: MATSEQAIJ matrix
105: * - int shift: matrix index.
106: * - 0 for c representation
107: * - 1 for fortran representation
108: * - MatReuse reuse:
109: * - MAT_INITIAL_MATRIX: Create a new aij representation
110: * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
111: * Output:
112: * - int *nnz: Number of nonzero-elements.
113: * - int **r pointer to i index
114: * - int **c pointer to j elements
115: * - MATRIXTYPE **v: Non-zero elements
116: */
117: PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
118: {
119: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
121: *v=aa->a;
122: if (reuse == MAT_INITIAL_MATRIX) {
123: *r = (INT_TYPE*)aa->i;
124: *c = (INT_TYPE*)aa->j;
125: *nnz = aa->nz;
126: }
127: return 0;
128: }
130: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
131: {
132: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
133: PetscInt rstart,nz,i,j,countA,countB;
134: PetscInt *row,*col;
135: const PetscScalar *av, *bv;
136: PetscScalar *val;
137: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
138: Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data;
139: Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data;
140: PetscInt colA_start,jB,jcol;
142: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart;
143: av=aa->a; bv=bb->a;
145: garray = mat->garray;
147: if (reuse == MAT_INITIAL_MATRIX) {
148: nz = aa->nz + bb->nz;
149: *nnz = nz;
150: PetscMalloc3(m+1,&row,nz,&col,nz,&val);
151: *r = row; *c = col; *v = val;
152: } else {
153: row = *r; col = *c; val = *v;
154: }
156: nz = 0;
157: for (i=0; i<m; i++) {
158: row[i] = nz;
159: countA = ai[i+1] - ai[i];
160: countB = bi[i+1] - bi[i];
161: ajj = aj + ai[i]; /* ptr to the beginning of this row */
162: bjj = bj + bi[i];
164: /* B part, smaller col index */
165: colA_start = rstart + ajj[0]; /* the smallest global col index of A */
166: jB = 0;
167: for (j=0; j<countB; j++) {
168: jcol = garray[bjj[j]];
169: if (jcol > colA_start) break;
170: col[nz] = jcol;
171: val[nz++] = *bv++;
172: }
173: jB = j;
175: /* A part */
176: for (j=0; j<countA; j++) {
177: col[nz] = rstart + ajj[j];
178: val[nz++] = *av++;
179: }
181: /* B part, larger col index */
182: for (j=jB; j<countB; j++) {
183: col[nz] = garray[bjj[j]];
184: val[nz++] = *bv++;
185: }
186: }
187: row[m] = nz;
189: return 0;
190: }
192: PetscErrorCode MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
193: {
194: const PetscInt *ai, *aj, *bi, *bj,*garray,bs=A->rmap->bs,bs2=bs*bs,m=A->rmap->n/bs,*ajj,*bjj;
195: PetscInt rstart,nz,i,j,countA,countB;
196: PetscInt *row,*col;
197: const PetscScalar *av, *bv;
198: PetscScalar *val;
199: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data;
200: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data;
201: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
202: PetscInt colA_start,jB,jcol;
204: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart/bs;
205: av=aa->a; bv=bb->a;
207: garray = mat->garray;
209: if (reuse == MAT_INITIAL_MATRIX) {
210: nz = aa->nz + bb->nz;
211: *nnz = nz;
212: PetscMalloc3(m+1,&row,nz,&col,nz*bs2,&val);
213: *r = row; *c = col; *v = val;
214: } else {
215: row = *r; col = *c; val = *v;
216: }
218: nz = 0;
219: for (i=0; i<m; i++) {
220: row[i] = nz+1;
221: countA = ai[i+1] - ai[i];
222: countB = bi[i+1] - bi[i];
223: ajj = aj + ai[i]; /* ptr to the beginning of this row */
224: bjj = bj + bi[i];
226: /* B part, smaller col index */
227: colA_start = rstart + (countA > 0 ? ajj[0] : 0); /* the smallest global col index of A */
228: jB = 0;
229: for (j=0; j<countB; j++) {
230: jcol = garray[bjj[j]];
231: if (jcol > colA_start) break;
232: col[nz++] = jcol + 1;
233: }
234: jB = j;
235: PetscArraycpy(val,bv,jB*bs2);
236: val += jB*bs2;
237: bv += jB*bs2;
239: /* A part */
240: for (j=0; j<countA; j++) col[nz++] = rstart + ajj[j] + 1;
241: PetscArraycpy(val,av,countA*bs2);
242: val += countA*bs2;
243: av += countA*bs2;
245: /* B part, larger col index */
246: for (j=jB; j<countB; j++) col[nz++] = garray[bjj[j]] + 1;
247: PetscArraycpy(val,bv,(countB-jB)*bs2);
248: val += (countB-jB)*bs2;
249: bv += (countB-jB)*bs2;
250: }
251: row[m] = nz+1;
253: return 0;
254: }
256: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
257: {
258: const PetscInt *ai, *aj, *bi, *bj,*garray,bs=A->rmap->bs,bs2=bs*bs,m=A->rmap->n/bs,*ajj,*bjj;
259: PetscInt rstart,nz,i,j,countA,countB;
260: PetscInt *row,*col;
261: const PetscScalar *av, *bv;
262: PetscScalar *val;
263: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
264: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data;
265: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
267: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart/bs;
268: av=aa->a; bv=bb->a;
270: garray = mat->garray;
272: if (reuse == MAT_INITIAL_MATRIX) {
273: nz = aa->nz + bb->nz;
274: *nnz = nz;
275: PetscMalloc3(m+1,&row,nz,&col,nz*bs2,&val);
276: *r = row; *c = col; *v = val;
277: } else {
278: row = *r; col = *c; val = *v;
279: }
281: nz = 0;
282: for (i=0; i<m; i++) {
283: row[i] = nz+1;
284: countA = ai[i+1] - ai[i];
285: countB = bi[i+1] - bi[i];
286: ajj = aj + ai[i]; /* ptr to the beginning of this row */
287: bjj = bj + bi[i];
289: /* A part */
290: for (j=0; j<countA; j++) col[nz++] = rstart + ajj[j] + 1;
291: PetscArraycpy(val,av,countA*bs2);
292: val += countA*bs2;
293: av += countA*bs2;
295: /* B part, larger col index */
296: for (j=0; j<countB; j++) col[nz++] = garray[bjj[j]] + 1;
297: PetscArraycpy(val,bv,countB*bs2);
298: val += countB*bs2;
299: bv += countB*bs2;
300: }
301: row[m] = nz+1;
303: return 0;
304: }
306: /*
307: * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
308: */
309: PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
310: {
311: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
312: MPI_Comm comm;
314: /* Terminate instance, deallocate memories */
315: if (mat_mkl_cpardiso->CleanUp) {
316: mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
318: cluster_sparse_solver (
319: mat_mkl_cpardiso->pt,
320: &mat_mkl_cpardiso->maxfct,
321: &mat_mkl_cpardiso->mnum,
322: &mat_mkl_cpardiso->mtype,
323: &mat_mkl_cpardiso->phase,
324: &mat_mkl_cpardiso->n,
325: NULL,
326: NULL,
327: NULL,
328: mat_mkl_cpardiso->perm,
329: &mat_mkl_cpardiso->nrhs,
330: mat_mkl_cpardiso->iparm,
331: &mat_mkl_cpardiso->msglvl,
332: NULL,
333: NULL,
334: &mat_mkl_cpardiso->comm_mkl_cpardiso,
335: (PetscInt*)&mat_mkl_cpardiso->err);
336: }
338: if (mat_mkl_cpardiso->ConvertToTriples != MatCopy_seqaij_seqaij_MKL_CPARDISO) {
339: PetscFree3(mat_mkl_cpardiso->ia,mat_mkl_cpardiso->ja,mat_mkl_cpardiso->a);
340: }
341: comm = MPI_Comm_f2c(mat_mkl_cpardiso->comm_mkl_cpardiso);
342: MPI_Comm_free(&comm);
343: PetscFree(A->data);
345: /* clear composed functions */
346: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
347: PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);
348: return 0;
349: }
351: /*
352: * Computes Ax = b
353: */
354: PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
355: {
356: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
357: PetscScalar *xarray;
358: const PetscScalar *barray;
360: mat_mkl_cpardiso->nrhs = 1;
361: VecGetArray(x,&xarray);
362: VecGetArrayRead(b,&barray);
364: /* solve phase */
365: /*-------------*/
366: mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
367: cluster_sparse_solver (
368: mat_mkl_cpardiso->pt,
369: &mat_mkl_cpardiso->maxfct,
370: &mat_mkl_cpardiso->mnum,
371: &mat_mkl_cpardiso->mtype,
372: &mat_mkl_cpardiso->phase,
373: &mat_mkl_cpardiso->n,
374: mat_mkl_cpardiso->a,
375: mat_mkl_cpardiso->ia,
376: mat_mkl_cpardiso->ja,
377: mat_mkl_cpardiso->perm,
378: &mat_mkl_cpardiso->nrhs,
379: mat_mkl_cpardiso->iparm,
380: &mat_mkl_cpardiso->msglvl,
381: (void*)barray,
382: (void*)xarray,
383: &mat_mkl_cpardiso->comm_mkl_cpardiso,
384: (PetscInt*)&mat_mkl_cpardiso->err);
388: VecRestoreArray(x,&xarray);
389: VecRestoreArrayRead(b,&barray);
390: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
391: return 0;
392: }
394: PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
395: {
396: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
398: #if defined(PETSC_USE_COMPLEX)
399: mat_mkl_cpardiso->iparm[12 - 1] = 1;
400: #else
401: mat_mkl_cpardiso->iparm[12 - 1] = 2;
402: #endif
403: MatSolve_MKL_CPARDISO(A,b,x);
404: mat_mkl_cpardiso->iparm[12 - 1] = 0;
405: return 0;
406: }
408: PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
409: {
410: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
411: PetscScalar *xarray;
412: const PetscScalar *barray;
414: MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);
416: if (mat_mkl_cpardiso->nrhs > 0) {
417: MatDenseGetArrayRead(B,&barray);
418: MatDenseGetArray(X,&xarray);
422: /* solve phase */
423: /*-------------*/
424: mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
425: cluster_sparse_solver (
426: mat_mkl_cpardiso->pt,
427: &mat_mkl_cpardiso->maxfct,
428: &mat_mkl_cpardiso->mnum,
429: &mat_mkl_cpardiso->mtype,
430: &mat_mkl_cpardiso->phase,
431: &mat_mkl_cpardiso->n,
432: mat_mkl_cpardiso->a,
433: mat_mkl_cpardiso->ia,
434: mat_mkl_cpardiso->ja,
435: mat_mkl_cpardiso->perm,
436: &mat_mkl_cpardiso->nrhs,
437: mat_mkl_cpardiso->iparm,
438: &mat_mkl_cpardiso->msglvl,
439: (void*)barray,
440: (void*)xarray,
441: &mat_mkl_cpardiso->comm_mkl_cpardiso,
442: (PetscInt*)&mat_mkl_cpardiso->err);
444: MatDenseRestoreArrayRead(B,&barray);
445: MatDenseRestoreArray(X,&xarray);
447: }
448: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
449: return 0;
450: }
452: /*
453: * LU Decomposition
454: */
455: PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
456: {
457: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
459: mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
460: (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);
462: mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
463: cluster_sparse_solver (
464: mat_mkl_cpardiso->pt,
465: &mat_mkl_cpardiso->maxfct,
466: &mat_mkl_cpardiso->mnum,
467: &mat_mkl_cpardiso->mtype,
468: &mat_mkl_cpardiso->phase,
469: &mat_mkl_cpardiso->n,
470: mat_mkl_cpardiso->a,
471: mat_mkl_cpardiso->ia,
472: mat_mkl_cpardiso->ja,
473: mat_mkl_cpardiso->perm,
474: &mat_mkl_cpardiso->nrhs,
475: mat_mkl_cpardiso->iparm,
476: &mat_mkl_cpardiso->msglvl,
477: NULL,
478: NULL,
479: &mat_mkl_cpardiso->comm_mkl_cpardiso,
480: &mat_mkl_cpardiso->err);
483: mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
484: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
485: return 0;
486: }
488: /* Sets mkl_cpardiso options from the options database */
489: PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
490: {
491: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
492: PetscErrorCode ierr;
493: PetscInt icntl,threads;
494: PetscBool flg;
496: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");
497: PetscOptionsInt("-mat_mkl_cpardiso_65","Suggested number of threads to use within MKL_CPARDISO","None",threads,&threads,&flg);
498: if (flg) mkl_set_num_threads((int)threads);
500: PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);
501: if (flg) mat_mkl_cpardiso->maxfct = icntl;
503: PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);
504: if (flg) mat_mkl_cpardiso->mnum = icntl;
506: PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);
507: if (flg) mat_mkl_cpardiso->msglvl = icntl;
509: PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);
510: if (flg) mat_mkl_cpardiso->mtype = icntl;
511: PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);
513: if (flg && icntl != 0) {
514: PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);
515: if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
517: PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);
518: if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
520: PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);
521: if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
523: PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);
524: if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
526: PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);
527: if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
529: PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);
530: if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
532: PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);
533: if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
535: PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);
536: if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
538: PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
539: &flg);
540: if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
542: PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
543: &flg);
544: if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
546: PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);
547: if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
549: PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);
550: if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
552: PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);
553: if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
555: PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);
556: if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
558: PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);
559: if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
561: PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);
562: if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
564: PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);
565: if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
567: PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);
568: if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
569: }
571: PetscOptionsEnd();
572: return 0;
573: }
575: PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
576: {
577: PetscInt bs;
578: PetscBool match;
579: PetscMPIInt size;
580: MPI_Comm comm;
583: MPI_Comm_dup(PetscObjectComm((PetscObject)A),&comm);
584: MPI_Comm_size(comm, &size);
585: mat_mkl_cpardiso->comm_mkl_cpardiso = MPI_Comm_c2f(comm);
587: mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
588: mat_mkl_cpardiso->maxfct = 1;
589: mat_mkl_cpardiso->mnum = 1;
590: mat_mkl_cpardiso->n = A->rmap->N;
591: if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
592: mat_mkl_cpardiso->msglvl = 0;
593: mat_mkl_cpardiso->nrhs = 1;
594: mat_mkl_cpardiso->err = 0;
595: mat_mkl_cpardiso->phase = -1;
596: #if defined(PETSC_USE_COMPLEX)
597: mat_mkl_cpardiso->mtype = 13;
598: #else
599: mat_mkl_cpardiso->mtype = 11;
600: #endif
602: #if defined(PETSC_USE_REAL_SINGLE)
603: mat_mkl_cpardiso->iparm[27] = 1;
604: #else
605: mat_mkl_cpardiso->iparm[27] = 0;
606: #endif
608: mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */
609: mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */
610: mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */
611: mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */
612: mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
613: mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */
614: mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
615: mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
616: mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
617: mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */
619: mat_mkl_cpardiso->iparm[39] = 0;
620: if (size > 1) {
621: mat_mkl_cpardiso->iparm[39] = 2;
622: mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
623: mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
624: }
625: PetscObjectTypeCompareAny((PetscObject)A,&match,MATMPIBAIJ,MATMPISBAIJ,"");
626: if (match) {
627: MatGetBlockSize(A,&bs);
628: mat_mkl_cpardiso->iparm[36] = bs;
629: mat_mkl_cpardiso->iparm[40] /= bs;
630: mat_mkl_cpardiso->iparm[41] /= bs;
631: mat_mkl_cpardiso->iparm[40]++;
632: mat_mkl_cpardiso->iparm[41]++;
633: mat_mkl_cpardiso->iparm[34] = 0; /* Fortran style */
634: } else {
635: mat_mkl_cpardiso->iparm[34] = 1; /* C style */
636: }
638: mat_mkl_cpardiso->perm = 0;
639: return 0;
640: }
642: /*
643: * Symbolic decomposition. Mkl_Pardiso analysis phase.
644: */
645: PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
646: {
647: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
649: mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
651: /* Set MKL_CPARDISO options from the options database */
652: PetscSetMKL_CPARDISOFromOptions(F,A);
653: (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);
655: mat_mkl_cpardiso->n = A->rmap->N;
656: if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
658: /* analysis phase */
659: /*----------------*/
660: mat_mkl_cpardiso->phase = JOB_ANALYSIS;
662: cluster_sparse_solver (
663: mat_mkl_cpardiso->pt,
664: &mat_mkl_cpardiso->maxfct,
665: &mat_mkl_cpardiso->mnum,
666: &mat_mkl_cpardiso->mtype,
667: &mat_mkl_cpardiso->phase,
668: &mat_mkl_cpardiso->n,
669: mat_mkl_cpardiso->a,
670: mat_mkl_cpardiso->ia,
671: mat_mkl_cpardiso->ja,
672: mat_mkl_cpardiso->perm,
673: &mat_mkl_cpardiso->nrhs,
674: mat_mkl_cpardiso->iparm,
675: &mat_mkl_cpardiso->msglvl,
676: NULL,
677: NULL,
678: &mat_mkl_cpardiso->comm_mkl_cpardiso,
679: (PetscInt*)&mat_mkl_cpardiso->err);
683: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
684: F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
685: F->ops->solve = MatSolve_MKL_CPARDISO;
686: F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO;
687: F->ops->matsolve = MatMatSolve_MKL_CPARDISO;
688: return 0;
689: }
691: PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS perm,const MatFactorInfo *info)
692: {
693: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
695: mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
697: /* Set MKL_CPARDISO options from the options database */
698: PetscSetMKL_CPARDISOFromOptions(F,A);
699: (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);
701: mat_mkl_cpardiso->n = A->rmap->N;
702: if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
703: #if defined(PETSC_USE_COMPLEX)
704: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead",((PetscObject)A)->type_name);
705: #endif
706: if (A->spd_set && A->spd) mat_mkl_cpardiso->mtype = 2;
707: else mat_mkl_cpardiso->mtype = -2;
709: /* analysis phase */
710: /*----------------*/
711: mat_mkl_cpardiso->phase = JOB_ANALYSIS;
713: cluster_sparse_solver (
714: mat_mkl_cpardiso->pt,
715: &mat_mkl_cpardiso->maxfct,
716: &mat_mkl_cpardiso->mnum,
717: &mat_mkl_cpardiso->mtype,
718: &mat_mkl_cpardiso->phase,
719: &mat_mkl_cpardiso->n,
720: mat_mkl_cpardiso->a,
721: mat_mkl_cpardiso->ia,
722: mat_mkl_cpardiso->ja,
723: mat_mkl_cpardiso->perm,
724: &mat_mkl_cpardiso->nrhs,
725: mat_mkl_cpardiso->iparm,
726: &mat_mkl_cpardiso->msglvl,
727: NULL,
728: NULL,
729: &mat_mkl_cpardiso->comm_mkl_cpardiso,
730: (PetscInt*)&mat_mkl_cpardiso->err);
734: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
735: F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_CPARDISO;
736: F->ops->solve = MatSolve_MKL_CPARDISO;
737: F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO;
738: F->ops->matsolve = MatMatSolve_MKL_CPARDISO;
739: return 0;
740: }
742: PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
743: {
744: PetscBool iascii;
745: PetscViewerFormat format;
746: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
747: PetscInt i;
749: /* check if matrix is mkl_cpardiso type */
750: if (A->ops->solve != MatSolve_MKL_CPARDISO) return 0;
752: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
753: if (iascii) {
754: PetscViewerGetFormat(viewer,&format);
755: if (format == PETSC_VIEWER_ASCII_INFO) {
756: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");
757: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);
758: for (i = 1; i <= 64; i++) {
759: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);
760: }
761: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);
762: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);
763: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);
764: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);
765: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);
766: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);
767: }
768: }
769: return 0;
770: }
772: PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
773: {
774: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
776: info->block_size = 1.0;
777: info->nz_allocated = mat_mkl_cpardiso->nz + 0.0;
778: info->nz_unneeded = 0.0;
779: info->assemblies = 0.0;
780: info->mallocs = 0.0;
781: info->memory = 0.0;
782: info->fill_ratio_given = 0;
783: info->fill_ratio_needed = 0;
784: info->factor_mallocs = 0;
785: return 0;
786: }
788: PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
789: {
790: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)F->data;
792: if (icntl <= 64) {
793: mat_mkl_cpardiso->iparm[icntl - 1] = ival;
794: } else {
795: if (icntl == 65) mkl_set_num_threads((int)ival);
796: else if (icntl == 66) mat_mkl_cpardiso->maxfct = ival;
797: else if (icntl == 67) mat_mkl_cpardiso->mnum = ival;
798: else if (icntl == 68) mat_mkl_cpardiso->msglvl = ival;
799: else if (icntl == 69) mat_mkl_cpardiso->mtype = ival;
800: }
801: return 0;
802: }
804: /*@
805: MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
807: Logically Collective on Mat
809: Input Parameters:
810: + F - the factored matrix obtained by calling MatGetFactor()
811: . icntl - index of Mkl_Pardiso parameter
812: - ival - value of Mkl_Pardiso parameter
814: Options Database:
815: . -mat_mkl_cpardiso_<icntl> <ival> - set the option numbered icntl to ival
817: Level: Intermediate
819: Notes:
820: This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
821: database approach when working with TS, SNES, or KSP.
823: References:
824: . * - Mkl_Pardiso Users' Guide
826: .seealso: MatGetFactor()
827: @*/
828: PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
829: {
830: PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
831: return 0;
832: }
834: /*MC
835: MATSOLVERMKL_CPARDISO - A matrix type providing direct solvers (LU) for parallel matrices via the external package MKL_CPARDISO.
837: Works with MATMPIAIJ matrices
839: Use -pc_type lu -pc_factor_mat_solver_type mkl_cpardiso to use this direct solver
841: Options Database Keys:
842: + -mat_mkl_cpardiso_65 - Suggested number of threads to use within MKL_CPARDISO
843: . -mat_mkl_cpardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time
844: . -mat_mkl_cpardiso_67 - Indicates the actual matrix for the solution phase
845: . -mat_mkl_cpardiso_68 - Message level information, use 1 to get detailed information on the solver options
846: . -mat_mkl_cpardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type
847: . -mat_mkl_cpardiso_1 - Use default values
848: . -mat_mkl_cpardiso_2 - Fill-in reducing ordering for the input matrix
849: . -mat_mkl_cpardiso_4 - Preconditioned CGS/CG
850: . -mat_mkl_cpardiso_5 - User permutation
851: . -mat_mkl_cpardiso_6 - Write solution on x
852: . -mat_mkl_cpardiso_8 - Iterative refinement step
853: . -mat_mkl_cpardiso_10 - Pivoting perturbation
854: . -mat_mkl_cpardiso_11 - Scaling vectors
855: . -mat_mkl_cpardiso_12 - Solve with transposed or conjugate transposed matrix A
856: . -mat_mkl_cpardiso_13 - Improved accuracy using (non-) symmetric weighted matching
857: . -mat_mkl_cpardiso_18 - Numbers of non-zero elements
858: . -mat_mkl_cpardiso_19 - Report number of floating point operations
859: . -mat_mkl_cpardiso_21 - Pivoting for symmetric indefinite matrices
860: . -mat_mkl_cpardiso_24 - Parallel factorization control
861: . -mat_mkl_cpardiso_25 - Parallel forward/backward solve control
862: . -mat_mkl_cpardiso_27 - Matrix checker
863: . -mat_mkl_cpardiso_31 - Partial solve and computing selected components of the solution vectors
864: . -mat_mkl_cpardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode
865: - -mat_mkl_cpardiso_60 - Intel MKL_CPARDISO mode
867: Level: beginner
869: Notes:
870: Use -mat_mkl_cpardiso_68 1 to display the number of threads the solver is using. MKL does not provide a way to directly access this
871: information.
873: For more information on the options check the MKL_CPARDISO manual
875: .seealso: PCFactorSetMatSolverType(), MatSolverType
877: M*/
879: static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
880: {
881: *type = MATSOLVERMKL_CPARDISO;
882: return 0;
883: }
885: /* MatGetFactor for MPI AIJ matrices */
886: static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
887: {
888: Mat B;
889: Mat_MKL_CPARDISO *mat_mkl_cpardiso;
890: PetscBool isSeqAIJ,isMPIBAIJ,isMPISBAIJ;
892: /* Create the factorization matrix */
894: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
895: PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&isMPIBAIJ);
896: PetscObjectTypeCompare((PetscObject)A,MATMPISBAIJ,&isMPISBAIJ);
897: MatCreate(PetscObjectComm((PetscObject)A),&B);
898: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
899: PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);
900: MatSetUp(B);
902: PetscNewLog(B,&mat_mkl_cpardiso);
904: if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
905: else if (isMPIBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO;
906: else if (isMPISBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO;
907: else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
909: if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
910: else B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_CPARDISO;
911: B->ops->destroy = MatDestroy_MKL_CPARDISO;
913: B->ops->view = MatView_MKL_CPARDISO;
914: B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
916: B->factortype = ftype;
917: B->assembled = PETSC_TRUE; /* required by -ksp_view */
919: B->data = mat_mkl_cpardiso;
921: /* set solvertype */
922: PetscFree(B->solvertype);
923: PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);
925: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);
926: PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);
927: PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);
929: *F = B;
930: return 0;
931: }
933: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
934: {
935: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
936: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
937: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
938: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_mpiaij_mkl_cpardiso);
939: return 0;
940: }