Actual source code: mumps.c
2: /*
3: Provides an interface to the MUMPS sparse solver
4: */
5: #include <petscpkg_version.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8: #include <../src/mat/impls/sell/mpi/mpisell.h>
10: EXTERN_C_BEGIN
11: #if defined(PETSC_USE_COMPLEX)
12: #if defined(PETSC_USE_REAL_SINGLE)
13: #include <cmumps_c.h>
14: #else
15: #include <zmumps_c.h>
16: #endif
17: #else
18: #if defined(PETSC_USE_REAL_SINGLE)
19: #include <smumps_c.h>
20: #else
21: #include <dmumps_c.h>
22: #endif
23: #endif
24: EXTERN_C_END
25: #define JOB_INIT -1
26: #define JOB_FACTSYMBOLIC 1
27: #define JOB_FACTNUMERIC 2
28: #define JOB_SOLVE 3
29: #define JOB_END -2
31: /* calls to MUMPS */
32: #if defined(PETSC_USE_COMPLEX)
33: #if defined(PETSC_USE_REAL_SINGLE)
34: #define MUMPS_c cmumps_c
35: #else
36: #define MUMPS_c zmumps_c
37: #endif
38: #else
39: #if defined(PETSC_USE_REAL_SINGLE)
40: #define MUMPS_c smumps_c
41: #else
42: #define MUMPS_c dmumps_c
43: #endif
44: #endif
46: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47: number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48: naming convention in PetscMPIInt, PetscBLASInt etc.
49: */
50: typedef MUMPS_INT PetscMUMPSInt;
52: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53: #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55: #endif
56: #else
57: #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59: #endif
60: #endif
62: #define MPIU_MUMPSINT MPI_INT
63: #define PETSC_MUMPS_INT_MAX 2147483647
64: #define PETSC_MUMPS_INT_MIN -2147483648
66: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67: static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68: {
69: #if PetscDefined(USE_64BIT_INDICES)
70: PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71: #endif
72: *b = (PetscMUMPSInt)(a);
73: return 0;
74: }
76: /* Put these utility routines here since they are only used in this file */
77: static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
78: {
79: PetscInt myval;
80: PetscBool myset;
81: /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
82: PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);
83: if (myset) PetscMUMPSIntCast(myval,value);
84: if (set) *set = myset;
85: return 0;
86: }
87: #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
89: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
90: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
91: #define PetscMUMPS_c(mumps) \
92: do { \
93: if (mumps->use_petsc_omp_support) { \
94: if (mumps->is_omp_master) { \
95: PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl); \
96: MUMPS_c(&mumps->id); \
97: PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl); \
98: } \
99: PetscOmpCtrlBarrier(mumps->omp_ctrl); \
100: /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \
101: to processes, so we only Bcast info[1], an error code and leave others (since they do not have \
102: an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \
103: omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
104: */ \
105: MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);\
106: MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);\
107: MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);\
108: } else { \
109: MUMPS_c(&mumps->id); \
110: } \
111: } while (0)
112: #else
113: #define PetscMUMPS_c(mumps) \
114: do { MUMPS_c(&mumps->id); } while (0)
115: #endif
117: /* declare MumpsScalar */
118: #if defined(PETSC_USE_COMPLEX)
119: #if defined(PETSC_USE_REAL_SINGLE)
120: #define MumpsScalar mumps_complex
121: #else
122: #define MumpsScalar mumps_double_complex
123: #endif
124: #else
125: #define MumpsScalar PetscScalar
126: #endif
128: /* macros s.t. indices match MUMPS documentation */
129: #define ICNTL(I) icntl[(I)-1]
130: #define CNTL(I) cntl[(I)-1]
131: #define INFOG(I) infog[(I)-1]
132: #define INFO(I) info[(I)-1]
133: #define RINFOG(I) rinfog[(I)-1]
134: #define RINFO(I) rinfo[(I)-1]
136: typedef struct Mat_MUMPS Mat_MUMPS;
137: struct Mat_MUMPS {
138: #if defined(PETSC_USE_COMPLEX)
139: #if defined(PETSC_USE_REAL_SINGLE)
140: CMUMPS_STRUC_C id;
141: #else
142: ZMUMPS_STRUC_C id;
143: #endif
144: #else
145: #if defined(PETSC_USE_REAL_SINGLE)
146: SMUMPS_STRUC_C id;
147: #else
148: DMUMPS_STRUC_C id;
149: #endif
150: #endif
152: MatStructure matstruc;
153: PetscMPIInt myid,petsc_size;
154: PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */
155: PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
156: PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */
157: PetscMUMPSInt sym;
158: MPI_Comm mumps_comm;
159: PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */
160: VecScatter scat_rhs, scat_sol; /* used by MatSolve() */
161: PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */
162: PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc;
163: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
164: PetscInt *rhs_nrow,max_nrhs;
165: PetscMPIInt *rhs_recvcounts,*rhs_disps;
166: PetscScalar *rhs_loc,*rhs_recvbuf;
167: #endif
168: Vec b_seq,x_seq;
169: PetscInt ninfo,*info; /* which INFO to display */
170: PetscInt sizeredrhs;
171: PetscScalar *schur_sol;
172: PetscInt schur_sizesol;
173: PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
174: PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */
175: PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
177: /* stuff used by petsc/mumps OpenMP support*/
178: PetscBool use_petsc_omp_support;
179: PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
180: MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */
181: PetscInt64 *recvcount; /* a collection of nnz on omp_master */
182: PetscMPIInt tag,omp_comm_size;
183: PetscBool is_omp_master; /* is this rank the master of omp_comm */
184: MPI_Request *reqs;
185: };
187: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
188: Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
189: */
190: static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
191: {
192: PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
194: #if defined(PETSC_USE_64BIT_INDICES)
195: {
196: PetscInt i;
197: if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
198: PetscFree(mumps->ia_alloc);
199: PetscMalloc1(nrow+1,&mumps->ia_alloc);
200: mumps->cur_ilen = nrow+1;
201: }
202: if (nnz > mumps->cur_jlen) {
203: PetscFree(mumps->ja_alloc);
204: PetscMalloc1(nnz,&mumps->ja_alloc);
205: mumps->cur_jlen = nnz;
206: }
207: for (i=0; i<nrow+1; i++) PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));
208: for (i=0; i<nnz; i++) PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));
209: *ia_mumps = mumps->ia_alloc;
210: *ja_mumps = mumps->ja_alloc;
211: }
212: #else
213: *ia_mumps = ia;
214: *ja_mumps = ja;
215: #endif
216: PetscMUMPSIntCast(nnz,nnz_mumps);
217: return 0;
218: }
220: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
221: {
222: PetscFree(mumps->id.listvar_schur);
223: PetscFree(mumps->id.redrhs);
224: PetscFree(mumps->schur_sol);
225: mumps->id.size_schur = 0;
226: mumps->id.schur_lld = 0;
227: mumps->id.ICNTL(19) = 0;
228: return 0;
229: }
231: /* solve with rhs in mumps->id.redrhs and return in the same location */
232: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
233: {
234: Mat_MUMPS *mumps=(Mat_MUMPS*)F->data;
235: Mat S,B,X;
236: MatFactorSchurStatus schurstatus;
237: PetscInt sizesol;
239: MatFactorFactorizeSchurComplement(F);
240: MatFactorGetSchurComplement(F,&S,&schurstatus);
241: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);
242: MatSetType(B,((PetscObject)S)->type_name);
243: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
244: MatBindToCPU(B,S->boundtocpu);
245: #endif
246: switch (schurstatus) {
247: case MAT_FACTOR_SCHUR_FACTORED:
248: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);
249: MatSetType(X,((PetscObject)S)->type_name);
250: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
251: MatBindToCPU(X,S->boundtocpu);
252: #endif
253: if (!mumps->id.ICNTL(9)) { /* transpose solve */
254: MatMatSolveTranspose(S,B,X);
255: } else {
256: MatMatSolve(S,B,X);
257: }
258: break;
259: case MAT_FACTOR_SCHUR_INVERTED:
260: sizesol = mumps->id.nrhs*mumps->id.size_schur;
261: if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
262: PetscFree(mumps->schur_sol);
263: PetscMalloc1(sizesol,&mumps->schur_sol);
264: mumps->schur_sizesol = sizesol;
265: }
266: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);
267: MatSetType(X,((PetscObject)S)->type_name);
268: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
269: MatBindToCPU(X,S->boundtocpu);
270: #endif
271: MatProductCreateWithMat(S,B,NULL,X);
272: if (!mumps->id.ICNTL(9)) { /* transpose solve */
273: MatProductSetType(X,MATPRODUCT_AtB);
274: } else {
275: MatProductSetType(X,MATPRODUCT_AB);
276: }
277: MatProductSetFromOptions(X);
278: MatProductSymbolic(X);
279: MatProductNumeric(X);
281: MatCopy(X,B,SAME_NONZERO_PATTERN);
282: break;
283: default:
284: SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
285: }
286: MatFactorRestoreSchurComplement(F,&S,schurstatus);
287: MatDestroy(&B);
288: MatDestroy(&X);
289: return 0;
290: }
292: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
293: {
294: Mat_MUMPS *mumps=(Mat_MUMPS*)F->data;
296: if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
297: return 0;
298: }
299: if (!expansion) { /* prepare for the condensation step */
300: PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
301: /* allocate MUMPS internal array to store reduced right-hand sides */
302: if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
303: PetscFree(mumps->id.redrhs);
304: mumps->id.lredrhs = mumps->id.size_schur;
305: PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);
306: mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
307: }
308: mumps->id.ICNTL(26) = 1; /* condensation phase */
309: } else { /* prepare for the expansion step */
310: /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
311: MatMumpsSolveSchur_Private(F);
312: mumps->id.ICNTL(26) = 2; /* expansion phase */
313: PetscMUMPS_c(mumps);
315: /* restore defaults */
316: mumps->id.ICNTL(26) = -1;
317: /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
318: if (mumps->id.nrhs > 1) {
319: PetscFree(mumps->id.redrhs);
320: mumps->id.lredrhs = 0;
321: mumps->sizeredrhs = 0;
322: }
323: }
324: return 0;
325: }
327: /*
328: MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
330: input:
331: A - matrix in aij,baij or sbaij format
332: shift - 0: C style output triple; 1: Fortran style output triple.
333: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
334: MAT_REUSE_MATRIX: only the values in v array are updated
335: output:
336: nnz - dim of r, c, and v (number of local nonzero entries of A)
337: r, c, v - row and col index, matrix values (matrix triples)
339: The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
340: freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
341: that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
343: */
345: PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
346: {
347: const PetscScalar *av;
348: const PetscInt *ai,*aj,*ajj,M=A->rmap->n;
349: PetscInt64 nz,rnz,i,j,k;
350: PetscMUMPSInt *row,*col;
351: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
353: MatSeqAIJGetArrayRead(A,&av);
354: mumps->val = (PetscScalar*)av;
355: if (reuse == MAT_INITIAL_MATRIX) {
356: nz = aa->nz;
357: ai = aa->i;
358: aj = aa->j;
359: PetscMalloc2(nz,&row,nz,&col);
360: for (i=k=0; i<M; i++) {
361: rnz = ai[i+1] - ai[i];
362: ajj = aj + ai[i];
363: for (j=0; j<rnz; j++) {
364: PetscMUMPSIntCast(i+shift,&row[k]);
365: PetscMUMPSIntCast(ajj[j] + shift,&col[k]);
366: k++;
367: }
368: }
369: mumps->irn = row;
370: mumps->jcn = col;
371: mumps->nnz = nz;
372: }
373: MatSeqAIJRestoreArrayRead(A,&av);
374: return 0;
375: }
377: PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
378: {
379: PetscInt64 nz,i,j,k,r;
380: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
381: PetscMUMPSInt *row,*col;
383: mumps->val = a->val;
384: if (reuse == MAT_INITIAL_MATRIX) {
385: nz = a->sliidx[a->totalslices];
386: PetscMalloc2(nz,&row,nz,&col);
387: for (i=k=0; i<a->totalslices; i++) {
388: for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
389: PetscMUMPSIntCast(8*i+r+shift,&row[k++]);
390: }
391: }
392: for (i=0;i<nz;i++) PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);
393: mumps->irn = row;
394: mumps->jcn = col;
395: mumps->nnz = nz;
396: }
397: return 0;
398: }
400: PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
401: {
402: Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data;
403: const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
404: PetscInt64 M,nz,idx=0,rnz,i,j,k,m;
405: PetscInt bs;
406: PetscMUMPSInt *row,*col;
408: MatGetBlockSize(A,&bs);
409: M = A->rmap->N/bs;
410: mumps->val = aa->a;
411: if (reuse == MAT_INITIAL_MATRIX) {
412: ai = aa->i; aj = aa->j;
413: nz = bs2*aa->nz;
414: PetscMalloc2(nz,&row,nz,&col);
415: for (i=0; i<M; i++) {
416: ajj = aj + ai[i];
417: rnz = ai[i+1] - ai[i];
418: for (k=0; k<rnz; k++) {
419: for (j=0; j<bs; j++) {
420: for (m=0; m<bs; m++) {
421: PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);
422: PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);
423: idx++;
424: }
425: }
426: }
427: }
428: mumps->irn = row;
429: mumps->jcn = col;
430: mumps->nnz = nz;
431: }
432: return 0;
433: }
435: PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
436: {
437: const PetscInt *ai, *aj,*ajj;
438: PetscInt bs;
439: PetscInt64 nz,rnz,i,j,k,m;
440: PetscMUMPSInt *row,*col;
441: PetscScalar *val;
442: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data;
443: const PetscInt bs2=aa->bs2,mbs=aa->mbs;
444: #if defined(PETSC_USE_COMPLEX)
445: PetscBool hermitian;
446: #endif
448: #if defined(PETSC_USE_COMPLEX)
449: MatGetOption(A,MAT_HERMITIAN,&hermitian);
451: #endif
452: ai = aa->i;
453: aj = aa->j;
454: MatGetBlockSize(A,&bs);
455: if (reuse == MAT_INITIAL_MATRIX) {
456: nz = aa->nz;
457: PetscMalloc2(bs2*nz,&row,bs2*nz,&col);
458: if (bs>1) {
459: PetscMalloc1(bs2*nz,&mumps->val_alloc);
460: mumps->val = mumps->val_alloc;
461: } else {
462: mumps->val = aa->a;
463: }
464: mumps->irn = row;
465: mumps->jcn = col;
466: } else {
467: if (bs == 1) mumps->val = aa->a;
468: row = mumps->irn;
469: col = mumps->jcn;
470: }
471: val = mumps->val;
473: nz = 0;
474: if (bs>1) {
475: for (i=0; i<mbs; i++) {
476: rnz = ai[i+1] - ai[i];
477: ajj = aj + ai[i];
478: for (j=0; j<rnz; j++) {
479: for (k=0; k<bs; k++) {
480: for (m=0; m<bs; m++) {
481: if (ajj[j]>i || k>=m) {
482: if (reuse == MAT_INITIAL_MATRIX) {
483: PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);
484: PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);
485: }
486: val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
487: }
488: }
489: }
490: }
491: }
492: } else if (reuse == MAT_INITIAL_MATRIX) {
493: for (i=0; i<mbs; i++) {
494: rnz = ai[i+1] - ai[i];
495: ajj = aj + ai[i];
496: for (j=0; j<rnz; j++) {
497: PetscMUMPSIntCast(i+shift,&row[nz]);
498: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
499: nz++;
500: }
501: }
503: }
504: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
505: return 0;
506: }
508: PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
509: {
510: const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n;
511: PetscInt64 nz,rnz,i,j;
512: const PetscScalar *av,*v1;
513: PetscScalar *val;
514: PetscMUMPSInt *row,*col;
515: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
516: PetscBool missing;
517: #if defined(PETSC_USE_COMPLEX)
518: PetscBool hermitian;
519: #endif
521: #if defined(PETSC_USE_COMPLEX)
522: MatGetOption(A,MAT_HERMITIAN,&hermitian);
524: #endif
525: MatSeqAIJGetArrayRead(A,&av);
526: ai = aa->i; aj = aa->j;
527: adiag = aa->diag;
528: MatMissingDiagonal_SeqAIJ(A,&missing,NULL);
529: if (reuse == MAT_INITIAL_MATRIX) {
530: /* count nz in the upper triangular part of A */
531: nz = 0;
532: if (missing) {
533: for (i=0; i<M; i++) {
534: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
535: for (j=ai[i];j<ai[i+1];j++) {
536: if (aj[j] < i) continue;
537: nz++;
538: }
539: } else {
540: nz += ai[i+1] - adiag[i];
541: }
542: }
543: } else {
544: for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
545: }
546: PetscMalloc2(nz,&row,nz,&col);
547: PetscMalloc1(nz,&val);
548: mumps->nnz = nz;
549: mumps->irn = row;
550: mumps->jcn = col;
551: mumps->val = mumps->val_alloc = val;
553: nz = 0;
554: if (missing) {
555: for (i=0; i<M; i++) {
556: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
557: for (j=ai[i];j<ai[i+1];j++) {
558: if (aj[j] < i) continue;
559: PetscMUMPSIntCast(i+shift,&row[nz]);
560: PetscMUMPSIntCast(aj[j]+shift,&col[nz]);
561: val[nz] = av[j];
562: nz++;
563: }
564: } else {
565: rnz = ai[i+1] - adiag[i];
566: ajj = aj + adiag[i];
567: v1 = av + adiag[i];
568: for (j=0; j<rnz; j++) {
569: PetscMUMPSIntCast(i+shift,&row[nz]);
570: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
571: val[nz++] = v1[j];
572: }
573: }
574: }
575: } else {
576: for (i=0; i<M; i++) {
577: rnz = ai[i+1] - adiag[i];
578: ajj = aj + adiag[i];
579: v1 = av + adiag[i];
580: for (j=0; j<rnz; j++) {
581: PetscMUMPSIntCast(i+shift,&row[nz]);
582: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
583: val[nz++] = v1[j];
584: }
585: }
586: }
587: } else {
588: nz = 0;
589: val = mumps->val;
590: if (missing) {
591: for (i=0; i <M; i++) {
592: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
593: for (j=ai[i];j<ai[i+1];j++) {
594: if (aj[j] < i) continue;
595: val[nz++] = av[j];
596: }
597: } else {
598: rnz = ai[i+1] - adiag[i];
599: v1 = av + adiag[i];
600: for (j=0; j<rnz; j++) {
601: val[nz++] = v1[j];
602: }
603: }
604: }
605: } else {
606: for (i=0; i <M; i++) {
607: rnz = ai[i+1] - adiag[i];
608: v1 = av + adiag[i];
609: for (j=0; j<rnz; j++) {
610: val[nz++] = v1[j];
611: }
612: }
613: }
614: }
615: MatSeqAIJRestoreArrayRead(A,&av);
616: return 0;
617: }
619: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
620: {
621: const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
622: PetscInt bs;
623: PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB;
624: PetscMUMPSInt *row,*col;
625: const PetscScalar *av,*bv,*v1,*v2;
626: PetscScalar *val;
627: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
628: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data;
629: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
630: const PetscInt bs2=aa->bs2,mbs=aa->mbs;
631: #if defined(PETSC_USE_COMPLEX)
632: PetscBool hermitian;
633: #endif
635: #if defined(PETSC_USE_COMPLEX)
636: MatGetOption(A,MAT_HERMITIAN,&hermitian);
638: #endif
639: MatGetBlockSize(A,&bs);
640: rstart = A->rmap->rstart;
641: ai = aa->i;
642: aj = aa->j;
643: bi = bb->i;
644: bj = bb->j;
645: av = aa->a;
646: bv = bb->a;
648: garray = mat->garray;
650: if (reuse == MAT_INITIAL_MATRIX) {
651: nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
652: PetscMalloc2(nz,&row,nz,&col);
653: PetscMalloc1(nz,&val);
654: /* can not decide the exact mumps->nnz now because of the SBAIJ */
655: mumps->irn = row;
656: mumps->jcn = col;
657: mumps->val = mumps->val_alloc = val;
658: } else {
659: val = mumps->val;
660: }
662: jj = 0; irow = rstart;
663: for (i=0; i<mbs; i++) {
664: ajj = aj + ai[i]; /* ptr to the beginning of this row */
665: countA = ai[i+1] - ai[i];
666: countB = bi[i+1] - bi[i];
667: bjj = bj + bi[i];
668: v1 = av + ai[i]*bs2;
669: v2 = bv + bi[i]*bs2;
671: if (bs>1) {
672: /* A-part */
673: for (j=0; j<countA; j++) {
674: for (k=0; k<bs; k++) {
675: for (m=0; m<bs; m++) {
676: if (rstart + ajj[j]*bs>irow || k>=m) {
677: if (reuse == MAT_INITIAL_MATRIX) {
678: PetscMUMPSIntCast(irow + m + shift,&row[jj]);
679: PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);
680: }
681: val[jj++] = v1[j*bs2 + m + k*bs];
682: }
683: }
684: }
685: }
687: /* B-part */
688: for (j=0; j < countB; j++) {
689: for (k=0; k<bs; k++) {
690: for (m=0; m<bs; m++) {
691: if (reuse == MAT_INITIAL_MATRIX) {
692: PetscMUMPSIntCast(irow + m + shift,&row[jj]);
693: PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);
694: }
695: val[jj++] = v2[j*bs2 + m + k*bs];
696: }
697: }
698: }
699: } else {
700: /* A-part */
701: for (j=0; j<countA; j++) {
702: if (reuse == MAT_INITIAL_MATRIX) {
703: PetscMUMPSIntCast(irow + shift,&row[jj]);
704: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
705: }
706: val[jj++] = v1[j];
707: }
709: /* B-part */
710: for (j=0; j < countB; j++) {
711: if (reuse == MAT_INITIAL_MATRIX) {
712: PetscMUMPSIntCast(irow + shift,&row[jj]);
713: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
714: }
715: val[jj++] = v2[j];
716: }
717: }
718: irow+=bs;
719: }
720: mumps->nnz = jj;
721: return 0;
722: }
724: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
725: {
726: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
727: PetscInt64 rstart,nz,i,j,jj,irow,countA,countB;
728: PetscMUMPSInt *row,*col;
729: const PetscScalar *av, *bv,*v1,*v2;
730: PetscScalar *val;
731: Mat Ad,Ao;
732: Mat_SeqAIJ *aa;
733: Mat_SeqAIJ *bb;
735: MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);
736: MatSeqAIJGetArrayRead(Ad,&av);
737: MatSeqAIJGetArrayRead(Ao,&bv);
739: aa = (Mat_SeqAIJ*)(Ad)->data;
740: bb = (Mat_SeqAIJ*)(Ao)->data;
741: ai = aa->i;
742: aj = aa->j;
743: bi = bb->i;
744: bj = bb->j;
746: rstart = A->rmap->rstart;
748: if (reuse == MAT_INITIAL_MATRIX) {
749: nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
750: PetscMalloc2(nz,&row,nz,&col);
751: PetscMalloc1(nz,&val);
752: mumps->nnz = nz;
753: mumps->irn = row;
754: mumps->jcn = col;
755: mumps->val = mumps->val_alloc = val;
756: } else {
757: val = mumps->val;
758: }
760: jj = 0; irow = rstart;
761: for (i=0; i<m; i++) {
762: ajj = aj + ai[i]; /* ptr to the beginning of this row */
763: countA = ai[i+1] - ai[i];
764: countB = bi[i+1] - bi[i];
765: bjj = bj + bi[i];
766: v1 = av + ai[i];
767: v2 = bv + bi[i];
769: /* A-part */
770: for (j=0; j<countA; j++) {
771: if (reuse == MAT_INITIAL_MATRIX) {
772: PetscMUMPSIntCast(irow + shift,&row[jj]);
773: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
774: }
775: val[jj++] = v1[j];
776: }
778: /* B-part */
779: for (j=0; j < countB; j++) {
780: if (reuse == MAT_INITIAL_MATRIX) {
781: PetscMUMPSIntCast(irow + shift,&row[jj]);
782: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
783: }
784: val[jj++] = v2[j];
785: }
786: irow++;
787: }
788: MatSeqAIJRestoreArrayRead(Ad,&av);
789: MatSeqAIJRestoreArrayRead(Ao,&bv);
790: return 0;
791: }
793: PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
794: {
795: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data;
796: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data;
797: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
798: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
799: const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
800: const PetscInt bs2=mat->bs2;
801: PetscInt bs;
802: PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx;
803: PetscMUMPSInt *row,*col;
804: const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
805: PetscScalar *val;
807: MatGetBlockSize(A,&bs);
808: if (reuse == MAT_INITIAL_MATRIX) {
809: nz = bs2*(aa->nz + bb->nz);
810: PetscMalloc2(nz,&row,nz,&col);
811: PetscMalloc1(nz,&val);
812: mumps->nnz = nz;
813: mumps->irn = row;
814: mumps->jcn = col;
815: mumps->val = mumps->val_alloc = val;
816: } else {
817: val = mumps->val;
818: }
820: jj = 0; irow = rstart;
821: for (i=0; i<mbs; i++) {
822: countA = ai[i+1] - ai[i];
823: countB = bi[i+1] - bi[i];
824: ajj = aj + ai[i];
825: bjj = bj + bi[i];
826: v1 = av + bs2*ai[i];
827: v2 = bv + bs2*bi[i];
829: idx = 0;
830: /* A-part */
831: for (k=0; k<countA; k++) {
832: for (j=0; j<bs; j++) {
833: for (n=0; n<bs; n++) {
834: if (reuse == MAT_INITIAL_MATRIX) {
835: PetscMUMPSIntCast(irow + n + shift,&row[jj]);
836: PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);
837: }
838: val[jj++] = v1[idx++];
839: }
840: }
841: }
843: idx = 0;
844: /* B-part */
845: for (k=0; k<countB; k++) {
846: for (j=0; j<bs; j++) {
847: for (n=0; n<bs; n++) {
848: if (reuse == MAT_INITIAL_MATRIX) {
849: PetscMUMPSIntCast(irow + n + shift,&row[jj]);
850: PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);
851: }
852: val[jj++] = v2[idx++];
853: }
854: }
855: }
856: irow += bs;
857: }
858: return 0;
859: }
861: PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
862: {
863: const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
864: PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
865: PetscMUMPSInt *row,*col;
866: const PetscScalar *av, *bv,*v1,*v2;
867: PetscScalar *val;
868: Mat Ad,Ao;
869: Mat_SeqAIJ *aa;
870: Mat_SeqAIJ *bb;
871: #if defined(PETSC_USE_COMPLEX)
872: PetscBool hermitian;
873: #endif
875: #if defined(PETSC_USE_COMPLEX)
876: MatGetOption(A,MAT_HERMITIAN,&hermitian);
878: #endif
879: MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);
880: MatSeqAIJGetArrayRead(Ad,&av);
881: MatSeqAIJGetArrayRead(Ao,&bv);
883: aa = (Mat_SeqAIJ*)(Ad)->data;
884: bb = (Mat_SeqAIJ*)(Ao)->data;
885: ai = aa->i;
886: aj = aa->j;
887: adiag = aa->diag;
888: bi = bb->i;
889: bj = bb->j;
891: rstart = A->rmap->rstart;
893: if (reuse == MAT_INITIAL_MATRIX) {
894: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
895: nzb = 0; /* num of upper triangular entries in mat->B */
896: for (i=0; i<m; i++) {
897: nza += (ai[i+1] - adiag[i]);
898: countB = bi[i+1] - bi[i];
899: bjj = bj + bi[i];
900: for (j=0; j<countB; j++) {
901: if (garray[bjj[j]] > rstart) nzb++;
902: }
903: }
905: nz = nza + nzb; /* total nz of upper triangular part of mat */
906: PetscMalloc2(nz,&row,nz,&col);
907: PetscMalloc1(nz,&val);
908: mumps->nnz = nz;
909: mumps->irn = row;
910: mumps->jcn = col;
911: mumps->val = mumps->val_alloc = val;
912: } else {
913: val = mumps->val;
914: }
916: jj = 0; irow = rstart;
917: for (i=0; i<m; i++) {
918: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
919: v1 = av + adiag[i];
920: countA = ai[i+1] - adiag[i];
921: countB = bi[i+1] - bi[i];
922: bjj = bj + bi[i];
923: v2 = bv + bi[i];
925: /* A-part */
926: for (j=0; j<countA; j++) {
927: if (reuse == MAT_INITIAL_MATRIX) {
928: PetscMUMPSIntCast(irow + shift,&row[jj]);
929: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
930: }
931: val[jj++] = v1[j];
932: }
934: /* B-part */
935: for (j=0; j < countB; j++) {
936: if (garray[bjj[j]] > rstart) {
937: if (reuse == MAT_INITIAL_MATRIX) {
938: PetscMUMPSIntCast(irow + shift,&row[jj]);
939: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
940: }
941: val[jj++] = v2[j];
942: }
943: }
944: irow++;
945: }
946: MatSeqAIJRestoreArrayRead(Ad,&av);
947: MatSeqAIJRestoreArrayRead(Ao,&bv);
948: return 0;
949: }
951: PetscErrorCode MatDestroy_MUMPS(Mat A)
952: {
953: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
955: PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);
956: VecScatterDestroy(&mumps->scat_rhs);
957: VecScatterDestroy(&mumps->scat_sol);
958: VecDestroy(&mumps->b_seq);
959: VecDestroy(&mumps->x_seq);
960: PetscFree(mumps->id.perm_in);
961: PetscFree2(mumps->irn,mumps->jcn);
962: PetscFree(mumps->val_alloc);
963: PetscFree(mumps->info);
964: MatMumpsResetSchur_Private(mumps);
965: mumps->id.job = JOB_END;
966: PetscMUMPS_c(mumps);
968: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
969: if (mumps->use_petsc_omp_support) {
970: PetscOmpCtrlDestroy(&mumps->omp_ctrl);
971: PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);
972: PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);
973: }
974: #endif
975: PetscFree(mumps->ia_alloc);
976: PetscFree(mumps->ja_alloc);
977: PetscFree(mumps->recvcount);
978: PetscFree(mumps->reqs);
979: PetscFree(mumps->irhs_loc);
980: if (mumps->mumps_comm != MPI_COMM_NULL) PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);
981: PetscFree(A->data);
983: /* clear composed functions */
984: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
985: PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);
986: PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);
987: PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);
988: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);
989: PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);
990: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);
991: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);
992: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);
993: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);
994: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);
995: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);
996: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);
997: return 0;
998: }
1000: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1001: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1002: {
1003: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1004: const PetscMPIInt ompsize=mumps->omp_comm_size;
1005: PetscInt i,m,M,rstart;
1007: MatGetSize(A,&M,NULL);
1008: MatGetLocalSize(A,&m,NULL);
1010: if (ompsize == 1) {
1011: if (!mumps->irhs_loc) {
1012: mumps->nloc_rhs = m;
1013: PetscMalloc1(m,&mumps->irhs_loc);
1014: MatGetOwnershipRange(A,&rstart,NULL);
1015: for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1016: }
1017: mumps->id.rhs_loc = (MumpsScalar*)array;
1018: } else {
1019: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1020: const PetscInt *ranges;
1021: PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks;
1022: MPI_Group petsc_group,omp_group;
1023: PetscScalar *recvbuf=NULL;
1025: if (mumps->is_omp_master) {
1026: /* Lazily initialize the omp stuff for distributed rhs */
1027: if (!mumps->irhs_loc) {
1028: PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);
1029: PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);
1030: MPI_Comm_group(mumps->petsc_comm,&petsc_group);
1031: MPI_Comm_group(mumps->omp_comm,&omp_group);
1032: for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1033: MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);
1035: /* Populate mumps->irhs_loc[], rhs_nrow[] */
1036: mumps->nloc_rhs = 0;
1037: MatGetOwnershipRanges(A,&ranges);
1038: for (j=0; j<ompsize; j++) {
1039: mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1040: mumps->nloc_rhs += mumps->rhs_nrow[j];
1041: }
1042: PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);
1043: for (j=k=0; j<ompsize; j++) {
1044: for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1045: }
1047: PetscFree2(omp_ranks,petsc_ranks);
1048: MPI_Group_free(&petsc_group);
1049: MPI_Group_free(&omp_group);
1050: }
1052: /* Realloc buffers when current nrhs is bigger than what we have met */
1053: if (nrhs > mumps->max_nrhs) {
1054: PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);
1055: PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);
1056: mumps->max_nrhs = nrhs;
1057: }
1059: /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1060: for (j=0; j<ompsize; j++) PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);
1061: mumps->rhs_disps[0] = 0;
1062: for (j=1; j<ompsize; j++) {
1063: mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1065: }
1066: recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1067: }
1069: PetscMPIIntCast(m*nrhs,&sendcount);
1070: MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);
1072: if (mumps->is_omp_master) {
1073: if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1074: PetscScalar *dst,*dstbase = mumps->rhs_loc;
1075: for (j=0; j<ompsize; j++) {
1076: const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1077: dst = dstbase;
1078: for (i=0; i<nrhs; i++) {
1079: PetscArraycpy(dst,src,mumps->rhs_nrow[j]);
1080: src += mumps->rhs_nrow[j];
1081: dst += mumps->nloc_rhs;
1082: }
1083: dstbase += mumps->rhs_nrow[j];
1084: }
1085: }
1086: mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1087: }
1088: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1089: }
1090: mumps->id.nrhs = nrhs;
1091: mumps->id.nloc_rhs = mumps->nloc_rhs;
1092: mumps->id.lrhs_loc = mumps->nloc_rhs;
1093: mumps->id.irhs_loc = mumps->irhs_loc;
1094: return 0;
1095: }
1097: PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1098: {
1099: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1100: const PetscScalar *rarray = NULL;
1101: PetscScalar *array;
1102: IS is_iden,is_petsc;
1103: PetscInt i;
1104: PetscBool second_solve = PETSC_FALSE;
1105: static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1107: PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);
1108: PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);
1110: if (A->factorerrortype) {
1111: PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1112: VecSetInf(x);
1113: return 0;
1114: }
1116: mumps->id.nrhs = 1;
1117: if (mumps->petsc_size > 1) {
1118: if (mumps->ICNTL20 == 10) {
1119: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1120: VecGetArrayRead(b,&rarray);
1121: MatMumpsSetUpDistRHSInfo(A,1,rarray);
1122: } else {
1123: mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1124: VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);
1125: VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);
1126: if (!mumps->myid) {
1127: VecGetArray(mumps->b_seq,&array);
1128: mumps->id.rhs = (MumpsScalar*)array;
1129: }
1130: }
1131: } else { /* petsc_size == 1 */
1132: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1133: VecCopy(b,x);
1134: VecGetArray(x,&array);
1135: mumps->id.rhs = (MumpsScalar*)array;
1136: }
1138: /*
1139: handle condensation step of Schur complement (if any)
1140: We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1141: According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1142: Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1143: This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1144: */
1145: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1147: second_solve = PETSC_TRUE;
1148: MatMumpsHandleSchur_Private(A,PETSC_FALSE);
1149: }
1150: /* solve phase */
1151: /*-------------*/
1152: mumps->id.job = JOB_SOLVE;
1153: PetscMUMPS_c(mumps);
1156: /* handle expansion step of Schur complement (if any) */
1157: if (second_solve) {
1158: MatMumpsHandleSchur_Private(A,PETSC_TRUE);
1159: }
1161: if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1162: if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1163: /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1164: VecScatterDestroy(&mumps->scat_sol);
1165: }
1166: if (!mumps->scat_sol) { /* create scatter scat_sol */
1167: PetscInt *isol2_loc=NULL;
1168: ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden); /* from */
1169: PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);
1170: for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1171: ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc); /* to */
1172: VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);
1173: ISDestroy(&is_iden);
1174: ISDestroy(&is_petsc);
1175: mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1176: }
1178: VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
1179: VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
1180: }
1182: if (mumps->petsc_size > 1) {
1183: if (mumps->ICNTL20 == 10) {
1184: VecRestoreArrayRead(b,&rarray);
1185: } else if (!mumps->myid) {
1186: VecRestoreArray(mumps->b_seq,&array);
1187: }
1188: } else VecRestoreArray(x,&array);
1190: PetscLogFlops(2.0*mumps->id.RINFO(3));
1191: return 0;
1192: }
1194: PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1195: {
1196: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1198: mumps->id.ICNTL(9) = 0;
1199: MatSolve_MUMPS(A,b,x);
1200: mumps->id.ICNTL(9) = 1;
1201: return 0;
1202: }
1204: PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1205: {
1206: Mat Bt = NULL;
1207: PetscBool denseX,denseB,flg,flgT;
1208: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1209: PetscInt i,nrhs,M;
1210: PetscScalar *array;
1211: const PetscScalar *rbray;
1212: PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0;
1213: PetscMUMPSInt *isol_loc,*isol_loc_save;
1214: PetscScalar *bray,*sol_loc,*sol_loc_save;
1215: IS is_to,is_from;
1216: PetscInt k,proc,j,m,myrstart;
1217: const PetscInt *rstart;
1218: Vec v_mpi,msol_loc;
1219: VecScatter scat_sol;
1220: Vec b_seq;
1221: VecScatter scat_rhs;
1222: PetscScalar *aa;
1223: PetscInt spnr,*ia,*ja;
1224: Mat_MPIAIJ *b = NULL;
1226: PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);
1229: PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);
1230: if (denseB) {
1232: mumps->id.ICNTL(20)= 0; /* dense RHS */
1233: } else { /* sparse B */
1235: PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);
1236: if (flgT) { /* input B is transpose of actural RHS matrix,
1237: because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1238: MatTransposeGetMat(B,&Bt);
1239: } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1240: mumps->id.ICNTL(20)= 1; /* sparse RHS */
1241: }
1243: MatGetSize(B,&M,&nrhs);
1244: mumps->id.nrhs = nrhs;
1245: mumps->id.lrhs = M;
1246: mumps->id.rhs = NULL;
1248: if (mumps->petsc_size == 1) {
1249: PetscScalar *aa;
1250: PetscInt spnr,*ia,*ja;
1251: PetscBool second_solve = PETSC_FALSE;
1253: MatDenseGetArray(X,&array);
1254: mumps->id.rhs = (MumpsScalar*)array;
1256: if (denseB) {
1257: /* copy B to X */
1258: MatDenseGetArrayRead(B,&rbray);
1259: PetscArraycpy(array,rbray,M*nrhs);
1260: MatDenseRestoreArrayRead(B,&rbray);
1261: } else { /* sparse B */
1262: MatSeqAIJGetArray(Bt,&aa);
1263: MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1265: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
1266: mumps->id.rhs_sparse = (MumpsScalar*)aa;
1267: }
1268: /* handle condensation step of Schur complement (if any) */
1269: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1270: second_solve = PETSC_TRUE;
1271: MatMumpsHandleSchur_Private(A,PETSC_FALSE);
1272: }
1273: /* solve phase */
1274: /*-------------*/
1275: mumps->id.job = JOB_SOLVE;
1276: PetscMUMPS_c(mumps);
1279: /* handle expansion step of Schur complement (if any) */
1280: if (second_solve) {
1281: MatMumpsHandleSchur_Private(A,PETSC_TRUE);
1282: }
1283: if (!denseB) { /* sparse B */
1284: MatSeqAIJRestoreArray(Bt,&aa);
1285: MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1287: }
1288: MatDenseRestoreArray(X,&array);
1289: return 0;
1290: }
1292: /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1295: /* create msol_loc to hold mumps local solution */
1296: isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1297: sol_loc_save = (PetscScalar*)mumps->id.sol_loc;
1299: lsol_loc = mumps->id.lsol_loc;
1300: nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */
1301: PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);
1302: mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1303: mumps->id.isol_loc = isol_loc;
1305: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);
1307: if (denseB) {
1308: if (mumps->ICNTL20 == 10) {
1309: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1310: MatDenseGetArrayRead(B,&rbray);
1311: MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);
1312: MatDenseRestoreArrayRead(B,&rbray);
1313: MatGetLocalSize(B,&m,NULL);
1314: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);
1315: } else {
1316: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1317: /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1318: very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1319: 0, re-arrange B into desired order, which is a local operation.
1320: */
1322: /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1323: /* wrap dense rhs matrix B into a vector v_mpi */
1324: MatGetLocalSize(B,&m,NULL);
1325: MatDenseGetArray(B,&bray);
1326: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);
1327: MatDenseRestoreArray(B,&bray);
1329: /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1330: if (!mumps->myid) {
1331: PetscInt *idx;
1332: /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1333: PetscMalloc1(nrhs*M,&idx);
1334: MatGetOwnershipRanges(B,&rstart);
1335: k = 0;
1336: for (proc=0; proc<mumps->petsc_size; proc++) {
1337: for (j=0; j<nrhs; j++) {
1338: for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1339: }
1340: }
1342: VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);
1343: ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);
1344: ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);
1345: } else {
1346: VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);
1347: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);
1348: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);
1349: }
1350: VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);
1351: VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);
1352: ISDestroy(&is_to);
1353: ISDestroy(&is_from);
1354: VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);
1356: if (!mumps->myid) { /* define rhs on the host */
1357: VecGetArray(b_seq,&bray);
1358: mumps->id.rhs = (MumpsScalar*)bray;
1359: VecRestoreArray(b_seq,&bray);
1360: }
1361: }
1362: } else { /* sparse B */
1363: b = (Mat_MPIAIJ*)Bt->data;
1365: /* wrap dense X into a vector v_mpi */
1366: MatGetLocalSize(X,&m,NULL);
1367: MatDenseGetArray(X,&bray);
1368: VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);
1369: MatDenseRestoreArray(X,&bray);
1371: if (!mumps->myid) {
1372: MatSeqAIJGetArray(b->A,&aa);
1373: MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1375: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
1376: mumps->id.rhs_sparse = (MumpsScalar*)aa;
1377: } else {
1378: mumps->id.irhs_ptr = NULL;
1379: mumps->id.irhs_sparse = NULL;
1380: mumps->id.nz_rhs = 0;
1381: mumps->id.rhs_sparse = NULL;
1382: }
1383: }
1385: /* solve phase */
1386: /*-------------*/
1387: mumps->id.job = JOB_SOLVE;
1388: PetscMUMPS_c(mumps);
1391: /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1392: MatDenseGetArray(X,&array);
1393: VecPlaceArray(v_mpi,array);
1395: /* create scatter scat_sol */
1396: MatGetOwnershipRanges(X,&rstart);
1397: /* iidx: index for scatter mumps solution to petsc X */
1399: ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);
1400: PetscMalloc1(nlsol_loc,&idxx);
1401: for (i=0; i<lsol_loc; i++) {
1402: isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1404: for (proc=0; proc<mumps->petsc_size; proc++) {
1405: if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1406: myrstart = rstart[proc];
1407: k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */
1408: iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */
1409: m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1410: break;
1411: }
1412: }
1414: for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1415: }
1416: ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);
1417: VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);
1418: VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);
1419: ISDestroy(&is_from);
1420: ISDestroy(&is_to);
1421: VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);
1422: MatDenseRestoreArray(X,&array);
1424: /* free spaces */
1425: mumps->id.sol_loc = (MumpsScalar*)sol_loc_save;
1426: mumps->id.isol_loc = isol_loc_save;
1428: PetscFree2(sol_loc,isol_loc);
1429: PetscFree(idxx);
1430: VecDestroy(&msol_loc);
1431: VecDestroy(&v_mpi);
1432: if (!denseB) {
1433: if (!mumps->myid) {
1434: b = (Mat_MPIAIJ*)Bt->data;
1435: MatSeqAIJRestoreArray(b->A,&aa);
1436: MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1438: }
1439: } else {
1440: if (mumps->ICNTL20 == 0) {
1441: VecDestroy(&b_seq);
1442: VecScatterDestroy(&scat_rhs);
1443: }
1444: }
1445: VecScatterDestroy(&scat_sol);
1446: PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));
1447: return 0;
1448: }
1450: PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1451: {
1452: PetscBool flg;
1453: Mat B;
1455: PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);
1458: /* Create B=Bt^T that uses Bt's data structure */
1459: MatCreateTranspose(Bt,&B);
1461: MatMatSolve_MUMPS(A,B,X);
1462: MatDestroy(&B);
1463: return 0;
1464: }
1466: #if !defined(PETSC_USE_COMPLEX)
1467: /*
1468: input:
1469: F: numeric factor
1470: output:
1471: nneg: total number of negative pivots
1472: nzero: total number of zero pivots
1473: npos: (global dimension of F) - nneg - nzero
1474: */
1475: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1476: {
1477: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
1478: PetscMPIInt size;
1480: MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);
1481: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1484: if (nneg) *nneg = mumps->id.INFOG(12);
1485: if (nzero || npos) {
1487: if (nzero) *nzero = mumps->id.INFOG(28);
1488: if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1489: }
1490: return 0;
1491: }
1492: #endif
1494: PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1495: {
1496: PetscInt i,nreqs;
1497: PetscMUMPSInt *irn,*jcn;
1498: PetscMPIInt count;
1499: PetscInt64 totnnz,remain;
1500: const PetscInt osize=mumps->omp_comm_size;
1501: PetscScalar *val;
1503: if (osize > 1) {
1504: if (reuse == MAT_INITIAL_MATRIX) {
1505: /* master first gathers counts of nonzeros to receive */
1506: if (mumps->is_omp_master) PetscMalloc1(osize,&mumps->recvcount);
1507: MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);
1509: /* Then each computes number of send/recvs */
1510: if (mumps->is_omp_master) {
1511: /* Start from 1 since self communication is not done in MPI */
1512: nreqs = 0;
1513: for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1514: } else {
1515: nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1516: }
1517: PetscMalloc1(nreqs*3,&mumps->reqs); /* Triple the requests since we send irn, jcn and val seperately */
1519: /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1520: MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1521: might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1522: is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1523: */
1524: nreqs = 0; /* counter for actual send/recvs */
1525: if (mumps->is_omp_master) {
1526: for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1527: PetscMalloc2(totnnz,&irn,totnnz,&jcn);
1528: PetscMalloc1(totnnz,&val);
1530: /* Self communication */
1531: PetscArraycpy(irn,mumps->irn,mumps->nnz);
1532: PetscArraycpy(jcn,mumps->jcn,mumps->nnz);
1533: PetscArraycpy(val,mumps->val,mumps->nnz);
1535: /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1536: PetscFree2(mumps->irn,mumps->jcn);
1537: PetscFree(mumps->val_alloc);
1538: mumps->nnz = totnnz;
1539: mumps->irn = irn;
1540: mumps->jcn = jcn;
1541: mumps->val = mumps->val_alloc = val;
1543: irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1544: jcn += mumps->recvcount[0];
1545: val += mumps->recvcount[0];
1547: /* Remote communication */
1548: for (i=1; i<osize; i++) {
1549: count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1550: remain = mumps->recvcount[i] - count;
1551: while (count>0) {
1552: MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1553: MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1554: MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1555: irn += count;
1556: jcn += count;
1557: val += count;
1558: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1559: remain -= count;
1560: }
1561: }
1562: } else {
1563: irn = mumps->irn;
1564: jcn = mumps->jcn;
1565: val = mumps->val;
1566: count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1567: remain = mumps->nnz - count;
1568: while (count>0) {
1569: MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1570: MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1571: MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1572: irn += count;
1573: jcn += count;
1574: val += count;
1575: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1576: remain -= count;
1577: }
1578: }
1579: } else {
1580: nreqs = 0;
1581: if (mumps->is_omp_master) {
1582: val = mumps->val + mumps->recvcount[0];
1583: for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1584: count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1585: remain = mumps->recvcount[i] - count;
1586: while (count>0) {
1587: MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1588: val += count;
1589: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1590: remain -= count;
1591: }
1592: }
1593: } else {
1594: val = mumps->val;
1595: count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1596: remain = mumps->nnz - count;
1597: while (count>0) {
1598: MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1599: val += count;
1600: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1601: remain -= count;
1602: }
1603: }
1604: }
1605: MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);
1606: mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1607: }
1608: return 0;
1609: }
1611: PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1612: {
1613: Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data;
1614: PetscBool isMPIAIJ;
1616: if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1617: if (mumps->id.INFOG(1) == -6) {
1618: PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1619: }
1620: PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1621: return 0;
1622: }
1624: (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);
1625: MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);
1627: /* numerical factorization phase */
1628: /*-------------------------------*/
1629: mumps->id.job = JOB_FACTNUMERIC;
1630: if (!mumps->id.ICNTL(18)) { /* A is centralized */
1631: if (!mumps->myid) {
1632: mumps->id.a = (MumpsScalar*)mumps->val;
1633: }
1634: } else {
1635: mumps->id.a_loc = (MumpsScalar*)mumps->val;
1636: }
1637: PetscMUMPS_c(mumps);
1638: if (mumps->id.INFOG(1) < 0) {
1639: if (A->erroriffailure) {
1640: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1641: } else {
1642: if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1643: PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1644: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1645: } else if (mumps->id.INFOG(1) == -13) {
1646: PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1647: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1648: } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1649: PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));
1650: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1651: } else {
1652: PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1653: F->factorerrortype = MAT_FACTOR_OTHER;
1654: }
1655: }
1656: }
1659: F->assembled = PETSC_TRUE;
1661: if (F->schur) { /* reset Schur status to unfactored */
1662: #if defined(PETSC_HAVE_CUDA)
1663: F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1664: #endif
1665: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1666: mumps->id.ICNTL(19) = 2;
1667: MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);
1668: }
1669: MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);
1670: }
1672: /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1673: if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1675: if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1676: if (mumps->petsc_size > 1) {
1677: PetscInt lsol_loc;
1678: PetscScalar *sol_loc;
1680: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);
1682: /* distributed solution; Create x_seq=sol_loc for repeated use */
1683: if (mumps->x_seq) {
1684: VecScatterDestroy(&mumps->scat_sol);
1685: PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);
1686: VecDestroy(&mumps->x_seq);
1687: }
1688: lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1689: PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);
1690: mumps->id.lsol_loc = lsol_loc;
1691: mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1692: VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);
1693: }
1694: PetscLogFlops(mumps->id.RINFO(2));
1695: return 0;
1696: }
1698: /* Sets MUMPS options from the options database */
1699: PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1700: {
1701: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1703: PetscMUMPSInt icntl=0;
1704: PetscInt info[80],i,ninfo=80;
1705: PetscBool flg=PETSC_FALSE;
1707: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");
1708: PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);
1709: if (flg) mumps->id.ICNTL(1) = icntl;
1710: PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);
1711: if (flg) mumps->id.ICNTL(2) = icntl;
1712: PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);
1713: if (flg) mumps->id.ICNTL(3) = icntl;
1715: PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);
1716: if (flg) mumps->id.ICNTL(4) = icntl;
1717: if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1719: PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);
1720: if (flg) mumps->id.ICNTL(6) = icntl;
1722: PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);
1723: if (flg) {
1725: mumps->id.ICNTL(7) = icntl;
1726: }
1728: PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);
1729: /* PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL); handled by MatSolveTranspose_MUMPS() */
1730: PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);
1731: PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);
1732: PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);
1733: PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);
1734: PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);
1735: PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);
1736: if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1737: MatDestroy(&F->schur);
1738: MatMumpsResetSchur_Private(mumps);
1739: }
1741: /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1742: and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1743: and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1744: This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1745: see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1746: In short, we could not use distributed RHS with MPICH until v4.0b1.
1747: */
1748: #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1749: mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1750: #else
1751: mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1752: #endif
1753: PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);
1755: #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1757: #endif
1758: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL); we only use distributed solution vector */
1760: PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);
1761: PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);
1762: PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);
1763: if (mumps->id.ICNTL(24)) {
1764: mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1765: }
1767: PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);
1768: PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);
1769: PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);
1770: PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);
1771: PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);
1772: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL); */ /* call MatMumpsGetInverse() directly */
1773: PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);
1774: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL); -- not supported by PETSc API */
1775: PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);
1776: PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);
1777: PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);
1778: PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);
1780: PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);
1781: PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);
1782: PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);
1783: PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);
1784: PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);
1785: PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);
1787: PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);
1789: PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);
1790: if (ninfo) {
1792: PetscMalloc1(ninfo,&mumps->info);
1793: mumps->ninfo = ninfo;
1794: for (i=0; i<ninfo; i++) {
1796: else mumps->info[i] = info[i];
1797: }
1798: }
1800: PetscOptionsEnd();
1801: return 0;
1802: }
1804: PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1805: {
1806: PetscInt nthreads=0;
1808: mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1809: MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);
1810: MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);/* "if (!myid)" still works even if mumps_comm is different */
1812: PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);
1813: if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1814: PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL);
1815: if (mumps->use_petsc_omp_support) {
1816: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1817: PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);
1818: PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);
1819: #else
1820: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:"");
1821: #endif
1822: } else {
1823: mumps->omp_comm = PETSC_COMM_SELF;
1824: mumps->mumps_comm = mumps->petsc_comm;
1825: mumps->is_omp_master = PETSC_TRUE;
1826: }
1827: MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);
1828: mumps->reqs = NULL;
1829: mumps->tag = 0;
1831: /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1832: if (mumps->mumps_comm != MPI_COMM_NULL) {
1833: PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);
1834: }
1836: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1837: mumps->id.job = JOB_INIT;
1838: mumps->id.par = 1; /* host participates factorizaton and solve */
1839: mumps->id.sym = mumps->sym;
1841: PetscMUMPS_c(mumps);
1844: /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1845: For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1846: */
1847: MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);
1848: MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);
1850: mumps->scat_rhs = NULL;
1851: mumps->scat_sol = NULL;
1853: /* set PETSc-MUMPS default options - override MUMPS default */
1854: mumps->id.ICNTL(3) = 0;
1855: mumps->id.ICNTL(4) = 0;
1856: if (mumps->petsc_size == 1) {
1857: mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1858: mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */
1859: } else {
1860: mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1861: mumps->id.ICNTL(21) = 1; /* distributed solution */
1862: }
1864: /* schur */
1865: mumps->id.size_schur = 0;
1866: mumps->id.listvar_schur = NULL;
1867: mumps->id.schur = NULL;
1868: mumps->sizeredrhs = 0;
1869: mumps->schur_sol = NULL;
1870: mumps->schur_sizesol = 0;
1871: return 0;
1872: }
1874: PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1875: {
1876: if (mumps->id.INFOG(1) < 0) {
1877: if (A->erroriffailure) {
1878: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1879: } else {
1880: if (mumps->id.INFOG(1) == -6) {
1881: PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1882: F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1883: } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1884: PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1885: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1886: } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1887: PetscInfo(F,"Empty matrix\n");
1888: } else {
1889: PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1890: F->factorerrortype = MAT_FACTOR_OTHER;
1891: }
1892: }
1893: }
1894: return 0;
1895: }
1897: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1898: {
1899: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1900: Vec b;
1901: const PetscInt M = A->rmap->N;
1903: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1904: /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1905: return 0;
1906: }
1908: /* Set MUMPS options from the options database */
1909: PetscSetMUMPSFromOptions(F,A);
1911: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
1912: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
1914: /* analysis phase */
1915: /*----------------*/
1916: mumps->id.job = JOB_FACTSYMBOLIC;
1917: mumps->id.n = M;
1918: switch (mumps->id.ICNTL(18)) {
1919: case 0: /* centralized assembled matrix input */
1920: if (!mumps->myid) {
1921: mumps->id.nnz = mumps->nnz;
1922: mumps->id.irn = mumps->irn;
1923: mumps->id.jcn = mumps->jcn;
1924: if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1925: if (r) {
1926: mumps->id.ICNTL(7) = 1;
1927: if (!mumps->myid) {
1928: const PetscInt *idx;
1929: PetscInt i;
1931: PetscMalloc1(M,&mumps->id.perm_in);
1932: ISGetIndices(r,&idx);
1933: for (i=0; i<M; i++) PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
1934: ISRestoreIndices(r,&idx);
1935: }
1936: }
1937: }
1938: break;
1939: case 3: /* distributed assembled matrix input (size>1) */
1940: mumps->id.nnz_loc = mumps->nnz;
1941: mumps->id.irn_loc = mumps->irn;
1942: mumps->id.jcn_loc = mumps->jcn;
1943: if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1944: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1945: MatCreateVecs(A,NULL,&b);
1946: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
1947: VecDestroy(&b);
1948: }
1949: break;
1950: }
1951: PetscMUMPS_c(mumps);
1952: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
1954: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1955: F->ops->solve = MatSolve_MUMPS;
1956: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
1957: F->ops->matsolve = MatMatSolve_MUMPS;
1958: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
1960: mumps->matstruc = SAME_NONZERO_PATTERN;
1961: return 0;
1962: }
1964: /* Note the Petsc r and c permutations are ignored */
1965: PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1966: {
1967: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1968: Vec b;
1969: const PetscInt M = A->rmap->N;
1971: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1972: /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1973: return 0;
1974: }
1976: /* Set MUMPS options from the options database */
1977: PetscSetMUMPSFromOptions(F,A);
1979: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
1980: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
1982: /* analysis phase */
1983: /*----------------*/
1984: mumps->id.job = JOB_FACTSYMBOLIC;
1985: mumps->id.n = M;
1986: switch (mumps->id.ICNTL(18)) {
1987: case 0: /* centralized assembled matrix input */
1988: if (!mumps->myid) {
1989: mumps->id.nnz = mumps->nnz;
1990: mumps->id.irn = mumps->irn;
1991: mumps->id.jcn = mumps->jcn;
1992: if (mumps->id.ICNTL(6)>1) {
1993: mumps->id.a = (MumpsScalar*)mumps->val;
1994: }
1995: }
1996: break;
1997: case 3: /* distributed assembled matrix input (size>1) */
1998: mumps->id.nnz_loc = mumps->nnz;
1999: mumps->id.irn_loc = mumps->irn;
2000: mumps->id.jcn_loc = mumps->jcn;
2001: if (mumps->id.ICNTL(6)>1) {
2002: mumps->id.a_loc = (MumpsScalar*)mumps->val;
2003: }
2004: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2005: MatCreateVecs(A,NULL,&b);
2006: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
2007: VecDestroy(&b);
2008: }
2009: break;
2010: }
2011: PetscMUMPS_c(mumps);
2012: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
2014: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2015: F->ops->solve = MatSolve_MUMPS;
2016: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2018: mumps->matstruc = SAME_NONZERO_PATTERN;
2019: return 0;
2020: }
2022: /* Note the Petsc r permutation and factor info are ignored */
2023: PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2024: {
2025: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
2026: Vec b;
2027: const PetscInt M = A->rmap->N;
2029: if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2030: /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2031: return 0;
2032: }
2034: /* Set MUMPS options from the options database */
2035: PetscSetMUMPSFromOptions(F,A);
2037: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2038: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
2040: /* analysis phase */
2041: /*----------------*/
2042: mumps->id.job = JOB_FACTSYMBOLIC;
2043: mumps->id.n = M;
2044: switch (mumps->id.ICNTL(18)) {
2045: case 0: /* centralized assembled matrix input */
2046: if (!mumps->myid) {
2047: mumps->id.nnz = mumps->nnz;
2048: mumps->id.irn = mumps->irn;
2049: mumps->id.jcn = mumps->jcn;
2050: if (mumps->id.ICNTL(6)>1) {
2051: mumps->id.a = (MumpsScalar*)mumps->val;
2052: }
2053: }
2054: break;
2055: case 3: /* distributed assembled matrix input (size>1) */
2056: mumps->id.nnz_loc = mumps->nnz;
2057: mumps->id.irn_loc = mumps->irn;
2058: mumps->id.jcn_loc = mumps->jcn;
2059: if (mumps->id.ICNTL(6)>1) {
2060: mumps->id.a_loc = (MumpsScalar*)mumps->val;
2061: }
2062: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2063: MatCreateVecs(A,NULL,&b);
2064: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
2065: VecDestroy(&b);
2066: }
2067: break;
2068: }
2069: PetscMUMPS_c(mumps);
2070: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
2072: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2073: F->ops->solve = MatSolve_MUMPS;
2074: F->ops->solvetranspose = MatSolve_MUMPS;
2075: F->ops->matsolve = MatMatSolve_MUMPS;
2076: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2077: #if defined(PETSC_USE_COMPLEX)
2078: F->ops->getinertia = NULL;
2079: #else
2080: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2081: #endif
2083: mumps->matstruc = SAME_NONZERO_PATTERN;
2084: return 0;
2085: }
2087: PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2088: {
2089: PetscBool iascii;
2090: PetscViewerFormat format;
2091: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
2093: /* check if matrix is mumps type */
2094: if (A->ops->solve != MatSolve_MUMPS) return 0;
2096: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
2097: if (iascii) {
2098: PetscViewerGetFormat(viewer,&format);
2099: if (format == PETSC_VIEWER_ASCII_INFO) {
2100: PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");
2101: PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d\n",mumps->id.sym);
2102: PetscViewerASCIIPrintf(viewer," PAR (host participation): %d\n",mumps->id.par);
2103: PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d\n",mumps->id.ICNTL(1));
2104: PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));
2105: PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d\n",mumps->id.ICNTL(3));
2106: PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d\n",mumps->id.ICNTL(4));
2107: PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d\n",mumps->id.ICNTL(5));
2108: PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d\n",mumps->id.ICNTL(6));
2109: PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));
2110: PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d\n",mumps->id.ICNTL(8));
2111: PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d\n",mumps->id.ICNTL(10));
2112: PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d\n",mumps->id.ICNTL(11));
2113: if (mumps->id.ICNTL(11)>0) {
2114: PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));
2115: PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));
2116: PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));
2117: PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));
2118: PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));
2119: PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));
2120: }
2121: PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d\n",mumps->id.ICNTL(12));
2122: PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d\n",mumps->id.ICNTL(13));
2123: PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));
2124: /* ICNTL(15-17) not used */
2125: PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d\n",mumps->id.ICNTL(18));
2126: PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d\n",mumps->id.ICNTL(19));
2127: PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d\n",mumps->id.ICNTL(20));
2128: PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d\n",mumps->id.ICNTL(21));
2129: PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d\n",mumps->id.ICNTL(22));
2130: PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));
2132: PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d\n",mumps->id.ICNTL(24));
2133: PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d\n",mumps->id.ICNTL(25));
2134: PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d\n",mumps->id.ICNTL(26));
2135: PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d\n",mumps->id.ICNTL(27));
2136: PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d\n",mumps->id.ICNTL(28));
2137: PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d\n",mumps->id.ICNTL(29));
2139: PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d\n",mumps->id.ICNTL(30));
2140: PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d\n",mumps->id.ICNTL(31));
2141: PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d\n",mumps->id.ICNTL(33));
2142: PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d\n",mumps->id.ICNTL(35));
2143: PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d\n",mumps->id.ICNTL(36));
2144: PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d\n",mumps->id.ICNTL(38));
2146: PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));
2147: PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));
2148: PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));
2149: PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));
2150: PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));
2151: PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));
2153: /* information local to each processor */
2154: PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");
2155: PetscViewerASCIIPushSynchronized(viewer);
2156: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));
2157: PetscViewerFlush(viewer);
2158: PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");
2159: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));
2160: PetscViewerFlush(viewer);
2161: PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");
2162: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));
2163: PetscViewerFlush(viewer);
2165: PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");
2166: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(15));
2167: PetscViewerFlush(viewer);
2169: PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");
2170: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(16));
2171: PetscViewerFlush(viewer);
2173: PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");
2174: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(23));
2175: PetscViewerFlush(viewer);
2177: if (mumps->ninfo && mumps->ninfo <= 80) {
2178: PetscInt i;
2179: for (i=0; i<mumps->ninfo; i++) {
2180: PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "): \n",mumps->info[i]);
2181: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));
2182: PetscViewerFlush(viewer);
2183: }
2184: }
2185: PetscViewerASCIIPopSynchronized(viewer);
2187: if (!mumps->myid) { /* information from the host */
2188: PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));
2189: PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));
2190: PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));
2191: PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));
2193: PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));
2194: PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));
2195: PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));
2196: PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));
2197: PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));
2198: PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));
2199: PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));
2200: PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));
2201: PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));
2202: PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));
2203: PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));
2204: PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));
2205: PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));
2206: PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));
2207: PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));
2208: PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));
2209: PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));
2210: PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));
2211: PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));
2212: PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));
2213: PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));
2214: PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));
2215: PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));
2216: PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));
2217: PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));
2218: PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));
2219: PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));
2220: PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));
2221: PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));
2222: PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));
2223: PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));
2224: PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));
2225: PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));
2226: PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));
2227: }
2228: }
2229: }
2230: return 0;
2231: }
2233: PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2234: {
2235: Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2237: info->block_size = 1.0;
2238: info->nz_allocated = mumps->id.INFOG(20);
2239: info->nz_used = mumps->id.INFOG(20);
2240: info->nz_unneeded = 0.0;
2241: info->assemblies = 0.0;
2242: info->mallocs = 0.0;
2243: info->memory = 0.0;
2244: info->fill_ratio_given = 0;
2245: info->fill_ratio_needed = 0;
2246: info->factor_mallocs = 0;
2247: return 0;
2248: }
2250: /* -------------------------------------------------------------------------------------------*/
2251: PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2252: {
2253: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2254: const PetscScalar *arr;
2255: const PetscInt *idxs;
2256: PetscInt size,i;
2258: ISGetLocalSize(is,&size);
2259: if (mumps->petsc_size > 1) {
2260: PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2262: ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2263: MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);
2265: }
2267: /* Schur complement matrix */
2268: MatDestroy(&F->schur);
2269: MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);
2270: MatDenseGetArrayRead(F->schur,&arr);
2271: mumps->id.schur = (MumpsScalar*)arr;
2272: mumps->id.size_schur = size;
2273: mumps->id.schur_lld = size;
2274: MatDenseRestoreArrayRead(F->schur,&arr);
2275: if (mumps->sym == 1) {
2276: MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);
2277: }
2279: /* MUMPS expects Fortran style indices */
2280: PetscFree(mumps->id.listvar_schur);
2281: PetscMalloc1(size,&mumps->id.listvar_schur);
2282: ISGetIndices(is,&idxs);
2283: for (i=0; i<size; i++) PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));
2284: ISRestoreIndices(is,&idxs);
2285: if (mumps->petsc_size > 1) {
2286: mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2287: } else {
2288: if (F->factortype == MAT_FACTOR_LU) {
2289: mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2290: } else {
2291: mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2292: }
2293: }
2294: /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2295: mumps->id.ICNTL(26) = -1;
2296: return 0;
2297: }
2299: /* -------------------------------------------------------------------------------------------*/
2300: PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2301: {
2302: Mat St;
2303: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2304: PetscScalar *array;
2305: #if defined(PETSC_USE_COMPLEX)
2306: PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2307: #endif
2310: MatCreate(PETSC_COMM_SELF,&St);
2311: MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);
2312: MatSetType(St,MATDENSE);
2313: MatSetUp(St);
2314: MatDenseGetArray(St,&array);
2315: if (!mumps->sym) { /* MUMPS always return a full matrix */
2316: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2317: PetscInt i,j,N=mumps->id.size_schur;
2318: for (i=0;i<N;i++) {
2319: for (j=0;j<N;j++) {
2320: #if !defined(PETSC_USE_COMPLEX)
2321: PetscScalar val = mumps->id.schur[i*N+j];
2322: #else
2323: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2324: #endif
2325: array[j*N+i] = val;
2326: }
2327: }
2328: } else { /* stored by columns */
2329: PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);
2330: }
2331: } else { /* either full or lower-triangular (not packed) */
2332: if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2333: PetscInt i,j,N=mumps->id.size_schur;
2334: for (i=0;i<N;i++) {
2335: for (j=i;j<N;j++) {
2336: #if !defined(PETSC_USE_COMPLEX)
2337: PetscScalar val = mumps->id.schur[i*N+j];
2338: #else
2339: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2340: #endif
2341: array[i*N+j] = val;
2342: array[j*N+i] = val;
2343: }
2344: }
2345: } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2346: PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);
2347: } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2348: PetscInt i,j,N=mumps->id.size_schur;
2349: for (i=0;i<N;i++) {
2350: for (j=0;j<i+1;j++) {
2351: #if !defined(PETSC_USE_COMPLEX)
2352: PetscScalar val = mumps->id.schur[i*N+j];
2353: #else
2354: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2355: #endif
2356: array[i*N+j] = val;
2357: array[j*N+i] = val;
2358: }
2359: }
2360: }
2361: }
2362: MatDenseRestoreArray(St,&array);
2363: *S = St;
2364: return 0;
2365: }
2367: /* -------------------------------------------------------------------------------------------*/
2368: PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2369: {
2370: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2372: PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));
2373: return 0;
2374: }
2376: PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2377: {
2378: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2380: *ival = mumps->id.ICNTL(icntl);
2381: return 0;
2382: }
2384: /*@
2385: MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2387: Logically Collective on Mat
2389: Input Parameters:
2390: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2391: . icntl - index of MUMPS parameter array ICNTL()
2392: - ival - value of MUMPS ICNTL(icntl)
2394: Options Database:
2395: . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2397: Level: beginner
2399: References:
2400: . * - MUMPS Users' Guide
2402: .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2403: @*/
2404: PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2405: {
2410: PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2411: return 0;
2412: }
2414: /*@
2415: MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2417: Logically Collective on Mat
2419: Input Parameters:
2420: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2421: - icntl - index of MUMPS parameter array ICNTL()
2423: Output Parameter:
2424: . ival - value of MUMPS ICNTL(icntl)
2426: Level: beginner
2428: References:
2429: . * - MUMPS Users' Guide
2431: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2432: @*/
2433: PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2434: {
2439: PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2440: return 0;
2441: }
2443: /* -------------------------------------------------------------------------------------------*/
2444: PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2445: {
2446: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2448: mumps->id.CNTL(icntl) = val;
2449: return 0;
2450: }
2452: PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2453: {
2454: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2456: *val = mumps->id.CNTL(icntl);
2457: return 0;
2458: }
2460: /*@
2461: MatMumpsSetCntl - Set MUMPS parameter CNTL()
2463: Logically Collective on Mat
2465: Input Parameters:
2466: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2467: . icntl - index of MUMPS parameter array CNTL()
2468: - val - value of MUMPS CNTL(icntl)
2470: Options Database:
2471: . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2473: Level: beginner
2475: References:
2476: . * - MUMPS Users' Guide
2478: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2479: @*/
2480: PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2481: {
2486: PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2487: return 0;
2488: }
2490: /*@
2491: MatMumpsGetCntl - Get MUMPS parameter CNTL()
2493: Logically Collective on Mat
2495: Input Parameters:
2496: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2497: - icntl - index of MUMPS parameter array CNTL()
2499: Output Parameter:
2500: . val - value of MUMPS CNTL(icntl)
2502: Level: beginner
2504: References:
2505: . * - MUMPS Users' Guide
2507: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2508: @*/
2509: PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2510: {
2515: PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2516: return 0;
2517: }
2519: PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2520: {
2521: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2523: *info = mumps->id.INFO(icntl);
2524: return 0;
2525: }
2527: PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2528: {
2529: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2531: *infog = mumps->id.INFOG(icntl);
2532: return 0;
2533: }
2535: PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2536: {
2537: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2539: *rinfo = mumps->id.RINFO(icntl);
2540: return 0;
2541: }
2543: PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2544: {
2545: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2547: *rinfog = mumps->id.RINFOG(icntl);
2548: return 0;
2549: }
2551: PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2552: {
2553: Mat Bt = NULL,Btseq = NULL;
2554: PetscBool flg;
2555: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2556: PetscScalar *aa;
2557: PetscInt spnr,*ia,*ja,M,nrhs;
2560: PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);
2561: if (flg) {
2562: MatTransposeGetMat(spRHS,&Bt);
2563: } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2565: MatMumpsSetIcntl(F,30,1);
2567: if (mumps->petsc_size > 1) {
2568: Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2569: Btseq = b->A;
2570: } else {
2571: Btseq = Bt;
2572: }
2574: MatGetSize(spRHS,&M,&nrhs);
2575: mumps->id.nrhs = nrhs;
2576: mumps->id.lrhs = M;
2577: mumps->id.rhs = NULL;
2579: if (!mumps->myid) {
2580: MatSeqAIJGetArray(Btseq,&aa);
2581: MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
2583: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
2584: mumps->id.rhs_sparse = (MumpsScalar*)aa;
2585: } else {
2586: mumps->id.irhs_ptr = NULL;
2587: mumps->id.irhs_sparse = NULL;
2588: mumps->id.nz_rhs = 0;
2589: mumps->id.rhs_sparse = NULL;
2590: }
2591: mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2592: mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2594: /* solve phase */
2595: /*-------------*/
2596: mumps->id.job = JOB_SOLVE;
2597: PetscMUMPS_c(mumps);
2598: if (mumps->id.INFOG(1) < 0)
2599: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2601: if (!mumps->myid) {
2602: MatSeqAIJRestoreArray(Btseq,&aa);
2603: MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
2605: }
2606: return 0;
2607: }
2609: /*@
2610: MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2612: Logically Collective on Mat
2614: Input Parameters:
2615: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2616: - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2618: Output Parameter:
2619: . spRHS - requested entries of inverse of A
2621: Level: beginner
2623: References:
2624: . * - MUMPS Users' Guide
2626: .seealso: MatGetFactor(), MatCreateTranspose()
2627: @*/
2628: PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2629: {
2632: PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2633: return 0;
2634: }
2636: PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2637: {
2638: Mat spRHS;
2640: MatCreateTranspose(spRHST,&spRHS);
2641: MatMumpsGetInverse_MUMPS(F,spRHS);
2642: MatDestroy(&spRHS);
2643: return 0;
2644: }
2646: /*@
2647: MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2649: Logically Collective on Mat
2651: Input Parameters:
2652: + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2653: - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2655: Output Parameter:
2656: . spRHST - requested entries of inverse of A^T
2658: Level: beginner
2660: References:
2661: . * - MUMPS Users' Guide
2663: .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2664: @*/
2665: PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2666: {
2667: PetscBool flg;
2671: PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);
2674: PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2675: return 0;
2676: }
2678: /*@
2679: MatMumpsGetInfo - Get MUMPS parameter INFO()
2681: Logically Collective on Mat
2683: Input Parameters:
2684: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2685: - icntl - index of MUMPS parameter array INFO()
2687: Output Parameter:
2688: . ival - value of MUMPS INFO(icntl)
2690: Level: beginner
2692: References:
2693: . * - MUMPS Users' Guide
2695: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2696: @*/
2697: PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2698: {
2702: PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2703: return 0;
2704: }
2706: /*@
2707: MatMumpsGetInfog - Get MUMPS parameter INFOG()
2709: Logically Collective on Mat
2711: Input Parameters:
2712: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2713: - icntl - index of MUMPS parameter array INFOG()
2715: Output Parameter:
2716: . ival - value of MUMPS INFOG(icntl)
2718: Level: beginner
2720: References:
2721: . * - MUMPS Users' Guide
2723: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2724: @*/
2725: PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2726: {
2730: PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2731: return 0;
2732: }
2734: /*@
2735: MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2737: Logically Collective on Mat
2739: Input Parameters:
2740: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2741: - icntl - index of MUMPS parameter array RINFO()
2743: Output Parameter:
2744: . val - value of MUMPS RINFO(icntl)
2746: Level: beginner
2748: References:
2749: . * - MUMPS Users' Guide
2751: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2752: @*/
2753: PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2754: {
2758: PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2759: return 0;
2760: }
2762: /*@
2763: MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2765: Logically Collective on Mat
2767: Input Parameters:
2768: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2769: - icntl - index of MUMPS parameter array RINFOG()
2771: Output Parameter:
2772: . val - value of MUMPS RINFOG(icntl)
2774: Level: beginner
2776: References:
2777: . * - MUMPS Users' Guide
2779: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2780: @*/
2781: PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2782: {
2786: PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2787: return 0;
2788: }
2790: /*MC
2791: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
2792: distributed and sequential matrices via the external package MUMPS.
2794: Works with MATAIJ and MATSBAIJ matrices
2796: Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2798: Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2800: Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2802: Options Database Keys:
2803: + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2804: . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2805: . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host
2806: . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4)
2807: . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2808: . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2809: Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2810: . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77)
2811: . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
2812: . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2813: . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2814: . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2815: . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
2816: . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
2817: . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2818: . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2819: . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2820: . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
2821: . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2822: . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
2823: . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2824: . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2825: . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2826: . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2827: . -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2828: . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2829: . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2830: . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2831: . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold
2832: . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement
2833: . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2834: . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2835: . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2836: . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2837: - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2838: Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2840: Level: beginner
2842: Notes:
2843: MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2845: When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2846: $ KSPGetPC(ksp,&pc);
2847: $ PCFactorGetMatrix(pc,&mat);
2848: $ MatMumpsGetInfo(mat,....);
2849: $ MatMumpsGetInfog(mat,....); etc.
2850: Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2852: Using MUMPS with 64-bit integers
2853: MUMPS provides 64-bit integer support in two build modes:
2854: full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2855: requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2857: selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2858: MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2859: columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2860: integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2862: With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2864: Two modes to run MUMPS/PETSc with OpenMP
2865: $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2866: $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2868: $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2869: $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2871: To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2872: (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2873: (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2874: libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2875: (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2877: If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2878: processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2879: size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2880: are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2881: by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2882: In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2883: if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2884: MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2885: cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2886: problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2887: For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2888: examine the mapping result.
2890: PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2891: for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2892: calls omp_set_num_threads(m) internally before calling MUMPS.
2894: References:
2895: + * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2896: - * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2898: .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix()
2900: M*/
2902: static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2903: {
2904: *type = MATSOLVERMUMPS;
2905: return 0;
2906: }
2908: /* MatGetFactor for Seq and MPI AIJ matrices */
2909: static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2910: {
2911: Mat B;
2912: Mat_MUMPS *mumps;
2913: PetscBool isSeqAIJ;
2914: PetscMPIInt size;
2916: #if defined(PETSC_USE_COMPLEX)
2918: #endif
2919: /* Create the factorization matrix */
2920: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
2921: MatCreate(PetscObjectComm((PetscObject)A),&B);
2922: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
2923: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
2924: MatSetUp(B);
2926: PetscNewLog(B,&mumps);
2928: B->ops->view = MatView_MUMPS;
2929: B->ops->getinfo = MatGetInfo_MUMPS;
2931: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
2932: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
2933: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
2934: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
2935: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
2936: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
2937: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
2938: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
2939: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
2940: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
2941: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
2942: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
2943: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
2945: if (ftype == MAT_FACTOR_LU) {
2946: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
2947: B->factortype = MAT_FACTOR_LU;
2948: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
2949: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
2950: PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);
2951: mumps->sym = 0;
2952: } else {
2953: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
2954: B->factortype = MAT_FACTOR_CHOLESKY;
2955: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
2956: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
2957: PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);
2958: #if defined(PETSC_USE_COMPLEX)
2959: mumps->sym = 2;
2960: #else
2961: if (A->spd_set && A->spd) mumps->sym = 1;
2962: else mumps->sym = 2;
2963: #endif
2964: }
2966: /* set solvertype */
2967: PetscFree(B->solvertype);
2968: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
2969: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
2970: if (size == 1) {
2971: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
2972: B->canuseordering = PETSC_TRUE;
2973: }
2974: B->ops->destroy = MatDestroy_MUMPS;
2975: B->data = (void*)mumps;
2977: PetscInitializeMUMPS(A,mumps);
2979: *F = B;
2980: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2981: return 0;
2982: }
2984: /* MatGetFactor for Seq and MPI SBAIJ matrices */
2985: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
2986: {
2987: Mat B;
2988: Mat_MUMPS *mumps;
2989: PetscBool isSeqSBAIJ;
2990: PetscMPIInt size;
2992: #if defined(PETSC_USE_COMPLEX)
2994: #endif
2995: MatCreate(PetscObjectComm((PetscObject)A),&B);
2996: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
2997: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
2998: MatSetUp(B);
3000: PetscNewLog(B,&mumps);
3001: PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);
3002: if (isSeqSBAIJ) {
3003: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3004: } else {
3005: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3006: }
3008: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3009: B->ops->view = MatView_MUMPS;
3010: B->ops->getinfo = MatGetInfo_MUMPS;
3012: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3013: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3014: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3015: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3016: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3017: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3018: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3019: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3020: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3021: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3022: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3023: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3024: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3026: B->factortype = MAT_FACTOR_CHOLESKY;
3027: #if defined(PETSC_USE_COMPLEX)
3028: mumps->sym = 2;
3029: #else
3030: if (A->spd_set && A->spd) mumps->sym = 1;
3031: else mumps->sym = 2;
3032: #endif
3034: /* set solvertype */
3035: PetscFree(B->solvertype);
3036: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3037: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3038: if (size == 1) {
3039: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3040: B->canuseordering = PETSC_TRUE;
3041: }
3042: PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);
3043: B->ops->destroy = MatDestroy_MUMPS;
3044: B->data = (void*)mumps;
3046: PetscInitializeMUMPS(A,mumps);
3048: *F = B;
3049: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3050: return 0;
3051: }
3053: static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3054: {
3055: Mat B;
3056: Mat_MUMPS *mumps;
3057: PetscBool isSeqBAIJ;
3058: PetscMPIInt size;
3060: /* Create the factorization matrix */
3061: PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);
3062: MatCreate(PetscObjectComm((PetscObject)A),&B);
3063: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3064: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3065: MatSetUp(B);
3067: PetscNewLog(B,&mumps);
3068: if (ftype == MAT_FACTOR_LU) {
3069: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3070: B->factortype = MAT_FACTOR_LU;
3071: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3072: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3073: mumps->sym = 0;
3074: PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);
3075: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3077: B->ops->view = MatView_MUMPS;
3078: B->ops->getinfo = MatGetInfo_MUMPS;
3080: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3081: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3082: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3083: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3084: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3085: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3086: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3087: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3088: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3089: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3090: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3091: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3092: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3094: /* set solvertype */
3095: PetscFree(B->solvertype);
3096: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3097: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3098: if (size == 1) {
3099: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3100: B->canuseordering = PETSC_TRUE;
3101: }
3102: B->ops->destroy = MatDestroy_MUMPS;
3103: B->data = (void*)mumps;
3105: PetscInitializeMUMPS(A,mumps);
3107: *F = B;
3108: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3109: return 0;
3110: }
3112: /* MatGetFactor for Seq and MPI SELL matrices */
3113: static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3114: {
3115: Mat B;
3116: Mat_MUMPS *mumps;
3117: PetscBool isSeqSELL;
3118: PetscMPIInt size;
3120: /* Create the factorization matrix */
3121: PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);
3122: MatCreate(PetscObjectComm((PetscObject)A),&B);
3123: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3124: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3125: MatSetUp(B);
3127: PetscNewLog(B,&mumps);
3129: B->ops->view = MatView_MUMPS;
3130: B->ops->getinfo = MatGetInfo_MUMPS;
3132: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3133: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3134: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3135: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3136: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3137: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3138: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3139: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3140: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3141: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3142: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3144: if (ftype == MAT_FACTOR_LU) {
3145: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3146: B->factortype = MAT_FACTOR_LU;
3147: if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3148: else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3149: mumps->sym = 0;
3150: PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);
3151: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3153: /* set solvertype */
3154: PetscFree(B->solvertype);
3155: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3156: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3157: if (size == 1) {
3158: /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3159: B->canuseordering = PETSC_TRUE;
3160: }
3161: B->ops->destroy = MatDestroy_MUMPS;
3162: B->data = (void*)mumps;
3164: PetscInitializeMUMPS(A,mumps);
3166: *F = B;
3167: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3168: return 0;
3169: }
3171: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3172: {
3173: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);
3174: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);
3175: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);
3176: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);
3177: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);
3178: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);
3179: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);
3180: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);
3181: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);
3182: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);
3183: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);
3184: return 0;
3185: }