Actual source code: mumps.c
petsc-3.14.6 2021-03-30
2: /*
3: Provides an interface to the MUMPS sparse solver
4: */
5: #include <petscpkg_version.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8: #include <../src/mat/impls/sell/mpi/mpisell.h>
10: EXTERN_C_BEGIN
11: #if defined(PETSC_USE_COMPLEX)
12: #if defined(PETSC_USE_REAL_SINGLE)
13: #include <cmumps_c.h>
14: #else
15: #include <zmumps_c.h>
16: #endif
17: #else
18: #if defined(PETSC_USE_REAL_SINGLE)
19: #include <smumps_c.h>
20: #else
21: #include <dmumps_c.h>
22: #endif
23: #endif
24: EXTERN_C_END
25: #define JOB_INIT -1
26: #define JOB_FACTSYMBOLIC 1
27: #define JOB_FACTNUMERIC 2
28: #define JOB_SOLVE 3
29: #define JOB_END -2
31: /* calls to MUMPS */
32: #if defined(PETSC_USE_COMPLEX)
33: #if defined(PETSC_USE_REAL_SINGLE)
34: #define MUMPS_c cmumps_c
35: #else
36: #define MUMPS_c zmumps_c
37: #endif
38: #else
39: #if defined(PETSC_USE_REAL_SINGLE)
40: #define MUMPS_c smumps_c
41: #else
42: #define MUMPS_c dmumps_c
43: #endif
44: #endif
46: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47: number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48: naming convention in PetscMPIInt, PetscBLASInt etc.
49: */
50: typedef MUMPS_INT PetscMUMPSInt;
52: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53: #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55: #endif
56: #else
57: #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59: #endif
60: #endif
62: #define MPIU_MUMPSINT MPI_INT
63: #define PETSC_MUMPS_INT_MAX 2147483647
64: #define PETSC_MUMPS_INT_MIN -2147483648
66: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67: PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68: {
70: if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71: *b = (PetscMUMPSInt)(a);
72: return(0);
73: }
75: /* Put these utility routines here since they are only used in this file */
76: PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
77: {
79: PetscInt myval;
80: PetscBool myset;
82: /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83: PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);
84: if (myset) {PetscMUMPSIntCast(myval,value);}
85: if (set) *set = myset;
86: return(0);
87: }
88: #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
90: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92: #define PetscMUMPS_c(mumps) \
93: do { \
94: if (mumps->use_petsc_omp_support) { \
95: if (mumps->is_omp_master) { \
96: PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl); \
97: MUMPS_c(&mumps->id); \
98: PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl); \
99: } \
100: PetscOmpCtrlBarrier(mumps->omp_ctrl); \
101: /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \
102: to processes, so we only Bcast info[1], an error code and leave others (since they do not have \
103: an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \
104: omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105: */ \
106: MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm); \
107: MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm); \
108: MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm); \
109: } else { \
110: MUMPS_c(&mumps->id); \
111: } \
112: } while (0)
113: #else
114: #define PetscMUMPS_c(mumps) \
115: do { MUMPS_c(&mumps->id); } while (0)
116: #endif
118: /* declare MumpsScalar */
119: #if defined(PETSC_USE_COMPLEX)
120: #if defined(PETSC_USE_REAL_SINGLE)
121: #define MumpsScalar mumps_complex
122: #else
123: #define MumpsScalar mumps_double_complex
124: #endif
125: #else
126: #define MumpsScalar PetscScalar
127: #endif
129: /* macros s.t. indices match MUMPS documentation */
130: #define ICNTL(I) icntl[(I)-1]
131: #define CNTL(I) cntl[(I)-1]
132: #define INFOG(I) infog[(I)-1]
133: #define INFO(I) info[(I)-1]
134: #define RINFOG(I) rinfog[(I)-1]
135: #define RINFO(I) rinfo[(I)-1]
137: typedef struct Mat_MUMPS Mat_MUMPS;
138: struct Mat_MUMPS {
139: #if defined(PETSC_USE_COMPLEX)
140: #if defined(PETSC_USE_REAL_SINGLE)
141: CMUMPS_STRUC_C id;
142: #else
143: ZMUMPS_STRUC_C id;
144: #endif
145: #else
146: #if defined(PETSC_USE_REAL_SINGLE)
147: SMUMPS_STRUC_C id;
148: #else
149: DMUMPS_STRUC_C id;
150: #endif
151: #endif
153: MatStructure matstruc;
154: PetscMPIInt myid,petsc_size;
155: PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */
156: PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157: PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */
158: PetscMUMPSInt sym;
159: MPI_Comm mumps_comm;
160: PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */
161: VecScatter scat_rhs, scat_sol; /* used by MatSolve() */
162: PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */
163: PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc;
164: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
165: PetscInt *rhs_nrow,max_nrhs;
166: PetscMPIInt *rhs_recvcounts,*rhs_disps;
167: PetscScalar *rhs_loc,*rhs_recvbuf;
168: #endif
169: Vec b_seq,x_seq;
170: PetscInt ninfo,*info; /* which INFO to display */
171: PetscInt sizeredrhs;
172: PetscScalar *schur_sol;
173: PetscInt schur_sizesol;
174: PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
175: PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */
176: PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
178: /* stuff used by petsc/mumps OpenMP support*/
179: PetscBool use_petsc_omp_support;
180: PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
181: MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */
182: PetscInt64 *recvcount; /* a collection of nnz on omp_master */
183: PetscMPIInt tag,omp_comm_size;
184: PetscBool is_omp_master; /* is this rank the master of omp_comm */
185: MPI_Request *reqs;
186: };
188: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
189: Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
190: */
191: static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
192: {
194: PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
197: #if defined(PETSC_USE_64BIT_INDICES)
198: {
199: PetscInt i;
200: if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201: PetscFree(mumps->ia_alloc);
202: PetscMalloc1(nrow+1,&mumps->ia_alloc);
203: mumps->cur_ilen = nrow+1;
204: }
205: if (nnz > mumps->cur_jlen) {
206: PetscFree(mumps->ja_alloc);
207: PetscMalloc1(nnz,&mumps->ja_alloc);
208: mumps->cur_jlen = nnz;
209: }
210: for (i=0; i<nrow+1; i++) {PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));}
211: for (i=0; i<nnz; i++) {PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));}
212: *ia_mumps = mumps->ia_alloc;
213: *ja_mumps = mumps->ja_alloc;
214: }
215: #else
216: *ia_mumps = ia;
217: *ja_mumps = ja;
218: #endif
219: PetscMUMPSIntCast(nnz,nnz_mumps);
220: return(0);
221: }
223: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224: {
228: PetscFree(mumps->id.listvar_schur);
229: PetscFree(mumps->id.redrhs);
230: PetscFree(mumps->schur_sol);
231: mumps->id.size_schur = 0;
232: mumps->id.schur_lld = 0;
233: mumps->id.ICNTL(19) = 0;
234: return(0);
235: }
237: /* solve with rhs in mumps->id.redrhs and return in the same location */
238: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
239: {
240: Mat_MUMPS *mumps=(Mat_MUMPS*)F->data;
241: Mat S,B,X;
242: MatFactorSchurStatus schurstatus;
243: PetscInt sizesol;
244: PetscErrorCode ierr;
247: MatFactorFactorizeSchurComplement(F);
248: MatFactorGetSchurComplement(F,&S,&schurstatus);
249: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);
250: MatSetType(B,((PetscObject)S)->type_name);
251: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252: MatBindToCPU(B,S->boundtocpu);
253: #endif
254: switch (schurstatus) {
255: case MAT_FACTOR_SCHUR_FACTORED:
256: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);
257: MatSetType(X,((PetscObject)S)->type_name);
258: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259: MatBindToCPU(X,S->boundtocpu);
260: #endif
261: if (!mumps->id.ICNTL(9)) { /* transpose solve */
262: MatMatSolveTranspose(S,B,X);
263: } else {
264: MatMatSolve(S,B,X);
265: }
266: break;
267: case MAT_FACTOR_SCHUR_INVERTED:
268: sizesol = mumps->id.nrhs*mumps->id.size_schur;
269: if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270: PetscFree(mumps->schur_sol);
271: PetscMalloc1(sizesol,&mumps->schur_sol);
272: mumps->schur_sizesol = sizesol;
273: }
274: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);
275: MatSetType(X,((PetscObject)S)->type_name);
276: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277: MatBindToCPU(X,S->boundtocpu);
278: #endif
279: MatProductCreateWithMat(S,B,NULL,X);
280: if (!mumps->id.ICNTL(9)) { /* transpose solve */
281: MatProductSetType(X,MATPRODUCT_AtB);
282: } else {
283: MatProductSetType(X,MATPRODUCT_AB);
284: }
285: MatProductSetFromOptions(X);
286: MatProductSymbolic(X);
287: MatProductNumeric(X);
289: MatCopy(X,B,SAME_NONZERO_PATTERN);
290: break;
291: default:
292: SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
293: }
294: MatFactorRestoreSchurComplement(F,&S,schurstatus);
295: MatDestroy(&B);
296: MatDestroy(&X);
297: return(0);
298: }
300: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
301: {
302: Mat_MUMPS *mumps=(Mat_MUMPS*)F->data;
306: if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
307: return(0);
308: }
309: if (!expansion) { /* prepare for the condensation step */
310: PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
311: /* allocate MUMPS internal array to store reduced right-hand sides */
312: if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
313: PetscFree(mumps->id.redrhs);
314: mumps->id.lredrhs = mumps->id.size_schur;
315: PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);
316: mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
317: }
318: mumps->id.ICNTL(26) = 1; /* condensation phase */
319: } else { /* prepare for the expansion step */
320: /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
321: MatMumpsSolveSchur_Private(F);
322: mumps->id.ICNTL(26) = 2; /* expansion phase */
323: PetscMUMPS_c(mumps);
324: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
325: /* restore defaults */
326: mumps->id.ICNTL(26) = -1;
327: /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
328: if (mumps->id.nrhs > 1) {
329: PetscFree(mumps->id.redrhs);
330: mumps->id.lredrhs = 0;
331: mumps->sizeredrhs = 0;
332: }
333: }
334: return(0);
335: }
337: /*
338: MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
340: input:
341: A - matrix in aij,baij or sbaij format
342: shift - 0: C style output triple; 1: Fortran style output triple.
343: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
344: MAT_REUSE_MATRIX: only the values in v array are updated
345: output:
346: nnz - dim of r, c, and v (number of local nonzero entries of A)
347: r, c, v - row and col index, matrix values (matrix triples)
349: The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
350: freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
351: that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
353: */
355: PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
356: {
357: const PetscScalar *av;
358: const PetscInt *ai,*aj,*ajj,M=A->rmap->n;
359: PetscInt64 nz,rnz,i,j,k;
360: PetscErrorCode ierr;
361: PetscMUMPSInt *row,*col;
362: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
365: MatSeqAIJGetArrayRead(A,&av);
366: mumps->val = (PetscScalar*)av;
367: if (reuse == MAT_INITIAL_MATRIX) {
368: nz = aa->nz;
369: ai = aa->i;
370: aj = aa->j;
371: PetscMalloc2(nz,&row,nz,&col);
372: for (i=k=0; i<M; i++) {
373: rnz = ai[i+1] - ai[i];
374: ajj = aj + ai[i];
375: for (j=0; j<rnz; j++) {
376: PetscMUMPSIntCast(i+shift,&row[k]);
377: PetscMUMPSIntCast(ajj[j] + shift,&col[k]);
378: k++;
379: }
380: }
381: mumps->irn = row;
382: mumps->jcn = col;
383: mumps->nnz = nz;
384: }
385: MatSeqAIJRestoreArrayRead(A,&av);
386: return(0);
387: }
389: PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
390: {
392: PetscInt64 nz,i,j,k,r;
393: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
394: PetscMUMPSInt *row,*col;
397: mumps->val = a->val;
398: if (reuse == MAT_INITIAL_MATRIX) {
399: nz = a->sliidx[a->totalslices];
400: PetscMalloc2(nz,&row,nz,&col);
401: for (i=k=0; i<a->totalslices; i++) {
402: for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
403: PetscMUMPSIntCast(8*i+r+shift,&row[k++]);
404: }
405: }
406: for (i=0;i<nz;i++) {PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);}
407: mumps->irn = row;
408: mumps->jcn = col;
409: mumps->nnz = nz;
410: }
411: return(0);
412: }
414: PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
415: {
416: Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data;
417: const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
418: PetscInt64 M,nz,idx=0,rnz,i,j,k,m;
419: PetscInt bs;
421: PetscMUMPSInt *row,*col;
424: MatGetBlockSize(A,&bs);
425: M = A->rmap->N/bs;
426: mumps->val = aa->a;
427: if (reuse == MAT_INITIAL_MATRIX) {
428: ai = aa->i; aj = aa->j;
429: nz = bs2*aa->nz;
430: PetscMalloc2(nz,&row,nz,&col);
431: for (i=0; i<M; i++) {
432: ajj = aj + ai[i];
433: rnz = ai[i+1] - ai[i];
434: for (k=0; k<rnz; k++) {
435: for (j=0; j<bs; j++) {
436: for (m=0; m<bs; m++) {
437: PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);
438: PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);
439: idx++;
440: }
441: }
442: }
443: }
444: mumps->irn = row;
445: mumps->jcn = col;
446: mumps->nnz = nz;
447: }
448: return(0);
449: }
451: PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
452: {
453: const PetscInt *ai, *aj,*ajj;
454: PetscInt bs;
455: PetscInt64 nz,rnz,i,j,k,m;
456: PetscErrorCode ierr;
457: PetscMUMPSInt *row,*col;
458: PetscScalar *val;
459: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data;
460: const PetscInt bs2=aa->bs2,mbs=aa->mbs;
461: #if defined(PETSC_USE_COMPLEX)
462: PetscBool hermitian;
463: #endif
466: #if defined(PETSC_USE_COMPLEX)
467: MatGetOption(A,MAT_HERMITIAN,&hermitian);
468: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
469: #endif
470: ai = aa->i;
471: aj = aa->j;
472: MatGetBlockSize(A,&bs);
473: if (reuse == MAT_INITIAL_MATRIX) {
474: nz = aa->nz;
475: PetscMalloc2(bs2*nz,&row,bs2*nz,&col);
476: if (bs>1) {
477: PetscMalloc1(bs2*nz,&mumps->val_alloc);
478: mumps->val = mumps->val_alloc;
479: } else {
480: mumps->val = aa->a;
481: }
482: mumps->irn = row;
483: mumps->jcn = col;
484: } else {
485: if (bs == 1) mumps->val = aa->a;
486: row = mumps->irn;
487: col = mumps->jcn;
488: }
489: val = mumps->val;
491: nz = 0;
492: if (bs>1) {
493: for (i=0; i<mbs; i++) {
494: rnz = ai[i+1] - ai[i];
495: ajj = aj + ai[i];
496: for (j=0; j<rnz; j++) {
497: for (k=0; k<bs; k++) {
498: for (m=0; m<bs; m++) {
499: if (ajj[j]>i || k>=m) {
500: if (reuse == MAT_INITIAL_MATRIX) {
501: PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);
502: PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);
503: }
504: val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
505: }
506: }
507: }
508: }
509: }
510: } else if (reuse == MAT_INITIAL_MATRIX) {
511: for (i=0; i<mbs; i++) {
512: rnz = ai[i+1] - ai[i];
513: ajj = aj + ai[i];
514: for (j=0; j<rnz; j++) {
515: PetscMUMPSIntCast(i+shift,&row[nz]);
516: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
517: nz++;
518: }
519: }
520: if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz);
521: }
522: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
523: return(0);
524: }
526: PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
527: {
528: const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n;
529: PetscInt64 nz,rnz,i,j;
530: const PetscScalar *av,*v1;
531: PetscScalar *val;
532: PetscErrorCode ierr;
533: PetscMUMPSInt *row,*col;
534: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
535: PetscBool missing;
536: #if defined(PETSC_USE_COMPLEX)
537: PetscBool hermitian;
538: #endif
541: #if defined(PETSC_USE_COMPLEX)
542: MatGetOption(A,MAT_HERMITIAN,&hermitian);
543: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
544: #endif
545: MatSeqAIJGetArrayRead(A,&av);
546: ai = aa->i; aj = aa->j;
547: adiag = aa->diag;
548: MatMissingDiagonal_SeqAIJ(A,&missing,NULL);
549: if (reuse == MAT_INITIAL_MATRIX) {
550: /* count nz in the upper triangular part of A */
551: nz = 0;
552: if (missing) {
553: for (i=0; i<M; i++) {
554: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
555: for (j=ai[i];j<ai[i+1];j++) {
556: if (aj[j] < i) continue;
557: nz++;
558: }
559: } else {
560: nz += ai[i+1] - adiag[i];
561: }
562: }
563: } else {
564: for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
565: }
566: PetscMalloc2(nz,&row,nz,&col);
567: PetscMalloc1(nz,&val);
568: mumps->nnz = nz;
569: mumps->irn = row;
570: mumps->jcn = col;
571: mumps->val = mumps->val_alloc = val;
573: nz = 0;
574: if (missing) {
575: for (i=0; i<M; i++) {
576: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
577: for (j=ai[i];j<ai[i+1];j++) {
578: if (aj[j] < i) continue;
579: PetscMUMPSIntCast(i+shift,&row[nz]);
580: PetscMUMPSIntCast(aj[j]+shift,&col[nz]);
581: val[nz] = av[j];
582: nz++;
583: }
584: } else {
585: rnz = ai[i+1] - adiag[i];
586: ajj = aj + adiag[i];
587: v1 = av + adiag[i];
588: for (j=0; j<rnz; j++) {
589: PetscMUMPSIntCast(i+shift,&row[nz]);
590: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
591: val[nz++] = v1[j];
592: }
593: }
594: }
595: } else {
596: for (i=0; i<M; i++) {
597: rnz = ai[i+1] - adiag[i];
598: ajj = aj + adiag[i];
599: v1 = av + adiag[i];
600: for (j=0; j<rnz; j++) {
601: PetscMUMPSIntCast(i+shift,&row[nz]);
602: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
603: val[nz++] = v1[j];
604: }
605: }
606: }
607: } else {
608: nz = 0;
609: val = mumps->val;
610: if (missing) {
611: for (i=0; i <M; i++) {
612: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
613: for (j=ai[i];j<ai[i+1];j++) {
614: if (aj[j] < i) continue;
615: val[nz++] = av[j];
616: }
617: } else {
618: rnz = ai[i+1] - adiag[i];
619: v1 = av + adiag[i];
620: for (j=0; j<rnz; j++) {
621: val[nz++] = v1[j];
622: }
623: }
624: }
625: } else {
626: for (i=0; i <M; i++) {
627: rnz = ai[i+1] - adiag[i];
628: v1 = av + adiag[i];
629: for (j=0; j<rnz; j++) {
630: val[nz++] = v1[j];
631: }
632: }
633: }
634: }
635: MatSeqAIJRestoreArrayRead(A,&av);
636: return(0);
637: }
639: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
640: {
641: PetscErrorCode ierr;
642: const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
643: PetscInt bs;
644: PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB;
645: PetscMUMPSInt *row,*col;
646: const PetscScalar *av,*bv,*v1,*v2;
647: PetscScalar *val;
648: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
649: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data;
650: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
651: const PetscInt bs2=aa->bs2,mbs=aa->mbs;
652: #if defined(PETSC_USE_COMPLEX)
653: PetscBool hermitian;
654: #endif
657: #if defined(PETSC_USE_COMPLEX)
658: MatGetOption(A,MAT_HERMITIAN,&hermitian);
659: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
660: #endif
661: MatGetBlockSize(A,&bs);
662: rstart = A->rmap->rstart;
663: ai = aa->i;
664: aj = aa->j;
665: bi = bb->i;
666: bj = bb->j;
667: av = aa->a;
668: bv = bb->a;
670: garray = mat->garray;
672: if (reuse == MAT_INITIAL_MATRIX) {
673: nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
674: PetscMalloc2(nz,&row,nz,&col);
675: PetscMalloc1(nz,&val);
676: /* can not decide the exact mumps->nnz now because of the SBAIJ */
677: mumps->irn = row;
678: mumps->jcn = col;
679: mumps->val = mumps->val_alloc = val;
680: } else {
681: val = mumps->val;
682: }
684: jj = 0; irow = rstart;
685: for (i=0; i<mbs; i++) {
686: ajj = aj + ai[i]; /* ptr to the beginning of this row */
687: countA = ai[i+1] - ai[i];
688: countB = bi[i+1] - bi[i];
689: bjj = bj + bi[i];
690: v1 = av + ai[i]*bs2;
691: v2 = bv + bi[i]*bs2;
693: if (bs>1) {
694: /* A-part */
695: for (j=0; j<countA; j++) {
696: for (k=0; k<bs; k++) {
697: for (m=0; m<bs; m++) {
698: if (rstart + ajj[j]*bs>irow || k>=m) {
699: if (reuse == MAT_INITIAL_MATRIX) {
700: PetscMUMPSIntCast(irow + m + shift,&row[jj]);
701: PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);
702: }
703: val[jj++] = v1[j*bs2 + m + k*bs];
704: }
705: }
706: }
707: }
709: /* B-part */
710: for (j=0; j < countB; j++) {
711: for (k=0; k<bs; k++) {
712: for (m=0; m<bs; m++) {
713: if (reuse == MAT_INITIAL_MATRIX) {
714: PetscMUMPSIntCast(irow + m + shift,&row[jj]);
715: PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);
716: }
717: val[jj++] = v2[j*bs2 + m + k*bs];
718: }
719: }
720: }
721: } else {
722: /* A-part */
723: for (j=0; j<countA; j++) {
724: if (reuse == MAT_INITIAL_MATRIX) {
725: PetscMUMPSIntCast(irow + shift,&row[jj]);
726: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
727: }
728: val[jj++] = v1[j];
729: }
731: /* B-part */
732: for (j=0; j < countB; j++) {
733: if (reuse == MAT_INITIAL_MATRIX) {
734: PetscMUMPSIntCast(irow + shift,&row[jj]);
735: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
736: }
737: val[jj++] = v2[j];
738: }
739: }
740: irow+=bs;
741: }
742: mumps->nnz = jj;
743: return(0);
744: }
746: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
747: {
748: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
749: PetscErrorCode ierr;
750: PetscInt64 rstart,nz,i,j,jj,irow,countA,countB;
751: PetscMUMPSInt *row,*col;
752: const PetscScalar *av, *bv,*v1,*v2;
753: PetscScalar *val;
754: Mat Ad,Ao;
755: Mat_SeqAIJ *aa;
756: Mat_SeqAIJ *bb;
759: MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);
760: MatSeqAIJGetArrayRead(Ad,&av);
761: MatSeqAIJGetArrayRead(Ao,&bv);
763: aa = (Mat_SeqAIJ*)(Ad)->data;
764: bb = (Mat_SeqAIJ*)(Ao)->data;
765: ai = aa->i;
766: aj = aa->j;
767: bi = bb->i;
768: bj = bb->j;
770: rstart = A->rmap->rstart;
772: if (reuse == MAT_INITIAL_MATRIX) {
773: nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
774: PetscMalloc2(nz,&row,nz,&col);
775: PetscMalloc1(nz,&val);
776: mumps->nnz = nz;
777: mumps->irn = row;
778: mumps->jcn = col;
779: mumps->val = mumps->val_alloc = val;
780: } else {
781: val = mumps->val;
782: }
784: jj = 0; irow = rstart;
785: for (i=0; i<m; i++) {
786: ajj = aj + ai[i]; /* ptr to the beginning of this row */
787: countA = ai[i+1] - ai[i];
788: countB = bi[i+1] - bi[i];
789: bjj = bj + bi[i];
790: v1 = av + ai[i];
791: v2 = bv + bi[i];
793: /* A-part */
794: for (j=0; j<countA; j++) {
795: if (reuse == MAT_INITIAL_MATRIX) {
796: PetscMUMPSIntCast(irow + shift,&row[jj]);
797: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
798: }
799: val[jj++] = v1[j];
800: }
802: /* B-part */
803: for (j=0; j < countB; j++) {
804: if (reuse == MAT_INITIAL_MATRIX) {
805: PetscMUMPSIntCast(irow + shift,&row[jj]);
806: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
807: }
808: val[jj++] = v2[j];
809: }
810: irow++;
811: }
812: MatSeqAIJRestoreArrayRead(Ad,&av);
813: MatSeqAIJRestoreArrayRead(Ao,&bv);
814: return(0);
815: }
817: PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
818: {
819: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data;
820: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data;
821: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
822: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
823: const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
824: const PetscInt bs2=mat->bs2;
825: PetscErrorCode ierr;
826: PetscInt bs;
827: PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx;
828: PetscMUMPSInt *row,*col;
829: const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
830: PetscScalar *val;
833: MatGetBlockSize(A,&bs);
834: if (reuse == MAT_INITIAL_MATRIX) {
835: nz = bs2*(aa->nz + bb->nz);
836: PetscMalloc2(nz,&row,nz,&col);
837: PetscMalloc1(nz,&val);
838: mumps->nnz = nz;
839: mumps->irn = row;
840: mumps->jcn = col;
841: mumps->val = mumps->val_alloc = val;
842: } else {
843: val = mumps->val;
844: }
846: jj = 0; irow = rstart;
847: for (i=0; i<mbs; i++) {
848: countA = ai[i+1] - ai[i];
849: countB = bi[i+1] - bi[i];
850: ajj = aj + ai[i];
851: bjj = bj + bi[i];
852: v1 = av + bs2*ai[i];
853: v2 = bv + bs2*bi[i];
855: idx = 0;
856: /* A-part */
857: for (k=0; k<countA; k++) {
858: for (j=0; j<bs; j++) {
859: for (n=0; n<bs; n++) {
860: if (reuse == MAT_INITIAL_MATRIX) {
861: PetscMUMPSIntCast(irow + n + shift,&row[jj]);
862: PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);
863: }
864: val[jj++] = v1[idx++];
865: }
866: }
867: }
869: idx = 0;
870: /* B-part */
871: for (k=0; k<countB; k++) {
872: for (j=0; j<bs; j++) {
873: for (n=0; n<bs; n++) {
874: if (reuse == MAT_INITIAL_MATRIX) {
875: PetscMUMPSIntCast(irow + n + shift,&row[jj]);
876: PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);
877: }
878: val[jj++] = v2[idx++];
879: }
880: }
881: }
882: irow += bs;
883: }
884: return(0);
885: }
887: PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
888: {
889: const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
890: PetscErrorCode ierr;
891: PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
892: PetscMUMPSInt *row,*col;
893: const PetscScalar *av, *bv,*v1,*v2;
894: PetscScalar *val;
895: Mat Ad,Ao;
896: Mat_SeqAIJ *aa;
897: Mat_SeqAIJ *bb;
898: #if defined(PETSC_USE_COMPLEX)
899: PetscBool hermitian;
900: #endif
903: #if defined(PETSC_USE_COMPLEX)
904: MatGetOption(A,MAT_HERMITIAN,&hermitian);
905: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
906: #endif
907: MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);
908: MatSeqAIJGetArrayRead(Ad,&av);
909: MatSeqAIJGetArrayRead(Ao,&bv);
911: aa = (Mat_SeqAIJ*)(Ad)->data;
912: bb = (Mat_SeqAIJ*)(Ao)->data;
913: ai = aa->i;
914: aj = aa->j;
915: adiag = aa->diag;
916: bi = bb->i;
917: bj = bb->j;
919: rstart = A->rmap->rstart;
921: if (reuse == MAT_INITIAL_MATRIX) {
922: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
923: nzb = 0; /* num of upper triangular entries in mat->B */
924: for (i=0; i<m; i++) {
925: nza += (ai[i+1] - adiag[i]);
926: countB = bi[i+1] - bi[i];
927: bjj = bj + bi[i];
928: for (j=0; j<countB; j++) {
929: if (garray[bjj[j]] > rstart) nzb++;
930: }
931: }
933: nz = nza + nzb; /* total nz of upper triangular part of mat */
934: PetscMalloc2(nz,&row,nz,&col);
935: PetscMalloc1(nz,&val);
936: mumps->nnz = nz;
937: mumps->irn = row;
938: mumps->jcn = col;
939: mumps->val = mumps->val_alloc = val;
940: } else {
941: val = mumps->val;
942: }
944: jj = 0; irow = rstart;
945: for (i=0; i<m; i++) {
946: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
947: v1 = av + adiag[i];
948: countA = ai[i+1] - adiag[i];
949: countB = bi[i+1] - bi[i];
950: bjj = bj + bi[i];
951: v2 = bv + bi[i];
953: /* A-part */
954: for (j=0; j<countA; j++) {
955: if (reuse == MAT_INITIAL_MATRIX) {
956: PetscMUMPSIntCast(irow + shift,&row[jj]);
957: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
958: }
959: val[jj++] = v1[j];
960: }
962: /* B-part */
963: for (j=0; j < countB; j++) {
964: if (garray[bjj[j]] > rstart) {
965: if (reuse == MAT_INITIAL_MATRIX) {
966: PetscMUMPSIntCast(irow + shift,&row[jj]);
967: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
968: }
969: val[jj++] = v2[j];
970: }
971: }
972: irow++;
973: }
974: MatSeqAIJRestoreArrayRead(Ad,&av);
975: MatSeqAIJRestoreArrayRead(Ao,&bv);
976: return(0);
977: }
979: PetscErrorCode MatDestroy_MUMPS(Mat A)
980: {
982: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
985: PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);
986: VecScatterDestroy(&mumps->scat_rhs);
987: VecScatterDestroy(&mumps->scat_sol);
988: VecDestroy(&mumps->b_seq);
989: VecDestroy(&mumps->x_seq);
990: PetscFree(mumps->id.perm_in);
991: PetscFree2(mumps->irn,mumps->jcn);
992: PetscFree(mumps->val_alloc);
993: PetscFree(mumps->info);
994: MatMumpsResetSchur_Private(mumps);
995: mumps->id.job = JOB_END;
996: PetscMUMPS_c(mumps);
997: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
998: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
999: if (mumps->use_petsc_omp_support) {
1000: PetscOmpCtrlDestroy(&mumps->omp_ctrl);
1001: PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);
1002: PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);
1003: }
1004: #endif
1005: PetscFree(mumps->ia_alloc);
1006: PetscFree(mumps->ja_alloc);
1007: PetscFree(mumps->recvcount);
1008: PetscFree(mumps->reqs);
1009: PetscFree(mumps->irhs_loc);
1010: if (mumps->mumps_comm != MPI_COMM_NULL) {MPI_Comm_free(&mumps->mumps_comm);}
1011: PetscFree(A->data);
1013: /* clear composed functions */
1014: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1015: PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);
1016: PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);
1017: PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);
1018: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);
1019: PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);
1020: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);
1021: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);
1022: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);
1023: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);
1024: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);
1025: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);
1026: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);
1027: return(0);
1028: }
1030: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1031: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1032: {
1033: PetscErrorCode ierr;
1034: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1035: const PetscMPIInt ompsize=mumps->omp_comm_size;
1036: PetscInt i,m,M,rstart;
1039: MatGetSize(A,&M,NULL);
1040: MatGetLocalSize(A,&m,NULL);
1041: if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1042: if (ompsize == 1) {
1043: if (!mumps->irhs_loc) {
1044: mumps->nloc_rhs = m;
1045: PetscMalloc1(m,&mumps->irhs_loc);
1046: MatGetOwnershipRange(A,&rstart,NULL);
1047: for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1048: }
1049: mumps->id.rhs_loc = (MumpsScalar*)array;
1050: } else {
1051: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1052: const PetscInt *ranges;
1053: PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks;
1054: MPI_Group petsc_group,omp_group;
1055: PetscScalar *recvbuf=NULL;
1057: if (mumps->is_omp_master) {
1058: /* Lazily initialize the omp stuff for distributed rhs */
1059: if (!mumps->irhs_loc) {
1060: PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);
1061: PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);
1062: MPI_Comm_group(mumps->petsc_comm,&petsc_group);
1063: MPI_Comm_group(mumps->omp_comm,&omp_group);
1064: for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1065: MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);
1067: /* Populate mumps->irhs_loc[], rhs_nrow[] */
1068: mumps->nloc_rhs = 0;
1069: MatGetOwnershipRanges(A,&ranges);
1070: for (j=0; j<ompsize; j++) {
1071: mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1072: mumps->nloc_rhs += mumps->rhs_nrow[j];
1073: }
1074: PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);
1075: for (j=k=0; j<ompsize; j++) {
1076: for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1077: }
1079: PetscFree2(omp_ranks,petsc_ranks);
1080: MPI_Group_free(&petsc_group);
1081: MPI_Group_free(&omp_group);
1082: }
1084: /* Realloc buffers when current nrhs is bigger than what we have met */
1085: if (nrhs > mumps->max_nrhs) {
1086: PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);
1087: PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);
1088: mumps->max_nrhs = nrhs;
1089: }
1091: /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1092: for (j=0; j<ompsize; j++) {PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);}
1093: mumps->rhs_disps[0] = 0;
1094: for (j=1; j<ompsize; j++) {
1095: mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1096: if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1097: }
1098: recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1099: }
1101: PetscMPIIntCast(m*nrhs,&sendcount);
1102: MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);
1104: if (mumps->is_omp_master) {
1105: if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1106: PetscScalar *dst,*dstbase = mumps->rhs_loc;
1107: for (j=0; j<ompsize; j++) {
1108: const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1109: dst = dstbase;
1110: for (i=0; i<nrhs; i++) {
1111: PetscArraycpy(dst,src,mumps->rhs_nrow[j]);
1112: src += mumps->rhs_nrow[j];
1113: dst += mumps->nloc_rhs;
1114: }
1115: dstbase += mumps->rhs_nrow[j];
1116: }
1117: }
1118: mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1119: }
1120: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1121: }
1122: mumps->id.nrhs = nrhs;
1123: mumps->id.nloc_rhs = mumps->nloc_rhs;
1124: mumps->id.lrhs_loc = mumps->nloc_rhs;
1125: mumps->id.irhs_loc = mumps->irhs_loc;
1126: return(0);
1127: }
1129: PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1130: {
1131: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1132: const PetscScalar *rarray = NULL;
1133: PetscScalar *array;
1134: IS is_iden,is_petsc;
1135: PetscErrorCode ierr;
1136: PetscInt i;
1137: PetscBool second_solve = PETSC_FALSE;
1138: static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1141: PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);
1142: PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);
1144: if (A->factorerrortype) {
1145: PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1146: VecSetInf(x);
1147: return(0);
1148: }
1150: mumps->id.nrhs = 1;
1151: if (mumps->petsc_size > 1) {
1152: if (mumps->ICNTL20 == 10) {
1153: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1154: VecGetArrayRead(b,&rarray);
1155: MatMumpsSetUpDistRHSInfo(A,1,rarray);
1156: } else {
1157: mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/
1158: VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);
1159: VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);
1160: if (!mumps->myid) {
1161: VecGetArray(mumps->b_seq,&array);
1162: mumps->id.rhs = (MumpsScalar*)array;
1163: }
1164: }
1165: } else { /* petsc_size == 1 */
1166: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1167: VecCopy(b,x);
1168: VecGetArray(x,&array);
1169: mumps->id.rhs = (MumpsScalar*)array;
1170: }
1172: /*
1173: handle condensation step of Schur complement (if any)
1174: We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1175: According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1176: Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1177: This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1178: */
1179: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1180: if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1181: second_solve = PETSC_TRUE;
1182: MatMumpsHandleSchur_Private(A,PETSC_FALSE);
1183: }
1184: /* solve phase */
1185: /*-------------*/
1186: mumps->id.job = JOB_SOLVE;
1187: PetscMUMPS_c(mumps);
1188: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1190: /* handle expansion step of Schur complement (if any) */
1191: if (second_solve) {
1192: MatMumpsHandleSchur_Private(A,PETSC_TRUE);
1193: }
1195: if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1196: if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1197: /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1198: VecScatterDestroy(&mumps->scat_sol);
1199: }
1200: if (!mumps->scat_sol) { /* create scatter scat_sol */
1201: PetscInt *isol2_loc=NULL;
1202: ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden); /* from */
1203: PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);
1204: for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1205: ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc); /* to */
1206: VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);
1207: ISDestroy(&is_iden);
1208: ISDestroy(&is_petsc);
1209: mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1210: }
1212: VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
1213: VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
1214: }
1216: if (mumps->petsc_size > 1) {
1217: if (mumps->ICNTL20 == 10) {
1218: VecRestoreArrayRead(b,&rarray);
1219: } else if (!mumps->myid) {
1220: VecRestoreArray(mumps->b_seq,&array);
1221: }
1222: } else {VecRestoreArray(x,&array);}
1224: PetscLogFlops(2.0*mumps->id.RINFO(3));
1225: return(0);
1226: }
1228: PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1229: {
1230: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1234: mumps->id.ICNTL(9) = 0;
1235: MatSolve_MUMPS(A,b,x);
1236: mumps->id.ICNTL(9) = 1;
1237: return(0);
1238: }
1240: PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1241: {
1242: PetscErrorCode ierr;
1243: Mat Bt = NULL;
1244: PetscBool denseX,denseB,flg,flgT;
1245: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1246: PetscInt i,nrhs,M;
1247: PetscScalar *array;
1248: const PetscScalar *rbray;
1249: PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0;
1250: PetscMUMPSInt *isol_loc,*isol_loc_save;
1251: PetscScalar *bray,*sol_loc,*sol_loc_save;
1252: IS is_to,is_from;
1253: PetscInt k,proc,j,m,myrstart;
1254: const PetscInt *rstart;
1255: Vec v_mpi,msol_loc;
1256: VecScatter scat_sol;
1257: Vec b_seq;
1258: VecScatter scat_rhs;
1259: PetscScalar *aa;
1260: PetscInt spnr,*ia,*ja;
1261: Mat_MPIAIJ *b = NULL;
1264: PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);
1265: if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1267: PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);
1268: if (denseB) {
1269: if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1270: mumps->id.ICNTL(20)= 0; /* dense RHS */
1271: } else { /* sparse B */
1272: if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1273: PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);
1274: if (flgT) { /* input B is transpose of actural RHS matrix,
1275: because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1276: MatTransposeGetMat(B,&Bt);
1277: } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1278: mumps->id.ICNTL(20)= 1; /* sparse RHS */
1279: }
1281: MatGetSize(B,&M,&nrhs);
1282: mumps->id.nrhs = nrhs;
1283: mumps->id.lrhs = M;
1284: mumps->id.rhs = NULL;
1286: if (mumps->petsc_size == 1) {
1287: PetscScalar *aa;
1288: PetscInt spnr,*ia,*ja;
1289: PetscBool second_solve = PETSC_FALSE;
1291: MatDenseGetArray(X,&array);
1292: mumps->id.rhs = (MumpsScalar*)array;
1294: if (denseB) {
1295: /* copy B to X */
1296: MatDenseGetArrayRead(B,&rbray);
1297: PetscArraycpy(array,rbray,M*nrhs);
1298: MatDenseRestoreArrayRead(B,&rbray);
1299: } else { /* sparse B */
1300: MatSeqAIJGetArray(Bt,&aa);
1301: MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1302: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1303: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
1304: mumps->id.rhs_sparse = (MumpsScalar*)aa;
1305: }
1306: /* handle condensation step of Schur complement (if any) */
1307: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1308: second_solve = PETSC_TRUE;
1309: MatMumpsHandleSchur_Private(A,PETSC_FALSE);
1310: }
1311: /* solve phase */
1312: /*-------------*/
1313: mumps->id.job = JOB_SOLVE;
1314: PetscMUMPS_c(mumps);
1315: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1317: /* handle expansion step of Schur complement (if any) */
1318: if (second_solve) {
1319: MatMumpsHandleSchur_Private(A,PETSC_TRUE);
1320: }
1321: if (!denseB) { /* sparse B */
1322: MatSeqAIJRestoreArray(Bt,&aa);
1323: MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1324: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1325: }
1326: MatDenseRestoreArray(X,&array);
1327: return(0);
1328: }
1330: /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1331: if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1333: /* create msol_loc to hold mumps local solution */
1334: isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1335: sol_loc_save = (PetscScalar*)mumps->id.sol_loc;
1337: lsol_loc = mumps->id.lsol_loc;
1338: nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */
1339: PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);
1340: mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1341: mumps->id.isol_loc = isol_loc;
1343: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);
1345: if (denseB) {
1346: if (mumps->ICNTL20 == 10) {
1347: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1348: MatDenseGetArrayRead(B,&rbray);
1349: MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);
1350: MatDenseRestoreArrayRead(B,&rbray);
1351: MatGetLocalSize(B,&m,NULL);
1352: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);
1353: } else {
1354: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1355: /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1356: very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1357: 0, re-arrange B into desired order, which is a local operation.
1358: */
1360: /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1361: /* wrap dense rhs matrix B into a vector v_mpi */
1362: MatGetLocalSize(B,&m,NULL);
1363: MatDenseGetArray(B,&bray);
1364: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);
1365: MatDenseRestoreArray(B,&bray);
1367: /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1368: if (!mumps->myid) {
1369: PetscInt *idx;
1370: /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1371: PetscMalloc1(nrhs*M,&idx);
1372: MatGetOwnershipRanges(B,&rstart);
1373: k = 0;
1374: for (proc=0; proc<mumps->petsc_size; proc++){
1375: for (j=0; j<nrhs; j++){
1376: for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1377: }
1378: }
1380: VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);
1381: ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);
1382: ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);
1383: } else {
1384: VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);
1385: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);
1386: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);
1387: }
1388: VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);
1389: VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);
1390: ISDestroy(&is_to);
1391: ISDestroy(&is_from);
1392: VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);
1394: if (!mumps->myid) { /* define rhs on the host */
1395: VecGetArray(b_seq,&bray);
1396: mumps->id.rhs = (MumpsScalar*)bray;
1397: VecRestoreArray(b_seq,&bray);
1398: }
1399: }
1400: } else { /* sparse B */
1401: b = (Mat_MPIAIJ*)Bt->data;
1403: /* wrap dense X into a vector v_mpi */
1404: MatGetLocalSize(X,&m,NULL);
1405: MatDenseGetArray(X,&bray);
1406: VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);
1407: MatDenseRestoreArray(X,&bray);
1409: if (!mumps->myid) {
1410: MatSeqAIJGetArray(b->A,&aa);
1411: MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1412: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1413: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
1414: mumps->id.rhs_sparse = (MumpsScalar*)aa;
1415: } else {
1416: mumps->id.irhs_ptr = NULL;
1417: mumps->id.irhs_sparse = NULL;
1418: mumps->id.nz_rhs = 0;
1419: mumps->id.rhs_sparse = NULL;
1420: }
1421: }
1423: /* solve phase */
1424: /*-------------*/
1425: mumps->id.job = JOB_SOLVE;
1426: PetscMUMPS_c(mumps);
1427: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1429: /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1430: MatDenseGetArray(X,&array);
1431: VecPlaceArray(v_mpi,array);
1433: /* create scatter scat_sol */
1434: MatGetOwnershipRanges(X,&rstart);
1435: /* iidx: index for scatter mumps solution to petsc X */
1437: ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);
1438: PetscMalloc1(nlsol_loc,&idxx);
1439: for (i=0; i<lsol_loc; i++) {
1440: isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1442: for (proc=0; proc<mumps->petsc_size; proc++){
1443: if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1444: myrstart = rstart[proc];
1445: k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */
1446: iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */
1447: m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1448: break;
1449: }
1450: }
1452: for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1453: }
1454: ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);
1455: VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);
1456: VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);
1457: ISDestroy(&is_from);
1458: ISDestroy(&is_to);
1459: VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);
1460: MatDenseRestoreArray(X,&array);
1462: /* free spaces */
1463: mumps->id.sol_loc = (MumpsScalar*)sol_loc_save;
1464: mumps->id.isol_loc = isol_loc_save;
1466: PetscFree2(sol_loc,isol_loc);
1467: PetscFree(idxx);
1468: VecDestroy(&msol_loc);
1469: VecDestroy(&v_mpi);
1470: if (!denseB) {
1471: if (!mumps->myid) {
1472: b = (Mat_MPIAIJ*)Bt->data;
1473: MatSeqAIJRestoreArray(b->A,&aa);
1474: MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1475: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1476: }
1477: } else {
1478: if (mumps->ICNTL20 == 0) {
1479: VecDestroy(&b_seq);
1480: VecScatterDestroy(&scat_rhs);
1481: }
1482: }
1483: VecScatterDestroy(&scat_sol);
1484: PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));
1485: return(0);
1486: }
1488: PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1489: {
1491: PetscBool flg;
1492: Mat B;
1495: PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);
1496: if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1498: /* Create B=Bt^T that uses Bt's data structure */
1499: MatCreateTranspose(Bt,&B);
1501: MatMatSolve_MUMPS(A,B,X);
1502: MatDestroy(&B);
1503: return(0);
1504: }
1506: #if !defined(PETSC_USE_COMPLEX)
1507: /*
1508: input:
1509: F: numeric factor
1510: output:
1511: nneg: total number of negative pivots
1512: nzero: total number of zero pivots
1513: npos: (global dimension of F) - nneg - nzero
1514: */
1515: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1516: {
1517: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
1519: PetscMPIInt size;
1522: MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);
1523: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1524: if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13));
1526: if (nneg) *nneg = mumps->id.INFOG(12);
1527: if (nzero || npos) {
1528: if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1529: if (nzero) *nzero = mumps->id.INFOG(28);
1530: if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1531: }
1532: return(0);
1533: }
1534: #endif
1536: PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1537: {
1539: PetscInt i,nreqs;
1540: PetscMUMPSInt *irn,*jcn;
1541: PetscMPIInt count;
1542: PetscInt64 totnnz,remain;
1543: const PetscInt osize=mumps->omp_comm_size;
1544: PetscScalar *val;
1547: if (osize > 1) {
1548: if (reuse == MAT_INITIAL_MATRIX) {
1549: /* master first gathers counts of nonzeros to receive */
1550: if (mumps->is_omp_master) {PetscMalloc1(osize,&mumps->recvcount);}
1551: MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);
1553: /* Then each computes number of send/recvs */
1554: if (mumps->is_omp_master) {
1555: /* Start from 1 since self communication is not done in MPI */
1556: nreqs = 0;
1557: for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1558: } else {
1559: nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1560: }
1561: PetscMalloc1(nreqs*3,&mumps->reqs); /* Triple the requests since we send irn, jcn and val seperately */
1563: /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1564: MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1565: might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1566: is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1567: */
1568: nreqs = 0; /* counter for actual send/recvs */
1569: if (mumps->is_omp_master) {
1570: for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1571: PetscMalloc2(totnnz,&irn,totnnz,&jcn);
1572: PetscMalloc1(totnnz,&val);
1574: /* Self communication */
1575: PetscArraycpy(irn,mumps->irn,mumps->nnz);
1576: PetscArraycpy(jcn,mumps->jcn,mumps->nnz);
1577: PetscArraycpy(val,mumps->val,mumps->nnz);
1579: /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1580: PetscFree2(mumps->irn,mumps->jcn);
1581: PetscFree(mumps->val_alloc);
1582: mumps->nnz = totnnz;
1583: mumps->irn = irn;
1584: mumps->jcn = jcn;
1585: mumps->val = mumps->val_alloc = val;
1587: irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1588: jcn += mumps->recvcount[0];
1589: val += mumps->recvcount[0];
1591: /* Remote communication */
1592: for (i=1; i<osize; i++) {
1593: count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1594: remain = mumps->recvcount[i] - count;
1595: while (count>0) {
1596: MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1597: MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1598: MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1599: irn += count;
1600: jcn += count;
1601: val += count;
1602: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1603: remain -= count;
1604: }
1605: }
1606: } else {
1607: irn = mumps->irn;
1608: jcn = mumps->jcn;
1609: val = mumps->val;
1610: count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1611: remain = mumps->nnz - count;
1612: while (count>0) {
1613: MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1614: MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1615: MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1616: irn += count;
1617: jcn += count;
1618: val += count;
1619: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1620: remain -= count;
1621: }
1622: }
1623: } else {
1624: nreqs = 0;
1625: if (mumps->is_omp_master) {
1626: val = mumps->val + mumps->recvcount[0];
1627: for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1628: count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1629: remain = mumps->recvcount[i] - count;
1630: while (count>0) {
1631: MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1632: val += count;
1633: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1634: remain -= count;
1635: }
1636: }
1637: } else {
1638: val = mumps->val;
1639: count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1640: remain = mumps->nnz - count;
1641: while (count>0) {
1642: MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1643: val += count;
1644: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1645: remain -= count;
1646: }
1647: }
1648: }
1649: MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);
1650: mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1651: }
1652: return(0);
1653: }
1655: PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1656: {
1657: Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data;
1659: PetscBool isMPIAIJ;
1662: if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1663: if (mumps->id.INFOG(1) == -6) {
1664: PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1665: }
1666: PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1667: return(0);
1668: }
1670: (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);
1671: MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);
1673: /* numerical factorization phase */
1674: /*-------------------------------*/
1675: mumps->id.job = JOB_FACTNUMERIC;
1676: if (!mumps->id.ICNTL(18)) { /* A is centralized */
1677: if (!mumps->myid) {
1678: mumps->id.a = (MumpsScalar*)mumps->val;
1679: }
1680: } else {
1681: mumps->id.a_loc = (MumpsScalar*)mumps->val;
1682: }
1683: PetscMUMPS_c(mumps);
1684: if (mumps->id.INFOG(1) < 0) {
1685: if (A->erroriffailure) {
1686: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1687: } else {
1688: if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1689: PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1690: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1691: } else if (mumps->id.INFOG(1) == -13) {
1692: PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1693: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1694: } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1695: PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));
1696: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1697: } else {
1698: PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1699: F->factorerrortype = MAT_FACTOR_OTHER;
1700: }
1701: }
1702: }
1703: if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16));
1705: F->assembled = PETSC_TRUE;
1706: mumps->matstruc = SAME_NONZERO_PATTERN;
1707: if (F->schur) { /* reset Schur status to unfactored */
1708: #if defined(PETSC_HAVE_CUDA)
1709: F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1710: #endif
1711: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1712: mumps->id.ICNTL(19) = 2;
1713: MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);
1714: }
1715: MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);
1716: }
1718: /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1719: if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1721: if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1722: if (mumps->petsc_size > 1) {
1723: PetscInt lsol_loc;
1724: PetscScalar *sol_loc;
1726: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);
1728: /* distributed solution; Create x_seq=sol_loc for repeated use */
1729: if (mumps->x_seq) {
1730: VecScatterDestroy(&mumps->scat_sol);
1731: PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);
1732: VecDestroy(&mumps->x_seq);
1733: }
1734: lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1735: PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);
1736: mumps->id.lsol_loc = lsol_loc;
1737: mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1738: VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);
1739: }
1740: PetscLogFlops(mumps->id.RINFO(2));
1741: return(0);
1742: }
1744: /* Sets MUMPS options from the options database */
1745: PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1746: {
1747: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1749: PetscMUMPSInt icntl=0;
1750: PetscInt info[80],i,ninfo=80;
1751: PetscBool flg=PETSC_FALSE;
1754: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");
1755: PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);
1756: if (flg) mumps->id.ICNTL(1) = icntl;
1757: PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);
1758: if (flg) mumps->id.ICNTL(2) = icntl;
1759: PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);
1760: if (flg) mumps->id.ICNTL(3) = icntl;
1762: PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);
1763: if (flg) mumps->id.ICNTL(4) = icntl;
1764: if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1766: PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);
1767: if (flg) mumps->id.ICNTL(6) = icntl;
1769: PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=Petsc (sequential only), 3=Scotch, 4=PORD, 5=Metis, 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);
1770: if (flg) {
1771: if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not supported by the PETSc/MUMPS interface for parallel matrices\n");
1772: else mumps->id.ICNTL(7) = icntl;
1773: }
1775: PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);
1776: /* PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL); handled by MatSolveTranspose_MUMPS() */
1777: PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);
1778: PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);
1779: PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);
1780: PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);
1781: PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);
1782: PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);
1783: if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1784: MatDestroy(&F->schur);
1785: MatMumpsResetSchur_Private(mumps);
1786: }
1788: /* MPICH Fortran MPI_IN_PLACE binding has a bug that prevents the use of 'mpi4py + mpich + mumps', e.g., by Firedrake.
1789: So we turn off distributed RHS for MPICH. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1790: and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1791: */
1792: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) && defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
1793: mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1794: #else
1795: mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1796: #endif
1797: PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);
1798: if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10\n",(int)mumps->ICNTL20);
1799: #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1800: if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0\n");
1801: #endif
1802: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL); we only use distributed solution vector */
1804: PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);
1805: PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);
1806: PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);
1807: if (mumps->id.ICNTL(24)) {
1808: mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1809: }
1811: PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);
1812: PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);
1813: PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);
1814: PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);
1815: PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);
1816: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL); */ /* call MatMumpsGetInverse() directly */
1817: PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);
1818: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL); -- not supported by PETSc API */
1819: PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);
1820: PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);
1821: PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);
1822: PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);
1824: PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);
1825: PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);
1826: PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);
1827: PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);
1828: PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);
1829: PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);
1831: PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);
1833: PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);
1834: if (ninfo) {
1835: if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo);
1836: PetscMalloc1(ninfo,&mumps->info);
1837: mumps->ninfo = ninfo;
1838: for (i=0; i<ninfo; i++) {
1839: if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo);
1840: else mumps->info[i] = info[i];
1841: }
1842: }
1844: PetscOptionsEnd();
1845: return(0);
1846: }
1848: PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1849: {
1851: PetscInt nthreads=0;
1852: MPI_Comm newcomm=MPI_COMM_NULL;
1855: mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1856: MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);
1857: MPI_Comm_rank(mumps->petsc_comm,&mumps->myid); /* so that code like "if (!myid)" still works even if mumps_comm is different */
1859: PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);
1860: if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1861: PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);
1862: if (mumps->use_petsc_omp_support) {
1863: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1864: PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);
1865: PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);
1866: #else
1867: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n");
1868: #endif
1869: } else {
1870: mumps->omp_comm = PETSC_COMM_SELF;
1871: mumps->mumps_comm = mumps->petsc_comm;
1872: mumps->is_omp_master = PETSC_TRUE;
1873: }
1874: MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);
1875: mumps->reqs = NULL;
1876: mumps->tag = 0;
1878: /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1879: if (mumps->mumps_comm != MPI_COMM_NULL) {
1880: MPI_Comm_dup(mumps->mumps_comm,&newcomm);
1881: mumps->mumps_comm = newcomm;
1882: }
1884: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1885: mumps->id.job = JOB_INIT;
1886: mumps->id.par = 1; /* host participates factorizaton and solve */
1887: mumps->id.sym = mumps->sym;
1889: PetscMUMPS_c(mumps);
1890: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1892: /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1893: For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1894: */
1895: MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm); /* see MUMPS-5.1.2 Manual Section 9 */
1896: MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);
1898: mumps->scat_rhs = NULL;
1899: mumps->scat_sol = NULL;
1901: /* set PETSc-MUMPS default options - override MUMPS default */
1902: mumps->id.ICNTL(3) = 0;
1903: mumps->id.ICNTL(4) = 0;
1904: if (mumps->petsc_size == 1) {
1905: mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1906: mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */
1907: } else {
1908: mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1909: mumps->id.ICNTL(21) = 1; /* distributed solution */
1910: }
1912: /* schur */
1913: mumps->id.size_schur = 0;
1914: mumps->id.listvar_schur = NULL;
1915: mumps->id.schur = NULL;
1916: mumps->sizeredrhs = 0;
1917: mumps->schur_sol = NULL;
1918: mumps->schur_sizesol = 0;
1919: return(0);
1920: }
1922: PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1923: {
1927: if (mumps->id.INFOG(1) < 0) {
1928: if (A->erroriffailure) {
1929: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1930: } else {
1931: if (mumps->id.INFOG(1) == -6) {
1932: PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1933: F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1934: } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1935: PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1936: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1937: } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1938: PetscInfo(F,"Empty matrix\n");
1939: } else {
1940: PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1941: F->factorerrortype = MAT_FACTOR_OTHER;
1942: }
1943: }
1944: }
1945: return(0);
1946: }
1948: /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */
1949: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1950: {
1951: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1953: Vec b;
1954: const PetscInt M = A->rmap->N;
1957: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1959: /* Set MUMPS options from the options database */
1960: PetscSetMUMPSFromOptions(F,A);
1962: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
1963: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
1965: /* analysis phase */
1966: /*----------------*/
1967: mumps->id.job = JOB_FACTSYMBOLIC;
1968: mumps->id.n = M;
1969: switch (mumps->id.ICNTL(18)) {
1970: case 0: /* centralized assembled matrix input */
1971: if (!mumps->myid) {
1972: mumps->id.nnz = mumps->nnz;
1973: mumps->id.irn = mumps->irn;
1974: mumps->id.jcn = mumps->jcn;
1975: if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1976: if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */
1977: if (!mumps->myid) {
1978: const PetscInt *idx;
1979: PetscInt i;
1981: PetscMalloc1(M,&mumps->id.perm_in);
1982: ISGetIndices(r,&idx);
1983: for (i=0; i<M; i++) {PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));} /* perm_in[]: start from 1, not 0! */
1984: ISRestoreIndices(r,&idx);
1985: }
1986: }
1987: }
1988: break;
1989: case 3: /* distributed assembled matrix input (size>1) */
1990: mumps->id.nnz_loc = mumps->nnz;
1991: mumps->id.irn_loc = mumps->irn;
1992: mumps->id.jcn_loc = mumps->jcn;
1993: if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1994: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1995: MatCreateVecs(A,NULL,&b);
1996: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
1997: VecDestroy(&b);
1998: }
1999: break;
2000: }
2001: PetscMUMPS_c(mumps);
2002: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
2004: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2005: F->ops->solve = MatSolve_MUMPS;
2006: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2007: F->ops->matsolve = MatMatSolve_MUMPS;
2008: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2009: return(0);
2010: }
2012: /* Note the Petsc r and c permutations are ignored */
2013: PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2014: {
2015: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
2017: Vec b;
2018: const PetscInt M = A->rmap->N;
2021: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2023: /* Set MUMPS options from the options database */
2024: PetscSetMUMPSFromOptions(F,A);
2026: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2027: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
2029: /* analysis phase */
2030: /*----------------*/
2031: mumps->id.job = JOB_FACTSYMBOLIC;
2032: mumps->id.n = M;
2033: switch (mumps->id.ICNTL(18)) {
2034: case 0: /* centralized assembled matrix input */
2035: if (!mumps->myid) {
2036: mumps->id.nnz = mumps->nnz;
2037: mumps->id.irn = mumps->irn;
2038: mumps->id.jcn = mumps->jcn;
2039: if (mumps->id.ICNTL(6)>1) {
2040: mumps->id.a = (MumpsScalar*)mumps->val;
2041: }
2042: }
2043: break;
2044: case 3: /* distributed assembled matrix input (size>1) */
2045: mumps->id.nnz_loc = mumps->nnz;
2046: mumps->id.irn_loc = mumps->irn;
2047: mumps->id.jcn_loc = mumps->jcn;
2048: if (mumps->id.ICNTL(6)>1) {
2049: mumps->id.a_loc = (MumpsScalar*)mumps->val;
2050: }
2051: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2052: MatCreateVecs(A,NULL,&b);
2053: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
2054: VecDestroy(&b);
2055: }
2056: break;
2057: }
2058: PetscMUMPS_c(mumps);
2059: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
2061: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2062: F->ops->solve = MatSolve_MUMPS;
2063: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2064: return(0);
2065: }
2067: /* Note the Petsc r permutation and factor info are ignored */
2068: PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2069: {
2070: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
2072: Vec b;
2073: const PetscInt M = A->rmap->N;
2076: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2078: /* Set MUMPS options from the options database */
2079: PetscSetMUMPSFromOptions(F,A);
2081: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2082: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
2084: /* analysis phase */
2085: /*----------------*/
2086: mumps->id.job = JOB_FACTSYMBOLIC;
2087: mumps->id.n = M;
2088: switch (mumps->id.ICNTL(18)) {
2089: case 0: /* centralized assembled matrix input */
2090: if (!mumps->myid) {
2091: mumps->id.nnz = mumps->nnz;
2092: mumps->id.irn = mumps->irn;
2093: mumps->id.jcn = mumps->jcn;
2094: if (mumps->id.ICNTL(6)>1) {
2095: mumps->id.a = (MumpsScalar*)mumps->val;
2096: }
2097: }
2098: break;
2099: case 3: /* distributed assembled matrix input (size>1) */
2100: mumps->id.nnz_loc = mumps->nnz;
2101: mumps->id.irn_loc = mumps->irn;
2102: mumps->id.jcn_loc = mumps->jcn;
2103: if (mumps->id.ICNTL(6)>1) {
2104: mumps->id.a_loc = (MumpsScalar*)mumps->val;
2105: }
2106: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2107: MatCreateVecs(A,NULL,&b);
2108: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
2109: VecDestroy(&b);
2110: }
2111: break;
2112: }
2113: PetscMUMPS_c(mumps);
2114: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
2116: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2117: F->ops->solve = MatSolve_MUMPS;
2118: F->ops->solvetranspose = MatSolve_MUMPS;
2119: F->ops->matsolve = MatMatSolve_MUMPS;
2120: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2121: #if defined(PETSC_USE_COMPLEX)
2122: F->ops->getinertia = NULL;
2123: #else
2124: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2125: #endif
2126: return(0);
2127: }
2129: PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2130: {
2131: PetscErrorCode ierr;
2132: PetscBool iascii;
2133: PetscViewerFormat format;
2134: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
2137: /* check if matrix is mumps type */
2138: if (A->ops->solve != MatSolve_MUMPS) return(0);
2140: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
2141: if (iascii) {
2142: PetscViewerGetFormat(viewer,&format);
2143: if (format == PETSC_VIEWER_ASCII_INFO) {
2144: PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");
2145: PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);
2146: PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);
2147: PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));
2148: PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));
2149: PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));
2150: PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));
2151: PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));
2152: PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));
2153: PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));
2154: PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));
2155: PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));
2156: PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));
2157: if (mumps->id.ICNTL(11)>0) {
2158: PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));
2159: PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));
2160: PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));
2161: PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));
2162: PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));
2163: PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));
2164: }
2165: PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));
2166: PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));
2167: PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));
2168: /* ICNTL(15-17) not used */
2169: PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));
2170: PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));
2171: PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d \n",mumps->id.ICNTL(20));
2172: PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));
2173: PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));
2174: PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));
2176: PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));
2177: PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));
2178: PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d \n",mumps->id.ICNTL(26));
2179: PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d \n",mumps->id.ICNTL(27));
2180: PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));
2181: PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));
2183: PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));
2184: PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));
2185: PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));
2186: PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));
2187: PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));
2188: PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));
2190: PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));
2191: PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));
2192: PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));
2193: PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));
2194: PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));
2195: PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));
2197: /* infomation local to each processor */
2198: PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");
2199: PetscViewerASCIIPushSynchronized(viewer);
2200: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));
2201: PetscViewerFlush(viewer);
2202: PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");
2203: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));
2204: PetscViewerFlush(viewer);
2205: PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");
2206: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));
2207: PetscViewerFlush(viewer);
2209: PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");
2210: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));
2211: PetscViewerFlush(viewer);
2213: PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");
2214: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));
2215: PetscViewerFlush(viewer);
2217: PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");
2218: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));
2219: PetscViewerFlush(viewer);
2221: if (mumps->ninfo && mumps->ninfo <= 80){
2222: PetscInt i;
2223: for (i=0; i<mumps->ninfo; i++){
2224: PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);
2225: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));
2226: PetscViewerFlush(viewer);
2227: }
2228: }
2229: PetscViewerASCIIPopSynchronized(viewer);
2231: if (!mumps->myid) { /* information from the host */
2232: PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));
2233: PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));
2234: PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));
2235: PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));
2237: PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));
2238: PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));
2239: PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));
2240: PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));
2241: PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));
2242: PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));
2243: PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));
2244: PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));
2245: PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));
2246: PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));
2247: PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));
2248: PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));
2249: PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));
2250: PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));
2251: PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));
2252: PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));
2253: PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));
2254: PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));
2255: PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));
2256: PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));
2257: PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));
2258: PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));
2259: PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));
2260: PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));
2261: PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));
2262: PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));
2263: PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));
2264: PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));
2265: PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));
2266: PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));
2267: PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));
2268: PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));
2269: PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));
2270: PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));
2271: }
2272: }
2273: }
2274: return(0);
2275: }
2277: PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2278: {
2279: Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2282: info->block_size = 1.0;
2283: info->nz_allocated = mumps->id.INFOG(20);
2284: info->nz_used = mumps->id.INFOG(20);
2285: info->nz_unneeded = 0.0;
2286: info->assemblies = 0.0;
2287: info->mallocs = 0.0;
2288: info->memory = 0.0;
2289: info->fill_ratio_given = 0;
2290: info->fill_ratio_needed = 0;
2291: info->factor_mallocs = 0;
2292: return(0);
2293: }
2295: /* -------------------------------------------------------------------------------------------*/
2296: PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2297: {
2298: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2299: const PetscScalar *arr;
2300: const PetscInt *idxs;
2301: PetscInt size,i;
2302: PetscErrorCode ierr;
2305: ISGetLocalSize(is,&size);
2306: if (mumps->petsc_size > 1) {
2307: PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2309: ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2310: MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);
2311: if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n");
2312: }
2314: /* Schur complement matrix */
2315: MatDestroy(&F->schur);
2316: MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);
2317: MatDenseGetArrayRead(F->schur,&arr);
2318: mumps->id.schur = (MumpsScalar*)arr;
2319: mumps->id.size_schur = size;
2320: mumps->id.schur_lld = size;
2321: MatDenseRestoreArrayRead(F->schur,&arr);
2322: if (mumps->sym == 1) {
2323: MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);
2324: }
2326: /* MUMPS expects Fortran style indices */
2327: PetscFree(mumps->id.listvar_schur);
2328: PetscMalloc1(size,&mumps->id.listvar_schur);
2329: ISGetIndices(is,&idxs);
2330: for (i=0; i<size; i++) {PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));}
2331: ISRestoreIndices(is,&idxs);
2332: if (mumps->petsc_size > 1) {
2333: mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2334: } else {
2335: if (F->factortype == MAT_FACTOR_LU) {
2336: mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2337: } else {
2338: mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2339: }
2340: }
2341: /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2342: mumps->id.ICNTL(26) = -1;
2343: return(0);
2344: }
2346: /* -------------------------------------------------------------------------------------------*/
2347: PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2348: {
2349: Mat St;
2350: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2351: PetscScalar *array;
2352: #if defined(PETSC_USE_COMPLEX)
2353: PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2354: #endif
2358: if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2359: MatCreate(PETSC_COMM_SELF,&St);
2360: MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);
2361: MatSetType(St,MATDENSE);
2362: MatSetUp(St);
2363: MatDenseGetArray(St,&array);
2364: if (!mumps->sym) { /* MUMPS always return a full matrix */
2365: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2366: PetscInt i,j,N=mumps->id.size_schur;
2367: for (i=0;i<N;i++) {
2368: for (j=0;j<N;j++) {
2369: #if !defined(PETSC_USE_COMPLEX)
2370: PetscScalar val = mumps->id.schur[i*N+j];
2371: #else
2372: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2373: #endif
2374: array[j*N+i] = val;
2375: }
2376: }
2377: } else { /* stored by columns */
2378: PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);
2379: }
2380: } else { /* either full or lower-triangular (not packed) */
2381: if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2382: PetscInt i,j,N=mumps->id.size_schur;
2383: for (i=0;i<N;i++) {
2384: for (j=i;j<N;j++) {
2385: #if !defined(PETSC_USE_COMPLEX)
2386: PetscScalar val = mumps->id.schur[i*N+j];
2387: #else
2388: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2389: #endif
2390: array[i*N+j] = val;
2391: array[j*N+i] = val;
2392: }
2393: }
2394: } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2395: PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);
2396: } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2397: PetscInt i,j,N=mumps->id.size_schur;
2398: for (i=0;i<N;i++) {
2399: for (j=0;j<i+1;j++) {
2400: #if !defined(PETSC_USE_COMPLEX)
2401: PetscScalar val = mumps->id.schur[i*N+j];
2402: #else
2403: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2404: #endif
2405: array[i*N+j] = val;
2406: array[j*N+i] = val;
2407: }
2408: }
2409: }
2410: }
2411: MatDenseRestoreArray(St,&array);
2412: *S = St;
2413: return(0);
2414: }
2416: /* -------------------------------------------------------------------------------------------*/
2417: PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2418: {
2420: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2423: PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));
2424: return(0);
2425: }
2427: PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2428: {
2429: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2432: *ival = mumps->id.ICNTL(icntl);
2433: return(0);
2434: }
2436: /*@
2437: MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2439: Logically Collective on Mat
2441: Input Parameters:
2442: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2443: . icntl - index of MUMPS parameter array ICNTL()
2444: - ival - value of MUMPS ICNTL(icntl)
2446: Options Database:
2447: . -mat_mumps_icntl_<icntl> <ival>
2449: Level: beginner
2451: References:
2452: . MUMPS Users' Guide
2454: .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2455: @*/
2456: PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2457: {
2462: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2465: PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2466: return(0);
2467: }
2469: /*@
2470: MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2472: Logically Collective on Mat
2474: Input Parameters:
2475: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2476: - icntl - index of MUMPS parameter array ICNTL()
2478: Output Parameter:
2479: . ival - value of MUMPS ICNTL(icntl)
2481: Level: beginner
2483: References:
2484: . MUMPS Users' Guide
2486: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2487: @*/
2488: PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2489: {
2494: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2497: PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2498: return(0);
2499: }
2501: /* -------------------------------------------------------------------------------------------*/
2502: PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2503: {
2504: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2507: mumps->id.CNTL(icntl) = val;
2508: return(0);
2509: }
2511: PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2512: {
2513: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2516: *val = mumps->id.CNTL(icntl);
2517: return(0);
2518: }
2520: /*@
2521: MatMumpsSetCntl - Set MUMPS parameter CNTL()
2523: Logically Collective on Mat
2525: Input Parameters:
2526: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2527: . icntl - index of MUMPS parameter array CNTL()
2528: - val - value of MUMPS CNTL(icntl)
2530: Options Database:
2531: . -mat_mumps_cntl_<icntl> <val>
2533: Level: beginner
2535: References:
2536: . MUMPS Users' Guide
2538: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2539: @*/
2540: PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2541: {
2546: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2549: PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2550: return(0);
2551: }
2553: /*@
2554: MatMumpsGetCntl - Get MUMPS parameter CNTL()
2556: Logically Collective on Mat
2558: Input Parameters:
2559: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2560: - icntl - index of MUMPS parameter array CNTL()
2562: Output Parameter:
2563: . val - value of MUMPS CNTL(icntl)
2565: Level: beginner
2567: References:
2568: . MUMPS Users' Guide
2570: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2571: @*/
2572: PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2573: {
2578: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2581: PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2582: return(0);
2583: }
2585: PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2586: {
2587: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2590: *info = mumps->id.INFO(icntl);
2591: return(0);
2592: }
2594: PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2595: {
2596: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2599: *infog = mumps->id.INFOG(icntl);
2600: return(0);
2601: }
2603: PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2604: {
2605: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2608: *rinfo = mumps->id.RINFO(icntl);
2609: return(0);
2610: }
2612: PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2613: {
2614: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2617: *rinfog = mumps->id.RINFOG(icntl);
2618: return(0);
2619: }
2621: PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2622: {
2624: Mat Bt = NULL,Btseq = NULL;
2625: PetscBool flg;
2626: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2627: PetscScalar *aa;
2628: PetscInt spnr,*ia,*ja,M,nrhs;
2632: PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);
2633: if (flg) {
2634: MatTransposeGetMat(spRHS,&Bt);
2635: } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2637: MatMumpsSetIcntl(F,30,1);
2639: if (mumps->petsc_size > 1) {
2640: Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2641: Btseq = b->A;
2642: } else {
2643: Btseq = Bt;
2644: }
2646: MatGetSize(spRHS,&M,&nrhs);
2647: mumps->id.nrhs = nrhs;
2648: mumps->id.lrhs = M;
2649: mumps->id.rhs = NULL;
2651: if (!mumps->myid) {
2652: MatSeqAIJGetArray(Btseq,&aa);
2653: MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
2654: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2655: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
2656: mumps->id.rhs_sparse = (MumpsScalar*)aa;
2657: } else {
2658: mumps->id.irhs_ptr = NULL;
2659: mumps->id.irhs_sparse = NULL;
2660: mumps->id.nz_rhs = 0;
2661: mumps->id.rhs_sparse = NULL;
2662: }
2663: mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2664: mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2666: /* solve phase */
2667: /*-------------*/
2668: mumps->id.job = JOB_SOLVE;
2669: PetscMUMPS_c(mumps);
2670: if (mumps->id.INFOG(1) < 0)
2671: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
2673: if (!mumps->myid) {
2674: MatSeqAIJRestoreArray(Btseq,&aa);
2675: MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
2676: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2677: }
2678: return(0);
2679: }
2681: /*@
2682: MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2684: Logically Collective on Mat
2686: Input Parameters:
2687: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2688: - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2690: Output Parameter:
2691: . spRHS - requested entries of inverse of A
2693: Level: beginner
2695: References:
2696: . MUMPS Users' Guide
2698: .seealso: MatGetFactor(), MatCreateTranspose()
2699: @*/
2700: PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2701: {
2706: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2707: PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2708: return(0);
2709: }
2711: PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2712: {
2714: Mat spRHS;
2717: MatCreateTranspose(spRHST,&spRHS);
2718: MatMumpsGetInverse_MUMPS(F,spRHS);
2719: MatDestroy(&spRHS);
2720: return(0);
2721: }
2723: /*@
2724: MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2726: Logically Collective on Mat
2728: Input Parameters:
2729: + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2730: - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2732: Output Parameter:
2733: . spRHST - requested entries of inverse of A^T
2735: Level: beginner
2737: References:
2738: . MUMPS Users' Guide
2740: .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2741: @*/
2742: PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2743: {
2745: PetscBool flg;
2749: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2750: PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);
2751: if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2753: PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2754: return(0);
2755: }
2757: /*@
2758: MatMumpsGetInfo - Get MUMPS parameter INFO()
2760: Logically Collective on Mat
2762: Input Parameters:
2763: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2764: - icntl - index of MUMPS parameter array INFO()
2766: Output Parameter:
2767: . ival - value of MUMPS INFO(icntl)
2769: Level: beginner
2771: References:
2772: . MUMPS Users' Guide
2774: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2775: @*/
2776: PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2777: {
2782: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2784: PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2785: return(0);
2786: }
2788: /*@
2789: MatMumpsGetInfog - Get MUMPS parameter INFOG()
2791: Logically Collective on Mat
2793: Input Parameters:
2794: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2795: - icntl - index of MUMPS parameter array INFOG()
2797: Output Parameter:
2798: . ival - value of MUMPS INFOG(icntl)
2800: Level: beginner
2802: References:
2803: . MUMPS Users' Guide
2805: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2806: @*/
2807: PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2808: {
2813: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2815: PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2816: return(0);
2817: }
2819: /*@
2820: MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2822: Logically Collective on Mat
2824: Input Parameters:
2825: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2826: - icntl - index of MUMPS parameter array RINFO()
2828: Output Parameter:
2829: . val - value of MUMPS RINFO(icntl)
2831: Level: beginner
2833: References:
2834: . MUMPS Users' Guide
2836: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2837: @*/
2838: PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2839: {
2844: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2846: PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2847: return(0);
2848: }
2850: /*@
2851: MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2853: Logically Collective on Mat
2855: Input Parameters:
2856: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2857: - icntl - index of MUMPS parameter array RINFOG()
2859: Output Parameter:
2860: . val - value of MUMPS RINFOG(icntl)
2862: Level: beginner
2864: References:
2865: . MUMPS Users' Guide
2867: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2868: @*/
2869: PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2870: {
2875: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2877: PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2878: return(0);
2879: }
2881: /*MC
2882: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
2883: distributed and sequential matrices via the external package MUMPS.
2885: Works with MATAIJ and MATSBAIJ matrices
2887: Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2889: Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2891: Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2893: Options Database Keys:
2894: + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2895: . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2896: . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host
2897: . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4)
2898: . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2899: . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=PETSc (sequential only) 3=Scotch, 4=PORD, 5=Metis
2900: . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77)
2901: . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
2902: . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2903: . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2904: . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2905: . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
2906: . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
2907: . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2908: . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2909: . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2910: . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
2911: . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2912: . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
2913: . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2914: . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2915: . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2916: . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2917: . -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2918: . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2919: . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2920: . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2921: . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold
2922: . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement
2923: . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2924: . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2925: . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2926: . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2927: - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2928: Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2930: If run sequentially can use the PETSc provided ordering with the option -mat_mumps_icntl_7 1
2932: Level: beginner
2934: Notes:
2935: MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2937: When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2938: $ KSPGetPC(ksp,&pc);
2939: $ PCFactorGetMatrix(pc,&mat);
2940: $ MatMumpsGetInfo(mat,....);
2941: $ MatMumpsGetInfog(mat,....); etc.
2942: Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2944: Two modes to run MUMPS/PETSc with OpenMP
2946: $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2947: $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2949: $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2950: $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2952: To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2953: (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2954: (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2955: libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2956: (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2958: If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2959: processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2960: size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2961: are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2962: by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2963: In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2964: if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2965: MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2966: cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2967: problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2968: For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2969: examine the mapping result.
2971: PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2972: for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2973: calls omp_set_num_threads(m) internally before calling MUMPS.
2975: References:
2976: + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2977: - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2979: .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix()
2981: M*/
2983: static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2984: {
2986: *type = MATSOLVERMUMPS;
2987: return(0);
2988: }
2990: /* MatGetFactor for Seq and MPI AIJ matrices */
2991: static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2992: {
2993: Mat B;
2995: Mat_MUMPS *mumps;
2996: PetscBool isSeqAIJ;
2997: PetscMPIInt size;
3000: #if defined(PETSC_USE_COMPLEX)
3001: if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3002: #endif
3003: /* Create the factorization matrix */
3004: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
3005: MatCreate(PetscObjectComm((PetscObject)A),&B);
3006: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3007: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3008: MatSetUp(B);
3010: PetscNewLog(B,&mumps);
3012: B->ops->view = MatView_MUMPS;
3013: B->ops->getinfo = MatGetInfo_MUMPS;
3015: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3016: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3017: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3018: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3019: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3020: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3021: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3022: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3023: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3024: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3025: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3026: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3027: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3029: if (ftype == MAT_FACTOR_LU) {
3030: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3031: B->factortype = MAT_FACTOR_LU;
3032: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3033: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3034: mumps->sym = 0;
3035: } else {
3036: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3037: B->factortype = MAT_FACTOR_CHOLESKY;
3038: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3039: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3040: #if defined(PETSC_USE_COMPLEX)
3041: mumps->sym = 2;
3042: #else
3043: if (A->spd_set && A->spd) mumps->sym = 1;
3044: else mumps->sym = 2;
3045: #endif
3046: }
3048: /* set solvertype */
3049: PetscFree(B->solvertype);
3050: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3051: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3052: if (size == 1) {
3053: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3054: B->useordering = PETSC_TRUE;
3055: }
3057: B->ops->destroy = MatDestroy_MUMPS;
3058: B->data = (void*)mumps;
3060: PetscInitializeMUMPS(A,mumps);
3062: *F = B;
3063: return(0);
3064: }
3066: /* MatGetFactor for Seq and MPI SBAIJ matrices */
3067: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3068: {
3069: Mat B;
3071: Mat_MUMPS *mumps;
3072: PetscBool isSeqSBAIJ;
3073: PetscMPIInt size;
3076: #if defined(PETSC_USE_COMPLEX)
3077: if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3078: #endif
3079: MatCreate(PetscObjectComm((PetscObject)A),&B);
3080: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3081: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3082: MatSetUp(B);
3084: PetscNewLog(B,&mumps);
3085: PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);
3086: if (isSeqSBAIJ) {
3087: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3088: } else {
3089: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3090: }
3092: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3093: B->ops->view = MatView_MUMPS;
3094: B->ops->getinfo = MatGetInfo_MUMPS;
3096: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3097: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3098: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3099: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3100: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3101: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3102: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3103: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3104: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3105: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3106: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3107: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3108: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3110: B->factortype = MAT_FACTOR_CHOLESKY;
3111: #if defined(PETSC_USE_COMPLEX)
3112: mumps->sym = 2;
3113: #else
3114: if (A->spd_set && A->spd) mumps->sym = 1;
3115: else mumps->sym = 2;
3116: #endif
3118: /* set solvertype */
3119: PetscFree(B->solvertype);
3120: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3121: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3122: if (size == 1) {
3123: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3124: B->useordering = PETSC_TRUE;
3125: }
3127: B->ops->destroy = MatDestroy_MUMPS;
3128: B->data = (void*)mumps;
3130: PetscInitializeMUMPS(A,mumps);
3132: *F = B;
3133: return(0);
3134: }
3136: static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3137: {
3138: Mat B;
3140: Mat_MUMPS *mumps;
3141: PetscBool isSeqBAIJ;
3142: PetscMPIInt size;
3145: /* Create the factorization matrix */
3146: PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);
3147: MatCreate(PetscObjectComm((PetscObject)A),&B);
3148: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3149: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3150: MatSetUp(B);
3152: PetscNewLog(B,&mumps);
3153: if (ftype == MAT_FACTOR_LU) {
3154: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3155: B->factortype = MAT_FACTOR_LU;
3156: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3157: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3158: mumps->sym = 0;
3159: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
3161: B->ops->view = MatView_MUMPS;
3162: B->ops->getinfo = MatGetInfo_MUMPS;
3164: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3165: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3166: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3167: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3168: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3169: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3170: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3171: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3172: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3173: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3174: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3175: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3176: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3178: /* set solvertype */
3179: PetscFree(B->solvertype);
3180: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3181: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3182: if (size == 1) {
3183: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3184: B->useordering = PETSC_TRUE;
3185: }
3187: B->ops->destroy = MatDestroy_MUMPS;
3188: B->data = (void*)mumps;
3190: PetscInitializeMUMPS(A,mumps);
3192: *F = B;
3193: return(0);
3194: }
3196: /* MatGetFactor for Seq and MPI SELL matrices */
3197: static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3198: {
3199: Mat B;
3201: Mat_MUMPS *mumps;
3202: PetscBool isSeqSELL;
3203: PetscMPIInt size;
3206: /* Create the factorization matrix */
3207: PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);
3208: MatCreate(PetscObjectComm((PetscObject)A),&B);
3209: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3210: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3211: MatSetUp(B);
3213: PetscNewLog(B,&mumps);
3215: B->ops->view = MatView_MUMPS;
3216: B->ops->getinfo = MatGetInfo_MUMPS;
3218: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3219: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3220: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3221: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3222: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3223: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3224: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3225: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3226: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3227: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3228: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3230: if (ftype == MAT_FACTOR_LU) {
3231: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3232: B->factortype = MAT_FACTOR_LU;
3233: if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3234: else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3235: mumps->sym = 0;
3236: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3238: /* set solvertype */
3239: PetscFree(B->solvertype);
3240: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3241: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3242: if (size == 1) {
3243: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3244: B->useordering = PETSC_TRUE;
3245: }
3247: B->ops->destroy = MatDestroy_MUMPS;
3248: B->data = (void*)mumps;
3250: PetscInitializeMUMPS(A,mumps);
3252: *F = B;
3253: return(0);
3254: }
3256: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3257: {
3261: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);
3262: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);
3263: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);
3264: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);
3265: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);
3266: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);
3267: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);
3268: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);
3269: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);
3270: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);
3271: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);
3272: return(0);
3273: }