Actual source code: superlu_dist.c
petsc-3.3-p7 2013-05-11
2: /*
3: Provides an interface to the SuperLU_DIST_2.2 sparse solver
4: */
6: #include <../src/mat/impls/aij/seq/aij.h>
7: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
9: #include <stdlib.h>
10: #endif
12: #if defined(PETSC_USE_64BIT_INDICES)
13: /* ugly SuperLU_Dist variable telling it to use long long int */
14: #define _LONGINT
15: #endif
17: EXTERN_C_BEGIN
18: #if defined(PETSC_USE_COMPLEX)
19: #include <superlu_zdefs.h>
20: #else
21: #include <superlu_ddefs.h>
22: #endif
23: EXTERN_C_END
25: /*
26: GLOBAL - The sparse matrix and right hand side are all stored initially on process 0. Should be called centralized
27: DISTRIBUTED - The sparse matrix and right hand size are initially stored across the entire MPI communicator.
28: */
29: typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
30: const char *SuperLU_MatInputModes[] = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};
32: typedef struct {
33: int_t nprow,npcol,*row,*col;
34: gridinfo_t grid;
35: superlu_options_t options;
36: SuperMatrix A_sup;
37: ScalePermstruct_t ScalePermstruct;
38: LUstruct_t LUstruct;
39: int StatPrint;
40: SuperLU_MatInputMode MatInputMode;
41: SOLVEstruct_t SOLVEstruct;
42: fact_t FactPattern;
43: MPI_Comm comm_superlu;
44: #if defined(PETSC_USE_COMPLEX)
45: doublecomplex *val;
46: #else
47: double *val;
48: #endif
49: PetscBool matsolve_iscalled,matmatsolve_iscalled;
50: PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
51: } Mat_SuperLU_DIST;
53: extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
54: extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *);
55: extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
56: extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
57: extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
58: extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *);
59: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
63: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
64: {
65: PetscErrorCode ierr;
66: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
67: PetscBool flg;
70: if (lu && lu->CleanUpSuperLU_Dist) {
71: /* Deallocate SuperLU_DIST storage */
72: if (lu->MatInputMode == GLOBAL) {
73: Destroy_CompCol_Matrix_dist(&lu->A_sup);
74: } else {
75: Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
76: if ( lu->options.SolveInitialized ) {
77: #if defined(PETSC_USE_COMPLEX)
78: zSolveFinalize(&lu->options, &lu->SOLVEstruct);
79: #else
80: dSolveFinalize(&lu->options, &lu->SOLVEstruct);
81: #endif
82: }
83: }
84: Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
85: ScalePermstructFree(&lu->ScalePermstruct);
86: LUstructFree(&lu->LUstruct);
88: /* Release the SuperLU_DIST process grid. */
89: superlu_gridexit(&lu->grid);
91: MPI_Comm_free(&(lu->comm_superlu));
92: }
93: PetscFree(A->spptr);
95: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
96: if (flg) {
97: MatDestroy_SeqAIJ(A);
98: } else {
99: MatDestroy_MPIAIJ(A);
100: }
101: return(0);
102: }
106: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
107: {
108: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
109: PetscErrorCode ierr;
110: PetscMPIInt size;
111: PetscInt m=A->rmap->n,M=A->rmap->N,N=A->cmap->N;
112: SuperLUStat_t stat;
113: double berr[1];
114: PetscScalar *bptr;
115: PetscInt nrhs=1;
116: Vec x_seq;
117: IS iden;
118: VecScatter scat;
119: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
122: MPI_Comm_size(((PetscObject)A)->comm,&size);
123: if (size > 1 && lu->MatInputMode == GLOBAL) {
124: /* global mat input, convert b to x_seq */
125: VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
126: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
127: VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
128: ISDestroy(&iden);
130: VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
131: VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
132: VecGetArray(x_seq,&bptr);
133: } else { /* size==1 || distributed mat input */
134: if (lu->options.SolveInitialized && !lu->matsolve_iscalled){
135: /* see comments in MatMatSolve() */
136: #if defined(PETSC_USE_COMPLEX)
137: zSolveFinalize(&lu->options, &lu->SOLVEstruct);
138: #else
139: dSolveFinalize(&lu->options, &lu->SOLVEstruct);
140: #endif
141: lu->options.SolveInitialized = NO;
142: }
143: VecCopy(b_mpi,x);
144: VecGetArray(x,&bptr);
145: }
146:
147: if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
149: PStatInit(&stat); /* Initialize the statistics variables. */
150: if (lu->MatInputMode == GLOBAL) {
151: #if defined(PETSC_USE_COMPLEX)
152: pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs,
153: &lu->grid,&lu->LUstruct,berr,&stat,&info);
154: #else
155: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs,
156: &lu->grid,&lu->LUstruct,berr,&stat,&info);
157: #endif
158: } else { /* distributed mat input */
159: #if defined(PETSC_USE_COMPLEX)
160: pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,
161: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
162: #else
163: pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,
164: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
165: #endif
166: }
167: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
169: if (lu->options.PrintStat) {
170: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
171: }
172: PStatFree(&stat);
173:
174: if (size > 1 && lu->MatInputMode == GLOBAL) {
175: /* convert seq x to mpi x */
176: VecRestoreArray(x_seq,&bptr);
177: VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
178: VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
179: VecScatterDestroy(&scat);
180: VecDestroy(&x_seq);
181: } else {
182: VecRestoreArray(x,&bptr);
183: lu->matsolve_iscalled = PETSC_TRUE;
184: lu->matmatsolve_iscalled = PETSC_FALSE;
185: }
186: return(0);
187: }
191: PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)
192: {
193: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
194: PetscErrorCode ierr;
195: PetscMPIInt size;
196: PetscInt M=A->rmap->N,m=A->rmap->n,nrhs;
197: SuperLUStat_t stat;
198: double berr[1];
199: PetscScalar *bptr;
200: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
201: PetscBool flg;
204: if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
205: PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);
206: if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
207: PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);
208: if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
209:
210: MPI_Comm_size(((PetscObject)A)->comm,&size);
211: if (size > 1 && lu->MatInputMode == GLOBAL) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatInputMode=GLOBAL for nproc %d>1 is not supported",size);
212: /* size==1 or distributed mat input */
213: if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled){
214: /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
215: thus destroy it and create a new SOLVEstruct.
216: Otherwise it may result in memory corruption or incorrect solution
217: See src/mat/examples/tests/ex125.c */
218: #if defined(PETSC_USE_COMPLEX)
219: zSolveFinalize(&lu->options, &lu->SOLVEstruct);
220: #else
221: dSolveFinalize(&lu->options, &lu->SOLVEstruct);
222: #endif
223: lu->options.SolveInitialized = NO;
224: }
225: MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);
227: MatGetSize(B_mpi,PETSC_NULL,&nrhs);
228:
229: PStatInit(&stat); /* Initialize the statistics variables. */
230: MatGetArray(X,&bptr);
231: if (lu->MatInputMode == GLOBAL) { /* size == 1 */
232: #if defined(PETSC_USE_COMPLEX)
233: pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, M, nrhs,
234: &lu->grid, &lu->LUstruct, berr, &stat, &info);
235: #else
236: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, M, nrhs,
237: &lu->grid, &lu->LUstruct, berr, &stat, &info);
238: #endif
239: } else { /* distributed mat input */
240: #if defined(PETSC_USE_COMPLEX)
241: pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,
242: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
243: #else
244: pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,
245: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
246: #endif
247: }
248: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
249: MatRestoreArray(X,&bptr);
251: if (lu->options.PrintStat) {
252: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
253: }
254: PStatFree(&stat);
255: lu->matsolve_iscalled = PETSC_FALSE;
256: lu->matmatsolve_iscalled = PETSC_TRUE;
257: return(0);
258: }
263: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
264: {
265: Mat *tseq,A_seq = PETSC_NULL;
266: Mat_SeqAIJ *aa,*bb;
267: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
268: PetscErrorCode ierr;
269: PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
270: m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
271: int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
272: PetscMPIInt size;
273: SuperLUStat_t stat;
274: double *berr=0;
275: IS isrow;
276: PetscLogDouble time0,time,time_min,time_max;
277: Mat F_diag=PETSC_NULL;
278: #if defined(PETSC_USE_COMPLEX)
279: doublecomplex *av, *bv;
280: #else
281: double *av, *bv;
282: #endif
285: MPI_Comm_size(((PetscObject)A)->comm,&size);
286:
287: if (lu->options.PrintStat) { /* collect time for mat conversion */
288: MPI_Barrier(((PetscObject)A)->comm);
289: PetscGetTime(&time0);
290: }
292: if (lu->MatInputMode == GLOBAL) { /* global mat input */
293: if (size > 1) { /* convert mpi A to seq mat A */
294: ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
295: MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
296: ISDestroy(&isrow);
297:
298: A_seq = *tseq;
299: PetscFree(tseq);
300: aa = (Mat_SeqAIJ*)A_seq->data;
301: } else {
302: PetscBool flg;
303: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);
304: if (flg) {
305: Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data;
306: A = At->A;
307: }
308: aa = (Mat_SeqAIJ*)A->data;
309: }
311: /* Convert Petsc NR matrix to SuperLU_DIST NC.
312: Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
313: if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
314: Destroy_CompCol_Matrix_dist(&lu->A_sup);
315: if (lu->FactPattern == SamePattern_SameRowPerm){
316: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
317: } else { /* lu->FactPattern == SamePattern */
318: Destroy_LU(N, &lu->grid, &lu->LUstruct);
319: lu->options.Fact = SamePattern;
320: }
321: }
322: #if defined(PETSC_USE_COMPLEX)
323: zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
324: #else
325: dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
326: #endif
328: /* Create compressed column matrix A_sup. */
329: #if defined(PETSC_USE_COMPLEX)
330: zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
331: #else
332: dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
333: #endif
334: } else { /* distributed mat input */
335: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
336: aa=(Mat_SeqAIJ*)(mat->A)->data;
337: bb=(Mat_SeqAIJ*)(mat->B)->data;
338: ai=aa->i; aj=aa->j;
339: bi=bb->i; bj=bb->j;
340: #if defined(PETSC_USE_COMPLEX)
341: av=(doublecomplex*)aa->a;
342: bv=(doublecomplex*)bb->a;
343: #else
344: av=aa->a;
345: bv=bb->a;
346: #endif
347: rstart = A->rmap->rstart;
348: nz = aa->nz + bb->nz;
349: garray = mat->garray;
350:
351: if (lu->options.Fact == DOFACT) {/* first numeric factorization */
352: #if defined(PETSC_USE_COMPLEX)
353: zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
354: #else
355: dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
356: #endif
357: } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
358: /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* this leads to crash! However, see SuperLU_DIST_2.5/EXAMPLE/pzdrive2.c */
359: if (lu->FactPattern == SamePattern_SameRowPerm){
360: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
361: } else {
362: Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
363: lu->options.Fact = SamePattern;
364: }
365: }
366: nz = 0;
367: for ( i=0; i<m; i++ ) {
368: lu->row[i] = nz;
369: countA = ai[i+1] - ai[i];
370: countB = bi[i+1] - bi[i];
371: ajj = aj + ai[i]; /* ptr to the beginning of this row */
372: bjj = bj + bi[i];
374: /* B part, smaller col index */
375: colA_start = rstart + ajj[0]; /* the smallest global col index of A */
376: jB = 0;
377: for (j=0; j<countB; j++){
378: jcol = garray[bjj[j]];
379: if (jcol > colA_start) {
380: jB = j;
381: break;
382: }
383: lu->col[nz] = jcol;
384: lu->val[nz++] = *bv++;
385: if (j==countB-1) jB = countB;
386: }
388: /* A part */
389: for (j=0; j<countA; j++){
390: lu->col[nz] = rstart + ajj[j];
391: lu->val[nz++] = *av++;
392: }
394: /* B part, larger col index */
395: for (j=jB; j<countB; j++){
396: lu->col[nz] = garray[bjj[j]];
397: lu->val[nz++] = *bv++;
398: }
399: }
400: lu->row[m] = nz;
401: #if defined(PETSC_USE_COMPLEX)
402: zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
403: #else
404: dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
405: #endif
406: }
407: if (lu->options.PrintStat) {
408: PetscGetTime(&time);
409: time0 = time - time0;
410: }
412: /* Factor the matrix. */
413: PStatInit(&stat); /* Initialize the statistics variables. */
414: if (lu->MatInputMode == GLOBAL) { /* global mat input */
415: #if defined(PETSC_USE_COMPLEX)
416: pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
417: #else
418: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
419: #endif
420: } else { /* distributed mat input */
421: #if defined(PETSC_USE_COMPLEX)
422: pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
423: if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
424: #else
425: pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
426: if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
427: #endif
428: }
430: if (lu->MatInputMode == GLOBAL && size > 1){
431: MatDestroy(&A_seq);
432: }
434: if (lu->options.PrintStat) {
435: MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
436: MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
437: MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
438: time = time/size; /* average time */
439: PetscPrintf(((PetscObject)A)->comm, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);
440: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
441: }
442: PStatFree(&stat);
443: if (size > 1){
444: F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
445: F_diag->assembled = PETSC_TRUE;
446: }
447: (F)->assembled = PETSC_TRUE;
448: (F)->preallocated = PETSC_TRUE;
449: lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
450: return(0);
451: }
453: /* Note the Petsc r and c permutations are ignored */
456: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
457: {
458: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->spptr;
459: PetscInt M=A->rmap->N,N=A->cmap->N;
462: /* Initialize the SuperLU process grid. */
463: superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
465: /* Initialize ScalePermstruct and LUstruct. */
466: ScalePermstructInit(M, N, &lu->ScalePermstruct);
467: LUstructInit(M, N, &lu->LUstruct);
468: F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
469: F->ops->solve = MatSolve_SuperLU_DIST;
470: F->ops->matsolve = MatMatSolve_SuperLU_DIST;
471: lu->CleanUpSuperLU_Dist = PETSC_TRUE;
472: return(0);
473: }
475: EXTERN_C_BEGIN
478: PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
479: {
481: *type = MATSOLVERSUPERLU_DIST;
482: return(0);
483: }
484: EXTERN_C_END
488: PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
489: {
490: Mat B;
491: Mat_SuperLU_DIST *lu;
492: PetscErrorCode ierr;
493: PetscInt M=A->rmap->N,N=A->cmap->N,indx;
494: PetscMPIInt size;
495: superlu_options_t options;
496: PetscBool flg;
497: const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"};
498: const char *rowperm[] = {"LargeDiag","NATURAL"};
499: const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
502: /* Create the factorization matrix */
503: MatCreate(((PetscObject)A)->comm,&B);
504: MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);
505: MatSetType(B,((PetscObject)A)->type_name);
506: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
507: MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);
509: B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
510: B->ops->view = MatView_SuperLU_DIST;
511: B->ops->destroy = MatDestroy_SuperLU_DIST;
512: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);
513: B->factortype = MAT_FACTOR_LU;
515: PetscNewLog(B,Mat_SuperLU_DIST,&lu);
516: B->spptr = lu;
518: /* Set the default input options:
519: options.Fact = DOFACT;
520: options.Equil = YES;
521: options.ParSymbFact = NO;
522: options.ColPerm = METIS_AT_PLUS_A;
523: options.RowPerm = LargeDiag;
524: options.ReplaceTinyPivot = YES;
525: options.IterRefine = DOUBLE;
526: options.Trans = NOTRANS;
527: options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
528: options.RefineInitialized = NO;
529: options.PrintStat = YES;
530: */
531: set_default_options_dist(&options);
533: MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));
534: MPI_Comm_size(((PetscObject)A)->comm,&size);
535: /* Default num of process columns and rows */
536: lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + PetscSqrtReal((PetscReal)size)));
537: if (!lu->npcol) lu->npcol = 1;
538: while (lu->npcol > 0) {
539: lu->nprow = PetscMPIIntCast(size/lu->npcol);
540: if (size == lu->nprow * lu->npcol) break;
541: lu->npcol --;
542: }
543:
544: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
545: PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
546: PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
547: if (size != lu->nprow * lu->npcol) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
548:
549: lu->MatInputMode = DISTRIBUTED;
550: PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);
551: if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
553: PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
554: if (!flg) {
555: options.Equil = NO;
556: }
558: PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,2,rowperm[0],&indx,&flg);
559: if (flg) {
560: switch (indx) {
561: case 0:
562: options.RowPerm = LargeDiag;
563: break;
564: case 1:
565: options.RowPerm = NOROWPERM;
566: break;
567: }
568: }
570: PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);
571: if (flg) {
572: switch (indx) {
573: case 0:
574: options.ColPerm = NATURAL;
575: break;
576: case 1:
577: options.ColPerm = MMD_AT_PLUS_A;
578: break;
579: case 2:
580: options.ColPerm = MMD_ATA;
581: break;
582: case 3:
583: options.ColPerm = METIS_AT_PLUS_A;
584: break;
585: case 4:
586: options.ColPerm = PARMETIS; /* only works for np>1 */
587: break;
588: default:
589: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
590: }
591: }
593: PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
594: if (!flg) {
595: options.ReplaceTinyPivot = NO;
596: }
598: options.ParSymbFact = NO;
599: PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);
600: if (flg){
601: #ifdef PETSC_HAVE_PARMETIS
602: options.ParSymbFact = YES;
603: options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
604: #else
605: printf("parsymbfact needs PARMETIS");
606: #endif
607: }
609: lu->FactPattern = SamePattern_SameRowPerm;
610: PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[1],&indx,&flg);
611: if (flg) {
612: switch (indx) {
613: case 0:
614: lu->FactPattern = SamePattern;
615: break;
616: case 1:
617: lu->FactPattern = SamePattern_SameRowPerm;
618: break;
619: }
620: }
621:
622: options.IterRefine = NOREFINE;
623: PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
624: if (flg) {
625: options.IterRefine = SLU_DOUBLE;
626: }
628: if (PetscLogPrintInfo) {
629: options.PrintStat = YES;
630: } else {
631: options.PrintStat = NO;
632: }
633: PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",
634: (PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,0);
635: PetscOptionsEnd();
637: lu->options = options;
638: lu->options.Fact = DOFACT;
639: lu->matsolve_iscalled = PETSC_FALSE;
640: lu->matmatsolve_iscalled = PETSC_FALSE;
641: *F = B;
642: return(0);
643: }
645: EXTERN_C_BEGIN
648: PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
649: {
653: MatGetFactor_aij_superlu_dist(A,ftype,F);
654: return(0);
655: }
656: EXTERN_C_END
658: EXTERN_C_BEGIN
661: PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
662: {
666: MatGetFactor_aij_superlu_dist(A,ftype,F);
667: return(0);
668: }
669: EXTERN_C_END
673: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
674: {
675: Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr;
676: superlu_options_t options;
677: PetscErrorCode ierr;
680: /* check if matrix is superlu_dist type */
681: if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);
683: options = lu->options;
684: PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
685: PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
686: PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);
687: PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);
688: PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);
689: PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE]);
690: PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
691: PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
692:
693: switch(options.ColPerm){
694: case NATURAL:
695: PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");
696: break;
697: case MMD_AT_PLUS_A:
698: PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");
699: break;
700: case MMD_ATA:
701: PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");
702: break;
703: case METIS_AT_PLUS_A:
704: PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");
705: break;
706: case PARMETIS:
707: PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");
708: break;
709: default:
710: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
711: }
713: PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);
714:
715: if (lu->FactPattern == SamePattern){
716: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");
717: } else {
718: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");
719: }
720: return(0);
721: }
725: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
726: {
727: PetscErrorCode ierr;
728: PetscBool iascii;
729: PetscViewerFormat format;
732: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
733: if (iascii) {
734: PetscViewerGetFormat(viewer,&format);
735: if (format == PETSC_VIEWER_ASCII_INFO) {
736: MatFactorInfo_SuperLU_DIST(A,viewer);
737: }
738: }
739: return(0);
740: }
743: /*MC
744: MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
746: Works with AIJ matrices
748: Options Database Keys:
749: + -mat_superlu_dist_r <n> - number of rows in processor partition
750: . -mat_superlu_dist_c <n> - number of columns in processor partition
751: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
752: . -mat_superlu_dist_equil - equilibrate the matrix
753: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
754: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
755: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
756: . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm
757: . -mat_superlu_dist_iterrefine - use iterative refinement
758: - -mat_superlu_dist_statprint - print factorization information
760: Level: beginner
762: .seealso: PCLU
764: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
766: M*/