Actual source code: superlu_dist.c
1: /*
2: Provides an interface to the SuperLU_DIST sparse solver
3: */
5: #include <../src/mat/impls/aij/seq/aij.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <petscpkg_version.h>
9: PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef")
10: EXTERN_C_BEGIN
11: #if defined(PETSC_USE_COMPLEX)
12: #define CASTDOUBLECOMPLEX (doublecomplex *)
13: #define CASTDOUBLECOMPLEXSTAR (doublecomplex **)
14: #include <superlu_zdefs.h>
15: #define LUstructInit zLUstructInit
16: #define ScalePermstructInit zScalePermstructInit
17: #define ScalePermstructFree zScalePermstructFree
18: #define LUstructFree zLUstructFree
19: #define Destroy_LU zDestroy_LU
20: #define ScalePermstruct_t zScalePermstruct_t
21: #define LUstruct_t zLUstruct_t
22: #define SOLVEstruct_t zSOLVEstruct_t
23: #define SolveFinalize zSolveFinalize
24: #define pGetDiagU pzGetDiagU
25: #define pgssvx pzgssvx
26: #define allocateA_dist zallocateA_dist
27: #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
28: #define SLU SLU_Z
29: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
30: #define DeAllocLlu_3d zDeAllocLlu_3d
31: #define DeAllocGlu_3d zDeAllocGlu_3d
32: #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
33: #define pgssvx3d pzgssvx3d
34: #endif
35: #elif defined(PETSC_USE_REAL_SINGLE)
36: #define CASTDOUBLECOMPLEX
37: #define CASTDOUBLECOMPLEXSTAR
38: #include <superlu_sdefs.h>
39: #define LUstructInit sLUstructInit
40: #define ScalePermstructInit sScalePermstructInit
41: #define ScalePermstructFree sScalePermstructFree
42: #define LUstructFree sLUstructFree
43: #define Destroy_LU sDestroy_LU
44: #define ScalePermstruct_t sScalePermstruct_t
45: #define LUstruct_t sLUstruct_t
46: #define SOLVEstruct_t sSOLVEstruct_t
47: #define SolveFinalize sSolveFinalize
48: #define pGetDiagU psGetDiagU
49: #define pgssvx psgssvx
50: #define allocateA_dist sallocateA_dist
51: #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist
52: #define SLU SLU_S
53: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
54: #define DeAllocLlu_3d sDeAllocLlu_3d
55: #define DeAllocGlu_3d sDeAllocGlu_3d
56: #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d
57: #define pgssvx3d psgssvx3d
58: #endif
59: #else
60: #define CASTDOUBLECOMPLEX
61: #define CASTDOUBLECOMPLEXSTAR
62: #include <superlu_ddefs.h>
63: #define LUstructInit dLUstructInit
64: #define ScalePermstructInit dScalePermstructInit
65: #define ScalePermstructFree dScalePermstructFree
66: #define LUstructFree dLUstructFree
67: #define Destroy_LU dDestroy_LU
68: #define ScalePermstruct_t dScalePermstruct_t
69: #define LUstruct_t dLUstruct_t
70: #define SOLVEstruct_t dSOLVEstruct_t
71: #define SolveFinalize dSolveFinalize
72: #define pGetDiagU pdGetDiagU
73: #define pgssvx pdgssvx
74: #define allocateA_dist dallocateA_dist
75: #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
76: #define SLU SLU_D
77: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
78: #define DeAllocLlu_3d dDeAllocLlu_3d
79: #define DeAllocGlu_3d dDeAllocGlu_3d
80: #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
81: #define pgssvx3d pdgssvx3d
82: #endif
83: #endif
84: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
85: #include <superlu_sdefs.h>
86: #endif
87: EXTERN_C_END
88: PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
90: typedef struct {
91: int_t nprow, npcol, *row, *col;
92: gridinfo_t grid;
93: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
94: PetscBool use3d;
95: int_t npdep; /* replication factor, must be power of two */
96: gridinfo3d_t grid3d;
97: #endif
98: superlu_dist_options_t options;
99: SuperMatrix A_sup;
100: ScalePermstruct_t ScalePermstruct;
101: LUstruct_t LUstruct;
102: int StatPrint;
103: SOLVEstruct_t SOLVEstruct;
104: fact_t FactPattern;
105: MPI_Comm comm_superlu;
106: PetscScalar *val;
107: PetscBool matsolve_iscalled, matmatsolve_iscalled;
108: PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
109: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
110: sScalePermstruct_t sScalePermstruct;
111: sLUstruct_t sLUstruct;
112: sSOLVEstruct_t sSOLVEstruct;
113: float *sval;
114: PetscBool singleprecision; /* use single precision SuperLU_DIST from double precision PETSc */
115: float *sbptr;
116: #endif
117: } Mat_SuperLU_DIST;
119: static PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU)
120: {
121: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
123: PetscFunctionBegin;
124: PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU));
125: PetscFunctionReturn(PETSC_SUCCESS);
126: }
128: PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU)
129: {
130: PetscFunctionBegin;
132: PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU));
133: PetscFunctionReturn(PETSC_SUCCESS);
134: }
136: /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
137: typedef struct {
138: MPI_Comm comm;
139: PetscBool busy;
140: gridinfo_t grid;
141: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
142: PetscBool use3d;
143: gridinfo3d_t grid3d;
144: #endif
145: } PetscSuperLU_DIST;
147: static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
149: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
150: {
151: PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val;
153: PetscFunctionBegin;
154: if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval");
155: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
156: if (context->use3d) {
157: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d));
158: } else
159: #endif
160: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid));
161: PetscCallMPI(MPI_Comm_free(&context->comm));
162: PetscCall(PetscFree(context));
163: PetscFunctionReturn(MPI_SUCCESS);
164: }
166: /*
167: Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
168: users who do not destroy all PETSc objects before PetscFinalize().
170: The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn()
171: can still check that the keyval associated with the MPI communicator is correct when the MPI
172: communicator is destroyed.
174: This is called in PetscFinalize()
175: */
176: static PetscErrorCode Petsc_Superlu_dist_keyval_free(void)
177: {
178: PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
180: PetscFunctionBegin;
181: PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp));
182: PetscFunctionReturn(PETSC_SUCCESS);
183: }
185: static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
186: {
187: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
189: PetscFunctionBegin;
190: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
191: PetscCall(PetscFree(lu->sbptr));
192: #endif
193: if (lu->CleanUpSuperLU_Dist) {
194: /* Deallocate SuperLU_DIST storage */
195: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
196: if (lu->options.SolveInitialized) {
197: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
198: if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
199: else
200: #endif
201: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
202: }
203: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
204: if (lu->use3d) {
205: if (lu->grid3d.zscp.Iam == 0) {
206: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
207: if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct));
208: else
209: #endif
210: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
211: } else {
212: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
213: if (lu->singleprecision) {
214: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", sDeAllocLlu_3d(lu->A_sup.ncol, &lu->sLUstruct, &lu->grid3d));
215: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", sDeAllocGlu_3d(&lu->sLUstruct));
216: } else
217: #endif
218: {
219: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
220: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
221: }
222: }
223: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
224: if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", sDestroy_A3d_gathered_on_2d(&lu->sSOLVEstruct, &lu->grid3d));
225: else
226: #endif
227: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
228: } else
229: #endif
230: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
231: if (lu->singleprecision) {
232: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid, &lu->sLUstruct));
233: PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct));
234: PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct));
235: } else
236: #endif
237: {
238: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
239: PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
240: PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
241: }
242: /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
243: if (lu->comm_superlu) {
244: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
245: if (lu->use3d) {
246: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d));
247: } else
248: #endif
249: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid));
250: }
251: }
252: /*
253: * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
254: * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
255: * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
256: * is off, and the communicator was not released or marked as "not busy " in the old code.
257: * Here we try to release comm regardless.
258: */
259: if (lu->comm_superlu) {
260: PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
261: } else {
262: PetscSuperLU_DIST *context;
263: MPI_Comm comm;
264: PetscMPIInt flg;
266: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
267: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg));
268: if (flg) context->busy = PETSC_FALSE;
269: }
271: PetscCall(PetscFree(A->data));
272: /* clear composed functions */
273: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
274: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL));
276: PetscFunctionReturn(PETSC_SUCCESS);
277: }
279: static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x)
280: {
281: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
282: PetscInt m = A->rmap->n;
283: SuperLUStat_t stat;
284: PetscReal berr[1];
285: PetscScalar *bptr = NULL;
286: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
287: float sberr[1];
288: #endif
289: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
290: static PetscBool cite = PETSC_FALSE;
292: PetscFunctionBegin;
293: PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
294: PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM "
295: "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",
296: &cite));
298: if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
299: /* see comments in MatMatSolve() */
300: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
301: if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
302: else
303: #endif
304: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
305: lu->options.SolveInitialized = NO;
306: }
307: PetscCall(VecCopy(b_mpi, x));
308: PetscCall(VecGetArray(x, &bptr));
309: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
310: if (lu->singleprecision) {
311: PetscInt n;
312: PetscCall(VecGetLocalSize(x, &n));
313: if (!lu->sbptr) PetscCall(PetscMalloc1(n, &lu->sbptr));
314: for (PetscInt i = 0; i < n; i++) lu->sbptr[i] = PetscRealPart(bptr[i]); /* PetscRealPart() is a no-op to allow compiling with complex */
315: }
316: #endif
318: PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
319: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
320: if (lu->use3d) {
321: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
322: if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, m, 1, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info));
323: else
324: #endif
325: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
326: } else
327: #endif
328: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
329: if (lu->singleprecision)
330: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, m, 1, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info));
331: else
332: #endif
333: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
334: PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
336: if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
337: PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
339: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
340: if (lu->singleprecision) {
341: PetscInt n;
342: PetscCall(VecGetLocalSize(x, &n));
343: for (PetscInt i = 0; i < n; i++) bptr[i] = lu->sbptr[i];
344: }
345: #endif
346: PetscCall(VecRestoreArray(x, &bptr));
347: lu->matsolve_iscalled = PETSC_TRUE;
348: lu->matmatsolve_iscalled = PETSC_FALSE;
349: PetscFunctionReturn(PETSC_SUCCESS);
350: }
352: static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X)
353: {
354: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
355: PetscInt m = A->rmap->n, nrhs;
356: SuperLUStat_t stat;
357: PetscReal berr[1];
358: PetscScalar *bptr;
359: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
360: PetscBool flg;
362: PetscFunctionBegin;
363: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
364: PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for -pc_precision single");
365: #endif
366: PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
367: PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
368: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
369: if (X != B_mpi) {
370: PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
371: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
372: }
374: if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
375: /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
376: thus destroy it and create a new SOLVEstruct.
377: Otherwise it may result in memory corruption or incorrect solution
378: See src/mat/tests/ex125.c */
379: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
380: lu->options.SolveInitialized = NO;
381: }
382: if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN));
384: PetscCall(MatGetSize(B_mpi, NULL, &nrhs));
386: PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
387: PetscCall(MatDenseGetArray(X, &bptr));
389: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
390: if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
391: else
392: #endif
393: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
395: PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
396: PetscCall(MatDenseRestoreArray(X, &bptr));
398: if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
399: PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
400: lu->matsolve_iscalled = PETSC_FALSE;
401: lu->matmatsolve_iscalled = PETSC_TRUE;
402: PetscFunctionReturn(PETSC_SUCCESS);
403: }
405: /*
406: input:
407: F: numeric Cholesky factor
408: output:
409: nneg: total number of negative pivots
410: nzero: total number of zero pivots
411: npos: (global dimension of F) - nneg - nzero
412: */
413: static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
414: {
415: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
416: PetscScalar *diagU = NULL;
417: PetscInt M, i, neg = 0, zero = 0, pos = 0;
418: PetscReal r;
420: PetscFunctionBegin;
421: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
422: PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Not for -pc_precision single");
423: #endif
424: PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled");
425: PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM");
426: PetscCall(MatGetSize(F, &M, NULL));
427: PetscCall(PetscMalloc1(M, &diagU));
428: PetscCall(MatSuperluDistGetDiagU(F, diagU));
429: for (i = 0; i < M; i++) {
430: #if defined(PETSC_USE_COMPLEX)
431: r = PetscImaginaryPart(diagU[i]) / 10.0;
432: PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0));
433: r = PetscRealPart(diagU[i]);
434: #else
435: r = diagU[i];
436: #endif
437: if (r > 0) {
438: pos++;
439: } else if (r < 0) {
440: neg++;
441: } else zero++;
442: }
444: PetscCall(PetscFree(diagU));
445: if (nneg) *nneg = neg;
446: if (nzero) *nzero = zero;
447: if (npos) *npos = pos;
448: PetscFunctionReturn(PETSC_SUCCESS);
449: }
451: static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info)
452: {
453: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
454: Mat Aloc;
455: const PetscScalar *av;
456: const PetscInt *ai = NULL, *aj = NULL;
457: PetscInt nz, dummy;
458: int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
459: SuperLUStat_t stat;
460: PetscReal *berr = 0;
461: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
462: float *sberr = 0;
463: #endif
464: PetscBool ismpiaij, isseqaij, flg;
466: PetscFunctionBegin;
467: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
468: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij));
469: if (ismpiaij) {
470: PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc));
471: } else if (isseqaij) {
472: PetscCall(PetscObjectReference((PetscObject)A));
473: Aloc = A;
474: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
476: PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
477: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed");
478: PetscCall(MatSeqAIJGetArrayRead(Aloc, &av));
479: nz = ai[Aloc->rmap->n];
481: /* Allocations for A_sup */
482: if (lu->options.Fact == DOFACT) { /* first numeric factorization */
483: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
484: if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row));
485: else
486: #endif
487: PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
488: } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
489: if (lu->FactPattern == SamePattern_SameRowPerm) {
490: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
491: } else if (lu->FactPattern == SamePattern) {
492: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
493: if (lu->use3d) {
494: if (lu->grid3d.zscp.Iam == 0) {
495: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
496: if (lu->singleprecision) {
497: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct));
498: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
499: } else
500: #endif
501: {
502: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
503: PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
504: }
505: } else {
506: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
507: if (lu->singleprecision) {
508: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", sDeAllocLlu_3d(lu->A_sup.ncol, &lu->sLUstruct, &lu->grid3d));
509: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", sDeAllocGlu_3d(&lu->sLUstruct));
510: } else
511: #endif
512: {
513: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d));
514: PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct));
515: }
516: }
517: } else
518: #endif
519: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
520: if (lu->singleprecision)
521: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct));
522: else
523: #endif
524: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
525: lu->options.Fact = SamePattern;
526: } else if (lu->FactPattern == DOFACT) {
527: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
528: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
529: if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct));
530: else
531: #endif
532: PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
533: lu->options.Fact = DOFACT;
534: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
535: if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row));
536: else
537: #endif
538: PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
539: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
540: }
542: /* Copy AIJ matrix to superlu_dist matrix */
543: PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1));
544: PetscCall(PetscArraycpy(lu->col, aj, nz));
545: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
546: if (lu->singleprecision)
547: for (PetscInt i = 0; i < nz; i++) lu->sval[i] = PetscRealPart(av[i]); /* PetscRealPart() is a no-op to allow compiling with complex */
548: else
549: #endif
550: PetscCall(PetscArraycpy(lu->val, av, nz));
551: PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg));
552: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed");
553: PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av));
554: PetscCall(MatDestroy(&Aloc));
556: /* Create and setup A_sup */
557: if (lu->options.Fact == DOFACT) {
558: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
559: if (lu->singleprecision)
560: PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", sCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->sval, lu->col, lu->row, SLU_NR_loc, SLU_S, SLU_GE));
561: else
562: #endif
563: PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE));
564: }
566: /* Factor the matrix. */
567: PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
568: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE)
569: if (lu->use3d) {
570: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
571: if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo));
572: else
573: #endif
574: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
575: } else
576: #endif
577: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
578: if (lu->singleprecision)
579: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo));
580: else
581: #endif
582: PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
583: if (sinfo > 0) {
584: PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo);
585: if (sinfo <= lu->A_sup.ncol) {
586: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
587: PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo));
588: } else if (sinfo > lu->A_sup.ncol) {
589: /*
590: number of bytes allocated when memory allocation
591: failure occurred, plus A->ncol.
592: */
593: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
594: PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo));
595: }
596: } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo);
598: if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ }
599: PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
600: F->assembled = PETSC_TRUE;
601: F->preallocated = PETSC_TRUE;
602: lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
603: PetscFunctionReturn(PETSC_SUCCESS);
604: }
606: /* Note the Petsc r and c permutations are ignored */
607: static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
608: {
609: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
610: PetscInt M = A->rmap->N, N = A->cmap->N, indx;
611: PetscMPIInt size, mpiflg;
612: PetscBool flg, set;
613: const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"};
614: const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"};
615: const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"};
616: MPI_Comm comm;
617: PetscSuperLU_DIST *context = NULL;
619: PetscFunctionBegin;
620: /* Set options to F */
621: PetscCall(PetscObjectGetComm((PetscObject)F, &comm));
622: PetscCallMPI(MPI_Comm_size(comm, &size));
624: PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat");
625: PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
626: if (set && !flg) lu->options.Equil = NO;
628: PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg));
629: if (flg) {
630: switch (indx) {
631: case 0:
632: lu->options.RowPerm = NOROWPERM;
633: break;
634: case 1:
635: lu->options.RowPerm = LargeDiag_MC64;
636: break;
637: case 2:
638: lu->options.RowPerm = LargeDiag_AWPM;
639: break;
640: case 3:
641: lu->options.RowPerm = MY_PERMR;
642: break;
643: default:
644: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation");
645: }
646: }
648: PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg));
649: if (flg) {
650: switch (indx) {
651: case 0:
652: lu->options.ColPerm = NATURAL;
653: break;
654: case 1:
655: lu->options.ColPerm = MMD_AT_PLUS_A;
656: break;
657: case 2:
658: lu->options.ColPerm = MMD_ATA;
659: break;
660: case 3:
661: lu->options.ColPerm = METIS_AT_PLUS_A;
662: break;
663: case 4:
664: lu->options.ColPerm = PARMETIS; /* only works for np>1 */
665: break;
666: default:
667: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
668: }
669: }
671: lu->options.ReplaceTinyPivot = NO;
672: PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
673: if (set && flg) lu->options.ReplaceTinyPivot = YES;
675: lu->options.ParSymbFact = NO;
676: PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set));
677: if (set && flg && size > 1) {
678: #if defined(PETSC_HAVE_PARMETIS)
679: lu->options.ParSymbFact = YES;
680: lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
681: #else
682: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS");
683: #endif
684: }
686: lu->FactPattern = SamePattern;
687: PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg));
688: if (flg) {
689: switch (indx) {
690: case 0:
691: lu->FactPattern = SamePattern;
692: break;
693: case 1:
694: lu->FactPattern = SamePattern_SameRowPerm;
695: break;
696: case 2:
697: lu->FactPattern = DOFACT;
698: break;
699: }
700: }
702: lu->options.IterRefine = NOREFINE;
703: PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set));
704: if (set) {
705: if (flg) lu->options.IterRefine = SLU_DOUBLE;
706: else lu->options.IterRefine = NOREFINE;
707: }
709: if (PetscLogPrintInfo) lu->options.PrintStat = YES;
710: else lu->options.PrintStat = NO;
711: PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL));
712: PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL));
714: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg));
715: if (!mpiflg || context->busy) { /* additional options */
716: if (!mpiflg) {
717: PetscCall(PetscNew(&context));
718: context->busy = PETSC_TRUE;
719: PetscCallMPI(MPI_Comm_dup(comm, &context->comm));
720: PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context));
721: } else {
722: PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
723: }
725: /* Default number of process columns and rows */
726: lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size));
727: if (!lu->nprow) lu->nprow = 1;
728: while (lu->nprow > 0) {
729: lu->npcol = (int_t)(size / lu->nprow);
730: if (size == lu->nprow * lu->npcol) break;
731: lu->nprow--;
732: }
733: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
734: lu->use3d = PETSC_FALSE;
735: lu->npdep = 1;
736: #endif
738: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
739: PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL));
740: PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation");
741: if (lu->use3d) {
742: PetscInt t;
743: PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL));
744: t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep);
745: PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep);
746: if (lu->npdep > 1) {
747: lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep)));
748: if (!lu->nprow) lu->nprow = 1;
749: while (lu->nprow > 0) {
750: lu->npcol = (int_t)(size / (lu->npdep * lu->nprow));
751: if (size == lu->nprow * lu->npcol * lu->npdep) break;
752: lu->nprow--;
753: }
754: }
755: }
756: #endif
757: PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL));
758: PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL));
759: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
760: PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep);
761: #else
762: PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol);
763: #endif
764: /* end of adding additional options */
766: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
767: if (lu->use3d) {
768: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d));
769: if (context) {
770: context->grid3d = lu->grid3d;
771: context->use3d = lu->use3d;
772: }
773: } else {
774: #endif
775: PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid));
776: if (context) context->grid = lu->grid;
777: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
778: }
779: #endif
780: PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n"));
781: if (mpiflg) {
782: PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n"));
783: } else {
784: PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n"));
785: }
786: } else { /* (mpiflg && !context->busy) */
787: PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.\n"));
788: context->busy = PETSC_TRUE;
789: lu->grid = context->grid;
790: }
791: PetscOptionsEnd();
793: /* Initialize ScalePermstruct and LUstruct. */
794: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
795: if (lu->singleprecision) {
796: PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", sScalePermstructInit(M, N, &lu->sScalePermstruct));
797: PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", sLUstructInit(N, &lu->sLUstruct));
798: } else
799: #endif
800: {
801: PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct));
802: PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct));
803: }
804: F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
805: F->ops->solve = MatSolve_SuperLU_DIST;
806: F->ops->matsolve = MatMatSolve_SuperLU_DIST;
807: F->ops->getinertia = NULL;
809: if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
810: lu->CleanUpSuperLU_Dist = PETSC_TRUE;
811: PetscFunctionReturn(PETSC_SUCCESS);
812: }
814: static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info)
815: {
816: PetscFunctionBegin;
817: PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info));
818: F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
819: PetscFunctionReturn(PETSC_SUCCESS);
820: }
822: static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type)
823: {
824: PetscFunctionBegin;
825: *type = MATSOLVERSUPERLU_DIST;
826: PetscFunctionReturn(PETSC_SUCCESS);
827: }
829: static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer)
830: {
831: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
832: superlu_dist_options_t options;
834: PetscFunctionBegin;
835: /* check if matrix is superlu_dist type */
836: if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS);
838: options = lu->options;
839: PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n"));
840: /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
841: * format spec for int64_t is set to %d for whatever reason */
842: PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol));
843: #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0)
844: if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep));
845: #endif
847: PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO]));
848: PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO]));
849: PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE]));
850: PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol));
852: switch (options.RowPerm) {
853: case NOROWPERM:
854: PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n"));
855: break;
856: case LargeDiag_MC64:
857: PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n"));
858: break;
859: case LargeDiag_AWPM:
860: PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n"));
861: break;
862: case MY_PERMR:
863: PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n"));
864: break;
865: default:
866: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
867: }
869: switch (options.ColPerm) {
870: case NATURAL:
871: PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n"));
872: break;
873: case MMD_AT_PLUS_A:
874: PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n"));
875: break;
876: case MMD_ATA:
877: PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n"));
878: break;
879: /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
880: case METIS_AT_PLUS_A:
881: PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n"));
882: break;
883: case PARMETIS:
884: PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n"));
885: break;
886: default:
887: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
888: }
890: PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO]));
892: if (lu->FactPattern == SamePattern) {
893: PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n"));
894: } else if (lu->FactPattern == SamePattern_SameRowPerm) {
895: PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n"));
896: } else if (lu->FactPattern == DOFACT) {
897: PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n"));
898: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern");
899: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
900: if (lu->singleprecision) PetscCall(PetscViewerASCIIPrintf(viewer, " Using SuperLU_DIST in single precision\n"));
901: #endif
902: PetscFunctionReturn(PETSC_SUCCESS);
903: }
905: static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer)
906: {
907: PetscBool iascii;
908: PetscViewerFormat format;
910: PetscFunctionBegin;
911: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
912: if (iascii) {
913: PetscCall(PetscViewerGetFormat(viewer, &format));
914: if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer));
915: }
916: PetscFunctionReturn(PETSC_SUCCESS);
917: }
919: static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F)
920: {
921: Mat B;
922: Mat_SuperLU_DIST *lu;
923: PetscInt M = A->rmap->N, N = A->cmap->N;
924: PetscMPIInt size;
925: superlu_dist_options_t options;
926: PetscBool flg;
927: char string[16];
929: PetscFunctionBegin;
930: /* Create the factorization matrix */
931: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
932: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N));
933: PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name));
934: PetscCall(MatSetUp(B));
935: B->ops->getinfo = MatGetInfo_External;
936: B->ops->view = MatView_SuperLU_DIST;
937: B->ops->destroy = MatDestroy_SuperLU_DIST;
939: /* Set the default input options:
940: options.Fact = DOFACT;
941: options.Equil = YES;
942: options.ParSymbFact = NO;
943: options.ColPerm = METIS_AT_PLUS_A;
944: options.RowPerm = LargeDiag_MC64;
945: options.ReplaceTinyPivot = YES;
946: options.IterRefine = DOUBLE;
947: options.Trans = NOTRANS;
948: options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
949: options.RefineInitialized = NO;
950: options.PrintStat = YES;
951: options.SymPattern = NO;
952: */
953: set_default_options_dist(&options);
955: B->trivialsymbolic = PETSC_TRUE;
956: if (ftype == MAT_FACTOR_LU) {
957: B->factortype = MAT_FACTOR_LU;
958: B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
959: } else {
960: B->factortype = MAT_FACTOR_CHOLESKY;
961: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
962: options.SymPattern = YES;
963: }
965: /* set solvertype */
966: PetscCall(PetscFree(B->solvertype));
967: PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype));
969: PetscCall(PetscNew(&lu));
970: B->data = lu;
971: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
973: lu->options = options;
974: lu->options.Fact = DOFACT;
975: lu->matsolve_iscalled = PETSC_FALSE;
976: lu->matmatsolve_iscalled = PETSC_FALSE;
978: PetscCall(PetscOptionsGetString(NULL, NULL, "-pc_precision", string, sizeof(string), &flg));
979: if (flg) {
980: PetscCall(PetscStrcasecmp(string, "single", &flg));
981: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_USER_INPUT, "-pc_precision only accepts single as option for SuperLU_DIST");
982: #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
983: lu->singleprecision = PETSC_TRUE;
984: #endif
985: }
987: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist));
988: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST));
990: *F = B;
991: PetscFunctionReturn(PETSC_SUCCESS);
992: }
994: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void)
995: {
996: PetscFunctionBegin;
997: PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
998: PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
999: PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
1000: PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
1001: if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
1002: PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, NULL));
1003: PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free));
1004: }
1005: PetscFunctionReturn(PETSC_SUCCESS);
1006: }
1008: /*MC
1009: MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
1011: Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST
1013: Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver
1015: Works with `MATAIJ` matrices
1017: Options Database Keys:
1018: + -mat_superlu_dist_r <n> - number of rows in processor partition
1019: . -mat_superlu_dist_c <n> - number of columns in processor partition
1020: . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
1021: . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided
1022: . -mat_superlu_dist_equil - equilibrate the matrix
1023: . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
1024: . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
1025: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
1026: . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT`
1027: . -mat_superlu_dist_iterrefine - use iterative refinement
1028: . -mat_superlu_dist_printstat - print factorization information
1029: - -pc_precision single - use SuperLU_DIST single precision with PETSc double precision. Currently this does not accept an options prefix, so
1030: regardless of the `PC` prefix you must use no prefix here
1032: Level: beginner
1034: Note:
1035: If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs.
1037: .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()`
1038: M*/