Actual source code: pcmpi.c
1: /*
2: This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3: It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
5: That program may use OpenMP to compute the right hand side and matrix for the linear system
7: The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
9: The resulting KSP and PC can only be controlled via the options database, though some common commands
10: could be passed through the server.
12: */
13: #include <petsc/private/pcimpl.h>
14: #include <petsc/private/kspimpl.h>
15: #include <petsc.h>
17: #define PC_MPI_MAX_RANKS 256
18: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
20: typedef struct {
21: KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
22: PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
23: PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */
24: PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
25: PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
26: } PC_MPI;
28: typedef enum {
29: PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
30: PCMPI_CREATE,
31: PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */
32: PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
33: PCMPI_SOLVE,
34: PCMPI_VIEW,
35: PCMPI_DESTROY /* destroy a KSP that is no longer needed */
36: } PCMPICommand;
38: static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS];
39: static PetscBool PCMPICommSet = PETSC_FALSE;
40: static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
41: static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
43: static PetscErrorCode PCMPICommsCreate(void)
44: {
45: MPI_Comm comm = PC_MPI_COMM_WORLD;
46: PetscMPIInt size, rank, i;
48: PetscFunctionBegin;
49: PetscCallMPI(MPI_Comm_size(comm, &size));
50: PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
51: PetscCallMPI(MPI_Comm_rank(comm, &rank));
52: /* comm for size 1 is useful only for debugging */
53: for (i = 0; i < size; i++) {
54: PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
55: PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
56: PCMPISolveCounts[i] = 0;
57: PCMPIKSPCounts[i] = 0;
58: PCMPIIterations[i] = 0;
59: PCMPISizes[i] = 0;
60: }
61: PCMPICommSet = PETSC_TRUE;
62: PetscFunctionReturn(PETSC_SUCCESS);
63: }
65: PetscErrorCode PCMPICommsDestroy(void)
66: {
67: MPI_Comm comm = PC_MPI_COMM_WORLD;
68: PetscMPIInt size, rank, i;
70: PetscFunctionBegin;
71: if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
72: PetscCallMPI(MPI_Comm_size(comm, &size));
73: PetscCallMPI(MPI_Comm_rank(comm, &rank));
74: for (i = 0; i < size; i++) {
75: if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
76: }
77: PCMPICommSet = PETSC_FALSE;
78: PetscFunctionReturn(PETSC_SUCCESS);
79: }
81: static PetscErrorCode PCMPICreate(PC pc)
82: {
83: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
84: MPI_Comm comm = PC_MPI_COMM_WORLD;
85: KSP ksp;
86: PetscInt N[2], mincntperrank = 0;
87: PetscMPIInt size;
88: Mat sA;
89: char *cprefix = NULL;
90: PetscMPIInt len = 0;
92: PetscFunctionBegin;
93: if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
94: PetscCallMPI(MPI_Comm_size(comm, &size));
95: if (pc) {
96: if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
97: PetscCall(PCGetOperators(pc, &sA, &sA));
98: PetscCall(MatGetSize(sA, &N[0], &N[1]));
99: }
100: PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
102: /* choose a suitable sized MPI_Comm for the problem to be solved on */
103: if (km) mincntperrank = km->mincntperrank;
104: PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105: comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106: if (comm == MPI_COMM_NULL) {
107: ksp = NULL;
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
110: PetscCall(PetscLogStagePush(PCMPIStage));
111: PetscCall(KSPCreate(comm, &ksp));
112: PetscCall(KSPSetNestLevel(ksp, 1));
113: PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
114: PetscCall(PetscLogStagePop());
115: PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
116: if (pc) {
117: size_t slen;
118: const char *prefix = NULL;
119: char *found = NULL;
121: PetscCallMPI(MPI_Comm_size(comm, &size));
122: PCMPIKSPCounts[size - 1]++;
123: /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
124: PetscCall(PCGetOptionsPrefix(pc, &prefix));
125: PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
126: PetscCall(PetscStrallocpy(prefix, &cprefix));
127: PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
128: PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
129: *found = 0;
130: PetscCall(PetscStrlen(cprefix, &slen));
131: len = (PetscMPIInt)slen;
132: }
133: PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
134: if (len) {
135: if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
136: PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
137: PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
138: }
139: PetscCall(PetscFree(cprefix));
140: PetscFunctionReturn(PETSC_SUCCESS);
141: }
143: static PetscErrorCode PCMPISetMat(PC pc)
144: {
145: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
146: Mat A;
147: PetscInt m, n, *ia, *ja, j, bs;
148: Mat sA;
149: MPI_Comm comm = PC_MPI_COMM_WORLD;
150: KSP ksp;
151: PetscLayout layout;
152: const PetscInt *IA = NULL, *JA = NULL;
153: const PetscInt *range;
154: PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
155: PetscScalar *a;
156: const PetscScalar *sa = NULL;
157: PetscInt matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0};
158: char *cprefix;
160: PetscFunctionBegin;
161: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
162: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
163: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
164: if (pc) {
165: PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
166: const char *prefix;
167: size_t clen;
169: PetscCallMPI(MPI_Comm_size(comm, &size));
170: PCMPIMatCounts[size - 1]++;
171: PetscCall(PCGetOperators(pc, &sA, &sA));
172: PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
173: PetscCall(MatGetBlockSize(sA, &bs));
174: matproperties[2] = bs;
175: PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
176: matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
177: PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
178: matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
179: PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
180: matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
181: PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
182: matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
183: /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
184: PetscCall(MatGetOptionsPrefix(sA, &prefix));
185: PetscCall(PetscStrallocpy(prefix, &cprefix));
186: PetscCall(PetscStrlen(cprefix, &clen));
187: matproperties[7] = (PetscInt)clen;
188: }
189: PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
191: /* determine ownership ranges of matrix columns */
192: PetscCall(PetscLayoutCreate(comm, &layout));
193: PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
194: PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
195: PetscCall(PetscLayoutSetUp(layout));
196: PetscCall(PetscLayoutGetLocalSize(layout, &n));
197: PetscCall(PetscLayoutDestroy(&layout));
199: /* determine ownership ranges of matrix rows */
200: PetscCall(PetscLayoutCreate(comm, &layout));
201: PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
202: PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
203: PetscCall(PetscLayoutSetUp(layout));
204: PetscCall(PetscLayoutGetLocalSize(layout, &m));
206: /* copy over the matrix nonzero structure and values */
207: if (pc) {
208: NZ = km->NZ;
209: NZdispl = km->NZdispl;
210: PetscCall(PetscLayoutGetRanges(layout, &range));
211: PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
212: for (i = 0; i < size; i++) {
213: sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
214: NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
215: }
216: displi[0] = 0;
217: NZdispl[0] = 0;
218: for (j = 1; j < size; j++) {
219: displi[j] = displi[j - 1] + sendcounti[j - 1] - 1;
220: NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
221: }
222: PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
223: }
224: PetscCall(PetscLayoutDestroy(&layout));
225: PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
227: PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
228: PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
229: PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
230: PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
232: if (pc) {
233: PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
234: PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
235: }
237: for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
238: ia[0] = 0;
239: PetscCall(PetscLogStagePush(PCMPIStage));
240: PetscCall(MatCreate(comm, &A));
241: if (matproperties[7] > 0) {
242: if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
243: PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
244: PetscCall(MatSetOptionsPrefix(A, cprefix));
245: PetscCall(PetscFree(cprefix));
246: }
247: PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
248: PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
249: PetscCall(MatSetType(A, MATMPIAIJ));
250: PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
251: PetscCall(MatSetBlockSize(A, matproperties[2]));
253: if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
254: if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
255: if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
256: if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
258: PetscCall(PetscFree3(ia, ja, a));
259: PetscCall(KSPSetOperators(ksp, A, A));
260: if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
261: PetscCall(PetscLogStagePop());
262: if (pc) { /* needed for scatterv/gatherv of rhs and solution */
263: const PetscInt *range;
265: PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
266: for (i = 0; i < size; i++) {
267: km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
268: km->displ[i] = (PetscMPIInt)range[i];
269: }
270: }
271: PetscCall(MatDestroy(&A));
272: PetscCall(KSPSetFromOptions(ksp));
273: PetscFunctionReturn(PETSC_SUCCESS);
274: }
276: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
277: {
278: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
279: KSP ksp;
280: Mat sA, A;
281: MPI_Comm comm = PC_MPI_COMM_WORLD;
282: PetscScalar *a;
283: PetscCount nz;
284: const PetscScalar *sa = NULL;
285: PetscMPIInt size;
286: PetscInt matproperties[4] = {0, 0, 0, 0};
288: PetscFunctionBegin;
289: if (pc) {
290: PetscCall(PCGetOperators(pc, &sA, &sA));
291: PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
292: }
293: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
294: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
295: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
296: PetscCallMPI(MPI_Comm_size(comm, &size));
297: PCMPIMatCounts[size - 1]++;
298: PetscCall(KSPGetOperators(ksp, NULL, &A));
299: PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
300: PetscCall(PetscMalloc1(nz, &a));
301: PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
302: if (pc) {
303: PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
305: PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
307: PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
308: matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
309: PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
310: matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
311: PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
312: matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
313: PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
314: matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
315: }
316: PetscCall(MatUpdateMPIAIJWithArray(A, a));
317: PetscCall(PetscFree(a));
318: PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
319: /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
320: if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
321: if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
322: if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
323: if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
324: PetscFunctionReturn(PETSC_SUCCESS);
325: }
327: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
328: {
329: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
330: KSP ksp;
331: MPI_Comm comm = PC_MPI_COMM_WORLD;
332: const PetscScalar *sb = NULL, *x;
333: PetscScalar *b, *sx = NULL;
334: PetscInt its, n;
335: PetscMPIInt size;
337: PetscFunctionBegin;
338: PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
339: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
340: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
342: /* TODO: optimize code to not require building counts/displ every time */
344: /* scatterv rhs */
345: PetscCallMPI(MPI_Comm_size(comm, &size));
346: if (pc) {
347: PetscInt N;
349: PCMPISolveCounts[size - 1]++;
350: PetscCall(MatGetSize(pc->pmat, &N, NULL));
351: PCMPISizes[size - 1] += N;
352: PetscCall(VecGetArrayRead(B, &sb));
353: }
354: PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
355: PetscCall(VecGetArray(ksp->vec_rhs, &b));
356: PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
357: PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
358: if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
360: PetscCall(PetscLogStagePush(PCMPIStage));
361: PetscCall(KSPSolve(ksp, NULL, NULL));
362: PetscCall(PetscLogStagePop());
363: PetscCall(KSPGetIterationNumber(ksp, &its));
364: PCMPIIterations[size - 1] += its;
366: /* gather solution */
367: PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
368: if (pc) PetscCall(VecGetArray(X, &sx));
369: PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
370: if (pc) PetscCall(VecRestoreArray(X, &sx));
371: PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
372: PetscFunctionReturn(PETSC_SUCCESS);
373: }
375: static PetscErrorCode PCMPIDestroy(PC pc)
376: {
377: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
378: KSP ksp;
379: MPI_Comm comm = PC_MPI_COMM_WORLD;
381: PetscFunctionBegin;
382: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
383: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
384: PetscCall(PetscLogStagePush(PCMPIStage));
385: PetscCall(KSPDestroy(&ksp));
386: PetscCall(PetscLogStagePop());
387: PetscFunctionReturn(PETSC_SUCCESS);
388: }
390: PetscBool PCMPIServerActive = PETSC_FALSE;
392: /*@C
393: PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
394: parallel `KSP` solves and management of parallel `KSP` objects.
396: Logically Collective on all MPI processes except rank 0
398: Options Database Keys:
399: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
400: - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
402: Level: developer
404: Note:
405: This is normally started automatically in `PetscInitialize()` when the option is provided
407: See `PCMPI` for information on using the solver with a `KSP` object
409: Developer Notes:
410: When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
411: written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
412: (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
414: Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
416: Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
418: This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
420: The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
421: nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
423: The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
424: all MPI processes but the user callback only runs on one MPI process per node.
426: PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
427: the `MPI_Comm` argument from PETSc calls.
429: .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
430: @*/
431: PetscErrorCode PCMPIServerBegin(void)
432: {
433: PetscMPIInt rank;
435: PetscFunctionBegin;
436: PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
437: if (PetscDefined(USE_SINGLE_LIBRARY)) {
438: PetscCall(VecInitializePackage());
439: PetscCall(MatInitializePackage());
440: PetscCall(DMInitializePackage());
441: PetscCall(PCInitializePackage());
442: PetscCall(KSPInitializePackage());
443: PetscCall(SNESInitializePackage());
444: PetscCall(TSInitializePackage());
445: PetscCall(TaoInitializePackage());
446: }
447: PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
449: PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
450: if (rank == 0) {
451: PETSC_COMM_WORLD = PETSC_COMM_SELF;
452: PCMPIServerActive = PETSC_TRUE;
453: PetscFunctionReturn(PETSC_SUCCESS);
454: }
456: while (PETSC_TRUE) {
457: PCMPICommand request = PCMPI_CREATE;
458: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
459: switch (request) {
460: case PCMPI_CREATE:
461: PetscCall(PCMPICreate(NULL));
462: break;
463: case PCMPI_SET_MAT:
464: PetscCall(PCMPISetMat(NULL));
465: break;
466: case PCMPI_UPDATE_MAT_VALUES:
467: PetscCall(PCMPIUpdateMatValues(NULL));
468: break;
469: case PCMPI_VIEW:
470: // PetscCall(PCMPIView(NULL));
471: break;
472: case PCMPI_SOLVE:
473: PetscCall(PCMPISolve(NULL, NULL, NULL));
474: break;
475: case PCMPI_DESTROY:
476: PetscCall(PCMPIDestroy(NULL));
477: break;
478: case PCMPI_EXIT:
479: PetscCall(PetscFinalize());
480: exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
481: break;
482: default:
483: break;
484: }
485: }
486: PetscFunctionReturn(PETSC_SUCCESS);
487: }
489: /*@C
490: PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
491: parallel KSP solves and management of parallel `KSP` objects.
493: Logically Collective on all MPI ranks except 0
495: Level: developer
497: Note:
498: This is normally ended automatically in `PetscFinalize()` when the option is provided
500: .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
501: @*/
502: PetscErrorCode PCMPIServerEnd(void)
503: {
504: PCMPICommand request = PCMPI_EXIT;
506: PetscFunctionBegin;
507: if (PetscGlobalRank == 0) {
508: PetscViewer viewer = NULL;
509: PetscViewerFormat format;
511: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
512: PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
513: PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
514: PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
515: PetscOptionsEnd();
516: if (viewer) {
517: PetscBool isascii;
519: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
520: if (isascii) {
521: PetscMPIInt size;
522: PetscMPIInt i;
524: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
525: PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
526: PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n"));
527: if (PCMPIKSPCountsSeq) {
528: PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
529: }
530: for (i = 0; i < size; i++) {
531: if (PCMPIKSPCounts[i]) {
532: PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
533: }
534: }
535: }
536: PetscCall(PetscViewerDestroy(&viewer));
537: }
538: }
539: PetscCall(PCMPICommsDestroy());
540: PCMPIServerActive = PETSC_FALSE;
541: PetscFunctionReturn(PETSC_SUCCESS);
542: }
544: /*
545: This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
546: because, for example, the problem is small. This version is more efficient because it does not require copying any data
547: */
548: static PetscErrorCode PCSetUp_Seq(PC pc)
549: {
550: PC_MPI *km = (PC_MPI *)pc->data;
551: Mat sA;
552: const char *prefix;
553: char *found = NULL, *cprefix;
555: PetscFunctionBegin;
556: PetscCall(PCGetOperators(pc, NULL, &sA));
557: PetscCall(PCGetOptionsPrefix(pc, &prefix));
558: PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
559: PetscCall(KSPSetNestLevel(km->ksps[0], 1));
560: PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
562: /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
563: PetscCall(PCGetOptionsPrefix(pc, &prefix));
564: PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
565: PetscCall(PetscStrallocpy(prefix, &cprefix));
566: PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
567: PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
568: *found = 0;
569: PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
570: PetscCall(PetscFree(cprefix));
572: PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
573: PetscCall(KSPSetFromOptions(km->ksps[0]));
574: PetscCall(KSPSetUp(km->ksps[0]));
575: PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
576: PCMPIKSPCountsSeq++;
577: PetscFunctionReturn(PETSC_SUCCESS);
578: }
580: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
581: {
582: PC_MPI *km = (PC_MPI *)pc->data;
583: PetscInt its, n;
584: Mat A;
586: PetscFunctionBegin;
587: PetscCall(KSPSolve(km->ksps[0], b, x));
588: PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
589: PCMPISolveCountsSeq++;
590: PCMPIIterationsSeq += its;
591: PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
592: PetscCall(MatGetSize(A, &n, NULL));
593: PCMPISizesSeq += n;
594: PetscFunctionReturn(PETSC_SUCCESS);
595: }
597: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
598: {
599: PC_MPI *km = (PC_MPI *)pc->data;
601: PetscFunctionBegin;
602: PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
603: PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
604: PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
605: PetscFunctionReturn(PETSC_SUCCESS);
606: }
608: static PetscErrorCode PCDestroy_Seq(PC pc)
609: {
610: PC_MPI *km = (PC_MPI *)pc->data;
612: PetscFunctionBegin;
613: PetscCall(KSPDestroy(&km->ksps[0]));
614: PetscCall(PetscFree(pc->data));
615: PetscFunctionReturn(PETSC_SUCCESS);
616: }
618: /*
619: PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
620: right hand side to the parallel PC
621: */
622: static PetscErrorCode PCSetUp_MPI(PC pc)
623: {
624: PC_MPI *km = (PC_MPI *)pc->data;
625: PetscMPIInt rank, size;
626: PCMPICommand request;
627: PetscBool newmatrix = PETSC_FALSE;
629: PetscFunctionBegin;
630: PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
631: PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
632: PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
634: if (!pc->setupcalled) {
635: if (!km->alwaysuseserver) {
636: PetscInt n;
637: Mat sA;
638: /* short circuit for small systems */
639: PetscCall(PCGetOperators(pc, &sA, &sA));
640: PetscCall(MatGetSize(sA, &n, NULL));
641: if (n < 2 * km->mincntperrank - 1 || size == 1) {
642: pc->ops->setup = NULL;
643: pc->ops->apply = PCApply_Seq;
644: pc->ops->destroy = PCDestroy_Seq;
645: pc->ops->view = PCView_Seq;
646: PetscCall(PCSetUp_Seq(pc));
647: PetscFunctionReturn(PETSC_SUCCESS);
648: }
649: }
651: request = PCMPI_CREATE;
652: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
653: PetscCall(PCMPICreate(pc));
654: newmatrix = PETSC_TRUE;
655: }
656: if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
658: if (newmatrix) {
659: PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
660: request = PCMPI_SET_MAT;
661: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
662: PetscCall(PCMPISetMat(pc));
663: } else {
664: PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
665: request = PCMPI_UPDATE_MAT_VALUES;
666: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
667: PetscCall(PCMPIUpdateMatValues(pc));
668: }
669: PetscFunctionReturn(PETSC_SUCCESS);
670: }
672: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
673: {
674: PCMPICommand request = PCMPI_SOLVE;
676: PetscFunctionBegin;
677: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
678: PetscCall(PCMPISolve(pc, b, x));
679: PetscFunctionReturn(PETSC_SUCCESS);
680: }
682: static PetscErrorCode PCDestroy_MPI(PC pc)
683: {
684: PCMPICommand request = PCMPI_DESTROY;
686: PetscFunctionBegin;
687: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
688: PetscCall(PCMPIDestroy(pc));
689: PetscCall(PetscFree(pc->data));
690: PetscFunctionReturn(PETSC_SUCCESS);
691: }
693: /*
694: PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
695: */
696: static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
697: {
698: PC_MPI *km = (PC_MPI *)pc->data;
699: MPI_Comm comm;
700: PetscMPIInt size;
702: PetscFunctionBegin;
703: PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
704: PetscCallMPI(MPI_Comm_size(comm, &size));
705: PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
706: PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
707: PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
708: PetscFunctionReturn(PETSC_SUCCESS);
709: }
711: static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
712: {
713: PC_MPI *km = (PC_MPI *)pc->data;
715: PetscFunctionBegin;
716: PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
717: PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
718: PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
719: PetscOptionsHeadEnd();
720: PetscFunctionReturn(PETSC_SUCCESS);
721: }
723: /*MC
724: PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
726: Options Database Keys for the Server:
727: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
728: - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
730: Options Database Keys for a specific `KSP` object
731: + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
732: - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
734: Level: developer
736: Notes:
737: The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
739: It can be particularly useful for user OpenMP code or potentially user GPU code.
741: When the program is running with a single MPI process then it directly uses the provided matrix and right hand side
742: and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
744: The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
745: because they are not the actual solver objects.
747: When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
748: stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
750: Developer Note:
751: This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
752: a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
754: .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
755: M*/
756: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
757: {
758: PC_MPI *km;
759: char *found = NULL;
761: PetscFunctionBegin;
762: PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
763: PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
765: /* material from PCSetType() */
766: PetscTryTypeMethod(pc, destroy);
767: pc->ops->destroy = NULL;
768: pc->data = NULL;
770: PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
771: PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
772: pc->modifysubmatrices = NULL;
773: pc->modifysubmatricesP = NULL;
774: pc->setupcalled = 0;
776: PetscCall(PetscNew(&km));
777: pc->data = (void *)km;
779: km->mincntperrank = 10000;
781: pc->ops->setup = PCSetUp_MPI;
782: pc->ops->apply = PCApply_MPI;
783: pc->ops->destroy = PCDestroy_MPI;
784: pc->ops->view = PCView_MPI;
785: pc->ops->setfromoptions = PCSetFromOptions_MPI;
786: PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
787: PetscFunctionReturn(PETSC_SUCCESS);
788: }