Actual source code: pcmpi.c
1: /*
2: This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
3: It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.
5: That program may use OpenMP to compute the right-hand side and matrix for the linear system
7: The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD
9: The resulting KSP and PC can only be controlled via the options database, though some common commands
10: could be passed through the server.
12: */
13: #include <petsc/private/pcimpl.h>
14: #include <petsc/private/kspimpl.h>
15: #include <petscts.h>
16: #include <petsctao.h>
18: #define PC_MPI_MAX_RANKS 256
19: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD
21: typedef struct {
22: KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
23: PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
24: PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */
25: PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
26: PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
27: } PC_MPI;
29: typedef enum {
30: PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
31: PCMPI_CREATE,
32: PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */
33: PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
34: PCMPI_SOLVE,
35: PCMPI_VIEW,
36: PCMPI_DESTROY /* destroy a KSP that is no longer needed */
37: } PCMPICommand;
39: static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS];
40: static PetscBool PCMPICommSet = PETSC_FALSE;
41: static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
42: static PetscInt PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;
44: static PetscErrorCode PCMPICommsCreate(void)
45: {
46: MPI_Comm comm = PC_MPI_COMM_WORLD;
47: PetscMPIInt size, rank, i;
49: PetscFunctionBegin;
50: PetscCallMPI(MPI_Comm_size(comm, &size));
51: PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
52: PetscCallMPI(MPI_Comm_rank(comm, &rank));
53: /* comm for size 1 is useful only for debugging */
54: for (i = 0; i < size; i++) {
55: PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
56: PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
57: PCMPISolveCounts[i] = 0;
58: PCMPIKSPCounts[i] = 0;
59: PCMPIIterations[i] = 0;
60: PCMPISizes[i] = 0;
61: }
62: PCMPICommSet = PETSC_TRUE;
63: PetscFunctionReturn(PETSC_SUCCESS);
64: }
66: PetscErrorCode PCMPICommsDestroy(void)
67: {
68: MPI_Comm comm = PC_MPI_COMM_WORLD;
69: PetscMPIInt size, rank, i;
71: PetscFunctionBegin;
72: if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
73: PetscCallMPI(MPI_Comm_size(comm, &size));
74: PetscCallMPI(MPI_Comm_rank(comm, &rank));
75: for (i = 0; i < size; i++) {
76: if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
77: }
78: PCMPICommSet = PETSC_FALSE;
79: PetscFunctionReturn(PETSC_SUCCESS);
80: }
82: static PetscErrorCode PCMPICreate(PC pc)
83: {
84: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
85: MPI_Comm comm = PC_MPI_COMM_WORLD;
86: KSP ksp;
87: PetscInt N[2], mincntperrank = 0;
88: PetscMPIInt size;
89: Mat sA;
90: char *cprefix = NULL;
91: PetscMPIInt len = 0;
93: PetscFunctionBegin;
94: if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
95: PetscCallMPI(MPI_Comm_size(comm, &size));
96: if (pc) {
97: if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
98: PetscCall(PCGetOperators(pc, &sA, &sA));
99: PetscCall(MatGetSize(sA, &N[0], &N[1]));
100: }
101: PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));
103: /* choose a suitable sized MPI_Comm for the problem to be solved on */
104: if (km) mincntperrank = km->mincntperrank;
105: PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
106: comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
107: if (comm == MPI_COMM_NULL) {
108: ksp = NULL;
109: PetscFunctionReturn(PETSC_SUCCESS);
110: }
111: PetscCall(PetscLogStagePush(PCMPIStage));
112: PetscCall(KSPCreate(comm, &ksp));
113: PetscCall(KSPSetNestLevel(ksp, 1));
114: PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
115: PetscCall(PetscLogStagePop());
116: PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
117: if (pc) {
118: size_t slen;
119: const char *prefix = NULL;
120: char *found = NULL;
122: PetscCallMPI(MPI_Comm_size(comm, &size));
123: PCMPIKSPCounts[size - 1]++;
124: /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
125: PetscCall(PCGetOptionsPrefix(pc, &prefix));
126: PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
127: PetscCall(PetscStrallocpy(prefix, &cprefix));
128: PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
129: PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
130: *found = 0;
131: PetscCall(PetscStrlen(cprefix, &slen));
132: len = (PetscMPIInt)slen;
133: }
134: PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
135: if (len) {
136: if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
137: PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
138: PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
139: }
140: PetscCall(PetscFree(cprefix));
141: PetscFunctionReturn(PETSC_SUCCESS);
142: }
144: static PetscErrorCode PCMPISetMat(PC pc)
145: {
146: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
147: Mat A;
148: PetscInt m, n, *ia, *ja, j, bs;
149: Mat sA;
150: MPI_Comm comm = PC_MPI_COMM_WORLD;
151: KSP ksp;
152: PetscLayout layout;
153: const PetscInt *IA = NULL, *JA = NULL;
154: const PetscInt *range;
155: PetscMPIInt *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
156: PetscScalar *a;
157: const PetscScalar *sa = NULL;
158: PetscInt matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0};
159: char *cprefix;
161: PetscFunctionBegin;
162: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
163: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
164: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
165: if (pc) {
166: PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
167: const char *prefix;
168: size_t clen;
170: PetscCallMPI(MPI_Comm_size(comm, &size));
171: PCMPIMatCounts[size - 1]++;
172: PetscCall(PCGetOperators(pc, &sA, &sA));
173: PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
174: PetscCall(MatGetBlockSize(sA, &bs));
175: matproperties[2] = bs;
176: PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
177: matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
178: PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
179: matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
180: PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
181: matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
182: PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
183: matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
184: /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
185: PetscCall(MatGetOptionsPrefix(sA, &prefix));
186: PetscCall(PetscStrallocpy(prefix, &cprefix));
187: PetscCall(PetscStrlen(cprefix, &clen));
188: matproperties[7] = (PetscInt)clen;
189: }
190: PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));
192: /* determine ownership ranges of matrix columns */
193: PetscCall(PetscLayoutCreate(comm, &layout));
194: PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
195: PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
196: PetscCall(PetscLayoutSetUp(layout));
197: PetscCall(PetscLayoutGetLocalSize(layout, &n));
198: PetscCall(PetscLayoutDestroy(&layout));
200: /* determine ownership ranges of matrix rows */
201: PetscCall(PetscLayoutCreate(comm, &layout));
202: PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
203: PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
204: PetscCall(PetscLayoutSetUp(layout));
205: PetscCall(PetscLayoutGetLocalSize(layout, &m));
207: /* copy over the matrix nonzero structure and values */
208: if (pc) {
209: NZ = km->NZ;
210: NZdispl = km->NZdispl;
211: PetscCall(PetscLayoutGetRanges(layout, &range));
212: PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
213: for (i = 0; i < size; i++) {
214: sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
215: NZ[i] = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
216: }
217: displi[0] = 0;
218: NZdispl[0] = 0;
219: for (j = 1; j < size; j++) {
220: displi[j] = displi[j - 1] + sendcounti[j - 1] - 1;
221: NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
222: }
223: PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
224: }
225: PetscCall(PetscLayoutDestroy(&layout));
226: PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));
228: PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
229: PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
230: PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
231: PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
233: if (pc) {
234: PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
235: PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
236: }
238: for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
239: ia[0] = 0;
240: PetscCall(PetscLogStagePush(PCMPIStage));
241: PetscCall(MatCreate(comm, &A));
242: if (matproperties[7] > 0) {
243: if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
244: PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
245: PetscCall(MatSetOptionsPrefix(A, cprefix));
246: PetscCall(PetscFree(cprefix));
247: }
248: PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
249: PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
250: PetscCall(MatSetType(A, MATMPIAIJ));
251: PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
252: PetscCall(MatSetBlockSize(A, matproperties[2]));
254: if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
255: if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
256: if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
257: if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));
259: PetscCall(PetscFree3(ia, ja, a));
260: PetscCall(KSPSetOperators(ksp, A, A));
261: if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
262: PetscCall(PetscLogStagePop());
263: if (pc) { /* needed for scatterv/gatherv of rhs and solution */
264: const PetscInt *range;
266: PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
267: for (i = 0; i < size; i++) {
268: km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
269: km->displ[i] = (PetscMPIInt)range[i];
270: }
271: }
272: PetscCall(MatDestroy(&A));
273: PetscCall(KSPSetFromOptions(ksp));
274: PetscFunctionReturn(PETSC_SUCCESS);
275: }
277: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
278: {
279: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
280: KSP ksp;
281: Mat sA, A;
282: MPI_Comm comm = PC_MPI_COMM_WORLD;
283: PetscScalar *a;
284: PetscCount nz;
285: const PetscScalar *sa = NULL;
286: PetscMPIInt size;
287: PetscInt matproperties[4] = {0, 0, 0, 0};
289: PetscFunctionBegin;
290: if (pc) {
291: PetscCall(PCGetOperators(pc, &sA, &sA));
292: PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
293: }
294: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
295: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
296: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
297: PetscCallMPI(MPI_Comm_size(comm, &size));
298: PCMPIMatCounts[size - 1]++;
299: PetscCall(KSPGetOperators(ksp, NULL, &A));
300: PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
301: PetscCall(PetscMalloc1(nz, &a));
302: PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
303: if (pc) {
304: PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
306: PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
308: PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
309: matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
310: PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
311: matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
312: PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
313: matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
314: PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
315: matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
316: }
317: PetscCall(MatUpdateMPIAIJWithArray(A, a));
318: PetscCall(PetscFree(a));
319: PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
320: /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
321: if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
322: if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
323: if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
324: if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
325: PetscFunctionReturn(PETSC_SUCCESS);
326: }
328: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
329: {
330: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
331: KSP ksp;
332: MPI_Comm comm = PC_MPI_COMM_WORLD;
333: const PetscScalar *sb = NULL, *x;
334: PetscScalar *b, *sx = NULL;
335: PetscInt its, n;
336: PetscMPIInt size;
338: PetscFunctionBegin;
339: PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
340: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
341: PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
343: /* TODO: optimize code to not require building counts/displ every time */
345: /* scatterv rhs */
346: PetscCallMPI(MPI_Comm_size(comm, &size));
347: if (pc) {
348: PetscInt N;
350: PCMPISolveCounts[size - 1]++;
351: PetscCall(MatGetSize(pc->pmat, &N, NULL));
352: PCMPISizes[size - 1] += N;
353: PetscCall(VecGetArrayRead(B, &sb));
354: }
355: PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
356: PetscCall(VecGetArray(ksp->vec_rhs, &b));
357: PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
358: PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
359: if (pc) PetscCall(VecRestoreArrayRead(B, &sb));
361: PetscCall(PetscLogStagePush(PCMPIStage));
362: PetscCall(KSPSolve(ksp, NULL, NULL));
363: PetscCall(PetscLogStagePop());
364: PetscCall(KSPGetIterationNumber(ksp, &its));
365: PCMPIIterations[size - 1] += its;
367: /* gather solution */
368: PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
369: if (pc) PetscCall(VecGetArray(X, &sx));
370: PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
371: if (pc) PetscCall(VecRestoreArray(X, &sx));
372: PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
373: PetscFunctionReturn(PETSC_SUCCESS);
374: }
376: static PetscErrorCode PCMPIDestroy(PC pc)
377: {
378: PC_MPI *km = pc ? (PC_MPI *)pc->data : NULL;
379: KSP ksp;
380: MPI_Comm comm = PC_MPI_COMM_WORLD;
382: PetscFunctionBegin;
383: PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
384: if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
385: PetscCall(PetscLogStagePush(PCMPIStage));
386: PetscCall(KSPDestroy(&ksp));
387: PetscCall(PetscLogStagePop());
388: PetscFunctionReturn(PETSC_SUCCESS);
389: }
391: PetscBool PCMPIServerActive = PETSC_FALSE;
393: /*@C
394: PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
395: parallel `KSP` solves and management of parallel `KSP` objects.
397: Logically Collective on all MPI processes except rank 0
399: Options Database Keys:
400: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
401: - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program
403: Level: developer
405: Note:
406: This is normally started automatically in `PetscInitialize()` when the option is provided
408: See `PCMPI` for information on using the solver with a `KSP` object
410: Developer Notes:
411: When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
412: written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
413: (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.
415: Can this be integrated into the `PetscDevice` abstraction that is currently being developed?
417: Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage
419: This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object
421: The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
422: nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.
424: The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
425: all MPI processes but the user callback only runs on one MPI process per node.
427: PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
428: the `MPI_Comm` argument from PETSc calls.
430: .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
431: @*/
432: PetscErrorCode PCMPIServerBegin(void)
433: {
434: PetscMPIInt rank;
436: PetscFunctionBegin;
437: PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
438: if (PetscDefined(USE_SINGLE_LIBRARY)) {
439: PetscCall(VecInitializePackage());
440: PetscCall(MatInitializePackage());
441: PetscCall(DMInitializePackage());
442: PetscCall(PCInitializePackage());
443: PetscCall(KSPInitializePackage());
444: PetscCall(SNESInitializePackage());
445: PetscCall(TSInitializePackage());
446: PetscCall(TaoInitializePackage());
447: }
448: PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));
450: PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
451: if (rank == 0) {
452: PETSC_COMM_WORLD = PETSC_COMM_SELF;
453: PCMPIServerActive = PETSC_TRUE;
454: PetscFunctionReturn(PETSC_SUCCESS);
455: }
457: while (PETSC_TRUE) {
458: PCMPICommand request = PCMPI_CREATE;
459: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
460: switch (request) {
461: case PCMPI_CREATE:
462: PetscCall(PCMPICreate(NULL));
463: break;
464: case PCMPI_SET_MAT:
465: PetscCall(PCMPISetMat(NULL));
466: break;
467: case PCMPI_UPDATE_MAT_VALUES:
468: PetscCall(PCMPIUpdateMatValues(NULL));
469: break;
470: case PCMPI_VIEW:
471: // PetscCall(PCMPIView(NULL));
472: break;
473: case PCMPI_SOLVE:
474: PetscCall(PCMPISolve(NULL, NULL, NULL));
475: break;
476: case PCMPI_DESTROY:
477: PetscCall(PCMPIDestroy(NULL));
478: break;
479: case PCMPI_EXIT:
480: PetscCall(PetscFinalize());
481: exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
482: break;
483: default:
484: break;
485: }
486: }
487: PetscFunctionReturn(PETSC_SUCCESS);
488: }
490: /*@C
491: PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
492: parallel KSP solves and management of parallel `KSP` objects.
494: Logically Collective on all MPI ranks except 0
496: Level: developer
498: Note:
499: This is normally ended automatically in `PetscFinalize()` when the option is provided
501: .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
502: @*/
503: PetscErrorCode PCMPIServerEnd(void)
504: {
505: PCMPICommand request = PCMPI_EXIT;
507: PetscFunctionBegin;
508: if (PetscGlobalRank == 0) {
509: PetscViewer viewer = NULL;
510: PetscViewerFormat format;
512: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
513: PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
514: PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
515: PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
516: PetscOptionsEnd();
517: if (viewer) {
518: PetscBool isascii;
520: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
521: if (isascii) {
522: PetscMPIInt size;
523: PetscMPIInt i;
525: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
526: PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
527: PetscCall(PetscViewerASCIIPrintf(viewer, " Ranks KSPSolve()s Mats KSPs Avg. Size Avg. Its\n"));
528: if (PCMPIKSPCountsSeq) {
529: PetscCall(PetscViewerASCIIPrintf(viewer, " Sequential %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
530: }
531: for (i = 0; i < size; i++) {
532: if (PCMPIKSPCounts[i]) {
533: PetscCall(PetscViewerASCIIPrintf(viewer, " %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
534: }
535: }
536: }
537: PetscCall(PetscOptionsRestoreViewer(&viewer));
538: }
539: }
540: PetscCall(PCMPICommsDestroy());
541: PCMPIServerActive = PETSC_FALSE;
542: PetscFunctionReturn(PETSC_SUCCESS);
543: }
545: /*
546: This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
547: because, for example, the problem is small. This version is more efficient because it does not require copying any data
548: */
549: static PetscErrorCode PCSetUp_Seq(PC pc)
550: {
551: PC_MPI *km = (PC_MPI *)pc->data;
552: Mat sA;
553: const char *prefix;
554: char *found = NULL, *cprefix;
556: PetscFunctionBegin;
557: PetscCall(PCGetOperators(pc, NULL, &sA));
558: PetscCall(PCGetOptionsPrefix(pc, &prefix));
559: PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
560: PetscCall(KSPSetNestLevel(km->ksps[0], 1));
561: PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));
563: /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
564: PetscCall(PCGetOptionsPrefix(pc, &prefix));
565: PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
566: PetscCall(PetscStrallocpy(prefix, &cprefix));
567: PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
568: PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
569: *found = 0;
570: PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
571: PetscCall(PetscFree(cprefix));
573: PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
574: PetscCall(KSPSetFromOptions(km->ksps[0]));
575: PetscCall(KSPSetUp(km->ksps[0]));
576: PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
577: PCMPIKSPCountsSeq++;
578: PetscFunctionReturn(PETSC_SUCCESS);
579: }
581: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
582: {
583: PC_MPI *km = (PC_MPI *)pc->data;
584: PetscInt its, n;
585: Mat A;
587: PetscFunctionBegin;
588: PetscCall(KSPSolve(km->ksps[0], b, x));
589: PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
590: PCMPISolveCountsSeq++;
591: PCMPIIterationsSeq += its;
592: PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
593: PetscCall(MatGetSize(A, &n, NULL));
594: PCMPISizesSeq += n;
595: PetscFunctionReturn(PETSC_SUCCESS);
596: }
598: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
599: {
600: PC_MPI *km = (PC_MPI *)pc->data;
602: PetscFunctionBegin;
603: PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
604: PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
605: PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
606: PetscFunctionReturn(PETSC_SUCCESS);
607: }
609: static PetscErrorCode PCDestroy_Seq(PC pc)
610: {
611: PC_MPI *km = (PC_MPI *)pc->data;
613: PetscFunctionBegin;
614: PetscCall(KSPDestroy(&km->ksps[0]));
615: PetscCall(PetscFree(pc->data));
616: PetscFunctionReturn(PETSC_SUCCESS);
617: }
619: /*
620: PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
621: right-hand side to the parallel PC
622: */
623: static PetscErrorCode PCSetUp_MPI(PC pc)
624: {
625: PC_MPI *km = (PC_MPI *)pc->data;
626: PetscMPIInt rank, size;
627: PCMPICommand request;
628: PetscBool newmatrix = PETSC_FALSE;
630: PetscFunctionBegin;
631: PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
632: PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
633: PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
635: if (!pc->setupcalled) {
636: if (!km->alwaysuseserver) {
637: PetscInt n;
638: Mat sA;
639: /* short circuit for small systems */
640: PetscCall(PCGetOperators(pc, &sA, &sA));
641: PetscCall(MatGetSize(sA, &n, NULL));
642: if (n < 2 * km->mincntperrank - 1 || size == 1) {
643: pc->ops->setup = NULL;
644: pc->ops->apply = PCApply_Seq;
645: pc->ops->destroy = PCDestroy_Seq;
646: pc->ops->view = PCView_Seq;
647: PetscCall(PCSetUp_Seq(pc));
648: PetscFunctionReturn(PETSC_SUCCESS);
649: }
650: }
652: request = PCMPI_CREATE;
653: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
654: PetscCall(PCMPICreate(pc));
655: newmatrix = PETSC_TRUE;
656: }
657: if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;
659: if (newmatrix) {
660: PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
661: request = PCMPI_SET_MAT;
662: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
663: PetscCall(PCMPISetMat(pc));
664: } else {
665: PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
666: request = PCMPI_UPDATE_MAT_VALUES;
667: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
668: PetscCall(PCMPIUpdateMatValues(pc));
669: }
670: PetscFunctionReturn(PETSC_SUCCESS);
671: }
673: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
674: {
675: PCMPICommand request = PCMPI_SOLVE;
677: PetscFunctionBegin;
678: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
679: PetscCall(PCMPISolve(pc, b, x));
680: PetscFunctionReturn(PETSC_SUCCESS);
681: }
683: static PetscErrorCode PCDestroy_MPI(PC pc)
684: {
685: PCMPICommand request = PCMPI_DESTROY;
687: PetscFunctionBegin;
688: PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
689: PetscCall(PCMPIDestroy(pc));
690: PetscCall(PetscFree(pc->data));
691: PetscFunctionReturn(PETSC_SUCCESS);
692: }
694: /*
695: PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
696: */
697: static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
698: {
699: PC_MPI *km = (PC_MPI *)pc->data;
700: MPI_Comm comm;
701: PetscMPIInt size;
703: PetscFunctionBegin;
704: PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
705: PetscCallMPI(MPI_Comm_size(comm, &size));
706: PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
707: PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
708: PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
709: PetscFunctionReturn(PETSC_SUCCESS);
710: }
712: static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
713: {
714: PC_MPI *km = (PC_MPI *)pc->data;
716: PetscFunctionBegin;
717: PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
718: PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
719: PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
720: PetscOptionsHeadEnd();
721: PetscFunctionReturn(PETSC_SUCCESS);
722: }
724: /*MC
725: PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process
727: Options Database Keys for the Server:
728: + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
729: - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server
731: Options Database Keys for a specific `KSP` object
732: + -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
733: - -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)
735: Level: developer
737: Notes:
738: The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.
740: It can be particularly useful for user OpenMP code or potentially user GPU code.
742: When the program is running with a single MPI process then it directly uses the provided matrix and right-hand side
743: and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.
745: The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
746: because they are not the actual solver objects.
748: When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
749: stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.
751: Developer Note:
752: This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
753: a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.
755: .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
756: M*/
757: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
758: {
759: PC_MPI *km;
760: char *found = NULL;
762: PetscFunctionBegin;
763: PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
764: PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");
766: /* material from PCSetType() */
767: PetscTryTypeMethod(pc, destroy);
768: pc->ops->destroy = NULL;
769: pc->data = NULL;
771: PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
772: PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
773: pc->modifysubmatrices = NULL;
774: pc->modifysubmatricesP = NULL;
775: pc->setupcalled = 0;
777: PetscCall(PetscNew(&km));
778: pc->data = (void *)km;
780: km->mincntperrank = 10000;
782: pc->ops->setup = PCSetUp_MPI;
783: pc->ops->apply = PCApply_MPI;
784: pc->ops->destroy = PCDestroy_MPI;
785: pc->ops->view = PCView_MPI;
786: pc->ops->setfromoptions = PCSetFromOptions_MPI;
787: PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
788: PetscFunctionReturn(PETSC_SUCCESS);
789: }