Actual source code: pcmpi.c

  1: /*
  2:     This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0.
  3:     It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes.

  5:     That program may use OpenMP to compute the right hand side and matrix for the linear system

  7:     The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD

  9:     The resulting KSP and PC can only be controlled via the options database, though some common commands
 10:     could be passed through the server.

 12: */
 13: #include <petsc/private/pcimpl.h>
 14: #include <petsc/private/kspimpl.h>
 15: #include <petsc.h>

 17: #define PC_MPI_MAX_RANKS  256
 18: #define PC_MPI_COMM_WORLD MPI_COMM_WORLD

 20: typedef struct {
 21:   KSP         ksps[PC_MPI_MAX_RANKS];                               /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */
 22:   PetscMPIInt sendcount[PC_MPI_MAX_RANKS], displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */
 23:   PetscMPIInt NZ[PC_MPI_MAX_RANKS], NZdispl[PC_MPI_MAX_RANKS];      /* For scatter of nonzero values in matrix (and nonzero column indices initially */
 24:   PetscInt    mincntperrank;                                        /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */
 25:   PetscBool   alwaysuseserver;                                      /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */
 26: } PC_MPI;

 28: typedef enum {
 29:   PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */
 30:   PCMPI_CREATE,
 31:   PCMPI_SET_MAT,           /* set original matrix (or one with different nonzero pattern) */
 32:   PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */
 33:   PCMPI_SOLVE,
 34:   PCMPI_VIEW,
 35:   PCMPI_DESTROY /* destroy a KSP that is no longer needed */
 36: } PCMPICommand;

 38: static MPI_Comm  PCMPIComms[PC_MPI_MAX_RANKS];
 39: static PetscBool PCMPICommSet = PETSC_FALSE;
 40: static PetscInt  PCMPISolveCounts[PC_MPI_MAX_RANKS], PCMPIKSPCounts[PC_MPI_MAX_RANKS], PCMPIMatCounts[PC_MPI_MAX_RANKS], PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0;
 41: static PetscInt  PCMPIIterations[PC_MPI_MAX_RANKS], PCMPISizes[PC_MPI_MAX_RANKS], PCMPIIterationsSeq = 0, PCMPISizesSeq = 0;

 43: static PetscErrorCode PCMPICommsCreate(void)
 44: {
 45:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 46:   PetscMPIInt size, rank, i;

 48:   PetscFunctionBegin;
 49:   PetscCallMPI(MPI_Comm_size(comm, &size));
 50:   PetscCheck(size <= PC_MPI_MAX_RANKS, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve");
 51:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 52:   /* comm for size 1 is useful only for debugging */
 53:   for (i = 0; i < size; i++) {
 54:     PetscMPIInt color = rank < i + 1 ? 0 : MPI_UNDEFINED;
 55:     PetscCallMPI(MPI_Comm_split(comm, color, 0, &PCMPIComms[i]));
 56:     PCMPISolveCounts[i] = 0;
 57:     PCMPIKSPCounts[i]   = 0;
 58:     PCMPIIterations[i]  = 0;
 59:     PCMPISizes[i]       = 0;
 60:   }
 61:   PCMPICommSet = PETSC_TRUE;
 62:   PetscFunctionReturn(PETSC_SUCCESS);
 63: }

 65: PetscErrorCode PCMPICommsDestroy(void)
 66: {
 67:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 68:   PetscMPIInt size, rank, i;

 70:   PetscFunctionBegin;
 71:   if (!PCMPICommSet) PetscFunctionReturn(PETSC_SUCCESS);
 72:   PetscCallMPI(MPI_Comm_size(comm, &size));
 73:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
 74:   for (i = 0; i < size; i++) {
 75:     if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i]));
 76:   }
 77:   PCMPICommSet = PETSC_FALSE;
 78:   PetscFunctionReturn(PETSC_SUCCESS);
 79: }

 81: static PetscErrorCode PCMPICreate(PC pc)
 82: {
 83:   PC_MPI     *km   = pc ? (PC_MPI *)pc->data : NULL;
 84:   MPI_Comm    comm = PC_MPI_COMM_WORLD;
 85:   KSP         ksp;
 86:   PetscInt    N[2], mincntperrank = 0;
 87:   PetscMPIInt size;
 88:   Mat         sA;
 89:   char       *cprefix = NULL;
 90:   PetscMPIInt len     = 0;

 92:   PetscFunctionBegin;
 93:   if (!PCMPICommSet) PetscCall(PCMPICommsCreate());
 94:   PetscCallMPI(MPI_Comm_size(comm, &size));
 95:   if (pc) {
 96:     if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n"));
 97:     PetscCall(PCGetOperators(pc, &sA, &sA));
 98:     PetscCall(MatGetSize(sA, &N[0], &N[1]));
 99:   }
100:   PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm));

102:   /* choose a suitable sized MPI_Comm for the problem to be solved on */
103:   if (km) mincntperrank = km->mincntperrank;
104:   PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm));
105:   comm = PCMPIComms[PetscMin(size, PetscMax(1, N[0] / mincntperrank)) - 1];
106:   if (comm == MPI_COMM_NULL) {
107:     ksp = NULL;
108:     PetscFunctionReturn(PETSC_SUCCESS);
109:   }
110:   PetscCall(PetscLogStagePush(PCMPIStage));
111:   PetscCall(KSPCreate(comm, &ksp));
112:   PetscCall(KSPSetNestLevel(ksp, 1));
113:   PetscCall(PetscObjectSetTabLevel((PetscObject)ksp, 1));
114:   PetscCall(PetscLogStagePop());
115:   PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm));
116:   if (pc) {
117:     size_t      slen;
118:     const char *prefix = NULL;
119:     char       *found  = NULL;

121:     PetscCallMPI(MPI_Comm_size(comm, &size));
122:     PCMPIKSPCounts[size - 1]++;
123:     /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
124:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
125:     PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
126:     PetscCall(PetscStrallocpy(prefix, &cprefix));
127:     PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
128:     PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
129:     *found = 0;
130:     PetscCall(PetscStrlen(cprefix, &slen));
131:     len = (PetscMPIInt)slen;
132:   }
133:   PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm));
134:   if (len) {
135:     if (!pc) PetscCall(PetscMalloc1(len + 1, &cprefix));
136:     PetscCallMPI(MPI_Bcast(cprefix, len + 1, MPI_CHAR, 0, comm));
137:     PetscCall(KSPSetOptionsPrefix(ksp, cprefix));
138:   }
139:   PetscCall(PetscFree(cprefix));
140:   PetscFunctionReturn(PETSC_SUCCESS);
141: }

143: static PetscErrorCode PCMPISetMat(PC pc)
144: {
145:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
146:   Mat                A;
147:   PetscInt           m, n, *ia, *ja, j, bs;
148:   Mat                sA;
149:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
150:   KSP                ksp;
151:   PetscLayout        layout;
152:   const PetscInt    *IA = NULL, *JA = NULL;
153:   const PetscInt    *range;
154:   PetscMPIInt       *NZ = NULL, sendcounti[PC_MPI_MAX_RANKS], displi[PC_MPI_MAX_RANKS], *NZdispl = NULL, nz, size, i;
155:   PetscScalar       *a;
156:   const PetscScalar *sa               = NULL;
157:   PetscInt           matproperties[8] = {0, 0, 0, 0, 0, 0, 0, 0};
158:   char              *cprefix;

160:   PetscFunctionBegin;
161:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
162:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
163:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
164:   if (pc) {
165:     PetscBool   isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;
166:     const char *prefix;
167:     size_t      clen;

169:     PetscCallMPI(MPI_Comm_size(comm, &size));
170:     PCMPIMatCounts[size - 1]++;
171:     PetscCall(PCGetOperators(pc, &sA, &sA));
172:     PetscCall(MatGetSize(sA, &matproperties[0], &matproperties[1]));
173:     PetscCall(MatGetBlockSize(sA, &bs));
174:     matproperties[2] = bs;
175:     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
176:     matproperties[3] = !isset ? 0 : (issymmetric ? 1 : 2);
177:     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
178:     matproperties[4] = !isset ? 0 : (ishermitian ? 1 : 2);
179:     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
180:     matproperties[5] = !isset ? 0 : (isspd ? 1 : 2);
181:     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
182:     matproperties[6] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
183:     /* Created Mat gets prefix of input Mat PLUS the mpi_linear_solver_server_ portion */
184:     PetscCall(MatGetOptionsPrefix(sA, &prefix));
185:     PetscCall(PetscStrallocpy(prefix, &cprefix));
186:     PetscCall(PetscStrlen(cprefix, &clen));
187:     matproperties[7] = (PetscInt)clen;
188:   }
189:   PetscCallMPI(MPI_Bcast(matproperties, PETSC_STATIC_ARRAY_LENGTH(matproperties), MPIU_INT, 0, comm));

191:   /* determine ownership ranges of matrix columns */
192:   PetscCall(PetscLayoutCreate(comm, &layout));
193:   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
194:   PetscCall(PetscLayoutSetSize(layout, matproperties[1]));
195:   PetscCall(PetscLayoutSetUp(layout));
196:   PetscCall(PetscLayoutGetLocalSize(layout, &n));
197:   PetscCall(PetscLayoutDestroy(&layout));

199:   /* determine ownership ranges of matrix rows */
200:   PetscCall(PetscLayoutCreate(comm, &layout));
201:   PetscCall(PetscLayoutSetBlockSize(layout, matproperties[2]));
202:   PetscCall(PetscLayoutSetSize(layout, matproperties[0]));
203:   PetscCall(PetscLayoutSetUp(layout));
204:   PetscCall(PetscLayoutGetLocalSize(layout, &m));

206:   /* copy over the matrix nonzero structure and values */
207:   if (pc) {
208:     NZ      = km->NZ;
209:     NZdispl = km->NZdispl;
210:     PetscCall(PetscLayoutGetRanges(layout, &range));
211:     PetscCall(MatGetRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
212:     for (i = 0; i < size; i++) {
213:       sendcounti[i] = (PetscMPIInt)(1 + range[i + 1] - range[i]);
214:       NZ[i]         = (PetscMPIInt)(IA[range[i + 1]] - IA[range[i]]);
215:     }
216:     displi[0]  = 0;
217:     NZdispl[0] = 0;
218:     for (j = 1; j < size; j++) {
219:       displi[j]  = displi[j - 1] + sendcounti[j - 1] - 1;
220:       NZdispl[j] = NZdispl[j - 1] + NZ[j - 1];
221:     }
222:     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
223:   }
224:   PetscCall(PetscLayoutDestroy(&layout));
225:   PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm));

227:   PetscCall(PetscMalloc3(n + 1, &ia, nz, &ja, nz, &a));
228:   PetscCallMPI(MPI_Scatterv(IA, sendcounti, displi, MPIU_INT, ia, n + 1, MPIU_INT, 0, comm));
229:   PetscCallMPI(MPI_Scatterv(JA, NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0, comm));
230:   PetscCallMPI(MPI_Scatterv(sa, NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));

232:   if (pc) {
233:     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));
234:     PetscCall(MatRestoreRowIJ(sA, 0, PETSC_FALSE, PETSC_FALSE, NULL, &IA, &JA, NULL));
235:   }

237:   for (j = 1; j < n + 1; j++) ia[j] -= ia[0];
238:   ia[0] = 0;
239:   PetscCall(PetscLogStagePush(PCMPIStage));
240:   PetscCall(MatCreate(comm, &A));
241:   if (matproperties[7] > 0) {
242:     if (!pc) PetscCall(PetscMalloc1(matproperties[7] + 1, &cprefix));
243:     PetscCallMPI(MPI_Bcast(cprefix, matproperties[7] + 1, MPI_CHAR, 0, comm));
244:     PetscCall(MatSetOptionsPrefix(A, cprefix));
245:     PetscCall(PetscFree(cprefix));
246:   }
247:   PetscCall(MatAppendOptionsPrefix(A, "mpi_linear_solver_server_"));
248:   PetscCall(MatSetSizes(A, m, n, matproperties[0], matproperties[1]));
249:   PetscCall(MatSetType(A, MATMPIAIJ));
250:   PetscCall(MatMPIAIJSetPreallocationCSR(A, ia, ja, a));
251:   PetscCall(MatSetBlockSize(A, matproperties[2]));

253:   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
254:   if (matproperties[4]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[4] == 1 ? PETSC_TRUE : PETSC_FALSE));
255:   if (matproperties[5]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[5] == 1 ? PETSC_TRUE : PETSC_FALSE));
256:   if (matproperties[6]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[6] == 1 ? PETSC_TRUE : PETSC_FALSE));

258:   PetscCall(PetscFree3(ia, ja, a));
259:   PetscCall(KSPSetOperators(ksp, A, A));
260:   if (!ksp->vec_sol) PetscCall(MatCreateVecs(A, &ksp->vec_sol, &ksp->vec_rhs));
261:   PetscCall(PetscLogStagePop());
262:   if (pc) { /* needed for scatterv/gatherv of rhs and solution */
263:     const PetscInt *range;

265:     PetscCall(VecGetOwnershipRanges(ksp->vec_sol, &range));
266:     for (i = 0; i < size; i++) {
267:       km->sendcount[i] = (PetscMPIInt)(range[i + 1] - range[i]);
268:       km->displ[i]     = (PetscMPIInt)range[i];
269:     }
270:   }
271:   PetscCall(MatDestroy(&A));
272:   PetscCall(KSPSetFromOptions(ksp));
273:   PetscFunctionReturn(PETSC_SUCCESS);
274: }

276: static PetscErrorCode PCMPIUpdateMatValues(PC pc)
277: {
278:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
279:   KSP                ksp;
280:   Mat                sA, A;
281:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
282:   PetscScalar       *a;
283:   PetscCount         nz;
284:   const PetscScalar *sa = NULL;
285:   PetscMPIInt        size;
286:   PetscInt           matproperties[4] = {0, 0, 0, 0};

288:   PetscFunctionBegin;
289:   if (pc) {
290:     PetscCall(PCGetOperators(pc, &sA, &sA));
291:     PetscCall(MatSeqAIJGetArrayRead(sA, &sa));
292:   }
293:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
294:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
295:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));
296:   PetscCallMPI(MPI_Comm_size(comm, &size));
297:   PCMPIMatCounts[size - 1]++;
298:   PetscCall(KSPGetOperators(ksp, NULL, &A));
299:   PetscCall(MatMPIAIJGetNumberNonzeros(A, &nz));
300:   PetscCall(PetscMalloc1(nz, &a));
301:   PetscCallMPI(MPI_Scatterv(sa, pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0, comm));
302:   if (pc) {
303:     PetscBool isset, issymmetric, ishermitian, isspd, isstructurallysymmetric;

305:     PetscCall(MatSeqAIJRestoreArrayRead(sA, &sa));

307:     PetscCall(MatIsSymmetricKnown(sA, &isset, &issymmetric));
308:     matproperties[0] = !isset ? 0 : (issymmetric ? 1 : 2);
309:     PetscCall(MatIsHermitianKnown(sA, &isset, &ishermitian));
310:     matproperties[1] = !isset ? 0 : (ishermitian ? 1 : 2);
311:     PetscCall(MatIsSPDKnown(sA, &isset, &isspd));
312:     matproperties[2] = !isset ? 0 : (isspd ? 1 : 2);
313:     PetscCall(MatIsStructurallySymmetricKnown(sA, &isset, &isstructurallysymmetric));
314:     matproperties[3] = !isset ? 0 : (isstructurallysymmetric ? 1 : 2);
315:   }
316:   PetscCall(MatUpdateMPIAIJWithArray(A, a));
317:   PetscCall(PetscFree(a));
318:   PetscCallMPI(MPI_Bcast(matproperties, 4, MPIU_INT, 0, comm));
319:   /* if any of these properties was previously set and is now not set this will result in incorrect properties in A since there is no way to unset a property */
320:   if (matproperties[0]) PetscCall(MatSetOption(A, MAT_SYMMETRIC, matproperties[0] == 1 ? PETSC_TRUE : PETSC_FALSE));
321:   if (matproperties[1]) PetscCall(MatSetOption(A, MAT_HERMITIAN, matproperties[1] == 1 ? PETSC_TRUE : PETSC_FALSE));
322:   if (matproperties[2]) PetscCall(MatSetOption(A, MAT_SPD, matproperties[2] == 1 ? PETSC_TRUE : PETSC_FALSE));
323:   if (matproperties[3]) PetscCall(MatSetOption(A, MAT_STRUCTURALLY_SYMMETRIC, matproperties[3] == 1 ? PETSC_TRUE : PETSC_FALSE));
324:   PetscFunctionReturn(PETSC_SUCCESS);
325: }

327: static PetscErrorCode PCMPISolve(PC pc, Vec B, Vec X)
328: {
329:   PC_MPI            *km = pc ? (PC_MPI *)pc->data : NULL;
330:   KSP                ksp;
331:   MPI_Comm           comm = PC_MPI_COMM_WORLD;
332:   const PetscScalar *sb   = NULL, *x;
333:   PetscScalar       *b, *sx = NULL;
334:   PetscInt           its, n;
335:   PetscMPIInt        size;

337:   PetscFunctionBegin;
338:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
339:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
340:   PetscCall(PetscObjectGetComm((PetscObject)ksp, &comm));

342:   /* TODO: optimize code to not require building counts/displ every time */

344:   /* scatterv rhs */
345:   PetscCallMPI(MPI_Comm_size(comm, &size));
346:   if (pc) {
347:     PetscInt N;

349:     PCMPISolveCounts[size - 1]++;
350:     PetscCall(MatGetSize(pc->pmat, &N, NULL));
351:     PCMPISizes[size - 1] += N;
352:     PetscCall(VecGetArrayRead(B, &sb));
353:   }
354:   PetscCall(VecGetLocalSize(ksp->vec_rhs, &n));
355:   PetscCall(VecGetArray(ksp->vec_rhs, &b));
356:   PetscCallMPI(MPI_Scatterv(sb, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0, comm));
357:   PetscCall(VecRestoreArray(ksp->vec_rhs, &b));
358:   if (pc) PetscCall(VecRestoreArrayRead(B, &sb));

360:   PetscCall(PetscLogStagePush(PCMPIStage));
361:   PetscCall(KSPSolve(ksp, NULL, NULL));
362:   PetscCall(PetscLogStagePop());
363:   PetscCall(KSPGetIterationNumber(ksp, &its));
364:   PCMPIIterations[size - 1] += its;

366:   /* gather solution */
367:   PetscCall(VecGetArrayRead(ksp->vec_sol, &x));
368:   if (pc) PetscCall(VecGetArray(X, &sx));
369:   PetscCallMPI(MPI_Gatherv(x, n, MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0, comm));
370:   if (pc) PetscCall(VecRestoreArray(X, &sx));
371:   PetscCall(VecRestoreArrayRead(ksp->vec_sol, &x));
372:   PetscFunctionReturn(PETSC_SUCCESS);
373: }

375: static PetscErrorCode PCMPIDestroy(PC pc)
376: {
377:   PC_MPI  *km = pc ? (PC_MPI *)pc->data : NULL;
378:   KSP      ksp;
379:   MPI_Comm comm = PC_MPI_COMM_WORLD;

381:   PetscFunctionBegin;
382:   PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm));
383:   if (!ksp) PetscFunctionReturn(PETSC_SUCCESS);
384:   PetscCall(PetscLogStagePush(PCMPIStage));
385:   PetscCall(KSPDestroy(&ksp));
386:   PetscCall(PetscLogStagePop());
387:   PetscFunctionReturn(PETSC_SUCCESS);
388: }

390: PetscBool PCMPIServerActive = PETSC_FALSE;

392: /*@C
393:   PCMPIServerBegin - starts a server that runs on the `rank != 0` MPI processes waiting to process requests for
394:   parallel `KSP` solves and management of parallel `KSP` objects.

396:   Logically Collective on all MPI processes except rank 0

398:   Options Database Keys:
399: + -mpi_linear_solver_server      - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
400: - -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server at the conclusion of the program

402:   Level: developer

404:   Note:
405:   This is normally started automatically in `PetscInitialize()` when the option is provided

407:   See `PCMPI` for information on using the solver with a `KSP` object

409:   Developer Notes:
410:   When called on MPI rank 0 this sets `PETSC_COMM_WORLD` to `PETSC_COMM_SELF` to allow a main program
411:   written with `PETSC_COMM_WORLD` to run correctly on the single rank while all the ranks
412:   (that would normally be sharing `PETSC_COMM_WORLD`) to run the solver server.

414:   Can this be integrated into the `PetscDevice` abstraction that is currently being developed?

416:   Conceivably `PCREDISTRIBUTE` could be organized in a similar manner to simplify its usage

418:   This could be implemented directly at the `KSP` level instead of using the `PCMPI` wrapper object

420:   The code could be extended to allow an MPI + OpenMP application to use the linear solver server concept across all shared-memory
421:   nodes with a single MPI process per node for the user application but multiple MPI processes per node for the linear solver.

423:   The concept could also be extended for users's callbacks for `SNES`, `TS`, and `Tao` where the `SNESSolve()` for example, runs on
424:   all MPI processes but the user callback only runs on one MPI process per node.

426:   PETSc could also be extended with an MPI-less API that provides access to PETSc's solvers without any reference to MPI, essentially remove
427:   the `MPI_Comm` argument from PETSc calls.

429: .seealso: [](sec_pcmpi), `PCMPIServerEnd()`, `PCMPI`, `KSPCheckPCMPI()`
430: @*/
431: PetscErrorCode PCMPIServerBegin(void)
432: {
433:   PetscMPIInt rank;

435:   PetscFunctionBegin;
436:   PetscCall(PetscInfo(NULL, "Starting MPI Linear Solver Server\n"));
437:   if (PetscDefined(USE_SINGLE_LIBRARY)) {
438:     PetscCall(VecInitializePackage());
439:     PetscCall(MatInitializePackage());
440:     PetscCall(DMInitializePackage());
441:     PetscCall(PCInitializePackage());
442:     PetscCall(KSPInitializePackage());
443:     PetscCall(SNESInitializePackage());
444:     PetscCall(TSInitializePackage());
445:     PetscCall(TaoInitializePackage());
446:   }
447:   PetscCall(PetscLogStageRegister("PCMPI", &PCMPIStage));

449:   PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD, &rank));
450:   if (rank == 0) {
451:     PETSC_COMM_WORLD  = PETSC_COMM_SELF;
452:     PCMPIServerActive = PETSC_TRUE;
453:     PetscFunctionReturn(PETSC_SUCCESS);
454:   }

456:   while (PETSC_TRUE) {
457:     PCMPICommand request = PCMPI_CREATE;
458:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
459:     switch (request) {
460:     case PCMPI_CREATE:
461:       PetscCall(PCMPICreate(NULL));
462:       break;
463:     case PCMPI_SET_MAT:
464:       PetscCall(PCMPISetMat(NULL));
465:       break;
466:     case PCMPI_UPDATE_MAT_VALUES:
467:       PetscCall(PCMPIUpdateMatValues(NULL));
468:       break;
469:     case PCMPI_VIEW:
470:       // PetscCall(PCMPIView(NULL));
471:       break;
472:     case PCMPI_SOLVE:
473:       PetscCall(PCMPISolve(NULL, NULL, NULL));
474:       break;
475:     case PCMPI_DESTROY:
476:       PetscCall(PCMPIDestroy(NULL));
477:       break;
478:     case PCMPI_EXIT:
479:       PetscCall(PetscFinalize());
480:       exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */
481:       break;
482:     default:
483:       break;
484:     }
485:   }
486:   PetscFunctionReturn(PETSC_SUCCESS);
487: }

489: /*@C
490:   PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for
491:   parallel KSP solves and management of parallel `KSP` objects.

493:   Logically Collective on all MPI ranks except 0

495:   Level: developer

497:   Note:
498:   This is normally ended automatically in `PetscFinalize()` when the option is provided

500: .seealso: [](sec_pcmpi), `PCMPIServerBegin()`, `PCMPI`, `KSPCheckPCMPI()`
501: @*/
502: PetscErrorCode PCMPIServerEnd(void)
503: {
504:   PCMPICommand request = PCMPI_EXIT;

506:   PetscFunctionBegin;
507:   if (PetscGlobalRank == 0) {
508:     PetscViewer       viewer = NULL;
509:     PetscViewerFormat format;

511:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD));
512:     PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */
513:     PetscOptionsBegin(PETSC_COMM_SELF, NULL, "MPI linear solver server options", NULL);
514:     PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view", "View information about system solved with the server", "PCMPI", &viewer, &format, NULL));
515:     PetscOptionsEnd();
516:     if (viewer) {
517:       PetscBool isascii;

519:       PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
520:       if (isascii) {
521:         PetscMPIInt size;
522:         PetscMPIInt i;

524:         PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
525:         PetscCall(PetscViewerASCIIPrintf(viewer, "MPI linear solver server statistics:\n"));
526:         PetscCall(PetscViewerASCIIPrintf(viewer, "    Ranks        KSPSolve()s     Mats        KSPs       Avg. Size      Avg. Its\n"));
527:         if (PCMPIKSPCountsSeq) {
528:           PetscCall(PetscViewerASCIIPrintf(viewer, "  Sequential         %" PetscInt_FMT "                         %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "\n", PCMPISolveCountsSeq, PCMPIKSPCountsSeq, PCMPISizesSeq / PCMPISolveCountsSeq, PCMPIIterationsSeq / PCMPISolveCountsSeq));
529:         }
530:         for (i = 0; i < size; i++) {
531:           if (PCMPIKSPCounts[i]) {
532:             PetscCall(PetscViewerASCIIPrintf(viewer, "     %d               %" PetscInt_FMT "            %" PetscInt_FMT "           %" PetscInt_FMT "            %" PetscInt_FMT "            %" PetscInt_FMT "\n", i + 1, PCMPISolveCounts[i], PCMPIMatCounts[i], PCMPIKSPCounts[i], PCMPISizes[i] / PCMPISolveCounts[i], PCMPIIterations[i] / PCMPISolveCounts[i]));
533:           }
534:         }
535:       }
536:       PetscCall(PetscViewerDestroy(&viewer));
537:     }
538:   }
539:   PetscCall(PCMPICommsDestroy());
540:   PCMPIServerActive = PETSC_FALSE;
541:   PetscFunctionReturn(PETSC_SUCCESS);
542: }

544: /*
545:     This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0
546:     because, for example, the problem is small. This version is more efficient because it does not require copying any data
547: */
548: static PetscErrorCode PCSetUp_Seq(PC pc)
549: {
550:   PC_MPI     *km = (PC_MPI *)pc->data;
551:   Mat         sA;
552:   const char *prefix;
553:   char       *found = NULL, *cprefix;

555:   PetscFunctionBegin;
556:   PetscCall(PCGetOperators(pc, NULL, &sA));
557:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
558:   PetscCall(KSPCreate(PETSC_COMM_SELF, &km->ksps[0]));
559:   PetscCall(KSPSetNestLevel(km->ksps[0], 1));
560:   PetscCall(PetscObjectSetTabLevel((PetscObject)km->ksps[0], 1));

562:   /* Created KSP gets prefix of PC minus the mpi_linear_solver_server_ portion */
563:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
564:   PetscCheck(prefix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing required prefix");
565:   PetscCall(PetscStrallocpy(prefix, &cprefix));
566:   PetscCall(PetscStrstr(cprefix, "mpi_linear_solver_server_", &found));
567:   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI missing mpi_linear_solver_server_ portion of prefix");
568:   *found = 0;
569:   PetscCall(KSPSetOptionsPrefix(km->ksps[0], cprefix));
570:   PetscCall(PetscFree(cprefix));

572:   PetscCall(KSPSetOperators(km->ksps[0], sA, sA));
573:   PetscCall(KSPSetFromOptions(km->ksps[0]));
574:   PetscCall(KSPSetUp(km->ksps[0]));
575:   PetscCall(PetscInfo((PetscObject)pc, "MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"));
576:   PCMPIKSPCountsSeq++;
577:   PetscFunctionReturn(PETSC_SUCCESS);
578: }

580: static PetscErrorCode PCApply_Seq(PC pc, Vec b, Vec x)
581: {
582:   PC_MPI  *km = (PC_MPI *)pc->data;
583:   PetscInt its, n;
584:   Mat      A;

586:   PetscFunctionBegin;
587:   PetscCall(KSPSolve(km->ksps[0], b, x));
588:   PetscCall(KSPGetIterationNumber(km->ksps[0], &its));
589:   PCMPISolveCountsSeq++;
590:   PCMPIIterationsSeq += its;
591:   PetscCall(KSPGetOperators(km->ksps[0], NULL, &A));
592:   PetscCall(MatGetSize(A, &n, NULL));
593:   PCMPISizesSeq += n;
594:   PetscFunctionReturn(PETSC_SUCCESS);
595: }

597: static PetscErrorCode PCView_Seq(PC pc, PetscViewer viewer)
598: {
599:   PC_MPI *km = (PC_MPI *)pc->data;

601:   PetscFunctionBegin;
602:   PetscCall(PetscViewerASCIIPrintf(viewer, "Running MPI linear solver server directly on rank 0 due to its small size\n"));
603:   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
604:   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
605:   PetscFunctionReturn(PETSC_SUCCESS);
606: }

608: static PetscErrorCode PCDestroy_Seq(PC pc)
609: {
610:   PC_MPI *km = (PC_MPI *)pc->data;

612:   PetscFunctionBegin;
613:   PetscCall(KSPDestroy(&km->ksps[0]));
614:   PetscCall(PetscFree(pc->data));
615:   PetscFunctionReturn(PETSC_SUCCESS);
616: }

618: /*
619:      PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and
620:      right hand side to the parallel PC
621: */
622: static PetscErrorCode PCSetUp_MPI(PC pc)
623: {
624:   PC_MPI      *km = (PC_MPI *)pc->data;
625:   PetscMPIInt  rank, size;
626:   PCMPICommand request;
627:   PetscBool    newmatrix = PETSC_FALSE;

629:   PetscFunctionBegin;
630:   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
631:   PetscCheck(rank == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?");
632:   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));

634:   if (!pc->setupcalled) {
635:     if (!km->alwaysuseserver) {
636:       PetscInt n;
637:       Mat      sA;
638:       /* short circuit for small systems */
639:       PetscCall(PCGetOperators(pc, &sA, &sA));
640:       PetscCall(MatGetSize(sA, &n, NULL));
641:       if (n < 2 * km->mincntperrank - 1 || size == 1) {
642:         pc->ops->setup   = NULL;
643:         pc->ops->apply   = PCApply_Seq;
644:         pc->ops->destroy = PCDestroy_Seq;
645:         pc->ops->view    = PCView_Seq;
646:         PetscCall(PCSetUp_Seq(pc));
647:         PetscFunctionReturn(PETSC_SUCCESS);
648:       }
649:     }

651:     request = PCMPI_CREATE;
652:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
653:     PetscCall(PCMPICreate(pc));
654:     newmatrix = PETSC_TRUE;
655:   }
656:   if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE;

658:   if (newmatrix) {
659:     PetscCall(PetscInfo((PetscObject)pc, "New matrix or matrix has changed nonzero structure\n"));
660:     request = PCMPI_SET_MAT;
661:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
662:     PetscCall(PCMPISetMat(pc));
663:   } else {
664:     PetscCall(PetscInfo((PetscObject)pc, "Matrix has only changed nonzero values\n"));
665:     request = PCMPI_UPDATE_MAT_VALUES;
666:     PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
667:     PetscCall(PCMPIUpdateMatValues(pc));
668:   }
669:   PetscFunctionReturn(PETSC_SUCCESS);
670: }

672: static PetscErrorCode PCApply_MPI(PC pc, Vec b, Vec x)
673: {
674:   PCMPICommand request = PCMPI_SOLVE;

676:   PetscFunctionBegin;
677:   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
678:   PetscCall(PCMPISolve(pc, b, x));
679:   PetscFunctionReturn(PETSC_SUCCESS);
680: }

682: static PetscErrorCode PCDestroy_MPI(PC pc)
683: {
684:   PCMPICommand request = PCMPI_DESTROY;

686:   PetscFunctionBegin;
687:   PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD));
688:   PetscCall(PCMPIDestroy(pc));
689:   PetscCall(PetscFree(pc->data));
690:   PetscFunctionReturn(PETSC_SUCCESS);
691: }

693: /*
694:      PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer
695: */
696: static PetscErrorCode PCView_MPI(PC pc, PetscViewer viewer)
697: {
698:   PC_MPI     *km = (PC_MPI *)pc->data;
699:   MPI_Comm    comm;
700:   PetscMPIInt size;

702:   PetscFunctionBegin;
703:   PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0], &comm));
704:   PetscCallMPI(MPI_Comm_size(comm, &size));
705:   PetscCall(PetscViewerASCIIPrintf(viewer, "Size of MPI communicator used for MPI parallel KSP solve %d\n", size));
706:   PetscCall(PetscViewerASCIIPrintf(viewer, "Desired minimum number of nonzeros per rank for MPI parallel solve %d\n", (int)km->mincntperrank));
707:   PetscCall(PetscViewerASCIIPrintf(viewer, "*** Use -mpi_linear_solver_server_view to statistics on all the solves ***\n"));
708:   PetscFunctionReturn(PETSC_SUCCESS);
709: }

711: static PetscErrorCode PCSetFromOptions_MPI(PC pc, PetscOptionItems *PetscOptionsObject)
712: {
713:   PC_MPI *km = (PC_MPI *)pc->data;

715:   PetscFunctionBegin;
716:   PetscOptionsHeadBegin(PetscOptionsObject, "MPI linear solver server options");
717:   PetscCall(PetscOptionsInt("-minimum_count_per_rank", "Desired minimum number of nonzeros per rank", "None", km->mincntperrank, &km->mincntperrank, NULL));
718:   PetscCall(PetscOptionsBool("-always_use_server", "Use the server even if only one rank is used for the solve (for debugging)", "None", km->alwaysuseserver, &km->alwaysuseserver, NULL));
719:   PetscOptionsHeadEnd();
720:   PetscFunctionReturn(PETSC_SUCCESS);
721: }

723: /*MC
724:      PCMPI - Calls an MPI parallel `KSP` to solve a linear system from user code running on one process

726:    Options Database Keys for the Server:
727: +  -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code
728: -  -mpi_linear_solver_server_view - displays information about all the linear systems solved by the MPI linear solver server

730:    Options Database Keys for a specific `KSP` object
731: +  -[any_ksp_prefix]_mpi_linear_solver_server_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for
732: -  -[any_ksp_prefix]_mpi_linear_solver_server_always_use_server - use the server solver code even if the particular system is only solved on the process (for debugging and testing purposes)

734:    Level: developer

736:    Notes:
737:    The options database prefix for the actual solver is any prefix provided before use to the original `KSP` with `KSPSetOptionsPrefix()`, mostly commonly no prefix is used.

739:    It can be particularly useful for user OpenMP code or potentially user GPU code.

741:    When the program is running with a single MPI process then it directly uses the provided matrix and right hand side
742:    and does not need to distribute the matrix and vector to the various MPI processes; thus it incurs no extra overhead over just using the `KSP` directly.

744:    The solver options for actual solving `KSP` and `PC` must be controlled via the options database, calls to set options directly on the user level `KSP` and `PC` have no effect
745:    because they are not the actual solver objects.

747:    When `-log_view` is used with this solver the events within the parallel solve are logging in their own stage. Some of the logging in the other
748:    stages will be confusing since the event times are only recorded on the 0th MPI rank, thus the percent of time in the events will be misleading.

750:    Developer Note:
751:    This `PCType` is never directly selected by the user, it is set when the option `-mpi_linear_solver_server` is used and the `PC` is at the outer most nesting of
752:    a `KSP`. The outer most `KSP` object is automatically set to `KSPPREONLY` and thus is not directly visible to the user.

754: .seealso: [](sec_pcmpi), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()`, `KSPCheckPCMPI()`
755: M*/
756: PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc)
757: {
758:   PC_MPI *km;
759:   char   *found = NULL;

761:   PetscFunctionBegin;
762:   PetscCall(PetscStrstr(((PetscObject)pc)->prefix, "mpi_linear_solver_server_", &found));
763:   PetscCheck(found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCMPI object prefix does not have mpi_linear_solver_server_");

765:   /* material from PCSetType() */
766:   PetscTryTypeMethod(pc, destroy);
767:   pc->ops->destroy = NULL;
768:   pc->data         = NULL;

770:   PetscCall(PetscFunctionListDestroy(&((PetscObject)pc)->qlist));
771:   PetscCall(PetscMemzero(pc->ops, sizeof(struct _PCOps)));
772:   pc->modifysubmatrices  = NULL;
773:   pc->modifysubmatricesP = NULL;
774:   pc->setupcalled        = 0;

776:   PetscCall(PetscNew(&km));
777:   pc->data = (void *)km;

779:   km->mincntperrank = 10000;

781:   pc->ops->setup          = PCSetUp_MPI;
782:   pc->ops->apply          = PCApply_MPI;
783:   pc->ops->destroy        = PCDestroy_MPI;
784:   pc->ops->view           = PCView_MPI;
785:   pc->ops->setfromoptions = PCSetFromOptions_MPI;
786:   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCMPI));
787:   PetscFunctionReturn(PETSC_SUCCESS);
788: }