Actual source code: mpidense.c
1: /*
2: Basic functions for basic parallel dense matrices.
3: Portions of this code are under:
4: Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
5: */
7: #include <../src/mat/impls/dense/mpi/mpidense.h>
8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9: #include <petscblaslapack.h>
11: /*@
12: MatDenseGetLocalMatrix - For a `MATMPIDENSE` or `MATSEQDENSE` matrix returns the sequential
13: matrix that represents the operator. For sequential matrices it returns itself.
15: Input Parameter:
16: . A - the sequential or MPI `MATDENSE` matrix
18: Output Parameter:
19: . B - the inner matrix
21: Level: intermediate
23: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MATMPIDENSE`, `MATSEQDENSE`
24: @*/
25: PetscErrorCode MatDenseGetLocalMatrix(Mat A, Mat *B)
26: {
27: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
28: PetscBool flg;
30: PetscFunctionBegin;
32: PetscAssertPointer(B, 2);
33: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIDENSE, &flg));
34: if (flg) *B = mat->A;
35: else {
36: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQDENSE, &flg));
37: PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)A)->type_name);
38: *B = A;
39: }
40: PetscFunctionReturn(PETSC_SUCCESS);
41: }
43: static PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
44: {
45: Mat_MPIDense *Amat = (Mat_MPIDense *)A->data;
46: Mat_MPIDense *Bmat = (Mat_MPIDense *)B->data;
48: PetscFunctionBegin;
49: PetscCall(MatCopy(Amat->A, Bmat->A, s));
50: PetscFunctionReturn(PETSC_SUCCESS);
51: }
53: PetscErrorCode MatShift_MPIDense(Mat A, PetscScalar alpha)
54: {
55: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
56: PetscInt j, lda, rstart = A->rmap->rstart, rend = A->rmap->rend, rend2;
57: PetscScalar *v;
59: PetscFunctionBegin;
60: PetscCall(MatDenseGetArray(mat->A, &v));
61: PetscCall(MatDenseGetLDA(mat->A, &lda));
62: rend2 = PetscMin(rend, A->cmap->N);
63: if (rend2 > rstart) {
64: for (j = rstart; j < rend2; j++) v[j - rstart + j * lda] += alpha;
65: PetscCall(PetscLogFlops(rend2 - rstart));
66: }
67: PetscCall(MatDenseRestoreArray(mat->A, &v));
68: PetscFunctionReturn(PETSC_SUCCESS);
69: }
71: static PetscErrorCode MatGetRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
72: {
73: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
74: PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
76: PetscFunctionBegin;
77: PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
78: lrow = row - rstart;
79: PetscCall(MatGetRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
80: PetscFunctionReturn(PETSC_SUCCESS);
81: }
83: static PetscErrorCode MatRestoreRow_MPIDense(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
84: {
85: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
86: PetscInt lrow, rstart = A->rmap->rstart, rend = A->rmap->rend;
88: PetscFunctionBegin;
89: PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "only local rows");
90: lrow = row - rstart;
91: PetscCall(MatRestoreRow(mat->A, lrow, nz, (const PetscInt **)idx, (const PetscScalar **)v));
92: PetscFunctionReturn(PETSC_SUCCESS);
93: }
95: static PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A, Mat *a)
96: {
97: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
98: PetscInt m = A->rmap->n, rstart = A->rmap->rstart;
99: PetscScalar *array;
100: MPI_Comm comm;
101: PetscBool flg;
102: Mat B;
104: PetscFunctionBegin;
105: PetscCall(MatHasCongruentLayouts(A, &flg));
106: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only square matrices supported.");
107: PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B));
108: if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
109: #if PetscDefined(HAVE_CUDA)
110: PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSECUDA, &flg));
111: PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSECUDA);
112: #elif PetscDefined(HAVE_HIP)
113: PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A, MATSEQDENSEHIP, &flg));
114: PetscCheck(!flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature", MATSEQDENSEHIP);
115: #endif
116: PetscCall(PetscObjectGetComm((PetscObject)(mdn->A), &comm));
117: PetscCall(MatCreate(comm, &B));
118: PetscCall(MatSetSizes(B, m, m, m, m));
119: PetscCall(MatSetType(B, ((PetscObject)mdn->A)->type_name));
120: PetscCall(MatDenseGetArrayRead(mdn->A, (const PetscScalar **)&array));
121: PetscCall(MatSeqDenseSetPreallocation(B, array + m * rstart));
122: PetscCall(MatDenseRestoreArrayRead(mdn->A, (const PetscScalar **)&array));
123: PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
124: *a = B;
125: PetscCall(MatDestroy(&B));
126: } else *a = B;
127: PetscFunctionReturn(PETSC_SUCCESS);
128: }
130: static PetscErrorCode MatSetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], const PetscScalar v[], InsertMode addv)
131: {
132: Mat_MPIDense *A = (Mat_MPIDense *)mat->data;
133: PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
134: PetscBool roworiented = A->roworiented;
136: PetscFunctionBegin;
137: for (i = 0; i < m; i++) {
138: if (idxm[i] < 0) continue;
139: PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
140: if (idxm[i] >= rstart && idxm[i] < rend) {
141: row = idxm[i] - rstart;
142: if (roworiented) {
143: PetscCall(MatSetValues(A->A, 1, &row, n, idxn, v + i * n, addv));
144: } else {
145: for (j = 0; j < n; j++) {
146: if (idxn[j] < 0) continue;
147: PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
148: PetscCall(MatSetValues(A->A, 1, &row, 1, &idxn[j], v + i + j * m, addv));
149: }
150: }
151: } else if (!A->donotstash) {
152: mat->assembled = PETSC_FALSE;
153: if (roworiented) {
154: PetscCall(MatStashValuesRow_Private(&mat->stash, idxm[i], n, idxn, v + i * n, PETSC_FALSE));
155: } else {
156: PetscCall(MatStashValuesCol_Private(&mat->stash, idxm[i], n, idxn, v + i, m, PETSC_FALSE));
157: }
158: }
159: }
160: PetscFunctionReturn(PETSC_SUCCESS);
161: }
163: static PetscErrorCode MatGetValues_MPIDense(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
164: {
165: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
166: PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend, row;
168: PetscFunctionBegin;
169: for (i = 0; i < m; i++) {
170: if (idxm[i] < 0) continue; /* negative row */
171: PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large");
172: if (idxm[i] >= rstart && idxm[i] < rend) {
173: row = idxm[i] - rstart;
174: for (j = 0; j < n; j++) {
175: if (idxn[j] < 0) continue; /* negative column */
176: PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large");
177: PetscCall(MatGetValues(mdn->A, 1, &row, 1, &idxn[j], v + i * n + j));
178: }
179: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
180: }
181: PetscFunctionReturn(PETSC_SUCCESS);
182: }
184: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A, PetscInt *lda)
185: {
186: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
188: PetscFunctionBegin;
189: PetscCall(MatDenseGetLDA(a->A, lda));
190: PetscFunctionReturn(PETSC_SUCCESS);
191: }
193: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A, PetscInt lda)
194: {
195: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
196: MatType mtype = MATSEQDENSE;
198: PetscFunctionBegin;
199: if (!a->A) {
200: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
201: PetscCall(PetscLayoutSetUp(A->rmap));
202: PetscCall(PetscLayoutSetUp(A->cmap));
203: PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
204: PetscCall(MatSetSizes(a->A, A->rmap->n, A->cmap->N, A->rmap->n, A->cmap->N));
205: #if PetscDefined(HAVE_CUDA)
206: PetscBool iscuda;
207: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSECUDA, &iscuda));
208: if (iscuda) mtype = MATSEQDENSECUDA;
209: #elif PetscDefined(HAVE_HIP)
210: PetscBool iship;
211: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIDENSEHIP, &iship));
212: if (iship) mtype = MATSEQDENSEHIP;
213: #endif
214: PetscCall(MatSetType(a->A, mtype));
215: }
216: PetscCall(MatDenseSetLDA(a->A, lda));
217: PetscFunctionReturn(PETSC_SUCCESS);
218: }
220: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A, PetscScalar **array)
221: {
222: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
224: PetscFunctionBegin;
225: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
226: PetscCall(MatDenseGetArray(a->A, array));
227: PetscFunctionReturn(PETSC_SUCCESS);
228: }
230: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A, const PetscScalar **array)
231: {
232: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
234: PetscFunctionBegin;
235: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
236: PetscCall(MatDenseGetArrayRead(a->A, array));
237: PetscFunctionReturn(PETSC_SUCCESS);
238: }
240: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A, PetscScalar **array)
241: {
242: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
244: PetscFunctionBegin;
245: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
246: PetscCall(MatDenseGetArrayWrite(a->A, array));
247: PetscFunctionReturn(PETSC_SUCCESS);
248: }
250: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A, const PetscScalar *array)
251: {
252: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
254: PetscFunctionBegin;
255: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
256: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
257: PetscCall(MatDensePlaceArray(a->A, array));
258: PetscFunctionReturn(PETSC_SUCCESS);
259: }
261: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
262: {
263: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
265: PetscFunctionBegin;
266: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
267: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
268: PetscCall(MatDenseResetArray(a->A));
269: PetscFunctionReturn(PETSC_SUCCESS);
270: }
272: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A, const PetscScalar *array)
273: {
274: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
276: PetscFunctionBegin;
277: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
278: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
279: PetscCall(MatDenseReplaceArray(a->A, array));
280: PetscFunctionReturn(PETSC_SUCCESS);
281: }
283: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
284: {
285: Mat_MPIDense *mat = (Mat_MPIDense *)A->data, *newmatd;
286: PetscInt lda, i, j, rstart, rend, nrows, ncols, Ncols, nlrows, nlcols;
287: const PetscInt *irow, *icol;
288: const PetscScalar *v;
289: PetscScalar *bv;
290: Mat newmat;
291: IS iscol_local;
292: MPI_Comm comm_is, comm_mat;
294: PetscFunctionBegin;
295: PetscCall(PetscObjectGetComm((PetscObject)A, &comm_mat));
296: PetscCall(PetscObjectGetComm((PetscObject)iscol, &comm_is));
297: PetscCheck(comm_mat == comm_is, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "IS communicator must match matrix communicator");
299: PetscCall(ISAllGather(iscol, &iscol_local));
300: PetscCall(ISGetIndices(isrow, &irow));
301: PetscCall(ISGetIndices(iscol_local, &icol));
302: PetscCall(ISGetLocalSize(isrow, &nrows));
303: PetscCall(ISGetLocalSize(iscol, &ncols));
304: PetscCall(ISGetSize(iscol, &Ncols)); /* global number of columns, size of iscol_local */
306: /* No parallel redistribution currently supported! Should really check each index set
307: to confirm that it is OK. ... Currently supports only submatrix same partitioning as
308: original matrix! */
310: PetscCall(MatGetLocalSize(A, &nlrows, &nlcols));
311: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
313: /* Check submatrix call */
314: if (scall == MAT_REUSE_MATRIX) {
315: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
316: /* Really need to test rows and column sizes! */
317: newmat = *B;
318: } else {
319: /* Create and fill new matrix */
320: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &newmat));
321: PetscCall(MatSetSizes(newmat, nrows, ncols, PETSC_DECIDE, Ncols));
322: PetscCall(MatSetType(newmat, ((PetscObject)A)->type_name));
323: PetscCall(MatMPIDenseSetPreallocation(newmat, NULL));
324: }
326: /* Now extract the data pointers and do the copy, column at a time */
327: newmatd = (Mat_MPIDense *)newmat->data;
328: PetscCall(MatDenseGetArray(newmatd->A, &bv));
329: PetscCall(MatDenseGetArrayRead(mat->A, &v));
330: PetscCall(MatDenseGetLDA(mat->A, &lda));
331: for (i = 0; i < Ncols; i++) {
332: const PetscScalar *av = v + lda * icol[i];
333: for (j = 0; j < nrows; j++) *bv++ = av[irow[j] - rstart];
334: }
335: PetscCall(MatDenseRestoreArrayRead(mat->A, &v));
336: PetscCall(MatDenseRestoreArray(newmatd->A, &bv));
338: /* Assemble the matrices so that the correct flags are set */
339: PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
340: PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
342: /* Free work space */
343: PetscCall(ISRestoreIndices(isrow, &irow));
344: PetscCall(ISRestoreIndices(iscol_local, &icol));
345: PetscCall(ISDestroy(&iscol_local));
346: *B = newmat;
347: PetscFunctionReturn(PETSC_SUCCESS);
348: }
350: static PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A, PetscScalar **array)
351: {
352: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
354: PetscFunctionBegin;
355: PetscCall(MatDenseRestoreArray(a->A, array));
356: PetscFunctionReturn(PETSC_SUCCESS);
357: }
359: static PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A, const PetscScalar **array)
360: {
361: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
363: PetscFunctionBegin;
364: PetscCall(MatDenseRestoreArrayRead(a->A, array));
365: PetscFunctionReturn(PETSC_SUCCESS);
366: }
368: static PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A, PetscScalar **array)
369: {
370: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
372: PetscFunctionBegin;
373: PetscCall(MatDenseRestoreArrayWrite(a->A, array));
374: PetscFunctionReturn(PETSC_SUCCESS);
375: }
377: static PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat, MatAssemblyType mode)
378: {
379: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
380: PetscInt nstash, reallocs;
382: PetscFunctionBegin;
383: if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
385: PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
386: PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
387: PetscCall(PetscInfo(mdn->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
388: PetscFunctionReturn(PETSC_SUCCESS);
389: }
391: static PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat, MatAssemblyType mode)
392: {
393: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
394: PetscInt i, *row, *col, flg, j, rstart, ncols;
395: PetscMPIInt n;
396: PetscScalar *val;
398: PetscFunctionBegin;
399: if (!mdn->donotstash && !mat->nooffprocentries) {
400: /* wait on receives */
401: while (1) {
402: PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
403: if (!flg) break;
405: for (i = 0; i < n;) {
406: /* Now identify the consecutive vals belonging to the same row */
407: for (j = i, rstart = row[j]; j < n; j++) {
408: if (row[j] != rstart) break;
409: }
410: if (j < n) ncols = j - i;
411: else ncols = n - i;
412: /* Now assemble all these values with a single function call */
413: PetscCall(MatSetValues_MPIDense(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
414: i = j;
415: }
416: }
417: PetscCall(MatStashScatterEnd_Private(&mat->stash));
418: }
420: PetscCall(MatAssemblyBegin(mdn->A, mode));
421: PetscCall(MatAssemblyEnd(mdn->A, mode));
422: PetscFunctionReturn(PETSC_SUCCESS);
423: }
425: static PetscErrorCode MatZeroEntries_MPIDense(Mat A)
426: {
427: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
429: PetscFunctionBegin;
430: PetscCall(MatZeroEntries(l->A));
431: PetscFunctionReturn(PETSC_SUCCESS);
432: }
434: static PetscErrorCode MatZeroRows_MPIDense(Mat A, PetscInt n, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
435: {
436: Mat_MPIDense *l = (Mat_MPIDense *)A->data;
437: PetscInt i, len, *lrows;
439: PetscFunctionBegin;
440: /* get locally owned rows */
441: PetscCall(PetscLayoutMapLocal(A->rmap, n, rows, &len, &lrows, NULL));
442: /* fix right hand side if needed */
443: if (x && b) {
444: const PetscScalar *xx;
445: PetscScalar *bb;
447: PetscCall(VecGetArrayRead(x, &xx));
448: PetscCall(VecGetArrayWrite(b, &bb));
449: for (i = 0; i < len; ++i) bb[lrows[i]] = diag * xx[lrows[i]];
450: PetscCall(VecRestoreArrayRead(x, &xx));
451: PetscCall(VecRestoreArrayWrite(b, &bb));
452: }
453: PetscCall(MatZeroRows(l->A, len, lrows, 0.0, NULL, NULL));
454: if (diag != 0.0) {
455: Vec d;
457: PetscCall(MatCreateVecs(A, NULL, &d));
458: PetscCall(VecSet(d, diag));
459: PetscCall(MatDiagonalSet(A, d, INSERT_VALUES));
460: PetscCall(VecDestroy(&d));
461: }
462: PetscCall(PetscFree(lrows));
463: PetscFunctionReturn(PETSC_SUCCESS);
464: }
466: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat, Vec, Vec);
467: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat, Vec, Vec, Vec);
468: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat, Vec, Vec);
469: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat, Vec, Vec, Vec);
471: static PetscErrorCode MatMult_MPIDense(Mat mat, Vec xx, Vec yy)
472: {
473: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
474: const PetscScalar *ax;
475: PetscScalar *ay;
476: PetscMemType axmtype, aymtype;
478: PetscFunctionBegin;
479: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
480: PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
481: PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
482: PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
483: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
484: PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
485: PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
486: PetscCall((*mdn->A->ops->mult)(mdn->A, mdn->lvec, yy));
487: PetscFunctionReturn(PETSC_SUCCESS);
488: }
490: static PetscErrorCode MatMultAdd_MPIDense(Mat mat, Vec xx, Vec yy, Vec zz)
491: {
492: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
493: const PetscScalar *ax;
494: PetscScalar *ay;
495: PetscMemType axmtype, aymtype;
497: PetscFunctionBegin;
498: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
499: PetscCall(VecGetArrayReadAndMemType(xx, &ax, &axmtype));
500: PetscCall(VecGetArrayAndMemType(mdn->lvec, &ay, &aymtype));
501: PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPI_REPLACE));
502: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ax, ay, MPI_REPLACE));
503: PetscCall(VecRestoreArrayAndMemType(mdn->lvec, &ay));
504: PetscCall(VecRestoreArrayReadAndMemType(xx, &ax));
505: PetscCall((*mdn->A->ops->multadd)(mdn->A, mdn->lvec, yy, zz));
506: PetscFunctionReturn(PETSC_SUCCESS);
507: }
509: static PetscErrorCode MatMultTranspose_MPIDense(Mat A, Vec xx, Vec yy)
510: {
511: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
512: const PetscScalar *ax;
513: PetscScalar *ay;
514: PetscMemType axmtype, aymtype;
516: PetscFunctionBegin;
517: if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
518: PetscCall(VecSet(yy, 0.0));
519: PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
520: PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
521: PetscCall(VecGetArrayAndMemType(yy, &ay, &aymtype));
522: PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
523: PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
524: PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
525: PetscCall(VecRestoreArrayAndMemType(yy, &ay));
526: PetscFunctionReturn(PETSC_SUCCESS);
527: }
529: static PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A, Vec xx, Vec yy, Vec zz)
530: {
531: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
532: const PetscScalar *ax;
533: PetscScalar *ay;
534: PetscMemType axmtype, aymtype;
536: PetscFunctionBegin;
537: if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
538: PetscCall(VecCopy(yy, zz));
539: PetscCall((*a->A->ops->multtranspose)(a->A, xx, a->lvec));
540: PetscCall(VecGetArrayReadAndMemType(a->lvec, &ax, &axmtype));
541: PetscCall(VecGetArrayAndMemType(zz, &ay, &aymtype));
542: PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx, MPIU_SCALAR, axmtype, ax, aymtype, ay, MPIU_SUM));
543: PetscCall(PetscSFReduceEnd(a->Mvctx, MPIU_SCALAR, ax, ay, MPIU_SUM));
544: PetscCall(VecRestoreArrayReadAndMemType(a->lvec, &ax));
545: PetscCall(VecRestoreArrayAndMemType(zz, &ay));
546: PetscFunctionReturn(PETSC_SUCCESS);
547: }
549: PetscErrorCode MatGetDiagonal_MPIDense(Mat A, Vec v)
550: {
551: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
552: PetscInt lda, len, i, nl, ng, m = A->rmap->n, radd;
553: PetscScalar *x;
554: const PetscScalar *av;
556: PetscFunctionBegin;
557: PetscCall(VecGetArray(v, &x));
558: PetscCall(VecGetSize(v, &ng));
559: PetscCheck(ng == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming mat and vec");
560: PetscCall(VecGetLocalSize(v, &nl));
561: len = PetscMin(a->A->rmap->n, a->A->cmap->n);
562: radd = A->rmap->rstart * m;
563: PetscCall(MatDenseGetArrayRead(a->A, &av));
564: PetscCall(MatDenseGetLDA(a->A, &lda));
565: for (i = 0; i < len; i++) x[i] = av[radd + i * lda + i];
566: PetscCall(MatDenseRestoreArrayRead(a->A, &av));
567: PetscCall(PetscArrayzero(x + i, nl - i));
568: PetscCall(VecRestoreArray(v, &x));
569: PetscFunctionReturn(PETSC_SUCCESS);
570: }
572: static PetscErrorCode MatDestroy_MPIDense(Mat mat)
573: {
574: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
576: PetscFunctionBegin;
577: PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
578: PetscCall(MatStashDestroy_Private(&mat->stash));
579: PetscCheck(!mdn->vecinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
580: PetscCheck(!mdn->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
581: PetscCall(MatDestroy(&mdn->A));
582: PetscCall(VecDestroy(&mdn->lvec));
583: PetscCall(PetscSFDestroy(&mdn->Mvctx));
584: PetscCall(VecDestroy(&mdn->cvec));
585: PetscCall(MatDestroy(&mdn->cmat));
587: PetscCall(PetscFree(mat->data));
588: PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
590: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", NULL));
591: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", NULL));
592: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", NULL));
593: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", NULL));
594: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", NULL));
595: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", NULL));
596: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", NULL));
597: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", NULL));
598: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", NULL));
599: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", NULL));
600: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", NULL));
601: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
602: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", NULL));
603: #if defined(PETSC_HAVE_ELEMENTAL)
604: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", NULL));
605: #endif
606: #if defined(PETSC_HAVE_SCALAPACK)
607: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", NULL));
608: #endif
609: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", NULL));
610: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", NULL));
611: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", NULL));
612: #if defined(PETSC_HAVE_CUDA)
613: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", NULL));
614: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", NULL));
615: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", NULL));
616: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensecuda_mpidense_C", NULL));
617: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensecuda_C", NULL));
618: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C", NULL));
619: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaij_C", NULL));
620: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C", NULL));
621: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArray_C", NULL));
622: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayRead_C", NULL));
623: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAGetArrayWrite_C", NULL));
624: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArray_C", NULL));
625: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayRead_C", NULL));
626: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDARestoreArrayWrite_C", NULL));
627: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAPlaceArray_C", NULL));
628: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAResetArray_C", NULL));
629: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDAReplaceArray_C", NULL));
630: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseCUDASetPreallocation_C", NULL));
631: #endif
632: #if defined(PETSC_HAVE_HIP)
633: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", NULL));
634: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", NULL));
635: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", NULL));
636: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidensehip_mpidense_C", NULL));
637: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidensehip_C", NULL));
638: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidensehip_C", NULL));
639: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaij_C", NULL));
640: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidensehip_mpiaijhipsparse_C", NULL));
641: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArray_C", NULL));
642: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayRead_C", NULL));
643: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPGetArrayWrite_C", NULL));
644: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArray_C", NULL));
645: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayRead_C", NULL));
646: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPRestoreArrayWrite_C", NULL));
647: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPPlaceArray_C", NULL));
648: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPResetArray_C", NULL));
649: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPReplaceArray_C", NULL));
650: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseHIPSetPreallocation_C", NULL));
651: #endif
652: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", NULL));
653: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", NULL));
654: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", NULL));
655: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", NULL));
656: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", NULL));
657: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", NULL));
658: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", NULL));
659: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", NULL));
660: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", NULL));
661: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", NULL));
663: PetscCall(PetscObjectCompose((PetscObject)mat, "DiagonalBlock", NULL));
664: PetscFunctionReturn(PETSC_SUCCESS);
665: }
667: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat, PetscViewer);
669: #include <petscdraw.h>
670: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
671: {
672: Mat_MPIDense *mdn = (Mat_MPIDense *)mat->data;
673: PetscMPIInt rank;
674: PetscViewerType vtype;
675: PetscBool iascii, isdraw;
676: PetscViewer sviewer;
677: PetscViewerFormat format;
679: PetscFunctionBegin;
680: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
681: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
682: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
683: if (iascii) {
684: PetscCall(PetscViewerGetType(viewer, &vtype));
685: PetscCall(PetscViewerGetFormat(viewer, &format));
686: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687: MatInfo info;
688: PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
689: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
690: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
691: (PetscInt)info.memory));
692: PetscCall(PetscViewerFlush(viewer));
693: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
694: if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx, viewer));
695: PetscFunctionReturn(PETSC_SUCCESS);
696: } else if (format == PETSC_VIEWER_ASCII_INFO) {
697: PetscFunctionReturn(PETSC_SUCCESS);
698: }
699: } else if (isdraw) {
700: PetscDraw draw;
701: PetscBool isnull;
703: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
704: PetscCall(PetscDrawIsNull(draw, &isnull));
705: if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
706: }
708: {
709: /* assemble the entire matrix onto first processor. */
710: Mat A;
711: PetscInt M = mat->rmap->N, N = mat->cmap->N, m, row, i, nz;
712: PetscInt *cols;
713: PetscScalar *vals;
715: PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
716: if (rank == 0) {
717: PetscCall(MatSetSizes(A, M, N, M, N));
718: } else {
719: PetscCall(MatSetSizes(A, 0, 0, M, N));
720: }
721: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
722: PetscCall(MatSetType(A, MATMPIDENSE));
723: PetscCall(MatMPIDenseSetPreallocation(A, NULL));
725: /* Copy the matrix ... This isn't the most efficient means,
726: but it's quick for now */
727: A->insertmode = INSERT_VALUES;
729: row = mat->rmap->rstart;
730: m = mdn->A->rmap->n;
731: for (i = 0; i < m; i++) {
732: PetscCall(MatGetRow_MPIDense(mat, row, &nz, &cols, &vals));
733: PetscCall(MatSetValues_MPIDense(A, 1, &row, nz, cols, vals, INSERT_VALUES));
734: PetscCall(MatRestoreRow_MPIDense(mat, row, &nz, &cols, &vals));
735: row++;
736: }
738: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
739: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
740: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
741: if (rank == 0) {
742: PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense *)(A->data))->A, ((PetscObject)mat)->name));
743: PetscCall(MatView_SeqDense(((Mat_MPIDense *)(A->data))->A, sviewer));
744: }
745: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
746: PetscCall(PetscViewerFlush(viewer));
747: PetscCall(MatDestroy(&A));
748: }
749: PetscFunctionReturn(PETSC_SUCCESS);
750: }
752: static PetscErrorCode MatView_MPIDense(Mat mat, PetscViewer viewer)
753: {
754: PetscBool iascii, isbinary, isdraw, issocket;
756: PetscFunctionBegin;
757: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
758: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
759: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
760: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
762: if (iascii || issocket || isdraw) {
763: PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat, viewer));
764: } else if (isbinary) PetscCall(MatView_Dense_Binary(mat, viewer));
765: PetscFunctionReturn(PETSC_SUCCESS);
766: }
768: static PetscErrorCode MatGetInfo_MPIDense(Mat A, MatInfoType flag, MatInfo *info)
769: {
770: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
771: Mat mdn = mat->A;
772: PetscLogDouble isend[5], irecv[5];
774: PetscFunctionBegin;
775: info->block_size = 1.0;
777: PetscCall(MatGetInfo(mdn, MAT_LOCAL, info));
779: isend[0] = info->nz_used;
780: isend[1] = info->nz_allocated;
781: isend[2] = info->nz_unneeded;
782: isend[3] = info->memory;
783: isend[4] = info->mallocs;
784: if (flag == MAT_LOCAL) {
785: info->nz_used = isend[0];
786: info->nz_allocated = isend[1];
787: info->nz_unneeded = isend[2];
788: info->memory = isend[3];
789: info->mallocs = isend[4];
790: } else if (flag == MAT_GLOBAL_MAX) {
791: PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
793: info->nz_used = irecv[0];
794: info->nz_allocated = irecv[1];
795: info->nz_unneeded = irecv[2];
796: info->memory = irecv[3];
797: info->mallocs = irecv[4];
798: } else if (flag == MAT_GLOBAL_SUM) {
799: PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
801: info->nz_used = irecv[0];
802: info->nz_allocated = irecv[1];
803: info->nz_unneeded = irecv[2];
804: info->memory = irecv[3];
805: info->mallocs = irecv[4];
806: }
807: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
808: info->fill_ratio_needed = 0;
809: info->factor_mallocs = 0;
810: PetscFunctionReturn(PETSC_SUCCESS);
811: }
813: static PetscErrorCode MatSetOption_MPIDense(Mat A, MatOption op, PetscBool flg)
814: {
815: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
817: PetscFunctionBegin;
818: switch (op) {
819: case MAT_NEW_NONZERO_LOCATIONS:
820: case MAT_NEW_NONZERO_LOCATION_ERR:
821: case MAT_NEW_NONZERO_ALLOCATION_ERR:
822: MatCheckPreallocated(A, 1);
823: PetscCall(MatSetOption(a->A, op, flg));
824: break;
825: case MAT_ROW_ORIENTED:
826: MatCheckPreallocated(A, 1);
827: a->roworiented = flg;
828: PetscCall(MatSetOption(a->A, op, flg));
829: break;
830: case MAT_FORCE_DIAGONAL_ENTRIES:
831: case MAT_KEEP_NONZERO_PATTERN:
832: case MAT_USE_HASH_TABLE:
833: case MAT_SORTED_FULL:
834: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
835: break;
836: case MAT_IGNORE_OFF_PROC_ENTRIES:
837: a->donotstash = flg;
838: break;
839: case MAT_SYMMETRIC:
840: case MAT_STRUCTURALLY_SYMMETRIC:
841: case MAT_HERMITIAN:
842: case MAT_SYMMETRY_ETERNAL:
843: case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
844: case MAT_SPD:
845: case MAT_IGNORE_LOWER_TRIANGULAR:
846: case MAT_IGNORE_ZERO_ENTRIES:
847: case MAT_SPD_ETERNAL:
848: /* if the diagonal matrix is square it inherits some of the properties above */
849: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
850: break;
851: default:
852: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
853: }
854: PetscFunctionReturn(PETSC_SUCCESS);
855: }
857: static PetscErrorCode MatDiagonalScale_MPIDense(Mat A, Vec ll, Vec rr)
858: {
859: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
860: const PetscScalar *l;
861: PetscScalar x, *v, *vv, *r;
862: PetscInt i, j, s2a, s3a, s2, s3, m = mdn->A->rmap->n, n = mdn->A->cmap->n, lda;
864: PetscFunctionBegin;
865: PetscCall(MatDenseGetArray(mdn->A, &vv));
866: PetscCall(MatDenseGetLDA(mdn->A, &lda));
867: PetscCall(MatGetLocalSize(A, &s2, &s3));
868: if (ll) {
869: PetscCall(VecGetLocalSize(ll, &s2a));
870: PetscCheck(s2a == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
871: PetscCall(VecGetArrayRead(ll, &l));
872: for (i = 0; i < m; i++) {
873: x = l[i];
874: v = vv + i;
875: for (j = 0; j < n; j++) {
876: (*v) *= x;
877: v += lda;
878: }
879: }
880: PetscCall(VecRestoreArrayRead(ll, &l));
881: PetscCall(PetscLogFlops(1.0 * n * m));
882: }
883: if (rr) {
884: const PetscScalar *ar;
886: PetscCall(VecGetLocalSize(rr, &s3a));
887: PetscCheck(s3a == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
888: PetscCall(VecGetArrayRead(rr, &ar));
889: if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
890: PetscCall(VecGetArray(mdn->lvec, &r));
891: PetscCall(PetscSFBcastBegin(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
892: PetscCall(PetscSFBcastEnd(mdn->Mvctx, MPIU_SCALAR, ar, r, MPI_REPLACE));
893: PetscCall(VecRestoreArrayRead(rr, &ar));
894: for (i = 0; i < n; i++) {
895: x = r[i];
896: v = vv + i * lda;
897: for (j = 0; j < m; j++) (*v++) *= x;
898: }
899: PetscCall(VecRestoreArray(mdn->lvec, &r));
900: PetscCall(PetscLogFlops(1.0 * n * m));
901: }
902: PetscCall(MatDenseRestoreArray(mdn->A, &vv));
903: PetscFunctionReturn(PETSC_SUCCESS);
904: }
906: static PetscErrorCode MatNorm_MPIDense(Mat A, NormType type, PetscReal *nrm)
907: {
908: Mat_MPIDense *mdn = (Mat_MPIDense *)A->data;
909: PetscInt i, j;
910: PetscMPIInt size;
911: PetscReal sum = 0.0;
912: const PetscScalar *av, *v;
914: PetscFunctionBegin;
915: PetscCall(MatDenseGetArrayRead(mdn->A, &av));
916: v = av;
917: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
918: if (size == 1) {
919: PetscCall(MatNorm(mdn->A, type, nrm));
920: } else {
921: if (type == NORM_FROBENIUS) {
922: for (i = 0; i < mdn->A->cmap->n * mdn->A->rmap->n; i++) {
923: sum += PetscRealPart(PetscConj(*v) * (*v));
924: v++;
925: }
926: PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
927: *nrm = PetscSqrtReal(*nrm);
928: PetscCall(PetscLogFlops(2.0 * mdn->A->cmap->n * mdn->A->rmap->n));
929: } else if (type == NORM_1) {
930: PetscReal *tmp, *tmp2;
931: PetscCall(PetscCalloc2(A->cmap->N, &tmp, A->cmap->N, &tmp2));
932: *nrm = 0.0;
933: v = av;
934: for (j = 0; j < mdn->A->cmap->n; j++) {
935: for (i = 0; i < mdn->A->rmap->n; i++) {
936: tmp[j] += PetscAbsScalar(*v);
937: v++;
938: }
939: }
940: PetscCall(MPIU_Allreduce(tmp, tmp2, A->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
941: for (j = 0; j < A->cmap->N; j++) {
942: if (tmp2[j] > *nrm) *nrm = tmp2[j];
943: }
944: PetscCall(PetscFree2(tmp, tmp2));
945: PetscCall(PetscLogFlops(A->cmap->n * A->rmap->n));
946: } else if (type == NORM_INFINITY) { /* max row norm */
947: PetscReal ntemp;
948: PetscCall(MatNorm(mdn->A, type, &ntemp));
949: PetscCall(MPIU_Allreduce(&ntemp, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
950: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for two norm");
951: }
952: PetscCall(MatDenseRestoreArrayRead(mdn->A, &av));
953: PetscFunctionReturn(PETSC_SUCCESS);
954: }
956: static PetscErrorCode MatTranspose_MPIDense(Mat A, MatReuse reuse, Mat *matout)
957: {
958: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
959: Mat B;
960: PetscInt M = A->rmap->N, N = A->cmap->N, m, n, *rwork, rstart = A->rmap->rstart;
961: PetscInt j, i, lda;
962: PetscScalar *v;
964: PetscFunctionBegin;
965: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
966: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
967: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
968: PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
969: PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
970: PetscCall(MatMPIDenseSetPreallocation(B, NULL));
971: } else B = *matout;
973: m = a->A->rmap->n;
974: n = a->A->cmap->n;
975: PetscCall(MatDenseGetArrayRead(a->A, (const PetscScalar **)&v));
976: PetscCall(MatDenseGetLDA(a->A, &lda));
977: PetscCall(PetscMalloc1(m, &rwork));
978: for (i = 0; i < m; i++) rwork[i] = rstart + i;
979: for (j = 0; j < n; j++) {
980: PetscCall(MatSetValues(B, 1, &j, m, rwork, v, INSERT_VALUES));
981: v += lda;
982: }
983: PetscCall(MatDenseRestoreArrayRead(a->A, (const PetscScalar **)&v));
984: PetscCall(PetscFree(rwork));
985: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
986: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
987: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988: *matout = B;
989: } else {
990: PetscCall(MatHeaderMerge(A, &B));
991: }
992: PetscFunctionReturn(PETSC_SUCCESS);
993: }
995: static PetscErrorCode MatDuplicate_MPIDense(Mat, MatDuplicateOption, Mat *);
996: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat, PetscScalar);
998: static PetscErrorCode MatSetUp_MPIDense(Mat A)
999: {
1000: PetscFunctionBegin;
1001: PetscCall(PetscLayoutSetUp(A->rmap));
1002: PetscCall(PetscLayoutSetUp(A->cmap));
1003: if (!A->preallocated) PetscCall(MatMPIDenseSetPreallocation(A, NULL));
1004: PetscFunctionReturn(PETSC_SUCCESS);
1005: }
1007: static PetscErrorCode MatAXPY_MPIDense(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
1008: {
1009: Mat_MPIDense *A = (Mat_MPIDense *)Y->data, *B = (Mat_MPIDense *)X->data;
1011: PetscFunctionBegin;
1012: PetscCall(MatAXPY(A->A, alpha, B->A, str));
1013: PetscFunctionReturn(PETSC_SUCCESS);
1014: }
1016: static PetscErrorCode MatConjugate_MPIDense(Mat mat)
1017: {
1018: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1020: PetscFunctionBegin;
1021: PetscCall(MatConjugate(a->A));
1022: PetscFunctionReturn(PETSC_SUCCESS);
1023: }
1025: static PetscErrorCode MatRealPart_MPIDense(Mat A)
1026: {
1027: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1029: PetscFunctionBegin;
1030: PetscCall(MatRealPart(a->A));
1031: PetscFunctionReturn(PETSC_SUCCESS);
1032: }
1034: static PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035: {
1036: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1038: PetscFunctionBegin;
1039: PetscCall(MatImaginaryPart(a->A));
1040: PetscFunctionReturn(PETSC_SUCCESS);
1041: }
1043: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A, Vec v, PetscInt col)
1044: {
1045: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1047: PetscFunctionBegin;
1048: PetscCheck(a->A, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing local matrix");
1049: PetscCheck(a->A->ops->getcolumnvector, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Missing get column operation");
1050: PetscCall((*a->A->ops->getcolumnvector)(a->A, v, col));
1051: PetscFunctionReturn(PETSC_SUCCESS);
1052: }
1054: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat, PetscInt, PetscReal *);
1056: static PetscErrorCode MatGetColumnReductions_MPIDense(Mat A, PetscInt type, PetscReal *reductions)
1057: {
1058: PetscInt i, m, n;
1059: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1060: PetscReal *work;
1062: PetscFunctionBegin;
1063: PetscCall(MatGetSize(A, &m, &n));
1064: PetscCall(PetscMalloc1(n, &work));
1065: if (type == REDUCTION_MEAN_REALPART) {
1066: PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_REALPART, work));
1067: } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1068: PetscCall(MatGetColumnReductions_SeqDense(a->A, (PetscInt)REDUCTION_SUM_IMAGINARYPART, work));
1069: } else {
1070: PetscCall(MatGetColumnReductions_SeqDense(a->A, type, work));
1071: }
1072: if (type == NORM_2) {
1073: for (i = 0; i < n; i++) work[i] *= work[i];
1074: }
1075: if (type == NORM_INFINITY) {
1076: PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, A->hdr.comm));
1077: } else {
1078: PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, A->hdr.comm));
1079: }
1080: PetscCall(PetscFree(work));
1081: if (type == NORM_2) {
1082: for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1083: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1084: for (i = 0; i < n; i++) reductions[i] /= m;
1085: }
1086: PetscFunctionReturn(PETSC_SUCCESS);
1087: }
1089: static PetscErrorCode MatSetRandom_MPIDense(Mat x, PetscRandom rctx)
1090: {
1091: Mat_MPIDense *d = (Mat_MPIDense *)x->data;
1093: PetscFunctionBegin;
1094: PetscCall(MatSetRandom(d->A, rctx));
1095: #if defined(PETSC_HAVE_DEVICE)
1096: x->offloadmask = d->A->offloadmask;
1097: #endif
1098: PetscFunctionReturn(PETSC_SUCCESS);
1099: }
1101: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A, PetscBool *missing, PetscInt *d)
1102: {
1103: PetscFunctionBegin;
1104: *missing = PETSC_FALSE;
1105: PetscFunctionReturn(PETSC_SUCCESS);
1106: }
1108: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1109: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1110: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat, Mat, PetscReal, Mat);
1111: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat, Mat, Mat);
1112: static PetscErrorCode MatEqual_MPIDense(Mat, Mat, PetscBool *);
1113: static PetscErrorCode MatLoad_MPIDense(Mat, PetscViewer);
1115: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1116: MatGetRow_MPIDense,
1117: MatRestoreRow_MPIDense,
1118: MatMult_MPIDense,
1119: /* 4*/ MatMultAdd_MPIDense,
1120: MatMultTranspose_MPIDense,
1121: MatMultTransposeAdd_MPIDense,
1122: NULL,
1123: NULL,
1124: NULL,
1125: /* 10*/ NULL,
1126: NULL,
1127: NULL,
1128: NULL,
1129: MatTranspose_MPIDense,
1130: /* 15*/ MatGetInfo_MPIDense,
1131: MatEqual_MPIDense,
1132: MatGetDiagonal_MPIDense,
1133: MatDiagonalScale_MPIDense,
1134: MatNorm_MPIDense,
1135: /* 20*/ MatAssemblyBegin_MPIDense,
1136: MatAssemblyEnd_MPIDense,
1137: MatSetOption_MPIDense,
1138: MatZeroEntries_MPIDense,
1139: /* 24*/ MatZeroRows_MPIDense,
1140: NULL,
1141: NULL,
1142: NULL,
1143: NULL,
1144: /* 29*/ MatSetUp_MPIDense,
1145: NULL,
1146: NULL,
1147: MatGetDiagonalBlock_MPIDense,
1148: NULL,
1149: /* 34*/ MatDuplicate_MPIDense,
1150: NULL,
1151: NULL,
1152: NULL,
1153: NULL,
1154: /* 39*/ MatAXPY_MPIDense,
1155: MatCreateSubMatrices_MPIDense,
1156: NULL,
1157: MatGetValues_MPIDense,
1158: MatCopy_MPIDense,
1159: /* 44*/ NULL,
1160: MatScale_MPIDense,
1161: MatShift_MPIDense,
1162: NULL,
1163: NULL,
1164: /* 49*/ MatSetRandom_MPIDense,
1165: NULL,
1166: NULL,
1167: NULL,
1168: NULL,
1169: /* 54*/ NULL,
1170: NULL,
1171: NULL,
1172: NULL,
1173: NULL,
1174: /* 59*/ MatCreateSubMatrix_MPIDense,
1175: MatDestroy_MPIDense,
1176: MatView_MPIDense,
1177: NULL,
1178: NULL,
1179: /* 64*/ NULL,
1180: NULL,
1181: NULL,
1182: NULL,
1183: NULL,
1184: /* 69*/ NULL,
1185: NULL,
1186: NULL,
1187: NULL,
1188: NULL,
1189: /* 74*/ NULL,
1190: NULL,
1191: NULL,
1192: NULL,
1193: NULL,
1194: /* 79*/ NULL,
1195: NULL,
1196: NULL,
1197: NULL,
1198: /* 83*/ MatLoad_MPIDense,
1199: NULL,
1200: NULL,
1201: NULL,
1202: NULL,
1203: NULL,
1204: /* 89*/ NULL,
1205: NULL,
1206: NULL,
1207: NULL,
1208: NULL,
1209: /* 94*/ NULL,
1210: NULL,
1211: MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1212: MatMatTransposeMultNumeric_MPIDense_MPIDense,
1213: NULL,
1214: /* 99*/ MatProductSetFromOptions_MPIDense,
1215: NULL,
1216: NULL,
1217: MatConjugate_MPIDense,
1218: NULL,
1219: /*104*/ NULL,
1220: MatRealPart_MPIDense,
1221: MatImaginaryPart_MPIDense,
1222: NULL,
1223: NULL,
1224: /*109*/ NULL,
1225: NULL,
1226: NULL,
1227: MatGetColumnVector_MPIDense,
1228: MatMissingDiagonal_MPIDense,
1229: /*114*/ NULL,
1230: NULL,
1231: NULL,
1232: NULL,
1233: NULL,
1234: /*119*/ NULL,
1235: NULL,
1236: NULL,
1237: NULL,
1238: NULL,
1239: /*124*/ NULL,
1240: MatGetColumnReductions_MPIDense,
1241: NULL,
1242: NULL,
1243: NULL,
1244: /*129*/ NULL,
1245: NULL,
1246: MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1247: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1248: NULL,
1249: /*134*/ NULL,
1250: NULL,
1251: NULL,
1252: NULL,
1253: NULL,
1254: /*139*/ NULL,
1255: NULL,
1256: NULL,
1257: NULL,
1258: NULL,
1259: MatCreateMPIMatConcatenateSeqMat_MPIDense,
1260: /*145*/ NULL,
1261: NULL,
1262: NULL,
1263: NULL,
1264: NULL,
1265: /*150*/ NULL,
1266: NULL};
1268: static PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat, PetscScalar *data)
1269: {
1270: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1271: MatType mtype = MATSEQDENSE;
1273: PetscFunctionBegin;
1274: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)mat), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1275: PetscCall(PetscLayoutSetUp(mat->rmap));
1276: PetscCall(PetscLayoutSetUp(mat->cmap));
1277: if (!a->A) {
1278: PetscCall(MatCreate(PETSC_COMM_SELF, &a->A));
1279: PetscCall(MatSetSizes(a->A, mat->rmap->n, mat->cmap->N, mat->rmap->n, mat->cmap->N));
1280: }
1281: #if defined(PETSC_HAVE_CUDA)
1282: PetscBool iscuda;
1283: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSECUDA, &iscuda));
1284: if (iscuda) mtype = MATSEQDENSECUDA;
1285: #endif
1286: #if defined(PETSC_HAVE_HIP)
1287: PetscBool iship;
1288: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPIDENSEHIP, &iship));
1289: if (iship) mtype = MATSEQDENSEHIP;
1290: #endif
1291: PetscCall(MatSetType(a->A, mtype));
1292: PetscCall(MatSeqDenseSetPreallocation(a->A, data));
1293: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1294: mat->offloadmask = a->A->offloadmask;
1295: #endif
1296: mat->preallocated = PETSC_TRUE;
1297: mat->assembled = PETSC_TRUE;
1298: PetscFunctionReturn(PETSC_SUCCESS);
1299: }
1301: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1302: {
1303: Mat B, C;
1305: PetscFunctionBegin;
1306: PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &C));
1307: PetscCall(MatConvert_SeqAIJ_SeqDense(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &B));
1308: PetscCall(MatDestroy(&C));
1309: if (reuse == MAT_REUSE_MATRIX) {
1310: C = *newmat;
1311: } else C = NULL;
1312: PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1313: PetscCall(MatDestroy(&B));
1314: if (reuse == MAT_INPLACE_MATRIX) {
1315: PetscCall(MatHeaderReplace(A, &C));
1316: } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1317: PetscFunctionReturn(PETSC_SUCCESS);
1318: }
1320: static PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1321: {
1322: Mat B, C;
1324: PetscFunctionBegin;
1325: PetscCall(MatDenseGetLocalMatrix(A, &C));
1326: PetscCall(MatConvert_SeqDense_SeqAIJ(C, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
1327: if (reuse == MAT_REUSE_MATRIX) {
1328: C = *newmat;
1329: } else C = NULL;
1330: PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A), B, A->cmap->n, !C ? MAT_INITIAL_MATRIX : MAT_REUSE_MATRIX, &C));
1331: PetscCall(MatDestroy(&B));
1332: if (reuse == MAT_INPLACE_MATRIX) {
1333: PetscCall(MatHeaderReplace(A, &C));
1334: } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1335: PetscFunctionReturn(PETSC_SUCCESS);
1336: }
1338: #if defined(PETSC_HAVE_ELEMENTAL)
1339: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1340: {
1341: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1342: Mat mat_elemental;
1343: PetscScalar *v;
1344: PetscInt m = A->rmap->n, N = A->cmap->N, rstart = A->rmap->rstart, i, *rows, *cols, lda;
1346: PetscFunctionBegin;
1347: if (reuse == MAT_REUSE_MATRIX) {
1348: mat_elemental = *newmat;
1349: PetscCall(MatZeroEntries(*newmat));
1350: } else {
1351: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1352: PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
1353: PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1354: PetscCall(MatSetUp(mat_elemental));
1355: PetscCall(MatSetOption(mat_elemental, MAT_ROW_ORIENTED, PETSC_FALSE));
1356: }
1358: PetscCall(PetscMalloc2(m, &rows, N, &cols));
1359: for (i = 0; i < N; i++) cols[i] = i;
1360: for (i = 0; i < m; i++) rows[i] = rstart + i;
1362: /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1363: PetscCall(MatDenseGetArray(A, &v));
1364: PetscCall(MatDenseGetLDA(a->A, &lda));
1365: if (lda == m) PetscCall(MatSetValues(mat_elemental, m, rows, N, cols, v, ADD_VALUES));
1366: else {
1367: for (i = 0; i < N; i++) PetscCall(MatSetValues(mat_elemental, m, rows, 1, &i, v + lda * i, ADD_VALUES));
1368: }
1369: PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1370: PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1371: PetscCall(MatDenseRestoreArray(A, &v));
1372: PetscCall(PetscFree2(rows, cols));
1374: if (reuse == MAT_INPLACE_MATRIX) {
1375: PetscCall(MatHeaderReplace(A, &mat_elemental));
1376: } else {
1377: *newmat = mat_elemental;
1378: }
1379: PetscFunctionReturn(PETSC_SUCCESS);
1380: }
1381: #endif
1383: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A, PetscInt col, PetscScalar **vals)
1384: {
1385: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1387: PetscFunctionBegin;
1388: PetscCall(MatDenseGetColumn(mat->A, col, vals));
1389: PetscFunctionReturn(PETSC_SUCCESS);
1390: }
1392: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A, PetscScalar **vals)
1393: {
1394: Mat_MPIDense *mat = (Mat_MPIDense *)A->data;
1396: PetscFunctionBegin;
1397: PetscCall(MatDenseRestoreColumn(mat->A, vals));
1398: PetscFunctionReturn(PETSC_SUCCESS);
1399: }
1401: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
1402: {
1403: Mat_MPIDense *mat;
1404: PetscInt m, nloc, N;
1406: PetscFunctionBegin;
1407: PetscCall(MatGetSize(inmat, &m, &N));
1408: PetscCall(MatGetLocalSize(inmat, NULL, &nloc));
1409: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1410: PetscInt sum;
1412: if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
1413: /* Check sum(n) = N */
1414: PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
1415: PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
1417: PetscCall(MatCreateDense(comm, m, n, PETSC_DETERMINE, N, NULL, outmat));
1418: PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
1419: }
1421: /* numeric phase */
1422: mat = (Mat_MPIDense *)(*outmat)->data;
1423: PetscCall(MatCopy(inmat, mat->A, SAME_NONZERO_PATTERN));
1424: PetscFunctionReturn(PETSC_SUCCESS);
1425: }
1427: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1428: {
1429: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1430: PetscInt lda;
1432: PetscFunctionBegin;
1433: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1434: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1435: if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1436: a->vecinuse = col + 1;
1437: PetscCall(MatDenseGetLDA(a->A, &lda));
1438: PetscCall(MatDenseGetArray(a->A, (PetscScalar **)&a->ptrinuse));
1439: PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1440: *v = a->cvec;
1441: PetscFunctionReturn(PETSC_SUCCESS);
1442: }
1444: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A, PetscInt col, Vec *v)
1445: {
1446: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1448: PetscFunctionBegin;
1449: PetscCheck(a->vecinuse, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1450: PetscCheck(a->cvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing internal column vector");
1451: a->vecinuse = 0;
1452: PetscCall(MatDenseRestoreArray(a->A, (PetscScalar **)&a->ptrinuse));
1453: PetscCall(VecResetArray(a->cvec));
1454: if (v) *v = NULL;
1455: PetscFunctionReturn(PETSC_SUCCESS);
1456: }
1458: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1459: {
1460: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1461: PetscInt lda;
1463: PetscFunctionBegin;
1464: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1465: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1466: if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1467: a->vecinuse = col + 1;
1468: PetscCall(MatDenseGetLDA(a->A, &lda));
1469: PetscCall(MatDenseGetArrayRead(a->A, &a->ptrinuse));
1470: PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1471: PetscCall(VecLockReadPush(a->cvec));
1472: *v = a->cvec;
1473: PetscFunctionReturn(PETSC_SUCCESS);
1474: }
1476: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A, PetscInt col, Vec *v)
1477: {
1478: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1480: PetscFunctionBegin;
1481: PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1482: PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1483: a->vecinuse = 0;
1484: PetscCall(MatDenseRestoreArrayRead(a->A, &a->ptrinuse));
1485: PetscCall(VecLockReadPop(a->cvec));
1486: PetscCall(VecResetArray(a->cvec));
1487: if (v) *v = NULL;
1488: PetscFunctionReturn(PETSC_SUCCESS);
1489: }
1491: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1492: {
1493: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1494: PetscInt lda;
1496: PetscFunctionBegin;
1497: PetscCheck(!a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1498: PetscCheck(!a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1499: if (!a->cvec) PetscCall(MatDenseCreateColumnVec_Private(A, &a->cvec));
1500: a->vecinuse = col + 1;
1501: PetscCall(MatDenseGetLDA(a->A, &lda));
1502: PetscCall(MatDenseGetArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1503: PetscCall(VecPlaceArray(a->cvec, a->ptrinuse + (size_t)col * (size_t)lda));
1504: *v = a->cvec;
1505: PetscFunctionReturn(PETSC_SUCCESS);
1506: }
1508: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A, PetscInt col, Vec *v)
1509: {
1510: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1512: PetscFunctionBegin;
1513: PetscCheck(a->vecinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetColumnVec() first");
1514: PetscCheck(a->cvec, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal column vector");
1515: a->vecinuse = 0;
1516: PetscCall(MatDenseRestoreArrayWrite(a->A, (PetscScalar **)&a->ptrinuse));
1517: PetscCall(VecResetArray(a->cvec));
1518: if (v) *v = NULL;
1519: PetscFunctionReturn(PETSC_SUCCESS);
1520: }
1522: static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *v)
1523: {
1524: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1525: Mat_MPIDense *c;
1526: MPI_Comm comm;
1527: PetscInt pbegin, pend;
1529: PetscFunctionBegin;
1530: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1531: PetscCheck(!a->vecinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreColumnVec() first");
1532: PetscCheck(!a->matinuse, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1533: pbegin = PetscMax(0, PetscMin(A->rmap->rend, rbegin) - A->rmap->rstart);
1534: pend = PetscMin(A->rmap->n, PetscMax(0, rend - A->rmap->rstart));
1535: if (!a->cmat) {
1536: PetscCall(MatCreate(comm, &a->cmat));
1537: PetscCall(MatSetType(a->cmat, ((PetscObject)A)->type_name));
1538: if (rend - rbegin == A->rmap->N) PetscCall(PetscLayoutReference(A->rmap, &a->cmat->rmap));
1539: else {
1540: PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1541: PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1542: PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1543: }
1544: PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1545: PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1546: } else {
1547: PetscBool same = (PetscBool)(rend - rbegin == a->cmat->rmap->N);
1548: if (same && a->cmat->rmap->N != A->rmap->N) {
1549: same = (PetscBool)(pend - pbegin == a->cmat->rmap->n);
1550: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1551: }
1552: if (!same) {
1553: PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1554: PetscCall(PetscLayoutCreate(comm, &a->cmat->rmap));
1555: PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap, pend - pbegin));
1556: PetscCall(PetscLayoutSetSize(a->cmat->rmap, rend - rbegin));
1557: PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1558: }
1559: if (cend - cbegin != a->cmat->cmap->N) {
1560: PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1561: PetscCall(PetscLayoutCreate(comm, &a->cmat->cmap));
1562: PetscCall(PetscLayoutSetSize(a->cmat->cmap, cend - cbegin));
1563: PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1564: }
1565: }
1566: c = (Mat_MPIDense *)a->cmat->data;
1567: PetscCheck(!c->A, comm, PETSC_ERR_ORDER, "Need to call MatDenseRestoreSubMatrix() first");
1568: PetscCall(MatDenseGetSubMatrix(a->A, pbegin, pend, cbegin, cend, &c->A));
1570: a->cmat->preallocated = PETSC_TRUE;
1571: a->cmat->assembled = PETSC_TRUE;
1572: #if defined(PETSC_HAVE_DEVICE)
1573: a->cmat->offloadmask = c->A->offloadmask;
1574: #endif
1575: a->matinuse = cbegin + 1;
1576: *v = a->cmat;
1577: PetscFunctionReturn(PETSC_SUCCESS);
1578: }
1580: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A, Mat *v)
1581: {
1582: Mat_MPIDense *a = (Mat_MPIDense *)A->data;
1583: Mat_MPIDense *c;
1585: PetscFunctionBegin;
1586: PetscCheck(a->matinuse, PetscObjectComm((PetscObject)A), PETSC_ERR_ORDER, "Need to call MatDenseGetSubMatrix() first");
1587: PetscCheck(a->cmat, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing internal matrix");
1588: PetscCheck(*v == a->cmat, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Not the matrix obtained from MatDenseGetSubMatrix()");
1589: a->matinuse = 0;
1590: c = (Mat_MPIDense *)a->cmat->data;
1591: PetscCall(MatDenseRestoreSubMatrix(a->A, &c->A));
1592: if (v) *v = NULL;
1593: #if defined(PETSC_HAVE_DEVICE)
1594: A->offloadmask = a->A->offloadmask;
1595: #endif
1596: PetscFunctionReturn(PETSC_SUCCESS);
1597: }
1599: /*MC
1600: MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1602: Options Database Key:
1603: . -mat_type mpidense - sets the matrix type to `MATMPIDENSE` during a call to `MatSetFromOptions()`
1605: Level: beginner
1607: .seealso: [](ch_matrices), `Mat`, `MatCreateDense()`, `MATSEQDENSE`, `MATDENSE`
1608: M*/
1609: PetscErrorCode MatCreate_MPIDense(Mat mat)
1610: {
1611: Mat_MPIDense *a;
1613: PetscFunctionBegin;
1614: PetscCall(PetscNew(&a));
1615: mat->data = (void *)a;
1616: mat->ops[0] = MatOps_Values;
1618: mat->insertmode = NOT_SET_VALUES;
1620: /* build cache for off array entries formed */
1621: a->donotstash = PETSC_FALSE;
1623: PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat), 1, &mat->stash));
1625: /* stuff used for matrix vector multiply */
1626: a->lvec = NULL;
1627: a->Mvctx = NULL;
1628: a->roworiented = PETSC_TRUE;
1630: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetLDA_C", MatDenseGetLDA_MPIDense));
1631: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseSetLDA_C", MatDenseSetLDA_MPIDense));
1632: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArray_C", MatDenseGetArray_MPIDense));
1633: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArray_C", MatDenseRestoreArray_MPIDense));
1634: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayRead_C", MatDenseGetArrayRead_MPIDense));
1635: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayRead_C", MatDenseRestoreArrayRead_MPIDense));
1636: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetArrayWrite_C", MatDenseGetArrayWrite_MPIDense));
1637: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreArrayWrite_C", MatDenseRestoreArrayWrite_MPIDense));
1638: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDensePlaceArray_C", MatDensePlaceArray_MPIDense));
1639: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseResetArray_C", MatDenseResetArray_MPIDense));
1640: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseReplaceArray_C", MatDenseReplaceArray_MPIDense));
1641: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVec_C", MatDenseGetColumnVec_MPIDense));
1642: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVec_C", MatDenseRestoreColumnVec_MPIDense));
1643: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecRead_C", MatDenseGetColumnVecRead_MPIDense));
1644: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecRead_C", MatDenseRestoreColumnVecRead_MPIDense));
1645: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumnVecWrite_C", MatDenseGetColumnVecWrite_MPIDense));
1646: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumnVecWrite_C", MatDenseRestoreColumnVecWrite_MPIDense));
1647: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetSubMatrix_C", MatDenseGetSubMatrix_MPIDense));
1648: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreSubMatrix_C", MatDenseRestoreSubMatrix_MPIDense));
1649: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
1650: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpiaij_C", MatConvert_MPIDense_MPIAIJ));
1651: #if defined(PETSC_HAVE_ELEMENTAL)
1652: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_elemental_C", MatConvert_MPIDense_Elemental));
1653: #endif
1654: #if defined(PETSC_HAVE_SCALAPACK)
1655: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_scalapack_C", MatConvert_Dense_ScaLAPACK));
1656: #endif
1657: #if defined(PETSC_HAVE_CUDA)
1658: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensecuda_C", MatConvert_MPIDense_MPIDenseCUDA));
1659: #endif
1660: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIDenseSetPreallocation_C", MatMPIDenseSetPreallocation_MPIDense));
1661: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1662: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1663: #if defined(PETSC_HAVE_CUDA)
1664: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijcusparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1665: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijcusparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1666: #endif
1667: #if defined(PETSC_HAVE_HIP)
1668: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpidense_mpidensehip_C", MatConvert_MPIDense_MPIDenseHIP));
1669: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaijhipsparse_mpidense_C", MatProductSetFromOptions_MPIAIJ_MPIDense));
1670: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpidense_mpiaijhipsparse_C", MatProductSetFromOptions_MPIDense_MPIAIJ));
1671: #endif
1672: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseGetColumn_C", MatDenseGetColumn_MPIDense));
1673: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDenseRestoreColumn_C", MatDenseRestoreColumn_MPIDense));
1674: PetscCall(PetscObjectChangeTypeName((PetscObject)mat, MATMPIDENSE));
1675: PetscFunctionReturn(PETSC_SUCCESS);
1676: }
1678: /*MC
1679: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1681: This matrix type is identical to `MATSEQDENSE` when constructed with a single process communicator,
1682: and `MATMPIDENSE` otherwise.
1684: Options Database Key:
1685: . -mat_type dense - sets the matrix type to `MATDENSE` during a call to `MatSetFromOptions()`
1687: Level: beginner
1689: .seealso: [](ch_matrices), `Mat`, `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`, `MATDENSEHIP`
1690: M*/
1692: /*@C
1693: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1695: Collective
1697: Input Parameters:
1698: + B - the matrix
1699: - data - optional location of matrix data. Set to `NULL` for PETSc
1700: to control all matrix memory allocation.
1702: Level: intermediate
1704: Notes:
1705: The dense format is fully compatible with standard Fortran
1706: storage by columns.
1708: The data input variable is intended primarily for Fortran programmers
1709: who wish to allocate their own matrix memory space. Most users should
1710: set `data` to `NULL`.
1712: .seealso: [](ch_matrices), `Mat`, `MATMPIDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1713: @*/
1714: PetscErrorCode MatMPIDenseSetPreallocation(Mat B, PetscScalar *data)
1715: {
1716: PetscFunctionBegin;
1718: PetscTryMethod(B, "MatMPIDenseSetPreallocation_C", (Mat, PetscScalar *), (B, data));
1719: PetscFunctionReturn(PETSC_SUCCESS);
1720: }
1722: /*@
1723: MatDensePlaceArray - Allows one to replace the array in a `MATDENSE` matrix with an
1724: array provided by the user. This is useful to avoid copying an array
1725: into a matrix
1727: Not Collective
1729: Input Parameters:
1730: + mat - the matrix
1731: - array - the array in column major order
1733: Level: developer
1735: Note:
1736: You can return to the original array with a call to `MatDenseResetArray()`. The user is responsible for freeing this array; it will not be
1737: freed when the matrix is destroyed.
1739: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`,
1740: `MatDenseReplaceArray()`
1741: @*/
1742: PetscErrorCode MatDensePlaceArray(Mat mat, const PetscScalar *array)
1743: {
1744: PetscFunctionBegin;
1746: PetscUseMethod(mat, "MatDensePlaceArray_C", (Mat, const PetscScalar *), (mat, array));
1747: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1748: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1749: mat->offloadmask = PETSC_OFFLOAD_CPU;
1750: #endif
1751: PetscFunctionReturn(PETSC_SUCCESS);
1752: }
1754: /*@
1755: MatDenseResetArray - Resets the matrix array to that it previously had before the call to `MatDensePlaceArray()`
1757: Not Collective
1759: Input Parameter:
1760: . mat - the matrix
1762: Level: developer
1764: Note:
1765: You can only call this after a call to `MatDensePlaceArray()`
1767: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
1768: @*/
1769: PetscErrorCode MatDenseResetArray(Mat mat)
1770: {
1771: PetscFunctionBegin;
1773: PetscUseMethod(mat, "MatDenseResetArray_C", (Mat), (mat));
1774: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1775: PetscFunctionReturn(PETSC_SUCCESS);
1776: }
1778: /*@
1779: MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1780: array provided by the user. This is useful to avoid copying an array
1781: into a matrix
1783: Not Collective
1785: Input Parameters:
1786: + mat - the matrix
1787: - array - the array in column major order
1789: Level: developer
1791: Note:
1792: The memory passed in MUST be obtained with `PetscMalloc()` and CANNOT be
1793: freed by the user. It will be freed when the matrix is destroyed.
1795: .seealso: [](ch_matrices), `Mat`, `MatDensePlaceArray()`, `MatDenseGetArray()`, `VecReplaceArray()`
1796: @*/
1797: PetscErrorCode MatDenseReplaceArray(Mat mat, const PetscScalar *array)
1798: {
1799: PetscFunctionBegin;
1801: PetscUseMethod(mat, "MatDenseReplaceArray_C", (Mat, const PetscScalar *), (mat, array));
1802: PetscCall(PetscObjectStateIncrease((PetscObject)mat));
1803: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
1804: mat->offloadmask = PETSC_OFFLOAD_CPU;
1805: #endif
1806: PetscFunctionReturn(PETSC_SUCCESS);
1807: }
1809: /*@C
1810: MatCreateDense - Creates a matrix in `MATDENSE` format.
1812: Collective
1814: Input Parameters:
1815: + comm - MPI communicator
1816: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1817: . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1818: . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
1819: . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
1820: - data - optional location of matrix data. Set data to `NULL` (`PETSC_NULL_SCALAR` for Fortran users) for PETSc
1821: to control all matrix memory allocation.
1823: Output Parameter:
1824: . A - the matrix
1826: Level: intermediate
1828: Notes:
1829: The dense format is fully compatible with standard Fortran
1830: storage by columns.
1832: Although local portions of the matrix are stored in column-major
1833: order, the matrix is partitioned across MPI ranks by row.
1835: The data input variable is intended primarily for Fortran programmers
1836: who wish to allocate their own matrix memory space. Most users should
1837: set `data` to `NULL` (`PETSC_NULL_SCALAR` for Fortran users).
1839: The user MUST specify either the local or global matrix dimensions
1840: (possibly both).
1842: .seealso: [](ch_matrices), `Mat`, `MATDENSE`, `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1843: @*/
1844: PetscErrorCode MatCreateDense(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
1845: {
1846: PetscFunctionBegin;
1847: PetscCall(MatCreate(comm, A));
1848: PetscCall(MatSetSizes(*A, m, n, M, N));
1849: PetscCall(MatSetType(*A, MATDENSE));
1850: PetscCall(MatSeqDenseSetPreallocation(*A, data));
1851: PetscCall(MatMPIDenseSetPreallocation(*A, data));
1852: PetscFunctionReturn(PETSC_SUCCESS);
1853: }
1855: static PetscErrorCode MatDuplicate_MPIDense(Mat A, MatDuplicateOption cpvalues, Mat *newmat)
1856: {
1857: Mat mat;
1858: Mat_MPIDense *a, *oldmat = (Mat_MPIDense *)A->data;
1860: PetscFunctionBegin;
1861: *newmat = NULL;
1862: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat));
1863: PetscCall(MatSetSizes(mat, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1864: PetscCall(MatSetType(mat, ((PetscObject)A)->type_name));
1865: a = (Mat_MPIDense *)mat->data;
1867: mat->factortype = A->factortype;
1868: mat->assembled = PETSC_TRUE;
1869: mat->preallocated = PETSC_TRUE;
1871: mat->insertmode = NOT_SET_VALUES;
1872: a->donotstash = oldmat->donotstash;
1874: PetscCall(PetscLayoutReference(A->rmap, &mat->rmap));
1875: PetscCall(PetscLayoutReference(A->cmap, &mat->cmap));
1877: PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
1879: *newmat = mat;
1880: PetscFunctionReturn(PETSC_SUCCESS);
1881: }
1883: static PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1884: {
1885: PetscBool isbinary;
1886: #if defined(PETSC_HAVE_HDF5)
1887: PetscBool ishdf5;
1888: #endif
1890: PetscFunctionBegin;
1893: /* force binary viewer to load .info file if it has not yet done so */
1894: PetscCall(PetscViewerSetUp(viewer));
1895: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1896: #if defined(PETSC_HAVE_HDF5)
1897: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
1898: #endif
1899: if (isbinary) {
1900: PetscCall(MatLoad_Dense_Binary(newMat, viewer));
1901: #if defined(PETSC_HAVE_HDF5)
1902: } else if (ishdf5) {
1903: PetscCall(MatLoad_Dense_HDF5(newMat, viewer));
1904: #endif
1905: } else SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
1906: PetscFunctionReturn(PETSC_SUCCESS);
1907: }
1909: static PetscErrorCode MatEqual_MPIDense(Mat A, Mat B, PetscBool *flag)
1910: {
1911: Mat_MPIDense *matB = (Mat_MPIDense *)B->data, *matA = (Mat_MPIDense *)A->data;
1912: Mat a, b;
1914: PetscFunctionBegin;
1915: a = matA->A;
1916: b = matB->A;
1917: PetscCall(MatEqual(a, b, flag));
1918: PetscCall(MPIU_Allreduce(MPI_IN_PLACE, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1919: PetscFunctionReturn(PETSC_SUCCESS);
1920: }
1922: static PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
1923: {
1924: Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
1926: PetscFunctionBegin;
1927: PetscCall(PetscFree2(atb->sendbuf, atb->recvcounts));
1928: PetscCall(MatDestroy(&atb->atb));
1929: PetscCall(PetscFree(atb));
1930: PetscFunctionReturn(PETSC_SUCCESS);
1931: }
1933: static PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
1934: {
1935: Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
1937: PetscFunctionBegin;
1938: PetscCall(PetscFree2(abt->buf[0], abt->buf[1]));
1939: PetscCall(PetscFree2(abt->recvcounts, abt->recvdispls));
1940: PetscCall(PetscFree(abt));
1941: PetscFunctionReturn(PETSC_SUCCESS);
1942: }
1944: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1945: {
1946: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
1947: Mat_TransMatMultDense *atb;
1948: MPI_Comm comm;
1949: PetscMPIInt size, *recvcounts;
1950: PetscScalar *carray, *sendbuf;
1951: const PetscScalar *atbarray;
1952: PetscInt i, cN = C->cmap->N, proc, k, j, lda;
1953: const PetscInt *ranges;
1955: PetscFunctionBegin;
1956: MatCheckProduct(C, 3);
1957: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
1958: atb = (Mat_TransMatMultDense *)C->product->data;
1959: recvcounts = atb->recvcounts;
1960: sendbuf = atb->sendbuf;
1962: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1963: PetscCallMPI(MPI_Comm_size(comm, &size));
1965: /* compute atbarray = aseq^T * bseq */
1966: PetscCall(MatTransposeMatMult(a->A, b->A, atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX, PETSC_DEFAULT, &atb->atb));
1968: PetscCall(MatGetOwnershipRanges(C, &ranges));
1970: /* arrange atbarray into sendbuf */
1971: PetscCall(MatDenseGetArrayRead(atb->atb, &atbarray));
1972: PetscCall(MatDenseGetLDA(atb->atb, &lda));
1973: for (proc = 0, k = 0; proc < size; proc++) {
1974: for (j = 0; j < cN; j++) {
1975: for (i = ranges[proc]; i < ranges[proc + 1]; i++) sendbuf[k++] = atbarray[i + j * lda];
1976: }
1977: }
1978: PetscCall(MatDenseRestoreArrayRead(atb->atb, &atbarray));
1980: /* sum all atbarray to local values of C */
1981: PetscCall(MatDenseGetArrayWrite(c->A, &carray));
1982: PetscCallMPI(MPI_Reduce_scatter(sendbuf, carray, recvcounts, MPIU_SCALAR, MPIU_SUM, comm));
1983: PetscCall(MatDenseRestoreArrayWrite(c->A, &carray));
1984: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1985: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1986: PetscFunctionReturn(PETSC_SUCCESS);
1987: }
1989: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1990: {
1991: MPI_Comm comm;
1992: PetscMPIInt size;
1993: PetscInt cm = A->cmap->n, cM, cN = B->cmap->N;
1994: Mat_TransMatMultDense *atb;
1995: PetscBool cisdense = PETSC_FALSE;
1996: PetscInt i;
1997: const PetscInt *ranges;
1999: PetscFunctionBegin;
2000: MatCheckProduct(C, 4);
2001: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2002: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2003: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2004: SETERRQ(comm, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2005: }
2007: /* create matrix product C */
2008: PetscCall(MatSetSizes(C, cm, B->cmap->n, A->cmap->N, B->cmap->N));
2009: #if defined(PETSC_HAVE_CUDA)
2010: PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
2011: #endif
2012: #if defined(PETSC_HAVE_HIP)
2013: PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSEHIP, ""));
2014: #endif
2015: if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
2016: PetscCall(MatSetUp(C));
2018: /* create data structure for reuse C */
2019: PetscCallMPI(MPI_Comm_size(comm, &size));
2020: PetscCall(PetscNew(&atb));
2021: cM = C->rmap->N;
2022: PetscCall(PetscMalloc2(cM * cN, &atb->sendbuf, size, &atb->recvcounts));
2023: PetscCall(MatGetOwnershipRanges(C, &ranges));
2024: for (i = 0; i < size; i++) atb->recvcounts[i] = (ranges[i + 1] - ranges[i]) * cN;
2026: C->product->data = atb;
2027: C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2028: PetscFunctionReturn(PETSC_SUCCESS);
2029: }
2031: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2032: {
2033: MPI_Comm comm;
2034: PetscMPIInt i, size;
2035: PetscInt maxRows, bufsiz;
2036: PetscMPIInt tag;
2037: PetscInt alg;
2038: Mat_MatTransMultDense *abt;
2039: Mat_Product *product = C->product;
2040: PetscBool flg;
2042: PetscFunctionBegin;
2043: MatCheckProduct(C, 4);
2044: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2045: /* check local size of A and B */
2046: PetscCheck(A->cmap->n == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")", A->cmap->n, B->cmap->n);
2048: PetscCall(PetscStrcmp(product->alg, "allgatherv", &flg));
2049: alg = flg ? 0 : 1;
2051: /* setup matrix product C */
2052: PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, A->rmap->N, B->rmap->N));
2053: PetscCall(MatSetType(C, MATMPIDENSE));
2054: PetscCall(MatSetUp(C));
2055: PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag));
2057: /* create data structure for reuse C */
2058: PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2059: PetscCallMPI(MPI_Comm_size(comm, &size));
2060: PetscCall(PetscNew(&abt));
2061: abt->tag = tag;
2062: abt->alg = alg;
2063: switch (alg) {
2064: case 1: /* alg: "cyclic" */
2065: for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2066: bufsiz = A->cmap->N * maxRows;
2067: PetscCall(PetscMalloc2(bufsiz, &(abt->buf[0]), bufsiz, &(abt->buf[1])));
2068: break;
2069: default: /* alg: "allgatherv" */
2070: PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2071: PetscCall(PetscMalloc2(size, &(abt->recvcounts), size + 1, &(abt->recvdispls)));
2072: for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2073: for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2074: break;
2075: }
2077: C->product->data = abt;
2078: C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2079: C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2080: PetscFunctionReturn(PETSC_SUCCESS);
2081: }
2083: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2084: {
2085: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2086: Mat_MatTransMultDense *abt;
2087: MPI_Comm comm;
2088: PetscMPIInt rank, size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2089: PetscScalar *sendbuf, *recvbuf = NULL, *cv;
2090: PetscInt i, cK = A->cmap->N, k, j, bn;
2091: PetscScalar _DOne = 1.0, _DZero = 0.0;
2092: const PetscScalar *av, *bv;
2093: PetscBLASInt cm, cn, ck, alda, blda = 0, clda;
2094: MPI_Request reqs[2];
2095: const PetscInt *ranges;
2097: PetscFunctionBegin;
2098: MatCheckProduct(C, 3);
2099: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2100: abt = (Mat_MatTransMultDense *)C->product->data;
2101: PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
2102: PetscCallMPI(MPI_Comm_rank(comm, &rank));
2103: PetscCallMPI(MPI_Comm_size(comm, &size));
2104: PetscCall(MatDenseGetArrayRead(a->A, &av));
2105: PetscCall(MatDenseGetArrayRead(b->A, &bv));
2106: PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2107: PetscCall(MatDenseGetLDA(a->A, &i));
2108: PetscCall(PetscBLASIntCast(i, &alda));
2109: PetscCall(MatDenseGetLDA(b->A, &i));
2110: PetscCall(PetscBLASIntCast(i, &blda));
2111: PetscCall(MatDenseGetLDA(c->A, &i));
2112: PetscCall(PetscBLASIntCast(i, &clda));
2113: PetscCall(MatGetOwnershipRanges(B, &ranges));
2114: bn = B->rmap->n;
2115: if (blda == bn) {
2116: sendbuf = (PetscScalar *)bv;
2117: } else {
2118: sendbuf = abt->buf[0];
2119: for (k = 0, i = 0; i < cK; i++) {
2120: for (j = 0; j < bn; j++, k++) sendbuf[k] = bv[i * blda + j];
2121: }
2122: }
2123: if (size > 1) {
2124: sendto = (rank + size - 1) % size;
2125: recvfrom = (rank + size + 1) % size;
2126: } else {
2127: sendto = recvfrom = 0;
2128: }
2129: PetscCall(PetscBLASIntCast(cK, &ck));
2130: PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2131: recvisfrom = rank;
2132: for (i = 0; i < size; i++) {
2133: /* we have finished receiving in sending, bufs can be read/modified */
2134: PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2135: PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2137: if (nextrecvisfrom != rank) {
2138: /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2139: sendsiz = cK * bn;
2140: recvsiz = cK * nextbn;
2141: recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2142: PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2143: PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2144: }
2146: /* local aseq * sendbuf^T */
2147: PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2148: if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &cm, &cn, &ck, &_DOne, av, &alda, sendbuf, &cn, &_DZero, cv + clda * ranges[recvisfrom], &clda));
2150: if (nextrecvisfrom != rank) {
2151: /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2152: PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2153: }
2154: bn = nextbn;
2155: recvisfrom = nextrecvisfrom;
2156: sendbuf = recvbuf;
2157: }
2158: PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2159: PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2160: PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2161: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2162: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2163: PetscFunctionReturn(PETSC_SUCCESS);
2164: }
2166: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2167: {
2168: Mat_MPIDense *a = (Mat_MPIDense *)A->data, *b = (Mat_MPIDense *)B->data, *c = (Mat_MPIDense *)C->data;
2169: Mat_MatTransMultDense *abt;
2170: MPI_Comm comm;
2171: PetscMPIInt size;
2172: PetscScalar *cv, *sendbuf, *recvbuf;
2173: const PetscScalar *av, *bv;
2174: PetscInt blda, i, cK = A->cmap->N, k, j, bn;
2175: PetscScalar _DOne = 1.0, _DZero = 0.0;
2176: PetscBLASInt cm, cn, ck, alda, clda;
2178: PetscFunctionBegin;
2179: MatCheckProduct(C, 3);
2180: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2181: abt = (Mat_MatTransMultDense *)C->product->data;
2182: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2183: PetscCallMPI(MPI_Comm_size(comm, &size));
2184: PetscCall(MatDenseGetArrayRead(a->A, &av));
2185: PetscCall(MatDenseGetArrayRead(b->A, &bv));
2186: PetscCall(MatDenseGetArrayWrite(c->A, &cv));
2187: PetscCall(MatDenseGetLDA(a->A, &i));
2188: PetscCall(PetscBLASIntCast(i, &alda));
2189: PetscCall(MatDenseGetLDA(b->A, &blda));
2190: PetscCall(MatDenseGetLDA(c->A, &i));
2191: PetscCall(PetscBLASIntCast(i, &clda));
2192: /* copy transpose of B into buf[0] */
2193: bn = B->rmap->n;
2194: sendbuf = abt->buf[0];
2195: recvbuf = abt->buf[1];
2196: for (k = 0, j = 0; j < bn; j++) {
2197: for (i = 0; i < cK; i++, k++) sendbuf[k] = bv[i * blda + j];
2198: }
2199: PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2200: PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2201: PetscCall(PetscBLASIntCast(cK, &ck));
2202: PetscCall(PetscBLASIntCast(c->A->rmap->n, &cm));
2203: PetscCall(PetscBLASIntCast(c->A->cmap->n, &cn));
2204: if (cm && cn && ck) PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &cm, &cn, &ck, &_DOne, av, &alda, recvbuf, &ck, &_DZero, cv, &clda));
2205: PetscCall(MatDenseRestoreArrayRead(a->A, &av));
2206: PetscCall(MatDenseRestoreArrayRead(b->A, &bv));
2207: PetscCall(MatDenseRestoreArrayWrite(c->A, &cv));
2208: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
2209: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
2210: PetscFunctionReturn(PETSC_SUCCESS);
2211: }
2213: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2214: {
2215: Mat_MatTransMultDense *abt;
2217: PetscFunctionBegin;
2218: MatCheckProduct(C, 3);
2219: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
2220: abt = (Mat_MatTransMultDense *)C->product->data;
2221: switch (abt->alg) {
2222: case 1:
2223: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2224: break;
2225: default:
2226: PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2227: break;
2228: }
2229: PetscFunctionReturn(PETSC_SUCCESS);
2230: }
2232: #if defined(PETSC_HAVE_ELEMENTAL)
2233: static PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2234: {
2235: Mat_MatMultDense *ab = (Mat_MatMultDense *)data;
2237: PetscFunctionBegin;
2238: PetscCall(MatDestroy(&ab->Ce));
2239: PetscCall(MatDestroy(&ab->Ae));
2240: PetscCall(MatDestroy(&ab->Be));
2241: PetscCall(PetscFree(ab));
2242: PetscFunctionReturn(PETSC_SUCCESS);
2243: }
2245: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2246: {
2247: Mat_MatMultDense *ab;
2249: PetscFunctionBegin;
2250: MatCheckProduct(C, 3);
2251: PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Missing product data");
2252: ab = (Mat_MatMultDense *)C->product->data;
2253: PetscCall(MatConvert_MPIDense_Elemental(A, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Ae));
2254: PetscCall(MatConvert_MPIDense_Elemental(B, MATELEMENTAL, MAT_REUSE_MATRIX, &ab->Be));
2255: PetscCall(MatMatMultNumeric_Elemental(ab->Ae, ab->Be, ab->Ce));
2256: PetscCall(MatConvert(ab->Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
2257: PetscFunctionReturn(PETSC_SUCCESS);
2258: }
2260: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2261: {
2262: Mat Ae, Be, Ce;
2263: Mat_MatMultDense *ab;
2265: PetscFunctionBegin;
2266: MatCheckProduct(C, 4);
2267: PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
2268: /* check local size of A and B */
2269: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2270: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2271: }
2273: /* create elemental matrices Ae and Be */
2274: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2275: PetscCall(MatSetSizes(Ae, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
2276: PetscCall(MatSetType(Ae, MATELEMENTAL));
2277: PetscCall(MatSetUp(Ae));
2278: PetscCall(MatSetOption(Ae, MAT_ROW_ORIENTED, PETSC_FALSE));
2280: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2281: PetscCall(MatSetSizes(Be, PETSC_DECIDE, PETSC_DECIDE, B->rmap->N, B->cmap->N));
2282: PetscCall(MatSetType(Be, MATELEMENTAL));
2283: PetscCall(MatSetUp(Be));
2284: PetscCall(MatSetOption(Be, MAT_ROW_ORIENTED, PETSC_FALSE));
2286: /* compute symbolic Ce = Ae*Be */
2287: PetscCall(MatCreate(PetscObjectComm((PetscObject)C), &Ce));
2288: PetscCall(MatMatMultSymbolic_Elemental(Ae, Be, fill, Ce));
2290: /* setup C */
2291: PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
2292: PetscCall(MatSetType(C, MATDENSE));
2293: PetscCall(MatSetUp(C));
2295: /* create data structure for reuse Cdense */
2296: PetscCall(PetscNew(&ab));
2297: ab->Ae = Ae;
2298: ab->Be = Be;
2299: ab->Ce = Ce;
2301: C->product->data = ab;
2302: C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2303: C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2304: PetscFunctionReturn(PETSC_SUCCESS);
2305: }
2306: #endif
2308: #if defined(PETSC_HAVE_ELEMENTAL)
2309: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2310: {
2311: PetscFunctionBegin;
2312: C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2313: C->ops->productsymbolic = MatProductSymbolic_AB;
2314: PetscFunctionReturn(PETSC_SUCCESS);
2315: }
2316: #endif
2318: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2319: {
2320: Mat_Product *product = C->product;
2321: Mat A = product->A, B = product->B;
2323: PetscFunctionBegin;
2324: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2325: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->rmap->rstart, A->rmap->rend, B->rmap->rstart, B->rmap->rend);
2326: C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2327: C->ops->productsymbolic = MatProductSymbolic_AtB;
2328: PetscFunctionReturn(PETSC_SUCCESS);
2329: }
2331: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2332: {
2333: Mat_Product *product = C->product;
2334: const char *algTypes[2] = {"allgatherv", "cyclic"};
2335: PetscInt alg, nalg = 2;
2336: PetscBool flg = PETSC_FALSE;
2338: PetscFunctionBegin;
2339: /* Set default algorithm */
2340: alg = 0; /* default is allgatherv */
2341: PetscCall(PetscStrcmp(product->alg, "default", &flg));
2342: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2344: /* Get runtime option */
2345: if (product->api_user) {
2346: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2347: PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2348: PetscOptionsEnd();
2349: } else {
2350: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2351: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2352: PetscOptionsEnd();
2353: }
2354: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2356: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2357: C->ops->productsymbolic = MatProductSymbolic_ABt;
2358: PetscFunctionReturn(PETSC_SUCCESS);
2359: }
2361: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2362: {
2363: Mat_Product *product = C->product;
2365: PetscFunctionBegin;
2366: switch (product->type) {
2367: #if defined(PETSC_HAVE_ELEMENTAL)
2368: case MATPRODUCT_AB:
2369: PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2370: break;
2371: #endif
2372: case MATPRODUCT_AtB:
2373: PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2374: break;
2375: case MATPRODUCT_ABt:
2376: PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2377: break;
2378: default:
2379: break;
2380: }
2381: PetscFunctionReturn(PETSC_SUCCESS);
2382: }