Actual source code: axpy.c
1: #include <petsc/private/matimpl.h>
3: static PetscErrorCode MatTransposeAXPY_Private(Mat Y, PetscScalar a, Mat X, MatStructure str, Mat T)
4: {
5: Mat A, F;
6: PetscScalar vshift, vscale;
7: PetscErrorCode (*f)(Mat, Mat *);
9: PetscFunctionBegin;
10: if (T == X) PetscCall(MatShellGetScalingShifts(T, &vshift, &vscale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
11: else {
12: vshift = 0.0;
13: vscale = 1.0;
14: }
15: PetscCall(PetscObjectQueryFunction((PetscObject)T, "MatTransposeGetMat_C", &f));
16: if (f) {
17: PetscCall(MatTransposeGetMat(T, &A));
18: if (T == X) {
19: PetscCall(PetscInfo(NULL, "Explicitly transposing X of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
20: PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &F));
21: A = Y;
22: } else {
23: PetscCall(PetscInfo(NULL, "Transposing X because Y of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
24: PetscCall(MatTranspose(X, MAT_INITIAL_MATRIX, &F));
25: }
26: } else {
27: PetscCall(MatHermitianTransposeGetMat(T, &A));
28: if (T == X) {
29: PetscCall(PetscInfo(NULL, "Explicitly Hermitian transposing X of type MATHERMITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
30: PetscCall(MatHermitianTranspose(A, MAT_INITIAL_MATRIX, &F));
31: A = Y;
32: } else {
33: PetscCall(PetscInfo(NULL, "Hermitian transposing X because Y of type MATHERMITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
34: PetscCall(MatHermitianTranspose(X, MAT_INITIAL_MATRIX, &F));
35: }
36: }
37: PetscCall(MatAXPY(A, a * vscale, F, str));
38: PetscCall(MatShift(A, a * vshift));
39: PetscCall(MatDestroy(&F));
40: PetscFunctionReturn(PETSC_SUCCESS);
41: }
43: /*@
44: MatAXPY - Computes Y = a*X + Y.
46: Logically Collective
48: Input Parameters:
49: + a - the scalar multiplier
50: . X - the first matrix
51: . Y - the second matrix
52: - str - either `SAME_NONZERO_PATTERN`, `DIFFERENT_NONZERO_PATTERN`, `UNKNOWN_NONZERO_PATTERN`, or `SUBSET_NONZERO_PATTERN` (nonzeros of `X` is a subset of `Y`'s)
54: Level: intermediate
56: .seealso: [](ch_matrices), `Mat`, `MatAYPX()`
57: @*/
58: PetscErrorCode MatAXPY(Mat Y, PetscScalar a, Mat X, MatStructure str)
59: {
60: PetscInt M1, M2, N1, N2;
61: PetscInt m1, m2, n1, n2;
62: PetscBool sametype, transpose;
64: PetscFunctionBegin;
68: PetscCheckSameComm(Y, 1, X, 3);
69: PetscCall(MatGetSize(X, &M1, &N1));
70: PetscCall(MatGetSize(Y, &M2, &N2));
71: PetscCall(MatGetLocalSize(X, &m1, &n1));
72: PetscCall(MatGetLocalSize(Y, &m2, &n2));
73: PetscCheck(M1 == M2 && N1 == N2, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_SIZ, "Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, M1, N1, M2, N2);
74: PetscCheck(m1 == m2 && n1 == n2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, m1, n1, m2, n2);
75: PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (Y)");
76: PetscCheck(X->assembled, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (X)");
77: if (a == (PetscScalar)0.0) PetscFunctionReturn(PETSC_SUCCESS);
78: if (Y == X) {
79: PetscCall(MatScale(Y, 1.0 + a));
80: PetscFunctionReturn(PETSC_SUCCESS);
81: }
82: PetscCall(PetscObjectObjectTypeCompare((PetscObject)X, (PetscObject)Y, &sametype));
83: PetscCall(PetscLogEventBegin(MAT_AXPY, Y, 0, 0, 0));
84: if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
85: PetscUseTypeMethod(Y, axpy, a, X, str);
86: } else {
87: PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
88: if (transpose) {
89: PetscCall(MatTransposeAXPY_Private(Y, a, X, str, X));
90: } else {
91: PetscCall(PetscObjectTypeCompareAny((PetscObject)Y, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
92: if (transpose) {
93: PetscCall(MatTransposeAXPY_Private(Y, a, X, str, Y));
94: } else {
95: PetscCall(MatAXPY_Basic(Y, a, X, str));
96: }
97: }
98: }
99: PetscCall(PetscLogEventEnd(MAT_AXPY, Y, 0, 0, 0));
100: PetscFunctionReturn(PETSC_SUCCESS);
101: }
103: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
104: {
105: PetscErrorCode (*preall)(Mat, Mat, Mat *) = NULL;
107: PetscFunctionBegin;
108: /* look for any available faster alternative to the general preallocator */
109: PetscCall(PetscObjectQueryFunction((PetscObject)Y, "MatAXPYGetPreallocation_C", &preall));
110: if (preall) {
111: PetscCall((*preall)(Y, X, B));
112: } else { /* Use MatPrellocator, assumes same row-col distribution */
113: Mat preallocator;
114: PetscInt r, rstart, rend;
115: PetscInt m, n, M, N;
117: PetscCall(MatGetRowUpperTriangular(Y));
118: PetscCall(MatGetRowUpperTriangular(X));
119: PetscCall(MatGetSize(Y, &M, &N));
120: PetscCall(MatGetLocalSize(Y, &m, &n));
121: PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &preallocator));
122: PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
123: PetscCall(MatSetLayouts(preallocator, Y->rmap, Y->cmap));
124: PetscCall(MatSetUp(preallocator));
125: PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
126: for (r = rstart; r < rend; ++r) {
127: PetscInt ncols;
128: const PetscInt *row;
129: const PetscScalar *vals;
131: PetscCall(MatGetRow(Y, r, &ncols, &row, &vals));
132: PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
133: PetscCall(MatRestoreRow(Y, r, &ncols, &row, &vals));
134: PetscCall(MatGetRow(X, r, &ncols, &row, &vals));
135: PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
136: PetscCall(MatRestoreRow(X, r, &ncols, &row, &vals));
137: }
138: PetscCall(MatSetOption(preallocator, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
139: PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
140: PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
141: PetscCall(MatRestoreRowUpperTriangular(Y));
142: PetscCall(MatRestoreRowUpperTriangular(X));
144: PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), B));
145: PetscCall(MatSetType(*B, ((PetscObject)Y)->type_name));
146: PetscCall(MatSetLayouts(*B, Y->rmap, Y->cmap));
147: PetscCall(MatPreallocatorPreallocate(preallocator, PETSC_FALSE, *B));
148: PetscCall(MatDestroy(&preallocator));
149: }
150: PetscFunctionReturn(PETSC_SUCCESS);
151: }
153: PetscErrorCode MatAXPY_Basic(Mat Y, PetscScalar a, Mat X, MatStructure str)
154: {
155: PetscBool isshell, isdense, isnest;
157: PetscFunctionBegin;
158: PetscCall(MatIsShell(Y, &isshell));
159: if (isshell) { /* MatShell has special support for AXPY */
160: PetscErrorCode (*f)(Mat, PetscScalar, Mat, MatStructure);
162: PetscCall(MatGetOperation(Y, MATOP_AXPY, (void (**)(void)) & f));
163: if (f) {
164: PetscCall((*f)(Y, a, X, str));
165: PetscFunctionReturn(PETSC_SUCCESS);
166: }
167: }
168: /* no need to preallocate if Y is dense */
169: PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)Y, &isdense, MATSEQDENSE, MATMPIDENSE, ""));
170: if (isdense) {
171: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATNEST, &isnest));
172: if (isnest) {
173: PetscCall(MatAXPY_Dense_Nest(Y, a, X));
174: PetscFunctionReturn(PETSC_SUCCESS);
175: }
176: if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
177: }
178: if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
179: PetscInt i, start, end, j, ncols, m, n;
180: const PetscInt *row;
181: PetscScalar *val;
182: const PetscScalar *vals;
184: PetscCall(MatGetSize(X, &m, &n));
185: PetscCall(MatGetOwnershipRange(X, &start, &end));
186: PetscCall(MatGetRowUpperTriangular(X));
187: if (a == 1.0) {
188: for (i = start; i < end; i++) {
189: PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
190: PetscCall(MatSetValues(Y, 1, &i, ncols, row, vals, ADD_VALUES));
191: PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
192: }
193: } else {
194: PetscInt vs = 100;
195: /* realloc if needed, as this function may be used in parallel */
196: PetscCall(PetscMalloc1(vs, &val));
197: for (i = start; i < end; i++) {
198: PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
199: if (vs < ncols) {
200: vs = PetscMin(2 * ncols, n);
201: PetscCall(PetscRealloc(vs * sizeof(*val), &val));
202: }
203: for (j = 0; j < ncols; j++) val[j] = a * vals[j];
204: PetscCall(MatSetValues(Y, 1, &i, ncols, row, val, ADD_VALUES));
205: PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
206: }
207: PetscCall(PetscFree(val));
208: }
209: PetscCall(MatRestoreRowUpperTriangular(X));
210: PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
211: PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
212: } else {
213: Mat B;
215: PetscCall(MatAXPY_Basic_Preallocate(Y, X, &B));
216: PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
217: PetscCall(MatHeaderMerge(Y, &B));
218: }
219: PetscFunctionReturn(PETSC_SUCCESS);
220: }
222: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B, Mat Y, PetscScalar a, Mat X, MatStructure str)
223: {
224: PetscInt i, start, end, j, ncols, m, n;
225: const PetscInt *row;
226: PetscScalar *val;
227: const PetscScalar *vals;
229: PetscFunctionBegin;
230: PetscCall(MatGetSize(X, &m, &n));
231: PetscCall(MatGetOwnershipRange(X, &start, &end));
232: PetscCall(MatGetRowUpperTriangular(Y));
233: PetscCall(MatGetRowUpperTriangular(X));
234: if (a == 1.0) {
235: for (i = start; i < end; i++) {
236: PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
237: PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
238: PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
240: PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
241: PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
242: PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
243: }
244: } else {
245: PetscInt vs = 100;
246: /* realloc if needed, as this function may be used in parallel */
247: PetscCall(PetscMalloc1(vs, &val));
248: for (i = start; i < end; i++) {
249: PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
250: PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
251: PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
253: PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
254: if (vs < ncols) {
255: vs = PetscMin(2 * ncols, n);
256: PetscCall(PetscRealloc(vs * sizeof(*val), &val));
257: }
258: for (j = 0; j < ncols; j++) val[j] = a * vals[j];
259: PetscCall(MatSetValues(B, 1, &i, ncols, row, val, ADD_VALUES));
260: PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
261: }
262: PetscCall(PetscFree(val));
263: }
264: PetscCall(MatRestoreRowUpperTriangular(Y));
265: PetscCall(MatRestoreRowUpperTriangular(X));
266: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
267: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
268: PetscFunctionReturn(PETSC_SUCCESS);
269: }
271: /*@
272: MatShift - Computes `Y = Y + a I`, where `a` is a `PetscScalar`
274: Neighbor-wise Collective
276: Input Parameters:
277: + Y - the matrix
278: - a - the `PetscScalar`
280: Level: intermediate
282: Notes:
283: If `Y` is a rectangular matrix, the shift is done on the main diagonal of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
285: If the matrix `Y` is missing some diagonal entries this routine can be very slow. To make it fast one should initially
286: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
287: entry). No operation is performed when a is zero.
289: To form Y = Y + diag(V) use `MatDiagonalSet()`
291: .seealso: [](ch_matrices), `Mat`, `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
292: @*/
293: PetscErrorCode MatShift(Mat Y, PetscScalar a)
294: {
295: PetscFunctionBegin;
297: PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix");
298: PetscCheck(!Y->factortype, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
299: MatCheckPreallocated(Y, 1);
300: if (a == 0.0) PetscFunctionReturn(PETSC_SUCCESS);
302: if (Y->ops->shift) PetscUseTypeMethod(Y, shift, a);
303: else PetscCall(MatShift_Basic(Y, a));
305: PetscCall(PetscObjectStateIncrease((PetscObject)Y));
306: PetscFunctionReturn(PETSC_SUCCESS);
307: }
309: PetscErrorCode MatDiagonalSet_Default(Mat Y, Vec D, InsertMode is)
310: {
311: PetscInt i, start, end;
312: const PetscScalar *v;
314: PetscFunctionBegin;
315: PetscCall(MatGetOwnershipRange(Y, &start, &end));
316: PetscCall(VecGetArrayRead(D, &v));
317: for (i = start; i < end; i++) PetscCall(MatSetValues(Y, 1, &i, 1, &i, v + i - start, is));
318: PetscCall(VecRestoreArrayRead(D, &v));
319: PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
320: PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
321: PetscFunctionReturn(PETSC_SUCCESS);
322: }
324: /*@
325: MatDiagonalSet - Computes `Y` = `Y` + `D`, where `D` is a diagonal matrix
326: that is represented as a vector. Or Y[i,i] = D[i] if `InsertMode` is
327: `INSERT_VALUES`.
329: Neighbor-wise Collective
331: Input Parameters:
332: + Y - the input matrix
333: . D - the diagonal matrix, represented as a vector
334: - is - `INSERT_VALUES` or `ADD_VALUES`
336: Level: intermediate
338: Note:
339: If the matrix `Y` is missing some diagonal entries this routine can be very slow. To make it fast one should initially
340: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
341: entry).
343: .seealso: [](ch_matrices), `Mat`, `MatShift()`, `MatScale()`, `MatDiagonalScale()`
344: @*/
345: PetscErrorCode MatDiagonalSet(Mat Y, Vec D, InsertMode is)
346: {
347: PetscInt matlocal, veclocal;
349: PetscFunctionBegin;
352: MatCheckPreallocated(Y, 1);
353: PetscCall(MatGetLocalSize(Y, &matlocal, NULL));
354: PetscCall(VecGetLocalSize(D, &veclocal));
355: PetscCheck(matlocal == veclocal, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT, matlocal, veclocal);
356: if (Y->ops->diagonalset) PetscUseTypeMethod(Y, diagonalset, D, is);
357: else PetscCall(MatDiagonalSet_Default(Y, D, is));
358: PetscCall(PetscObjectStateIncrease((PetscObject)Y));
359: PetscFunctionReturn(PETSC_SUCCESS);
360: }
362: /*@
363: MatAYPX - Computes Y = a*Y + X.
365: Logically Collective
367: Input Parameters:
368: + a - the `PetscScalar` multiplier
369: . Y - the first matrix
370: . X - the second matrix
371: - str - either `SAME_NONZERO_PATTERN`, `DIFFERENT_NONZERO_PATTERN`, `UNKNOWN_NONZERO_PATTERN`, or `SUBSET_NONZERO_PATTERN` (nonzeros of `X` is a subset of `Y`'s)
373: Level: intermediate
375: .seealso: [](ch_matrices), `Mat`, `MatAXPY()`
376: @*/
377: PetscErrorCode MatAYPX(Mat Y, PetscScalar a, Mat X, MatStructure str)
378: {
379: PetscFunctionBegin;
380: PetscCall(MatScale(Y, a));
381: PetscCall(MatAXPY(Y, 1.0, X, str));
382: PetscFunctionReturn(PETSC_SUCCESS);
383: }
385: /*@C
386: MatComputeOperator - Computes the explicit matrix
388: Collective
390: Input Parameters:
391: + inmat - the matrix
392: - mattype - the matrix type for the explicit operator
394: Output Parameter:
395: . mat - the explicit operator
397: Level: advanced
399: Note:
400: This computation is done by applying the operator to columns of the identity matrix.
401: This routine is costly in general, and is recommended for use only with relatively small systems.
402: Currently, this routine uses a dense matrix format if `mattype` == `NULL`.
404: .seealso: [](ch_matrices), `Mat`, `MatConvert()`, `MatMult()`, `MatComputeOperatorTranspose()`
405: @*/
406: PetscErrorCode MatComputeOperator(Mat inmat, MatType mattype, Mat *mat)
407: {
408: PetscFunctionBegin;
410: PetscAssertPointer(mat, 3);
411: PetscCall(MatConvert_Shell(inmat, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
412: PetscFunctionReturn(PETSC_SUCCESS);
413: }
415: /*@C
416: MatComputeOperatorTranspose - Computes the explicit matrix representation of
417: a give matrix that can apply `MatMultTranspose()`
419: Collective
421: Input Parameters:
422: + inmat - the matrix
423: - mattype - the matrix type for the explicit operator
425: Output Parameter:
426: . mat - the explicit operator transposed
428: Level: advanced
430: Note:
431: This computation is done by applying the transpose of the operator to columns of the identity matrix.
432: This routine is costly in general, and is recommended for use only with relatively small systems.
433: Currently, this routine uses a dense matrix format if `mattype` == `NULL`.
435: .seealso: [](ch_matrices), `Mat`, `MatConvert()`, `MatMult()`, `MatComputeOperator()`
436: @*/
437: PetscErrorCode MatComputeOperatorTranspose(Mat inmat, MatType mattype, Mat *mat)
438: {
439: Mat A;
441: PetscFunctionBegin;
443: PetscAssertPointer(mat, 3);
444: PetscCall(MatCreateTranspose(inmat, &A));
445: PetscCall(MatConvert_Shell(A, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
446: PetscCall(MatDestroy(&A));
447: PetscFunctionReturn(PETSC_SUCCESS);
448: }
450: /*@
451: MatFilter - Set all values in the matrix with an absolute value less than or equal to the tolerance to zero, and optionally compress the underlying storage
453: Input Parameters:
454: + A - The matrix
455: . tol - The zero tolerance
456: . compress - Whether the storage from the input matrix `A` should be compressed once values less than or equal to `tol` are set to zero
457: - keep - If `compress` is true and for a given row of `A`, the diagonal coefficient is less than or equal to `tol`, indicates whether it should be left in the structure or eliminated as well
459: Level: intermediate
461: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatZeroEntries()`, `MatEliminateZeros()`, `VecFilter()`
462: @*/
463: PetscErrorCode MatFilter(Mat A, PetscReal tol, PetscBool compress, PetscBool keep)
464: {
465: Mat a;
466: PetscScalar *newVals;
467: PetscInt *newCols, rStart, rEnd, maxRows, r, colMax = 0, nnz0 = 0, nnz1 = 0;
468: PetscBool flg;
470: PetscFunctionBegin;
471: PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
472: if (flg) {
473: PetscCall(MatDenseGetLocalMatrix(A, &a));
474: PetscCall(MatDenseGetLDA(a, &r));
475: PetscCall(MatGetSize(a, &rStart, &rEnd));
476: PetscCall(MatDenseGetArray(a, &newVals));
477: for (; colMax < rEnd; ++colMax) {
478: for (maxRows = 0; maxRows < rStart; ++maxRows) newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) <= tol ? 0.0 : newVals[maxRows + colMax * r];
479: }
480: PetscCall(MatDenseRestoreArray(a, &newVals));
481: } else {
482: const PetscInt *ranges;
483: PetscMPIInt rank, size;
485: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
486: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
487: PetscCall(MatGetOwnershipRanges(A, &ranges));
488: rStart = ranges[rank];
489: rEnd = ranges[rank + 1];
490: PetscCall(MatGetRowUpperTriangular(A));
491: for (r = rStart; r < rEnd; ++r) {
492: PetscInt ncols;
494: PetscCall(MatGetRow(A, r, &ncols, NULL, NULL));
495: colMax = PetscMax(colMax, ncols);
496: PetscCall(MatRestoreRow(A, r, &ncols, NULL, NULL));
497: }
498: maxRows = 0;
499: for (r = 0; r < size; ++r) maxRows = PetscMax(maxRows, ranges[r + 1] - ranges[r]);
500: PetscCall(PetscCalloc2(colMax, &newCols, colMax, &newVals));
501: PetscCall(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
502: PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
503: /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd() */
504: /* that are potentially called many times depending on the distribution of A */
505: for (r = rStart; r < rStart + maxRows; ++r) {
506: if (r < rEnd) {
507: const PetscScalar *vals;
508: const PetscInt *cols;
509: PetscInt ncols, newcols = 0, c;
511: PetscCall(MatGetRow(A, r, &ncols, &cols, &vals));
512: nnz0 += ncols - 1;
513: for (c = 0; c < ncols; ++c) {
514: if (PetscUnlikely(PetscAbsScalar(vals[c]) <= tol)) newCols[newcols++] = cols[c];
515: }
516: nnz1 += ncols - newcols - 1;
517: PetscCall(MatRestoreRow(A, r, &ncols, &cols, &vals));
518: PetscCall(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
519: }
520: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
521: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
522: }
523: PetscCall(MatRestoreRowUpperTriangular(A));
524: PetscCall(PetscFree2(newCols, newVals));
525: PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
526: if (nnz0 > 0) PetscCall(PetscInfo(NULL, "Filtering left %g%% edges in graph\n", 100 * (double)nnz1 / (double)nnz0));
527: else PetscCall(PetscInfo(NULL, "Warning: %" PetscInt_FMT " edges to filter with %" PetscInt_FMT " rows\n", nnz0, maxRows));
528: }
529: if (compress && A->ops->eliminatezeros) {
530: Mat B;
531: PetscBool flg;
533: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
534: if (!flg) {
535: PetscCall(MatEliminateZeros(A, keep));
536: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
537: PetscCall(MatHeaderReplace(A, &B));
538: }
539: }
540: PetscFunctionReturn(PETSC_SUCCESS);
541: }