Actual source code: axpy.c
petsc-3.13.6 2020-09-29
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5: {
6: PetscErrorCode ierr,(*f)(Mat,Mat*);
7: Mat A,F;
10: PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
11: if (f) {
12: MatTransposeGetMat(T,&A);
13: if (T == X) {
14: PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
15: MatTranspose(A,MAT_INITIAL_MATRIX,&F);
16: A = Y;
17: } else {
18: PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
19: MatTranspose(X,MAT_INITIAL_MATRIX,&F);
20: }
21: } else {
22: MatHermitianTransposeGetMat(T,&A);
23: if (T == X) {
24: PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
25: MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
26: A = Y;
27: } else {
28: PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
29: MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
30: }
31: }
32: MatAXPY(A,a,F,str);
33: MatDestroy(&F);
34: return(0);
35: }
37: /*@
38: MatAXPY - Computes Y = a*X + Y.
40: Logically Collective on Mat
42: Input Parameters:
43: + a - the scalar multiplier
44: . X - the first matrix
45: . Y - the second matrix
46: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
47: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
49: Notes: No operation is performed when a is zero.
51: Level: intermediate
53: .seealso: MatAYPX()
54: @*/
55: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
56: {
58: PetscInt M1,M2,N1,N2;
59: PetscInt m1,m2,n1,n2;
60: MatType t1,t2;
61: PetscBool sametype,transpose;
68: MatGetSize(X,&M1,&N1);
69: MatGetSize(Y,&M2,&N2);
70: MatGetLocalSize(X,&m1,&n1);
71: MatGetLocalSize(Y,&m2,&n2);
72: if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
73: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
74: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
75: if (!X->assembled) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
76: if (a == (PetscScalar)0.0) return(0);
77: if (Y == X) {
78: MatScale(Y,1.0 + a);
79: return(0);
80: }
81: MatGetType(X,&t1);
82: MatGetType(Y,&t2);
83: PetscStrcmp(t1,t2,&sametype);
84: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
85: if (Y->ops->axpy && sametype) {
86: (*Y->ops->axpy)(Y,a,X,str);
87: } else {
88: PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
89: if (transpose) {
90: MatTransposeAXPY_Private(Y,a,X,str,X);
91: } else {
92: PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
93: if (transpose) {
94: MatTransposeAXPY_Private(Y,a,X,str,Y);
95: } else {
96: MatAXPY_Basic(Y,a,X,str);
97: }
98: }
99: }
100: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
101: return(0);
102: }
104: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
105: {
107: PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
110: /* look for any available faster alternative to the general preallocator */
111: PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
112: if (preall) {
113: (*preall)(Y,X,B);
114: } else { /* Use MatPrellocator, assumes same row-col distribution */
115: Mat preallocator;
116: PetscInt r,rstart,rend;
117: PetscInt m,n,M,N;
119: MatGetRowUpperTriangular(Y);
120: MatGetRowUpperTriangular(X);
121: MatGetSize(Y,&M,&N);
122: MatGetLocalSize(Y,&m,&n);
123: MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
124: MatSetType(preallocator,MATPREALLOCATOR);
125: MatSetSizes(preallocator,m,n,M,N);
126: MatSetUp(preallocator);
127: MatGetOwnershipRange(preallocator,&rstart,&rend);
128: for (r = rstart; r < rend; ++r) {
129: PetscInt ncols;
130: const PetscInt *row;
131: const PetscScalar *vals;
133: MatGetRow(Y,r,&ncols,&row,&vals);
134: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
135: MatRestoreRow(Y,r,&ncols,&row,&vals);
136: MatGetRow(X,r,&ncols,&row,&vals);
137: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
138: MatRestoreRow(X,r,&ncols,&row,&vals);
139: }
140: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
141: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
142: MatRestoreRowUpperTriangular(Y);
143: MatRestoreRowUpperTriangular(X);
145: MatCreate(PetscObjectComm((PetscObject)Y),B);
146: MatSetType(*B,((PetscObject)Y)->type_name);
147: MatSetSizes(*B,m,n,M,N);
148: MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
149: MatDestroy(&preallocator);
150: }
151: return(0);
152: }
154: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
155: {
157: PetscBool isshell;
160: PetscObjectTypeCompare((PetscObject)Y,MATSHELL,&isshell);
161: if (isshell) { /* MatShell has special support for AXPY */
162: PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);
164: MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f);
165: if (f) {
166: (*f)(Y,a,X,str);
167: return(0);
168: }
169: }
170: if (str != DIFFERENT_NONZERO_PATTERN) {
171: PetscInt i,start,end,j,ncols,m,n;
172: const PetscInt *row;
173: PetscScalar *val;
174: const PetscScalar *vals;
176: MatGetSize(X,&m,&n);
177: MatGetOwnershipRange(X,&start,&end);
178: MatGetRowUpperTriangular(X);
179: if (a == 1.0) {
180: for (i = start; i < end; i++) {
181: MatGetRow(X,i,&ncols,&row,&vals);
182: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
183: MatRestoreRow(X,i,&ncols,&row,&vals);
184: }
185: } else {
186: PetscInt vs = 100;
187: /* realloc if needed, as this function may be used in parallel */
188: PetscMalloc1(vs,&val);
189: for (i=start; i<end; i++) {
190: MatGetRow(X,i,&ncols,&row,&vals);
191: if (vs < ncols) {
192: vs = PetscMin(2*ncols,n);
193: PetscRealloc(vs*sizeof(*val),&val);
194: }
195: for (j=0; j<ncols; j++) val[j] = a*vals[j];
196: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
197: MatRestoreRow(X,i,&ncols,&row,&vals);
198: }
199: PetscFree(val);
200: }
201: MatRestoreRowUpperTriangular(X);
202: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
203: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
204: } else {
205: Mat B;
207: MatAXPY_Basic_Preallocate(Y,X,&B);
208: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
209: MatHeaderReplace(Y,&B);
210: }
211: return(0);
212: }
214: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
215: {
216: PetscInt i,start,end,j,ncols,m,n;
217: PetscErrorCode ierr;
218: const PetscInt *row;
219: PetscScalar *val;
220: const PetscScalar *vals;
223: MatGetSize(X,&m,&n);
224: MatGetOwnershipRange(X,&start,&end);
225: MatGetRowUpperTriangular(Y);
226: MatGetRowUpperTriangular(X);
227: if (a == 1.0) {
228: for (i = start; i < end; i++) {
229: MatGetRow(Y,i,&ncols,&row,&vals);
230: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
231: MatRestoreRow(Y,i,&ncols,&row,&vals);
233: MatGetRow(X,i,&ncols,&row,&vals);
234: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
235: MatRestoreRow(X,i,&ncols,&row,&vals);
236: }
237: } else {
238: PetscInt vs = 100;
239: /* realloc if needed, as this function may be used in parallel */
240: PetscMalloc1(vs,&val);
241: for (i=start; i<end; i++) {
242: MatGetRow(Y,i,&ncols,&row,&vals);
243: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
244: MatRestoreRow(Y,i,&ncols,&row,&vals);
246: MatGetRow(X,i,&ncols,&row,&vals);
247: if (vs < ncols) {
248: vs = PetscMin(2*ncols,n);
249: PetscRealloc(vs*sizeof(*val),&val);
250: }
251: for (j=0; j<ncols; j++) val[j] = a*vals[j];
252: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
253: MatRestoreRow(X,i,&ncols,&row,&vals);
254: }
255: PetscFree(val);
256: }
257: MatRestoreRowUpperTriangular(Y);
258: MatRestoreRowUpperTriangular(X);
259: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
260: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
261: return(0);
262: }
264: /*@
265: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
267: Neighbor-wise Collective on Mat
269: Input Parameters:
270: + Y - the matrices
271: - a - the PetscScalar
273: Level: intermediate
275: Notes:
276: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
277: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
278: entry). No operation is performed when a is zero.
280: To form Y = Y + diag(V) use MatDiagonalSet()
282: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
283: @*/
284: PetscErrorCode MatShift(Mat Y,PetscScalar a)
285: {
290: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
291: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
292: MatCheckPreallocated(Y,1);
293: if (a == 0.0) return(0);
295: if (Y->ops->shift) {
296: (*Y->ops->shift)(Y,a);
297: } else {
298: MatShift_Basic(Y,a);
299: }
301: PetscObjectStateIncrease((PetscObject)Y);
302: return(0);
303: }
305: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
306: {
308: PetscInt i,start,end;
309: PetscScalar *v;
312: MatGetOwnershipRange(Y,&start,&end);
313: VecGetArray(D,&v);
314: for (i=start; i<end; i++) {
315: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
316: }
317: VecRestoreArray(D,&v);
318: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
319: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
320: return(0);
321: }
323: /*@
324: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
325: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
326: INSERT_VALUES.
328: Input Parameters:
329: + Y - the input matrix
330: . D - the diagonal matrix, represented as a vector
331: - i - INSERT_VALUES or ADD_VALUES
333: Neighbor-wise Collective on Mat
335: Notes:
336: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
337: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
338: entry).
340: Level: intermediate
342: .seealso: MatShift(), MatScale(), MatDiagonalScale()
343: @*/
344: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
345: {
347: PetscInt matlocal,veclocal;
352: MatGetLocalSize(Y,&matlocal,NULL);
353: VecGetLocalSize(D,&veclocal);
354: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
355: if (Y->ops->diagonalset) {
356: (*Y->ops->diagonalset)(Y,D,is);
357: } else {
358: MatDiagonalSet_Default(Y,D,is);
359: }
360: PetscObjectStateIncrease((PetscObject)Y);
361: return(0);
362: }
364: /*@
365: MatAYPX - Computes Y = a*Y + X.
367: Logically on Mat
369: Input Parameters:
370: + a - the PetscScalar multiplier
371: . Y - the first matrix
372: . X - the second matrix
373: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
375: Level: intermediate
377: .seealso: MatAXPY()
378: @*/
379: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
380: {
381: PetscScalar one = 1.0;
383: PetscInt mX,mY,nX,nY;
389: MatGetSize(X,&mX,&nX);
390: MatGetSize(X,&mY,&nY);
391: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
392: MatScale(Y,a);
393: MatAXPY(Y,one,X,str);
394: return(0);
395: }
397: /*@
398: MatComputeOperator - Computes the explicit matrix
400: Collective on Mat
402: Input Parameter:
403: + inmat - the matrix
404: - mattype - the matrix type for the explicit operator
406: Output Parameter:
407: . mat - the explict operator
409: Notes:
410: This computation is done by applying the operators to columns of the identity matrix.
411: This routine is costly in general, and is recommended for use only with relatively small systems.
412: Currently, this routine uses a dense matrix format if mattype == NULL.
414: Level: advanced
416: @*/
417: PetscErrorCode MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
418: {
424: MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
425: return(0);
426: }
428: /*@
429: MatComputeOperatorTranspose - Computes the explicit matrix representation of
430: a give matrix that can apply MatMultTranspose()
432: Collective on Mat
434: Input Parameter:
435: . inmat - the matrix
437: Output Parameter:
438: . mat - the explict operator transposed
440: Notes:
441: This computation is done by applying the transpose of the operator to columns of the identity matrix.
442: This routine is costly in general, and is recommended for use only with relatively small systems.
443: Currently, this routine uses a dense matrix format if mattype == NULL.
445: Level: advanced
447: @*/
448: PetscErrorCode MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
449: {
450: Mat A;
456: MatCreateTranspose(inmat,&A);
457: MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
458: MatDestroy(&A);
459: return(0);
460: }
462: /*@
463: MatChop - Set all values in the matrix less than the tolerance to zero
465: Input Parameters:
466: + A - The matrix
467: - tol - The zero tolerance
469: Output Parameters:
470: . A - The chopped matrix
472: Level: intermediate
474: .seealso: MatCreate(), MatZeroEntries()
475: @*/
476: PetscErrorCode MatChop(Mat A, PetscReal tol)
477: {
478: PetscScalar *newVals;
479: PetscInt *newCols;
480: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
484: MatGetOwnershipRange(A, &rStart, &rEnd);
485: MatGetRowUpperTriangular(A);
486: for (r = rStart; r < rEnd; ++r) {
487: PetscInt ncols;
489: MatGetRow(A, r, &ncols, NULL, NULL);
490: colMax = PetscMax(colMax, ncols);
491: MatRestoreRow(A, r, &ncols, NULL, NULL);
492: }
493: numRows = rEnd - rStart;
494: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
495: PetscMalloc2(colMax,&newCols,colMax,&newVals);
496: for (r = rStart; r < rStart+maxRows; ++r) {
497: const PetscScalar *vals;
498: const PetscInt *cols;
499: PetscInt ncols, newcols, c;
501: if (r < rEnd) {
502: MatGetRow(A, r, &ncols, &cols, &vals);
503: for (c = 0; c < ncols; ++c) {
504: newCols[c] = cols[c];
505: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
506: }
507: newcols = ncols;
508: MatRestoreRow(A, r, &ncols, &cols, &vals);
509: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
510: }
511: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
512: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
513: }
514: MatRestoreRowUpperTriangular(A);
515: PetscFree2(newCols,newVals);
516: return(0);
517: }