Actual source code: axpy.c
petsc-3.12.5 2020-03-29
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5: {
6: PetscErrorCode ierr,(*f)(Mat,Mat*);
7: Mat A,F;
10: PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
11: if (f) {
12: MatTransposeGetMat(T,&A);
13: if (T == X) {
14: PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
15: MatTranspose(A,MAT_INITIAL_MATRIX,&F);
16: A = Y;
17: } else {
18: PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
19: MatTranspose(X,MAT_INITIAL_MATRIX,&F);
20: }
21: } else {
22: MatHermitianTransposeGetMat(T,&A);
23: if (T == X) {
24: PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
25: MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
26: A = Y;
27: } else {
28: PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
29: MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
30: }
31: }
32: MatAXPY(A,a,F,str);
33: MatDestroy(&F);
34: return(0);
35: }
37: /*@
38: MatAXPY - Computes Y = a*X + Y.
40: Logically Collective on Mat
42: Input Parameters:
43: + a - the scalar multiplier
44: . X - the first matrix
45: . Y - the second matrix
46: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
47: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
49: Level: intermediate
51: .seealso: MatAYPX()
52: @*/
53: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
54: {
56: PetscInt M1,M2,N1,N2;
57: PetscInt m1,m2,n1,n2;
58: MatType t1,t2;
59: PetscBool sametype,transpose;
66: MatGetSize(X,&M1,&N1);
67: MatGetSize(Y,&M2,&N2);
68: MatGetLocalSize(X,&m1,&n1);
69: MatGetLocalSize(Y,&m2,&n2);
70: if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
71: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
72: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
73: if (!X->assembled) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
75: MatGetType(X,&t1);
76: MatGetType(Y,&t2);
77: PetscStrcmp(t1,t2,&sametype);
78: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
79: if (Y->ops->axpy && sametype) {
80: (*Y->ops->axpy)(Y,a,X,str);
81: } else {
82: PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
83: if (transpose) {
84: MatTransposeAXPY_Private(Y,a,X,str,X);
85: } else {
86: PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
87: if (transpose) {
88: MatTransposeAXPY_Private(Y,a,X,str,Y);
89: } else {
90: if (str != DIFFERENT_NONZERO_PATTERN) {
91: MatAXPY_Basic(Y,a,X,str);
92: } else {
93: Mat B;
95: MatAXPY_Basic_Preallocate(Y,X,&B);
96: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
97: MatHeaderReplace(Y,&B);
98: }
99: }
100: }
101: }
102: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
103: return(0);
104: }
106: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
107: {
109: PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
112: /* look for any available faster alternative to the general preallocator */
113: PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
114: if (preall) {
115: (*preall)(Y,X,B);
116: } else { /* Use MatPrellocator, assumes same row-col distribution */
117: Mat preallocator;
118: PetscInt r,rstart,rend;
119: PetscInt m,n,M,N;
121: MatGetRowUpperTriangular(Y);
122: MatGetRowUpperTriangular(X);
123: MatGetSize(Y,&M,&N);
124: MatGetLocalSize(Y,&m,&n);
125: MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
126: MatSetType(preallocator,MATPREALLOCATOR);
127: MatSetSizes(preallocator,m,n,M,N);
128: MatSetUp(preallocator);
129: MatGetOwnershipRange(preallocator,&rstart,&rend);
130: for (r = rstart; r < rend; ++r) {
131: PetscInt ncols;
132: const PetscInt *row;
133: const PetscScalar *vals;
135: MatGetRow(Y,r,&ncols,&row,&vals);
136: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
137: MatRestoreRow(Y,r,&ncols,&row,&vals);
138: MatGetRow(X,r,&ncols,&row,&vals);
139: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
140: MatRestoreRow(X,r,&ncols,&row,&vals);
141: }
142: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
143: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
144: MatRestoreRowUpperTriangular(Y);
145: MatRestoreRowUpperTriangular(X);
147: MatCreate(PetscObjectComm((PetscObject)Y),B);
148: MatSetType(*B,((PetscObject)Y)->type_name);
149: MatSetSizes(*B,m,n,M,N);
150: MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
151: MatDestroy(&preallocator);
152: }
153: return(0);
154: }
156: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
157: {
158: PetscInt i,start,end,j,ncols,m,n;
159: PetscErrorCode ierr;
160: const PetscInt *row;
161: PetscScalar *val;
162: const PetscScalar *vals;
165: MatGetSize(X,&m,&n);
166: MatGetOwnershipRange(X,&start,&end);
167: MatGetRowUpperTriangular(X);
168: if (a == 1.0) {
169: for (i = start; i < end; i++) {
170: MatGetRow(X,i,&ncols,&row,&vals);
171: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
172: MatRestoreRow(X,i,&ncols,&row,&vals);
173: }
174: } else {
175: PetscInt vs = 100;
176: /* realloc if needed, as this function may be used in parallel */
177: PetscMalloc1(vs,&val);
178: for (i=start; i<end; i++) {
179: MatGetRow(X,i,&ncols,&row,&vals);
180: if (vs < ncols) {
181: vs = PetscMin(2*ncols,n);
182: PetscRealloc(vs*sizeof(*val),&val);
183: }
184: for (j=0; j<ncols; j++) val[j] = a*vals[j];
185: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
186: MatRestoreRow(X,i,&ncols,&row,&vals);
187: }
188: PetscFree(val);
189: }
190: MatRestoreRowUpperTriangular(X);
191: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
192: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
193: return(0);
194: }
196: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
197: {
198: PetscInt i,start,end,j,ncols,m,n;
199: PetscErrorCode ierr;
200: const PetscInt *row;
201: PetscScalar *val;
202: const PetscScalar *vals;
205: MatGetSize(X,&m,&n);
206: MatGetOwnershipRange(X,&start,&end);
207: MatGetRowUpperTriangular(Y);
208: MatGetRowUpperTriangular(X);
209: if (a == 1.0) {
210: for (i = start; i < end; i++) {
211: MatGetRow(Y,i,&ncols,&row,&vals);
212: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
213: MatRestoreRow(Y,i,&ncols,&row,&vals);
215: MatGetRow(X,i,&ncols,&row,&vals);
216: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
217: MatRestoreRow(X,i,&ncols,&row,&vals);
218: }
219: } else {
220: PetscInt vs = 100;
221: /* realloc if needed, as this function may be used in parallel */
222: PetscMalloc1(vs,&val);
223: for (i=start; i<end; i++) {
224: MatGetRow(Y,i,&ncols,&row,&vals);
225: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
226: MatRestoreRow(Y,i,&ncols,&row,&vals);
228: MatGetRow(X,i,&ncols,&row,&vals);
229: if (vs < ncols) {
230: vs = PetscMin(2*ncols,n);
231: PetscRealloc(vs*sizeof(*val),&val);
232: }
233: for (j=0; j<ncols; j++) val[j] = a*vals[j];
234: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
235: MatRestoreRow(X,i,&ncols,&row,&vals);
236: }
237: PetscFree(val);
238: }
239: MatRestoreRowUpperTriangular(Y);
240: MatRestoreRowUpperTriangular(X);
241: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
242: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
243: return(0);
244: }
246: /*@
247: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
249: Neighbor-wise Collective on Mat
251: Input Parameters:
252: + Y - the matrices
253: - a - the PetscScalar
255: Level: intermediate
257: Notes:
258: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
259: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
260: entry).
262: To form Y = Y + diag(V) use MatDiagonalSet()
264: Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
265: preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
267: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
268: @*/
269: PetscErrorCode MatShift(Mat Y,PetscScalar a)
270: {
275: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
276: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
277: MatCheckPreallocated(Y,1);
278: if (a == 0.0) return(0);
280: if (Y->ops->shift) {
281: (*Y->ops->shift)(Y,a);
282: } else {
283: MatShift_Basic(Y,a);
284: }
286: PetscObjectStateIncrease((PetscObject)Y);
287: return(0);
288: }
290: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
291: {
293: PetscInt i,start,end;
294: PetscScalar *v;
297: MatGetOwnershipRange(Y,&start,&end);
298: VecGetArray(D,&v);
299: for (i=start; i<end; i++) {
300: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
301: }
302: VecRestoreArray(D,&v);
303: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
304: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
305: return(0);
306: }
308: /*@
309: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
310: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
311: INSERT_VALUES.
313: Input Parameters:
314: + Y - the input matrix
315: . D - the diagonal matrix, represented as a vector
316: - i - INSERT_VALUES or ADD_VALUES
318: Neighbor-wise Collective on Mat
320: Notes:
321: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
322: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
323: entry).
325: Level: intermediate
327: .seealso: MatShift(), MatScale(), MatDiagonalScale()
328: @*/
329: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
330: {
332: PetscInt matlocal,veclocal;
337: MatGetLocalSize(Y,&matlocal,NULL);
338: VecGetLocalSize(D,&veclocal);
339: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
340: if (Y->ops->diagonalset) {
341: (*Y->ops->diagonalset)(Y,D,is);
342: } else {
343: MatDiagonalSet_Default(Y,D,is);
344: }
345: PetscObjectStateIncrease((PetscObject)Y);
346: return(0);
347: }
349: /*@
350: MatAYPX - Computes Y = a*Y + X.
352: Logically on Mat
354: Input Parameters:
355: + a - the PetscScalar multiplier
356: . Y - the first matrix
357: . X - the second matrix
358: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
360: Level: intermediate
362: .seealso: MatAXPY()
363: @*/
364: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
365: {
366: PetscScalar one = 1.0;
368: PetscInt mX,mY,nX,nY;
374: MatGetSize(X,&mX,&nX);
375: MatGetSize(X,&mY,&nY);
376: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
377: MatScale(Y,a);
378: MatAXPY(Y,one,X,str);
379: return(0);
380: }
382: /*@
383: MatComputeOperator - Computes the explicit matrix
385: Collective on Mat
387: Input Parameter:
388: + inmat - the matrix
389: - mattype - the matrix type for the explicit operator
391: Output Parameter:
392: . mat - the explict operator
394: Notes:
395: This computation is done by applying the operators to columns of the identity matrix.
396: This routine is costly in general, and is recommended for use only with relatively small systems.
397: Currently, this routine uses a dense matrix format if mattype == NULL.
399: Level: advanced
401: @*/
402: PetscErrorCode MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
403: {
409: MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
410: return(0);
411: }
413: /*@
414: MatComputeOperatorTranspose - Computes the explicit matrix representation of
415: a give matrix that can apply MatMultTranspose()
417: Collective on Mat
419: Input Parameter:
420: . inmat - the matrix
422: Output Parameter:
423: . mat - the explict operator transposed
425: Notes:
426: This computation is done by applying the transpose of the operator to columns of the identity matrix.
427: This routine is costly in general, and is recommended for use only with relatively small systems.
428: Currently, this routine uses a dense matrix format if mattype == NULL.
430: Level: advanced
432: @*/
433: PetscErrorCode MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
434: {
435: Mat A;
441: MatCreateTranspose(inmat,&A);
442: MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
443: MatDestroy(&A);
444: return(0);
445: }
447: /*@
448: MatChop - Set all values in the matrix less than the tolerance to zero
450: Input Parameters:
451: + A - The matrix
452: - tol - The zero tolerance
454: Output Parameters:
455: . A - The chopped matrix
457: Level: intermediate
459: .seealso: MatCreate(), MatZeroEntries()
460: @*/
461: PetscErrorCode MatChop(Mat A, PetscReal tol)
462: {
463: PetscScalar *newVals;
464: PetscInt *newCols;
465: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
469: MatGetOwnershipRange(A, &rStart, &rEnd);
470: for (r = rStart; r < rEnd; ++r) {
471: PetscInt ncols;
473: MatGetRow(A, r, &ncols, NULL, NULL);
474: colMax = PetscMax(colMax, ncols);
475: MatRestoreRow(A, r, &ncols, NULL, NULL);
476: }
477: numRows = rEnd - rStart;
478: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
479: PetscMalloc2(colMax,&newCols,colMax,&newVals);
480: for (r = rStart; r < rStart+maxRows; ++r) {
481: const PetscScalar *vals;
482: const PetscInt *cols;
483: PetscInt ncols, newcols, c;
485: if (r < rEnd) {
486: MatGetRow(A, r, &ncols, &cols, &vals);
487: for (c = 0; c < ncols; ++c) {
488: newCols[c] = cols[c];
489: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
490: }
491: newcols = ncols;
492: MatRestoreRow(A, r, &ncols, &cols, &vals);
493: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
494: }
495: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
496: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
497: }
498: PetscFree2(newCols,newVals);
499: return(0);
500: }