Actual source code: axpy.c
petsc-3.14.6 2021-03-30
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5: {
6: PetscErrorCode ierr,(*f)(Mat,Mat*);
7: Mat A,F;
10: PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
11: if (f) {
12: MatTransposeGetMat(T,&A);
13: if (T == X) {
14: PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
15: MatTranspose(A,MAT_INITIAL_MATRIX,&F);
16: A = Y;
17: } else {
18: PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
19: MatTranspose(X,MAT_INITIAL_MATRIX,&F);
20: }
21: } else {
22: MatHermitianTransposeGetMat(T,&A);
23: if (T == X) {
24: PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
25: MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
26: A = Y;
27: } else {
28: PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
29: MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
30: }
31: }
32: MatAXPY(A,a,F,str);
33: MatDestroy(&F);
34: return(0);
35: }
37: /*@
38: MatAXPY - Computes Y = a*X + Y.
40: Logically Collective on Mat
42: Input Parameters:
43: + a - the scalar multiplier
44: . X - the first matrix
45: . Y - the second matrix
46: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
47: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
49: Notes: No operation is performed when a is zero.
51: Level: intermediate
53: .seealso: MatAYPX()
54: @*/
55: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
56: {
58: PetscInt M1,M2,N1,N2;
59: PetscInt m1,m2,n1,n2;
60: MatType t1,t2;
61: PetscBool sametype,transpose;
68: MatGetSize(X,&M1,&N1);
69: MatGetSize(Y,&M2,&N2);
70: MatGetLocalSize(X,&m1,&n1);
71: MatGetLocalSize(Y,&m2,&n2);
72: if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
73: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
74: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
75: if (!X->assembled) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
76: if (a == (PetscScalar)0.0) return(0);
77: if (Y == X) {
78: MatScale(Y,1.0 + a);
79: return(0);
80: }
81: MatGetType(X,&t1);
82: MatGetType(Y,&t2);
83: PetscStrcmp(t1,t2,&sametype);
84: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
85: if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
86: (*Y->ops->axpy)(Y,a,X,str);
87: } else {
88: PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
89: if (transpose) {
90: MatTransposeAXPY_Private(Y,a,X,str,X);
91: } else {
92: PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
93: if (transpose) {
94: MatTransposeAXPY_Private(Y,a,X,str,Y);
95: } else {
96: MatAXPY_Basic(Y,a,X,str);
97: }
98: }
99: }
100: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
101: return(0);
102: }
104: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
105: {
107: PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
110: /* look for any available faster alternative to the general preallocator */
111: PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
112: if (preall) {
113: (*preall)(Y,X,B);
114: } else { /* Use MatPrellocator, assumes same row-col distribution */
115: Mat preallocator;
116: PetscInt r,rstart,rend;
117: PetscInt m,n,M,N;
119: MatGetRowUpperTriangular(Y);
120: MatGetRowUpperTriangular(X);
121: MatGetSize(Y,&M,&N);
122: MatGetLocalSize(Y,&m,&n);
123: MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
124: MatSetType(preallocator,MATPREALLOCATOR);
125: MatSetSizes(preallocator,m,n,M,N);
126: MatSetUp(preallocator);
127: MatGetOwnershipRange(preallocator,&rstart,&rend);
128: for (r = rstart; r < rend; ++r) {
129: PetscInt ncols;
130: const PetscInt *row;
131: const PetscScalar *vals;
133: MatGetRow(Y,r,&ncols,&row,&vals);
134: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
135: MatRestoreRow(Y,r,&ncols,&row,&vals);
136: MatGetRow(X,r,&ncols,&row,&vals);
137: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
138: MatRestoreRow(X,r,&ncols,&row,&vals);
139: }
140: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
141: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
142: MatRestoreRowUpperTriangular(Y);
143: MatRestoreRowUpperTriangular(X);
145: MatCreate(PetscObjectComm((PetscObject)Y),B);
146: MatSetType(*B,((PetscObject)Y)->type_name);
147: MatSetSizes(*B,m,n,M,N);
148: MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
149: MatDestroy(&preallocator);
150: }
151: return(0);
152: }
154: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
155: {
157: PetscBool isshell,isdense;
160: PetscObjectTypeCompare((PetscObject)Y,MATSHELL,&isshell);
161: if (isshell) { /* MatShell has special support for AXPY */
162: PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);
164: MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f);
165: if (f) {
166: (*f)(Y,a,X,str);
167: return(0);
168: }
169: }
170: /* no need to preallocate if Y is dense */
171: PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,"");
172: if (isdense && str == DIFFERENT_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
173: if (str != DIFFERENT_NONZERO_PATTERN) {
174: PetscInt i,start,end,j,ncols,m,n;
175: const PetscInt *row;
176: PetscScalar *val;
177: const PetscScalar *vals;
179: MatGetSize(X,&m,&n);
180: MatGetOwnershipRange(X,&start,&end);
181: MatGetRowUpperTriangular(X);
182: if (a == 1.0) {
183: for (i = start; i < end; i++) {
184: MatGetRow(X,i,&ncols,&row,&vals);
185: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
186: MatRestoreRow(X,i,&ncols,&row,&vals);
187: }
188: } else {
189: PetscInt vs = 100;
190: /* realloc if needed, as this function may be used in parallel */
191: PetscMalloc1(vs,&val);
192: for (i=start; i<end; i++) {
193: MatGetRow(X,i,&ncols,&row,&vals);
194: if (vs < ncols) {
195: vs = PetscMin(2*ncols,n);
196: PetscRealloc(vs*sizeof(*val),&val);
197: }
198: for (j=0; j<ncols; j++) val[j] = a*vals[j];
199: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
200: MatRestoreRow(X,i,&ncols,&row,&vals);
201: }
202: PetscFree(val);
203: }
204: MatRestoreRowUpperTriangular(X);
205: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
206: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
207: } else {
208: Mat B;
210: MatAXPY_Basic_Preallocate(Y,X,&B);
211: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
212: /* TODO mat_tests-ex37_nsize-1_mat_type-baij_mat_block_size-2 fails with MatHeaderMerge */
213: MatHeaderReplace(Y,&B);
214: }
215: return(0);
216: }
218: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
219: {
220: PetscInt i,start,end,j,ncols,m,n;
221: PetscErrorCode ierr;
222: const PetscInt *row;
223: PetscScalar *val;
224: const PetscScalar *vals;
227: MatGetSize(X,&m,&n);
228: MatGetOwnershipRange(X,&start,&end);
229: MatGetRowUpperTriangular(Y);
230: MatGetRowUpperTriangular(X);
231: if (a == 1.0) {
232: for (i = start; i < end; i++) {
233: MatGetRow(Y,i,&ncols,&row,&vals);
234: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
235: MatRestoreRow(Y,i,&ncols,&row,&vals);
237: MatGetRow(X,i,&ncols,&row,&vals);
238: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
239: MatRestoreRow(X,i,&ncols,&row,&vals);
240: }
241: } else {
242: PetscInt vs = 100;
243: /* realloc if needed, as this function may be used in parallel */
244: PetscMalloc1(vs,&val);
245: for (i=start; i<end; i++) {
246: MatGetRow(Y,i,&ncols,&row,&vals);
247: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
248: MatRestoreRow(Y,i,&ncols,&row,&vals);
250: MatGetRow(X,i,&ncols,&row,&vals);
251: if (vs < ncols) {
252: vs = PetscMin(2*ncols,n);
253: PetscRealloc(vs*sizeof(*val),&val);
254: }
255: for (j=0; j<ncols; j++) val[j] = a*vals[j];
256: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
257: MatRestoreRow(X,i,&ncols,&row,&vals);
258: }
259: PetscFree(val);
260: }
261: MatRestoreRowUpperTriangular(Y);
262: MatRestoreRowUpperTriangular(X);
263: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
264: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
265: return(0);
266: }
268: /*@
269: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
271: Neighbor-wise Collective on Mat
273: Input Parameters:
274: + Y - the matrices
275: - a - the PetscScalar
277: Level: intermediate
279: Notes:
280: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
281: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
282: entry). No operation is performed when a is zero.
284: To form Y = Y + diag(V) use MatDiagonalSet()
286: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
287: @*/
288: PetscErrorCode MatShift(Mat Y,PetscScalar a)
289: {
294: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
295: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
296: MatCheckPreallocated(Y,1);
297: if (a == 0.0) return(0);
299: if (Y->ops->shift) {
300: (*Y->ops->shift)(Y,a);
301: } else {
302: MatShift_Basic(Y,a);
303: }
305: PetscObjectStateIncrease((PetscObject)Y);
306: return(0);
307: }
309: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
310: {
312: PetscInt i,start,end;
313: PetscScalar *v;
316: MatGetOwnershipRange(Y,&start,&end);
317: VecGetArray(D,&v);
318: for (i=start; i<end; i++) {
319: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
320: }
321: VecRestoreArray(D,&v);
322: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
323: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
324: return(0);
325: }
327: /*@
328: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
329: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
330: INSERT_VALUES.
332: Neighbor-wise Collective on Mat
334: Input Parameters:
335: + Y - the input matrix
336: . D - the diagonal matrix, represented as a vector
337: - i - INSERT_VALUES or ADD_VALUES
339: Notes:
340: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
341: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
342: entry).
344: Level: intermediate
346: .seealso: MatShift(), MatScale(), MatDiagonalScale()
347: @*/
348: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
349: {
351: PetscInt matlocal,veclocal;
356: MatGetLocalSize(Y,&matlocal,NULL);
357: VecGetLocalSize(D,&veclocal);
358: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
359: if (Y->ops->diagonalset) {
360: (*Y->ops->diagonalset)(Y,D,is);
361: } else {
362: MatDiagonalSet_Default(Y,D,is);
363: }
364: PetscObjectStateIncrease((PetscObject)Y);
365: return(0);
366: }
368: /*@
369: MatAYPX - Computes Y = a*Y + X.
371: Logically on Mat
373: Input Parameters:
374: + a - the PetscScalar multiplier
375: . Y - the first matrix
376: . X - the second matrix
377: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
379: Level: intermediate
381: .seealso: MatAXPY()
382: @*/
383: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
384: {
385: PetscScalar one = 1.0;
387: PetscInt mX,mY,nX,nY;
393: MatGetSize(X,&mX,&nX);
394: MatGetSize(X,&mY,&nY);
395: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
396: MatScale(Y,a);
397: MatAXPY(Y,one,X,str);
398: return(0);
399: }
401: /*@
402: MatComputeOperator - Computes the explicit matrix
404: Collective on Mat
406: Input Parameter:
407: + inmat - the matrix
408: - mattype - the matrix type for the explicit operator
410: Output Parameter:
411: . mat - the explict operator
413: Notes:
414: This computation is done by applying the operators to columns of the identity matrix.
415: This routine is costly in general, and is recommended for use only with relatively small systems.
416: Currently, this routine uses a dense matrix format if mattype == NULL.
418: Level: advanced
420: @*/
421: PetscErrorCode MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
422: {
428: MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
429: return(0);
430: }
432: /*@
433: MatComputeOperatorTranspose - Computes the explicit matrix representation of
434: a give matrix that can apply MatMultTranspose()
436: Collective on Mat
438: Input Parameter:
439: . inmat - the matrix
441: Output Parameter:
442: . mat - the explict operator transposed
444: Notes:
445: This computation is done by applying the transpose of the operator to columns of the identity matrix.
446: This routine is costly in general, and is recommended for use only with relatively small systems.
447: Currently, this routine uses a dense matrix format if mattype == NULL.
449: Level: advanced
451: @*/
452: PetscErrorCode MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
453: {
454: Mat A;
460: MatCreateTranspose(inmat,&A);
461: MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
462: MatDestroy(&A);
463: return(0);
464: }
466: /*@
467: MatChop - Set all values in the matrix less than the tolerance to zero
469: Input Parameters:
470: + A - The matrix
471: - tol - The zero tolerance
473: Output Parameters:
474: . A - The chopped matrix
476: Level: intermediate
478: .seealso: MatCreate(), MatZeroEntries()
479: @*/
480: PetscErrorCode MatChop(Mat A, PetscReal tol)
481: {
482: PetscScalar *newVals;
483: PetscInt *newCols;
484: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
488: MatGetOwnershipRange(A, &rStart, &rEnd);
489: MatGetRowUpperTriangular(A);
490: for (r = rStart; r < rEnd; ++r) {
491: PetscInt ncols;
493: MatGetRow(A, r, &ncols, NULL, NULL);
494: colMax = PetscMax(colMax, ncols);
495: MatRestoreRow(A, r, &ncols, NULL, NULL);
496: }
497: numRows = rEnd - rStart;
498: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
499: PetscMalloc2(colMax,&newCols,colMax,&newVals);
500: for (r = rStart; r < rStart+maxRows; ++r) {
501: const PetscScalar *vals;
502: const PetscInt *cols;
503: PetscInt ncols, newcols, c;
505: if (r < rEnd) {
506: MatGetRow(A, r, &ncols, &cols, &vals);
507: for (c = 0; c < ncols; ++c) {
508: newCols[c] = cols[c];
509: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
510: }
511: newcols = ncols;
512: MatRestoreRow(A, r, &ncols, &cols, &vals);
513: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
514: }
515: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
516: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
517: }
518: MatRestoreRowUpperTriangular(A);
519: PetscFree2(newCols,newVals);
520: return(0);
521: }