Actual source code: axpy.c
petsc-3.10.5 2019-03-28
2: #include <petsc/private/matimpl.h>
4: /*@
5: MatAXPY - Computes Y = a*X + Y.
7: Logically Collective on Mat
9: Input Parameters:
10: + a - the scalar multiplier
11: . X - the first matrix
12: . Y - the second matrix
13: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
16: Level: intermediate
18: .keywords: matrix, add
20: .seealso: MatAYPX()
21: @*/
22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23: {
25: PetscInt m1,m2,n1,n2;
26: PetscBool sametype;
32: MatGetSize(X,&m1,&n1);
33: MatGetSize(Y,&m2,&n2);
34: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);
36: PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);
37: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
38: if (Y->ops->axpy && sametype) {
39: (*Y->ops->axpy)(Y,a,X,str);
40: } else {
41: MatAXPY_Basic(Y,a,X,str);
42: }
43: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
44: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA)
45: if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
46: Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
47: }
48: #endif
49: return(0);
50: }
52: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
53: {
54: PetscInt i,start,end,j,ncols,m,n;
55: PetscErrorCode ierr;
56: const PetscInt *row;
57: PetscScalar *val;
58: const PetscScalar *vals;
61: MatGetSize(X,&m,&n);
62: MatGetOwnershipRange(X,&start,&end);
63: if (a == 1.0) {
64: for (i = start; i < end; i++) {
65: MatGetRow(X,i,&ncols,&row,&vals);
66: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
67: MatRestoreRow(X,i,&ncols,&row,&vals);
68: }
69: } else {
70: PetscMalloc1(n+1,&val);
71: for (i=start; i<end; i++) {
72: MatGetRow(X,i,&ncols,&row,&vals);
73: for (j=0; j<ncols; j++) {
74: val[j] = a*vals[j];
75: }
76: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
77: MatRestoreRow(X,i,&ncols,&row,&vals);
78: }
79: PetscFree(val);
80: }
81: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
82: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
83: return(0);
84: }
86: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
87: {
88: PetscInt i,start,end,j,ncols,m,n;
89: PetscErrorCode ierr;
90: const PetscInt *row;
91: PetscScalar *val;
92: const PetscScalar *vals;
95: MatGetSize(X,&m,&n);
96: MatGetOwnershipRange(X,&start,&end);
97: if (a == 1.0) {
98: for (i = start; i < end; i++) {
99: MatGetRow(Y,i,&ncols,&row,&vals);
100: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
101: MatRestoreRow(Y,i,&ncols,&row,&vals);
103: MatGetRow(X,i,&ncols,&row,&vals);
104: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
105: MatRestoreRow(X,i,&ncols,&row,&vals);
106: }
107: } else {
108: PetscMalloc1(n+1,&val);
109: for (i=start; i<end; i++) {
110: MatGetRow(Y,i,&ncols,&row,&vals);
111: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
112: MatRestoreRow(Y,i,&ncols,&row,&vals);
114: MatGetRow(X,i,&ncols,&row,&vals);
115: for (j=0; j<ncols; j++) {
116: val[j] = a*vals[j];
117: }
118: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
119: MatRestoreRow(X,i,&ncols,&row,&vals);
120: }
121: PetscFree(val);
122: }
123: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
124: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
125: return(0);
126: }
128: /*@
129: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
131: Neighbor-wise Collective on Mat
133: Input Parameters:
134: + Y - the matrices
135: - a - the PetscScalar
137: Level: intermediate
139: Notes:
140: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
141: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
142: entry).
144: To form Y = Y + diag(V) use MatDiagonalSet()
146: Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
147: preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
149: .keywords: matrix, add, shift
151: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
152: @*/
153: PetscErrorCode MatShift(Mat Y,PetscScalar a)
154: {
159: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
160: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
161: MatCheckPreallocated(Y,1);
163: if (Y->ops->shift) {
164: (*Y->ops->shift)(Y,a);
165: } else {
166: MatShift_Basic(Y,a);
167: }
169: PetscObjectStateIncrease((PetscObject)Y);
170: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA)
171: if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
172: Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
173: }
174: #endif
175: return(0);
176: }
178: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
179: {
181: PetscInt i,start,end;
182: PetscScalar *v;
185: MatGetOwnershipRange(Y,&start,&end);
186: VecGetArray(D,&v);
187: for (i=start; i<end; i++) {
188: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
189: }
190: VecRestoreArray(D,&v);
191: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
192: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
193: return(0);
194: }
196: /*@
197: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
198: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
199: INSERT_VALUES.
201: Input Parameters:
202: + Y - the input matrix
203: . D - the diagonal matrix, represented as a vector
204: - i - INSERT_VALUES or ADD_VALUES
206: Neighbor-wise Collective on Mat and Vec
208: Notes:
209: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
210: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
211: entry).
213: Level: intermediate
215: .keywords: matrix, add, shift, diagonal
217: .seealso: MatShift(), MatScale(), MatDiagonalScale()
218: @*/
219: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
220: {
222: PetscInt matlocal,veclocal;
227: MatGetLocalSize(Y,&matlocal,NULL);
228: VecGetLocalSize(D,&veclocal);
229: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
230: if (Y->ops->diagonalset) {
231: (*Y->ops->diagonalset)(Y,D,is);
232: } else {
233: MatDiagonalSet_Default(Y,D,is);
234: }
235: PetscObjectStateIncrease((PetscObject)Y);
236: return(0);
237: }
239: /*@
240: MatAYPX - Computes Y = a*Y + X.
242: Logically on Mat
244: Input Parameters:
245: + a - the PetscScalar multiplier
246: . Y - the first matrix
247: . X - the second matrix
248: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
250: Level: intermediate
252: .keywords: matrix, add
254: .seealso: MatAXPY()
255: @*/
256: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
257: {
258: PetscScalar one = 1.0;
260: PetscInt mX,mY,nX,nY;
266: MatGetSize(X,&mX,&nX);
267: MatGetSize(X,&mY,&nY);
268: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
270: MatScale(Y,a);
271: MatAXPY(Y,one,X,str);
272: return(0);
273: }
275: /*@
276: MatComputeExplicitOperator - Computes the explicit matrix
278: Collective on Mat
280: Input Parameter:
281: . inmat - the matrix
283: Output Parameter:
284: . mat - the explict operator
286: Notes:
287: This computation is done by applying the operators to columns of the
288: identity matrix.
290: Currently, this routine uses a dense matrix format when 1 processor
291: is used and a sparse format otherwise. This routine is costly in general,
292: and is recommended for use only with relatively small systems.
294: Level: advanced
296: .keywords: Mat, compute, explicit, operator
297: @*/
298: PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat)
299: {
301: MPI_Comm comm;
302: PetscMPIInt size;
308: PetscObjectGetComm((PetscObject)inmat,&comm);
309: MPI_Comm_size(comm,&size);
310: MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);
311: return(0);
312: }
314: /*@
315: MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
316: a give matrix that can apply MatMultTranspose()
318: Collective on Mat
320: Input Parameter:
321: . inmat - the matrix
323: Output Parameter:
324: . mat - the explict operator transposed
326: Notes:
327: This computation is done by applying the transpose of the operator to columns of the
328: identity matrix.
330: Currently, this routine uses a dense matrix format when 1 processor
331: is used and a sparse format otherwise. This routine is costly in general,
332: and is recommended for use only with relatively small systems.
334: Level: advanced
336: .keywords: Mat, compute, explicit, operator
337: @*/
338: PetscErrorCode MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
339: {
340: Vec in,out;
342: PetscInt i,m,n,M,N,*rows,start,end;
343: MPI_Comm comm;
344: PetscScalar *array,zero = 0.0,one = 1.0;
345: PetscMPIInt size;
351: PetscObjectGetComm((PetscObject)inmat,&comm);
352: MPI_Comm_size(comm,&size);
354: MatGetLocalSize(inmat,&m,&n);
355: MatGetSize(inmat,&M,&N);
356: MatCreateVecs(inmat,&in,&out);
357: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
358: VecGetOwnershipRange(out,&start,&end);
359: PetscMalloc1(m,&rows);
360: for (i=0; i<m; i++) rows[i] = start + i;
362: MatCreate(comm,mat);
363: MatSetSizes(*mat,m,n,M,N);
364: if (size == 1) {
365: MatSetType(*mat,MATSEQDENSE);
366: MatSeqDenseSetPreallocation(*mat,NULL);
367: } else {
368: MatSetType(*mat,MATMPIAIJ);
369: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
370: }
372: for (i=0; i<N; i++) {
374: VecSet(in,zero);
375: VecSetValues(in,1,&i,&one,INSERT_VALUES);
376: VecAssemblyBegin(in);
377: VecAssemblyEnd(in);
379: MatMultTranspose(inmat,in,out);
381: VecGetArray(out,&array);
382: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
383: VecRestoreArray(out,&array);
385: }
386: PetscFree(rows);
387: VecDestroy(&out);
388: VecDestroy(&in);
389: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
390: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
391: return(0);
392: }
394: /*@
395: MatChop - Set all values in the matrix less than the tolerance to zero
397: Input Parameters:
398: + A - The matrix
399: - tol - The zero tolerance
401: Output Parameters:
402: . A - The chopped matrix
404: Level: intermediate
406: .seealso: MatCreate(), MatZeroEntries()
407: @*/
408: PetscErrorCode MatChop(Mat A, PetscReal tol)
409: {
410: PetscScalar *newVals;
411: PetscInt *newCols;
412: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
416: MatGetOwnershipRange(A, &rStart, &rEnd);
417: for (r = rStart; r < rEnd; ++r) {
418: PetscInt ncols;
420: MatGetRow(A, r, &ncols, NULL, NULL);
421: colMax = PetscMax(colMax, ncols);
422: MatRestoreRow(A, r, &ncols, NULL, NULL);
423: }
424: numRows = rEnd - rStart;
425: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
426: PetscMalloc2(colMax,&newCols,colMax,&newVals);
427: for (r = rStart; r < rStart+maxRows; ++r) {
428: const PetscScalar *vals;
429: const PetscInt *cols;
430: PetscInt ncols, newcols, c;
432: if (r < rEnd) {
433: MatGetRow(A, r, &ncols, &cols, &vals);
434: for (c = 0; c < ncols; ++c) {
435: newCols[c] = cols[c];
436: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
437: }
438: newcols = ncols;
439: MatRestoreRow(A, r, &ncols, &cols, &vals);
440: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
441: }
442: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
443: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
444: }
445: PetscFree2(newCols,newVals);
446: return(0);
447: }