Actual source code: axpy.c
petsc-3.9.4 2018-09-11
2: #include <petsc/private/matimpl.h>
4: /*@
5: MatAXPY - Computes Y = a*X + Y.
7: Logically Collective on Mat
9: Input Parameters:
10: + a - the scalar multiplier
11: . X - the first matrix
12: . Y - the second matrix
13: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
16: Level: intermediate
18: .keywords: matrix, add
20: .seealso: MatAYPX()
21: @*/
22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23: {
25: PetscInt m1,m2,n1,n2;
26: PetscBool sametype;
32: MatGetSize(X,&m1,&n1);
33: MatGetSize(Y,&m2,&n2);
34: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);
36: PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);
37: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
38: if (Y->ops->axpy && sametype) {
39: (*Y->ops->axpy)(Y,a,X,str);
40: } else {
41: MatAXPY_Basic(Y,a,X,str);
42: }
43: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
44: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA)
45: if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
46: Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
47: }
48: #endif
49: return(0);
50: }
52: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
53: {
54: PetscInt i,start,end,j,ncols,m,n;
55: PetscErrorCode ierr;
56: const PetscInt *row;
57: PetscScalar *val;
58: const PetscScalar *vals;
61: MatGetSize(X,&m,&n);
62: MatGetOwnershipRange(X,&start,&end);
63: if (a == 1.0) {
64: for (i = start; i < end; i++) {
65: MatGetRow(X,i,&ncols,&row,&vals);
66: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
67: MatRestoreRow(X,i,&ncols,&row,&vals);
68: }
69: } else {
70: PetscMalloc1(n+1,&val);
71: for (i=start; i<end; i++) {
72: MatGetRow(X,i,&ncols,&row,&vals);
73: for (j=0; j<ncols; j++) {
74: val[j] = a*vals[j];
75: }
76: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
77: MatRestoreRow(X,i,&ncols,&row,&vals);
78: }
79: PetscFree(val);
80: }
81: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
82: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
83: return(0);
84: }
86: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
87: {
88: PetscInt i,start,end,j,ncols,m,n;
89: PetscErrorCode ierr;
90: const PetscInt *row;
91: PetscScalar *val;
92: const PetscScalar *vals;
95: MatGetSize(X,&m,&n);
96: MatGetOwnershipRange(X,&start,&end);
97: if (a == 1.0) {
98: for (i = start; i < end; i++) {
99: MatGetRow(Y,i,&ncols,&row,&vals);
100: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
101: MatRestoreRow(Y,i,&ncols,&row,&vals);
103: MatGetRow(X,i,&ncols,&row,&vals);
104: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
105: MatRestoreRow(X,i,&ncols,&row,&vals);
106: }
107: } else {
108: PetscMalloc1(n+1,&val);
109: for (i=start; i<end; i++) {
110: MatGetRow(Y,i,&ncols,&row,&vals);
111: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
112: MatRestoreRow(Y,i,&ncols,&row,&vals);
114: MatGetRow(X,i,&ncols,&row,&vals);
115: for (j=0; j<ncols; j++) {
116: val[j] = a*vals[j];
117: }
118: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
119: MatRestoreRow(X,i,&ncols,&row,&vals);
120: }
121: PetscFree(val);
122: }
123: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
124: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
125: return(0);
126: }
128: /*@
129: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
131: Neighbor-wise Collective on Mat
133: Input Parameters:
134: + Y - the matrices
135: - a - the PetscScalar
137: Level: intermediate
139: Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
140: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
141: entry).
143: To form Y = Y + diag(V) use MatDiagonalSet()
145: Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
146: preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
148: .keywords: matrix, add, shift
150: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
151: @*/
152: PetscErrorCode MatShift(Mat Y,PetscScalar a)
153: {
158: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
159: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
160: MatCheckPreallocated(Y,1);
162: if (Y->ops->shift) {
163: (*Y->ops->shift)(Y,a);
164: } else {
165: MatShift_Basic(Y,a);
166: }
168: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA)
169: if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
170: Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
171: }
172: #endif
173: return(0);
174: }
176: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
177: {
179: PetscInt i,start,end;
180: PetscScalar *v;
183: MatGetOwnershipRange(Y,&start,&end);
184: VecGetArray(D,&v);
185: for (i=start; i<end; i++) {
186: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
187: }
188: VecRestoreArray(D,&v);
189: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
190: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
191: return(0);
192: }
194: /*@
195: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
196: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
197: INSERT_VALUES.
199: Input Parameters:
200: + Y - the input matrix
201: . D - the diagonal matrix, represented as a vector
202: - i - INSERT_VALUES or ADD_VALUES
204: Neighbor-wise Collective on Mat and Vec
206: Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
207: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
208: entry).
210: Level: intermediate
212: .keywords: matrix, add, shift, diagonal
214: .seealso: MatShift(), MatScale(), MatDiagonalScale()
215: @*/
216: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
217: {
219: PetscInt matlocal,veclocal;
224: MatGetLocalSize(Y,&matlocal,NULL);
225: VecGetLocalSize(D,&veclocal);
226: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
227: if (Y->ops->diagonalset) {
228: (*Y->ops->diagonalset)(Y,D,is);
229: } else {
230: MatDiagonalSet_Default(Y,D,is);
231: }
232: return(0);
233: }
235: /*@
236: MatAYPX - Computes Y = a*Y + X.
238: Logically on Mat
240: Input Parameters:
241: + a - the PetscScalar multiplier
242: . Y - the first matrix
243: . X - the second matrix
244: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
246: Level: intermediate
248: .keywords: matrix, add
250: .seealso: MatAXPY()
251: @*/
252: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
253: {
254: PetscScalar one = 1.0;
256: PetscInt mX,mY,nX,nY;
262: MatGetSize(X,&mX,&nX);
263: MatGetSize(X,&mY,&nY);
264: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
266: MatScale(Y,a);
267: MatAXPY(Y,one,X,str);
268: return(0);
269: }
271: /*@
272: MatComputeExplicitOperator - Computes the explicit matrix
274: Collective on Mat
276: Input Parameter:
277: . inmat - the matrix
279: Output Parameter:
280: . mat - the explict operator
282: Notes:
283: This computation is done by applying the operators to columns of the
284: identity matrix.
286: Currently, this routine uses a dense matrix format when 1 processor
287: is used and a sparse format otherwise. This routine is costly in general,
288: and is recommended for use only with relatively small systems.
290: Level: advanced
292: .keywords: Mat, compute, explicit, operator
293: @*/
294: PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat)
295: {
296: Vec in,out;
298: PetscInt i,m,n,M,N,*rows,start,end;
299: MPI_Comm comm;
300: PetscScalar *array,zero = 0.0,one = 1.0;
301: PetscMPIInt size;
307: PetscObjectGetComm((PetscObject)inmat,&comm);
308: MPI_Comm_size(comm,&size);
310: MatGetLocalSize(inmat,&m,&n);
311: MatGetSize(inmat,&M,&N);
312: MatCreateVecs(inmat,&in,&out);
313: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
314: VecGetOwnershipRange(out,&start,&end);
315: PetscMalloc1(m,&rows);
316: for (i=0; i<m; i++) rows[i] = start + i;
318: MatCreate(comm,mat);
319: MatSetSizes(*mat,m,n,M,N);
320: if (size == 1) {
321: MatSetType(*mat,MATSEQDENSE);
322: MatSeqDenseSetPreallocation(*mat,NULL);
323: } else {
324: MatSetType(*mat,MATMPIAIJ);
325: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
326: }
328: for (i=0; i<N; i++) {
330: VecSet(in,zero);
331: VecSetValues(in,1,&i,&one,INSERT_VALUES);
332: VecAssemblyBegin(in);
333: VecAssemblyEnd(in);
335: MatMult(inmat,in,out);
337: VecGetArray(out,&array);
338: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
339: VecRestoreArray(out,&array);
341: }
342: PetscFree(rows);
343: VecDestroy(&out);
344: VecDestroy(&in);
345: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
346: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
347: return(0);
348: }
350: /*@
351: MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
352: a give matrix that can apply MatMultTranspose()
354: Collective on Mat
356: Input Parameter:
357: . inmat - the matrix
359: Output Parameter:
360: . mat - the explict operator transposed
362: Notes:
363: This computation is done by applying the transpose of the operator to columns of the
364: identity matrix.
366: Currently, this routine uses a dense matrix format when 1 processor
367: is used and a sparse format otherwise. This routine is costly in general,
368: and is recommended for use only with relatively small systems.
370: Level: advanced
372: .keywords: Mat, compute, explicit, operator
373: @*/
374: PetscErrorCode MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
375: {
376: Vec in,out;
378: PetscInt i,m,n,M,N,*rows,start,end;
379: MPI_Comm comm;
380: PetscScalar *array,zero = 0.0,one = 1.0;
381: PetscMPIInt size;
387: PetscObjectGetComm((PetscObject)inmat,&comm);
388: MPI_Comm_size(comm,&size);
390: MatGetLocalSize(inmat,&m,&n);
391: MatGetSize(inmat,&M,&N);
392: MatCreateVecs(inmat,&in,&out);
393: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
394: VecGetOwnershipRange(out,&start,&end);
395: PetscMalloc1(m,&rows);
396: for (i=0; i<m; i++) rows[i] = start + i;
398: MatCreate(comm,mat);
399: MatSetSizes(*mat,m,n,M,N);
400: if (size == 1) {
401: MatSetType(*mat,MATSEQDENSE);
402: MatSeqDenseSetPreallocation(*mat,NULL);
403: } else {
404: MatSetType(*mat,MATMPIAIJ);
405: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
406: }
408: for (i=0; i<N; i++) {
410: VecSet(in,zero);
411: VecSetValues(in,1,&i,&one,INSERT_VALUES);
412: VecAssemblyBegin(in);
413: VecAssemblyEnd(in);
415: MatMultTranspose(inmat,in,out);
417: VecGetArray(out,&array);
418: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
419: VecRestoreArray(out,&array);
421: }
422: PetscFree(rows);
423: VecDestroy(&out);
424: VecDestroy(&in);
425: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
426: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
427: return(0);
428: }
430: /*@
431: MatChop - Set all values in the matrix less than the tolerance to zero
433: Input Parameters:
434: + A - The matrix
435: - tol - The zero tolerance
437: Output Parameters:
438: . A - The chopped matrix
440: Level: intermediate
442: .seealso: MatCreate(), MatZeroEntries()
443: @*/
444: PetscErrorCode MatChop(Mat A, PetscReal tol)
445: {
446: PetscScalar *newVals;
447: PetscInt *newCols;
448: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
452: MatGetOwnershipRange(A, &rStart, &rEnd);
453: for (r = rStart; r < rEnd; ++r) {
454: PetscInt ncols;
456: MatGetRow(A, r, &ncols, NULL, NULL);
457: colMax = PetscMax(colMax, ncols);
458: MatRestoreRow(A, r, &ncols, NULL, NULL);
459: }
460: numRows = rEnd - rStart;
461: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
462: PetscMalloc2(colMax,&newCols,colMax,&newVals);
463: for (r = rStart; r < rStart+maxRows; ++r) {
464: const PetscScalar *vals;
465: const PetscInt *cols;
466: PetscInt ncols, newcols, c;
468: if (r < rEnd) {
469: MatGetRow(A, r, &ncols, &cols, &vals);
470: for (c = 0; c < ncols; ++c) {
471: newCols[c] = cols[c];
472: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
473: }
474: newcols = ncols;
475: MatRestoreRow(A, r, &ncols, &cols, &vals);
476: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
477: }
478: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
479: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
480: }
481: PetscFree2(newCols,newVals);
482: return(0);
483: }