Actual source code: axpy.c
petsc-3.8.4 2018-03-24
2: #include <petsc/private/matimpl.h>
4: /*@
5: MatAXPY - Computes Y = a*X + Y.
7: Logically Collective on Mat
9: Input Parameters:
10: + a - the scalar multiplier
11: . X - the first matrix
12: . Y - the second matrix
13: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
16: Level: intermediate
18: .keywords: matrix, add
20: .seealso: MatAYPX()
21: @*/
22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23: {
25: PetscInt m1,m2,n1,n2;
26: PetscBool sametype;
32: MatGetSize(X,&m1,&n1);
33: MatGetSize(Y,&m2,&n2);
34: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);
36: PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);
37: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
38: if (Y->ops->axpy && sametype) {
39: (*Y->ops->axpy)(Y,a,X,str);
40: } else {
41: MatAXPY_Basic(Y,a,X,str);
42: }
43: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
44: #if defined(PETSC_HAVE_CUSP)
45: if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
46: Y->valid_GPU_matrix = PETSC_CUSP_CPU;
47: }
48: #elif defined(PETSC_HAVE_VIENNACL)
49: if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
50: Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
51: }
52: #elif defined(PETSC_HAVE_VECCUDA)
53: if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
54: Y->valid_GPU_matrix = PETSC_CUDA_CPU;
55: }
56: #endif
57: return(0);
58: }
60: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
61: {
62: PetscInt i,start,end,j,ncols,m,n;
63: PetscErrorCode ierr;
64: const PetscInt *row;
65: PetscScalar *val;
66: const PetscScalar *vals;
69: MatGetSize(X,&m,&n);
70: MatGetOwnershipRange(X,&start,&end);
71: if (a == 1.0) {
72: for (i = start; i < end; i++) {
73: MatGetRow(X,i,&ncols,&row,&vals);
74: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
75: MatRestoreRow(X,i,&ncols,&row,&vals);
76: }
77: } else {
78: PetscMalloc1(n+1,&val);
79: for (i=start; i<end; i++) {
80: MatGetRow(X,i,&ncols,&row,&vals);
81: for (j=0; j<ncols; j++) {
82: val[j] = a*vals[j];
83: }
84: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
85: MatRestoreRow(X,i,&ncols,&row,&vals);
86: }
87: PetscFree(val);
88: }
89: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
90: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
91: return(0);
92: }
94: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
95: {
96: PetscInt i,start,end,j,ncols,m,n;
97: PetscErrorCode ierr;
98: const PetscInt *row;
99: PetscScalar *val;
100: const PetscScalar *vals;
103: MatGetSize(X,&m,&n);
104: MatGetOwnershipRange(X,&start,&end);
105: if (a == 1.0) {
106: for (i = start; i < end; i++) {
107: MatGetRow(Y,i,&ncols,&row,&vals);
108: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
109: MatRestoreRow(Y,i,&ncols,&row,&vals);
111: MatGetRow(X,i,&ncols,&row,&vals);
112: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
113: MatRestoreRow(X,i,&ncols,&row,&vals);
114: }
115: } else {
116: PetscMalloc1(n+1,&val);
117: for (i=start; i<end; i++) {
118: MatGetRow(Y,i,&ncols,&row,&vals);
119: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
120: MatRestoreRow(Y,i,&ncols,&row,&vals);
122: MatGetRow(X,i,&ncols,&row,&vals);
123: for (j=0; j<ncols; j++) {
124: val[j] = a*vals[j];
125: }
126: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
127: MatRestoreRow(X,i,&ncols,&row,&vals);
128: }
129: PetscFree(val);
130: }
131: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
132: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
133: return(0);
134: }
136: /*@
137: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
139: Neighbor-wise Collective on Mat
141: Input Parameters:
142: + Y - the matrices
143: - a - the PetscScalar
145: Level: intermediate
147: Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
148: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
149: entry).
151: Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
152: preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
154: .keywords: matrix, add, shift
156: .seealso: MatDiagonalSet()
157: @*/
158: PetscErrorCode MatShift(Mat Y,PetscScalar a)
159: {
164: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
165: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
166: MatCheckPreallocated(Y,1);
168: if (Y->ops->shift) {
169: (*Y->ops->shift)(Y,a);
170: } else {
171: MatShift_Basic(Y,a);
172: }
174: #if defined(PETSC_HAVE_CUSP)
175: if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
176: Y->valid_GPU_matrix = PETSC_CUSP_CPU;
177: }
178: #elif defined(PETSC_HAVE_VIENNACL)
179: if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
180: Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
181: }
182: #elif defined(PETSC_HAVE_VECCUDA)
183: if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
184: Y->valid_GPU_matrix = PETSC_CUDA_CPU;
185: }
186: #endif
187: return(0);
188: }
190: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
191: {
193: PetscInt i,start,end;
194: PetscScalar *v;
197: MatGetOwnershipRange(Y,&start,&end);
198: VecGetArray(D,&v);
199: for (i=start; i<end; i++) {
200: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
201: }
202: VecRestoreArray(D,&v);
203: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
204: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
205: return(0);
206: }
208: /*@
209: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
210: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
211: INSERT_VALUES.
213: Input Parameters:
214: + Y - the input matrix
215: . D - the diagonal matrix, represented as a vector
216: - i - INSERT_VALUES or ADD_VALUES
218: Neighbor-wise Collective on Mat and Vec
220: Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
221: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
222: entry).
224: Level: intermediate
226: .keywords: matrix, add, shift, diagonal
228: .seealso: MatShift()
229: @*/
230: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
231: {
233: PetscInt matlocal,veclocal;
238: MatGetLocalSize(Y,&matlocal,NULL);
239: VecGetLocalSize(D,&veclocal);
240: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
241: if (Y->ops->diagonalset) {
242: (*Y->ops->diagonalset)(Y,D,is);
243: } else {
244: MatDiagonalSet_Default(Y,D,is);
245: }
246: return(0);
247: }
249: /*@
250: MatAYPX - Computes Y = a*Y + X.
252: Logically on Mat
254: Input Parameters:
255: + a - the PetscScalar multiplier
256: . Y - the first matrix
257: . X - the second matrix
258: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
260: Level: intermediate
262: .keywords: matrix, add
264: .seealso: MatAXPY()
265: @*/
266: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
267: {
268: PetscScalar one = 1.0;
270: PetscInt mX,mY,nX,nY;
276: MatGetSize(X,&mX,&nX);
277: MatGetSize(X,&mY,&nY);
278: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
280: MatScale(Y,a);
281: MatAXPY(Y,one,X,str);
282: return(0);
283: }
285: /*@
286: MatComputeExplicitOperator - Computes the explicit matrix
288: Collective on Mat
290: Input Parameter:
291: . inmat - the matrix
293: Output Parameter:
294: . mat - the explict preconditioned operator
296: Notes:
297: This computation is done by applying the operators to columns of the
298: identity matrix.
300: Currently, this routine uses a dense matrix format when 1 processor
301: is used and a sparse format otherwise. This routine is costly in general,
302: and is recommended for use only with relatively small systems.
304: Level: advanced
306: .keywords: Mat, compute, explicit, operator
307: @*/
308: PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat)
309: {
310: Vec in,out;
312: PetscInt i,m,n,M,N,*rows,start,end;
313: MPI_Comm comm;
314: PetscScalar *array,zero = 0.0,one = 1.0;
315: PetscMPIInt size;
321: PetscObjectGetComm((PetscObject)inmat,&comm);
322: MPI_Comm_size(comm,&size);
324: MatGetLocalSize(inmat,&m,&n);
325: MatGetSize(inmat,&M,&N);
326: MatCreateVecs(inmat,&in,&out);
327: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
328: VecGetOwnershipRange(out,&start,&end);
329: PetscMalloc1(m,&rows);
330: for (i=0; i<m; i++) rows[i] = start + i;
332: MatCreate(comm,mat);
333: MatSetSizes(*mat,m,n,M,N);
334: if (size == 1) {
335: MatSetType(*mat,MATSEQDENSE);
336: MatSeqDenseSetPreallocation(*mat,NULL);
337: } else {
338: MatSetType(*mat,MATMPIAIJ);
339: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
340: }
342: for (i=0; i<N; i++) {
344: VecSet(in,zero);
345: VecSetValues(in,1,&i,&one,INSERT_VALUES);
346: VecAssemblyBegin(in);
347: VecAssemblyEnd(in);
349: MatMult(inmat,in,out);
351: VecGetArray(out,&array);
352: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
353: VecRestoreArray(out,&array);
355: }
356: PetscFree(rows);
357: VecDestroy(&out);
358: VecDestroy(&in);
359: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
360: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
361: return(0);
362: }
364: /*@
365: MatChop - Set all values in the matrix less than the tolerance to zero
367: Input Parameters:
368: + A - The matrix
369: - tol - The zero tolerance
371: Output Parameters:
372: . A - The chopped matrix
374: Level: intermediate
376: .seealso: MatCreate(), MatZeroEntries()
377: @*/
378: PetscErrorCode MatChop(Mat A, PetscReal tol)
379: {
380: PetscScalar *newVals;
381: PetscInt *newCols;
382: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
386: MatGetOwnershipRange(A, &rStart, &rEnd);
387: for (r = rStart; r < rEnd; ++r) {
388: PetscInt ncols;
390: MatGetRow(A, r, &ncols, NULL, NULL);
391: colMax = PetscMax(colMax, ncols);
392: MatRestoreRow(A, r, &ncols, NULL, NULL);
393: }
394: numRows = rEnd - rStart;
395: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
396: PetscMalloc2(colMax,&newCols,colMax,&newVals);
397: for (r = rStart; r < rStart+maxRows; ++r) {
398: const PetscScalar *vals;
399: const PetscInt *cols;
400: PetscInt ncols, newcols, c;
402: if (r < rEnd) {
403: MatGetRow(A, r, &ncols, &cols, &vals);
404: for (c = 0; c < ncols; ++c) {
405: newCols[c] = cols[c];
406: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
407: }
408: newcols = ncols;
409: MatRestoreRow(A, r, &ncols, &cols, &vals);
410: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
411: }
412: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
413: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
414: }
415: PetscFree2(newCols,newVals);
416: return(0);
417: }