Actual source code: axpy.c
petsc-3.11.4 2019-09-28
2: #include <petsc/private/matimpl.h>
4: /*@
5: MatAXPY - Computes Y = a*X + Y.
7: Logically Collective on Mat
9: Input Parameters:
10: + a - the scalar multiplier
11: . X - the first matrix
12: . Y - the second matrix
13: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
16: Level: intermediate
18: .keywords: matrix, add
20: .seealso: MatAYPX()
21: @*/
22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23: {
25: PetscInt M1,M2,N1,N2;
26: PetscInt m1,m2,n1,n2;
27: PetscBool sametype;
34: MatGetSize(X,&M1,&N1);
35: MatGetSize(Y,&M2,&N2);
36: MatGetLocalSize(X,&m1,&n1);
37: MatGetLocalSize(Y,&m2,&n2);
38: if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
39: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
41: PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);
42: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
43: if (Y->ops->axpy && sametype) {
44: (*Y->ops->axpy)(Y,a,X,str);
45: } else {
46: if (str != DIFFERENT_NONZERO_PATTERN) {
47: MatAXPY_Basic(Y,a,X,str);
48: } else {
49: Mat B;
51: MatAXPY_Basic_Preallocate(Y,X,&B);
52: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
53: MatHeaderReplace(Y,&B);
54: }
55: }
56: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
57: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
58: if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
59: Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
60: }
61: #endif
62: return(0);
63: }
65: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
66: {
68: PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
71: /* look for any available faster alternative to the general preallocator */
72: PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
73: if (preall) {
74: (*preall)(Y,X,B);
75: } else { /* Use MatPrellocator, assumes same row-col distribution */
76: Mat preallocator;
77: PetscInt r,rstart,rend;
78: PetscInt m,n,M,N;
80: MatGetRowUpperTriangular(Y);
81: MatGetRowUpperTriangular(X);
82: MatGetSize(Y,&M,&N);
83: MatGetLocalSize(Y,&m,&n);
84: MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
85: MatSetType(preallocator,MATPREALLOCATOR);
86: MatSetSizes(preallocator,m,n,M,N);
87: MatSetUp(preallocator);
88: MatGetOwnershipRange(preallocator,&rstart,&rend);
89: for (r = rstart; r < rend; ++r) {
90: PetscInt ncols;
91: const PetscInt *row;
92: const PetscScalar *vals;
94: MatGetRow(Y,r,&ncols,&row,&vals);
95: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
96: MatRestoreRow(Y,r,&ncols,&row,&vals);
97: MatGetRow(X,r,&ncols,&row,&vals);
98: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
99: MatRestoreRow(X,r,&ncols,&row,&vals);
100: }
101: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
102: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
103: MatRestoreRowUpperTriangular(Y);
104: MatRestoreRowUpperTriangular(X);
106: MatCreate(PetscObjectComm((PetscObject)Y),B);
107: MatSetType(*B,((PetscObject)Y)->type_name);
108: MatSetSizes(*B,m,n,M,N);
109: MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
110: MatDestroy(&preallocator);
111: }
112: return(0);
113: }
115: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
116: {
117: PetscInt i,start,end,j,ncols,m,n;
118: PetscErrorCode ierr;
119: const PetscInt *row;
120: PetscScalar *val;
121: const PetscScalar *vals;
124: MatGetSize(X,&m,&n);
125: MatGetOwnershipRange(X,&start,&end);
126: MatGetRowUpperTriangular(X);
127: if (a == 1.0) {
128: for (i = start; i < end; i++) {
129: MatGetRow(X,i,&ncols,&row,&vals);
130: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
131: MatRestoreRow(X,i,&ncols,&row,&vals);
132: }
133: } else {
134: PetscInt vs = 100;
135: /* realloc if needed, as this function may be used in parallel */
136: PetscMalloc1(vs,&val);
137: for (i=start; i<end; i++) {
138: MatGetRow(X,i,&ncols,&row,&vals);
139: if (vs < ncols) {
140: vs = PetscMin(2*ncols,n);
141: PetscRealloc(vs*sizeof(*val),&val);
142: }
143: for (j=0; j<ncols; j++) val[j] = a*vals[j];
144: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
145: MatRestoreRow(X,i,&ncols,&row,&vals);
146: }
147: PetscFree(val);
148: }
149: MatRestoreRowUpperTriangular(X);
150: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
151: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
152: return(0);
153: }
155: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
156: {
157: PetscInt i,start,end,j,ncols,m,n;
158: PetscErrorCode ierr;
159: const PetscInt *row;
160: PetscScalar *val;
161: const PetscScalar *vals;
164: MatGetSize(X,&m,&n);
165: MatGetOwnershipRange(X,&start,&end);
166: MatGetRowUpperTriangular(Y);
167: MatGetRowUpperTriangular(X);
168: if (a == 1.0) {
169: for (i = start; i < end; i++) {
170: MatGetRow(Y,i,&ncols,&row,&vals);
171: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
172: MatRestoreRow(Y,i,&ncols,&row,&vals);
174: MatGetRow(X,i,&ncols,&row,&vals);
175: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
176: MatRestoreRow(X,i,&ncols,&row,&vals);
177: }
178: } else {
179: PetscInt vs = 100;
180: /* realloc if needed, as this function may be used in parallel */
181: PetscMalloc1(vs,&val);
182: for (i=start; i<end; i++) {
183: MatGetRow(Y,i,&ncols,&row,&vals);
184: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
185: MatRestoreRow(Y,i,&ncols,&row,&vals);
187: MatGetRow(X,i,&ncols,&row,&vals);
188: if (vs < ncols) {
189: vs = PetscMin(2*ncols,n);
190: PetscRealloc(vs*sizeof(*val),&val);
191: }
192: for (j=0; j<ncols; j++) val[j] = a*vals[j];
193: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
194: MatRestoreRow(X,i,&ncols,&row,&vals);
195: }
196: PetscFree(val);
197: }
198: MatRestoreRowUpperTriangular(Y);
199: MatRestoreRowUpperTriangular(X);
200: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
201: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
202: return(0);
203: }
205: /*@
206: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
208: Neighbor-wise Collective on Mat
210: Input Parameters:
211: + Y - the matrices
212: - a - the PetscScalar
214: Level: intermediate
216: Notes:
217: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
218: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
219: entry).
221: To form Y = Y + diag(V) use MatDiagonalSet()
223: Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
224: preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
226: .keywords: matrix, add, shift
228: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
229: @*/
230: PetscErrorCode MatShift(Mat Y,PetscScalar a)
231: {
236: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
237: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
238: MatCheckPreallocated(Y,1);
240: if (Y->ops->shift) {
241: (*Y->ops->shift)(Y,a);
242: } else {
243: MatShift_Basic(Y,a);
244: }
246: PetscObjectStateIncrease((PetscObject)Y);
247: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
248: if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
249: Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
250: }
251: #endif
252: return(0);
253: }
255: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
256: {
258: PetscInt i,start,end;
259: PetscScalar *v;
262: MatGetOwnershipRange(Y,&start,&end);
263: VecGetArray(D,&v);
264: for (i=start; i<end; i++) {
265: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
266: }
267: VecRestoreArray(D,&v);
268: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
269: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
270: return(0);
271: }
273: /*@
274: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
275: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
276: INSERT_VALUES.
278: Input Parameters:
279: + Y - the input matrix
280: . D - the diagonal matrix, represented as a vector
281: - i - INSERT_VALUES or ADD_VALUES
283: Neighbor-wise Collective on Mat and Vec
285: Notes:
286: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
287: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
288: entry).
290: Level: intermediate
292: .keywords: matrix, add, shift, diagonal
294: .seealso: MatShift(), MatScale(), MatDiagonalScale()
295: @*/
296: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
297: {
299: PetscInt matlocal,veclocal;
304: MatGetLocalSize(Y,&matlocal,NULL);
305: VecGetLocalSize(D,&veclocal);
306: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
307: if (Y->ops->diagonalset) {
308: (*Y->ops->diagonalset)(Y,D,is);
309: } else {
310: MatDiagonalSet_Default(Y,D,is);
311: }
312: PetscObjectStateIncrease((PetscObject)Y);
313: return(0);
314: }
316: /*@
317: MatAYPX - Computes Y = a*Y + X.
319: Logically on Mat
321: Input Parameters:
322: + a - the PetscScalar multiplier
323: . Y - the first matrix
324: . X - the second matrix
325: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
327: Level: intermediate
329: .keywords: matrix, add
331: .seealso: MatAXPY()
332: @*/
333: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
334: {
335: PetscScalar one = 1.0;
337: PetscInt mX,mY,nX,nY;
343: MatGetSize(X,&mX,&nX);
344: MatGetSize(X,&mY,&nY);
345: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
347: MatScale(Y,a);
348: MatAXPY(Y,one,X,str);
349: return(0);
350: }
352: /*@
353: MatComputeExplicitOperator - Computes the explicit matrix
355: Collective on Mat
357: Input Parameter:
358: . inmat - the matrix
360: Output Parameter:
361: . mat - the explict operator
363: Notes:
364: This computation is done by applying the operators to columns of the
365: identity matrix.
367: Currently, this routine uses a dense matrix format when 1 processor
368: is used and a sparse format otherwise. This routine is costly in general,
369: and is recommended for use only with relatively small systems.
371: Level: advanced
373: .keywords: Mat, compute, explicit, operator
374: @*/
375: PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat)
376: {
378: MPI_Comm comm;
379: PetscMPIInt size;
385: PetscObjectGetComm((PetscObject)inmat,&comm);
386: MPI_Comm_size(comm,&size);
387: MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);
388: return(0);
389: }
391: /*@
392: MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
393: a give matrix that can apply MatMultTranspose()
395: Collective on Mat
397: Input Parameter:
398: . inmat - the matrix
400: Output Parameter:
401: . mat - the explict operator transposed
403: Notes:
404: This computation is done by applying the transpose of the operator to columns of the
405: identity matrix.
407: Currently, this routine uses a dense matrix format when 1 processor
408: is used and a sparse format otherwise. This routine is costly in general,
409: and is recommended for use only with relatively small systems.
411: Level: advanced
413: .keywords: Mat, compute, explicit, operator
414: @*/
415: PetscErrorCode MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
416: {
417: Vec in,out;
419: PetscInt i,m,n,M,N,*rows,start,end;
420: MPI_Comm comm;
421: PetscScalar *array,zero = 0.0,one = 1.0;
422: PetscMPIInt size;
428: PetscObjectGetComm((PetscObject)inmat,&comm);
429: MPI_Comm_size(comm,&size);
431: MatGetLocalSize(inmat,&m,&n);
432: MatGetSize(inmat,&M,&N);
433: MatCreateVecs(inmat,&in,&out);
434: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
435: VecGetOwnershipRange(out,&start,&end);
436: PetscMalloc1(m,&rows);
437: for (i=0; i<m; i++) rows[i] = start + i;
439: MatCreate(comm,mat);
440: MatSetSizes(*mat,m,n,M,N);
441: if (size == 1) {
442: MatSetType(*mat,MATSEQDENSE);
443: MatSeqDenseSetPreallocation(*mat,NULL);
444: } else {
445: MatSetType(*mat,MATMPIAIJ);
446: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
447: }
449: for (i=0; i<N; i++) {
451: VecSet(in,zero);
452: VecSetValues(in,1,&i,&one,INSERT_VALUES);
453: VecAssemblyBegin(in);
454: VecAssemblyEnd(in);
456: MatMultTranspose(inmat,in,out);
458: VecGetArray(out,&array);
459: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
460: VecRestoreArray(out,&array);
462: }
463: PetscFree(rows);
464: VecDestroy(&out);
465: VecDestroy(&in);
466: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
467: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
468: return(0);
469: }
471: /*@
472: MatChop - Set all values in the matrix less than the tolerance to zero
474: Input Parameters:
475: + A - The matrix
476: - tol - The zero tolerance
478: Output Parameters:
479: . A - The chopped matrix
481: Level: intermediate
483: .seealso: MatCreate(), MatZeroEntries()
484: @*/
485: PetscErrorCode MatChop(Mat A, PetscReal tol)
486: {
487: PetscScalar *newVals;
488: PetscInt *newCols;
489: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
493: MatGetOwnershipRange(A, &rStart, &rEnd);
494: for (r = rStart; r < rEnd; ++r) {
495: PetscInt ncols;
497: MatGetRow(A, r, &ncols, NULL, NULL);
498: colMax = PetscMax(colMax, ncols);
499: MatRestoreRow(A, r, &ncols, NULL, NULL);
500: }
501: numRows = rEnd - rStart;
502: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
503: PetscMalloc2(colMax,&newCols,colMax,&newVals);
504: for (r = rStart; r < rStart+maxRows; ++r) {
505: const PetscScalar *vals;
506: const PetscInt *cols;
507: PetscInt ncols, newcols, c;
509: if (r < rEnd) {
510: MatGetRow(A, r, &ncols, &cols, &vals);
511: for (c = 0; c < ncols; ++c) {
512: newCols[c] = cols[c];
513: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
514: }
515: newcols = ncols;
516: MatRestoreRow(A, r, &ncols, &cols, &vals);
517: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
518: }
519: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
520: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
521: }
522: PetscFree2(newCols,newVals);
523: return(0);
524: }