Actual source code: axpy.c
petsc-3.5.4 2015-05-23
2: #include <petsc-private/matimpl.h> /*I "petscmat.h" I*/
6: /*@
7: MatAXPY - Computes Y = a*X + Y.
9: Logically Collective on Mat
11: Input Parameters:
12: + a - the scalar multiplier
13: . X - the first matrix
14: . Y - the second matrix
15: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
16: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
18: Level: intermediate
20: .keywords: matrix, add
22: .seealso: MatAYPX()
23: @*/
24: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
25: {
27: PetscInt m1,m2,n1,n2;
33: MatGetSize(X,&m1,&n1);
34: MatGetSize(Y,&m2,&n2);
35: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);
37: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
38: if (Y->ops->axpy) {
39: (*Y->ops->axpy)(Y,a,X,str);
40: } else {
41: MatAXPY_Basic(Y,a,X,str);
42: }
43: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
44: #if defined(PETSC_HAVE_CUSP)
45: if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
46: Y->valid_GPU_matrix = PETSC_CUSP_CPU;
47: }
48: #endif
49: #if defined(PETSC_HAVE_VIENNACL)
50: if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
51: Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
52: }
53: #endif
54: return(0);
55: }
59: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
60: {
61: PetscInt i,start,end,j,ncols,m,n;
62: PetscErrorCode ierr;
63: const PetscInt *row;
64: PetscScalar *val;
65: const PetscScalar *vals;
68: MatGetSize(X,&m,&n);
69: MatGetOwnershipRange(X,&start,&end);
70: if (a == 1.0) {
71: for (i = start; i < end; i++) {
72: MatGetRow(X,i,&ncols,&row,&vals);
73: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
74: MatRestoreRow(X,i,&ncols,&row,&vals);
75: }
76: } else {
77: PetscMalloc1((n+1),&val);
78: for (i=start; i<end; i++) {
79: MatGetRow(X,i,&ncols,&row,&vals);
80: for (j=0; j<ncols; j++) {
81: val[j] = a*vals[j];
82: }
83: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
84: MatRestoreRow(X,i,&ncols,&row,&vals);
85: }
86: PetscFree(val);
87: }
88: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
89: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
90: return(0);
91: }
95: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
96: {
97: PetscInt i,start,end,j,ncols,m,n;
98: PetscErrorCode ierr;
99: const PetscInt *row;
100: PetscScalar *val;
101: const PetscScalar *vals;
104: MatGetSize(X,&m,&n);
105: MatGetOwnershipRange(X,&start,&end);
106: if (a == 1.0) {
107: for (i = start; i < end; i++) {
108: MatGetRow(Y,i,&ncols,&row,&vals);
109: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
110: MatRestoreRow(Y,i,&ncols,&row,&vals);
112: MatGetRow(X,i,&ncols,&row,&vals);
113: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
114: MatRestoreRow(X,i,&ncols,&row,&vals);
115: }
116: } else {
117: PetscMalloc1((n+1),&val);
118: for (i=start; i<end; i++) {
119: MatGetRow(Y,i,&ncols,&row,&vals);
120: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
121: MatRestoreRow(Y,i,&ncols,&row,&vals);
123: MatGetRow(X,i,&ncols,&row,&vals);
124: for (j=0; j<ncols; j++) {
125: val[j] = a*vals[j];
126: }
127: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
128: MatRestoreRow(X,i,&ncols,&row,&vals);
129: }
130: PetscFree(val);
131: }
132: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
133: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
134: return(0);
135: }
139: /*@
140: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
142: Neighbor-wise Collective on Mat
144: Input Parameters:
145: + Y - the matrices
146: - a - the PetscScalar
148: Level: intermediate
150: .keywords: matrix, add, shift
152: .seealso: MatDiagonalSet()
153: @*/
154: PetscErrorCode MatShift(Mat Y,PetscScalar a)
155: {
157: PetscInt i,start,end;
161: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
162: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
163: MatCheckPreallocated(Y,1);
165: if (Y->ops->shift) {
166: (*Y->ops->shift)(Y,a);
167: } else {
168: PetscScalar alpha = a;
169: MatGetOwnershipRange(Y,&start,&end);
170: for (i=start; i<end; i++) {
171: MatSetValues(Y,1,&i,1,&i,&alpha,ADD_VALUES);
172: }
173: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
174: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
175: }
176: #if defined(PETSC_HAVE_CUSP)
177: if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
178: Y->valid_GPU_matrix = PETSC_CUSP_CPU;
179: }
180: #endif
181: #if defined(PETSC_HAVE_VIENNACL)
182: if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
183: Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
184: }
185: #endif
186: return(0);
187: }
191: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
192: {
194: PetscInt i,start,end,vstart,vend;
195: PetscScalar *v;
198: VecGetOwnershipRange(D,&vstart,&vend);
199: MatGetOwnershipRange(Y,&start,&end);
200: if (vstart != start || vend != end) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector ownership range not compatible with matrix: %D %D vec %D %D mat",vstart,vend,start,end);
201: VecGetArray(D,&v);
202: for (i=start; i<end; i++) {
203: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
204: }
205: VecRestoreArray(D,&v);
206: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
207: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
208: return(0);
209: }
213: /*@
214: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
215: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
216: INSERT_VALUES.
218: Input Parameters:
219: + Y - the input matrix
220: . D - the diagonal matrix, represented as a vector
221: - i - INSERT_VALUES or ADD_VALUES
223: Neighbor-wise Collective on Mat and Vec
225: Level: intermediate
227: .keywords: matrix, add, shift, diagonal
229: .seealso: MatShift()
230: @*/
231: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
232: {
238: if (Y->ops->diagonalset) {
239: (*Y->ops->diagonalset)(Y,D,is);
240: } else {
241: MatDiagonalSet_Default(Y,D,is);
242: }
243: return(0);
244: }
248: /*@
249: MatAYPX - Computes Y = a*Y + X.
251: Logically on Mat
253: Input Parameters:
254: + a - the PetscScalar multiplier
255: . Y - the first matrix
256: . X - the second matrix
257: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
259: Level: intermediate
261: .keywords: matrix, add
263: .seealso: MatAXPY()
264: @*/
265: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
266: {
267: PetscScalar one = 1.0;
269: PetscInt mX,mY,nX,nY;
275: MatGetSize(X,&mX,&nX);
276: MatGetSize(X,&mY,&nY);
277: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
279: MatScale(Y,a);
280: MatAXPY(Y,one,X,str);
281: return(0);
282: }
286: /*@
287: MatComputeExplicitOperator - Computes the explicit matrix
289: Collective on Mat
291: Input Parameter:
292: . inmat - the matrix
294: Output Parameter:
295: . mat - the explict preconditioned operator
297: Notes:
298: This computation is done by applying the operators to columns of the
299: identity matrix.
301: Currently, this routine uses a dense matrix format when 1 processor
302: is used and a sparse format otherwise. This routine is costly in general,
303: and is recommended for use only with relatively small systems.
305: Level: advanced
307: .keywords: Mat, compute, explicit, operator
308: @*/
309: PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat)
310: {
311: Vec in,out;
313: PetscInt i,m,n,M,N,*rows,start,end;
314: MPI_Comm comm;
315: PetscScalar *array,zero = 0.0,one = 1.0;
316: PetscMPIInt size;
322: PetscObjectGetComm((PetscObject)inmat,&comm);
323: MPI_Comm_size(comm,&size);
325: MatGetLocalSize(inmat,&m,&n);
326: MatGetSize(inmat,&M,&N);
327: MatGetVecs(inmat,&in,&out);
328: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
329: VecGetOwnershipRange(out,&start,&end);
330: PetscMalloc1(m,&rows);
331: for (i=0; i<m; i++) rows[i] = start + i;
333: MatCreate(comm,mat);
334: MatSetSizes(*mat,m,n,M,N);
335: if (size == 1) {
336: MatSetType(*mat,MATSEQDENSE);
337: MatSeqDenseSetPreallocation(*mat,NULL);
338: } else {
339: MatSetType(*mat,MATMPIAIJ);
340: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
341: }
343: for (i=0; i<N; i++) {
345: VecSet(in,zero);
346: VecSetValues(in,1,&i,&one,INSERT_VALUES);
347: VecAssemblyBegin(in);
348: VecAssemblyEnd(in);
350: MatMult(inmat,in,out);
352: VecGetArray(out,&array);
353: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
354: VecRestoreArray(out,&array);
356: }
357: PetscFree(rows);
358: VecDestroy(&out);
359: VecDestroy(&in);
360: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
361: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
362: return(0);
363: }
365: /* Get the map xtoy which is used by MatAXPY() in the case of SUBSET_NONZERO_PATTERN */
368: PetscErrorCode MatAXPYGetxtoy_Private(PetscInt m,PetscInt *xi,PetscInt *xj,PetscInt *xgarray, PetscInt *yi,PetscInt *yj,PetscInt *ygarray, PetscInt **xtoy)
369: {
371: PetscInt row,i,nz,xcol,ycol,jx,jy,*x2y;
374: PetscMalloc1(xi[m],&x2y);
375: i = 0;
376: for (row=0; row<m; row++) {
377: nz = xi[1] - xi[0];
378: jy = 0;
379: for (jx=0; jx<nz; jx++,jy++) {
380: if (xgarray && ygarray) {
381: xcol = xgarray[xj[*xi + jx]];
382: ycol = ygarray[yj[*yi + jy]];
383: } else {
384: xcol = xj[*xi + jx];
385: ycol = yj[*yi + jy]; /* col index for y */
386: }
387: while (ycol < xcol) {
388: jy++;
389: if (ygarray) {
390: ycol = ygarray[yj[*yi + jy]];
391: } else {
392: ycol = yj[*yi + jy];
393: }
394: }
395: if (xcol != ycol) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"X matrix entry (%D,%D) is not in Y matrix",row,ycol);
396: x2y[i++] = *yi + jy;
397: }
398: xi++; yi++;
399: }
400: *xtoy = x2y;
401: return(0);
402: }
406: /*@
407: MatChop - Set all values in the matrix less than the tolerance to zero
409: Input Parameters:
410: + A - The matrix
411: - tol - The zero tolerance
413: Output Parameters:
414: . A - The chopped matrix
416: Level: intermediate
418: .seealso: MatCreate(), MatZeroEntries()
419: @*/
420: PetscErrorCode MatChop(Mat A, PetscReal tol)
421: {
422: PetscScalar *newVals;
423: PetscInt *newCols;
424: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
428: MatGetOwnershipRange(A, &rStart, &rEnd);
429: for (r = rStart; r < rEnd; ++r) {
430: PetscInt ncols;
432: MatGetRow(A, r, &ncols, NULL, NULL);
433: colMax = PetscMax(colMax, ncols);
434: MatRestoreRow(A, r, &ncols, NULL, NULL);
435: }
436: numRows = rEnd - rStart;
437: MPI_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
438: PetscMalloc2(colMax,&newCols,colMax,&newVals);
439: for (r = rStart; r < rStart+maxRows; ++r) {
440: const PetscScalar *vals;
441: const PetscInt *cols;
442: PetscInt ncols, newcols, c;
444: if (r < rEnd) {
445: MatGetRow(A, r, &ncols, &cols, &vals);
446: for (c = 0; c < ncols; ++c) {
447: newCols[c] = cols[c];
448: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
449: }
450: newcols = ncols;
451: MatRestoreRow(A, r, &ncols, &cols, &vals);
452: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
453: }
454: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
455: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
456: }
457: PetscFree2(newCols,newVals);
458: return(0);
459: }