Actual source code: axpy.c
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5: {
6: PetscErrorCode ierr,(*f)(Mat,Mat*);
7: Mat A,F;
10: PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
11: if (f) {
12: MatTransposeGetMat(T,&A);
13: if (T == X) {
14: PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
15: MatTranspose(A,MAT_INITIAL_MATRIX,&F);
16: A = Y;
17: } else {
18: PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
19: MatTranspose(X,MAT_INITIAL_MATRIX,&F);
20: }
21: } else {
22: MatHermitianTransposeGetMat(T,&A);
23: if (T == X) {
24: PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
25: MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
26: A = Y;
27: } else {
28: PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
29: MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
30: }
31: }
32: MatAXPY(A,a,F,str);
33: MatDestroy(&F);
34: return(0);
35: }
37: /*@
38: MatAXPY - Computes Y = a*X + Y.
40: Logically Collective on Mat
42: Input Parameters:
43: + a - the scalar multiplier
44: . X - the first matrix
45: . Y - the second matrix
46: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
48: Notes: No operation is performed when a is zero.
50: Level: intermediate
52: .seealso: MatAYPX()
53: @*/
54: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
55: {
57: PetscInt M1,M2,N1,N2;
58: PetscInt m1,m2,n1,n2;
59: MatType t1,t2;
60: PetscBool sametype,transpose;
67: MatGetSize(X,&M1,&N1);
68: MatGetSize(Y,&M2,&N2);
69: MatGetLocalSize(X,&m1,&n1);
70: MatGetLocalSize(Y,&m2,&n2);
71: if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes X %D x %D, Y %D x %D",M1,N1,M2,N2);
72: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes X %D x %D, Y %D x %D",m1,n1,m2,n2);
73: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
74: if (!X->assembled) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
75: if (a == (PetscScalar)0.0) return(0);
76: if (Y == X) {
77: MatScale(Y,1.0 + a);
78: return(0);
79: }
80: MatGetType(X,&t1);
81: MatGetType(Y,&t2);
82: PetscStrcmp(t1,t2,&sametype);
83: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
84: if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
85: (*Y->ops->axpy)(Y,a,X,str);
86: } else {
87: PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
88: if (transpose) {
89: MatTransposeAXPY_Private(Y,a,X,str,X);
90: } else {
91: PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
92: if (transpose) {
93: MatTransposeAXPY_Private(Y,a,X,str,Y);
94: } else {
95: MatAXPY_Basic(Y,a,X,str);
96: }
97: }
98: }
99: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
100: return(0);
101: }
103: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
104: {
106: PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
109: /* look for any available faster alternative to the general preallocator */
110: PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
111: if (preall) {
112: (*preall)(Y,X,B);
113: } else { /* Use MatPrellocator, assumes same row-col distribution */
114: Mat preallocator;
115: PetscInt r,rstart,rend;
116: PetscInt m,n,M,N;
118: MatGetRowUpperTriangular(Y);
119: MatGetRowUpperTriangular(X);
120: MatGetSize(Y,&M,&N);
121: MatGetLocalSize(Y,&m,&n);
122: MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
123: MatSetType(preallocator,MATPREALLOCATOR);
124: MatSetLayouts(preallocator,Y->rmap,Y->cmap);
125: MatSetUp(preallocator);
126: MatGetOwnershipRange(preallocator,&rstart,&rend);
127: for (r = rstart; r < rend; ++r) {
128: PetscInt ncols;
129: const PetscInt *row;
130: const PetscScalar *vals;
132: MatGetRow(Y,r,&ncols,&row,&vals);
133: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
134: MatRestoreRow(Y,r,&ncols,&row,&vals);
135: MatGetRow(X,r,&ncols,&row,&vals);
136: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
137: MatRestoreRow(X,r,&ncols,&row,&vals);
138: }
139: MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
140: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
141: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
142: MatRestoreRowUpperTriangular(Y);
143: MatRestoreRowUpperTriangular(X);
145: MatCreate(PetscObjectComm((PetscObject)Y),B);
146: MatSetType(*B,((PetscObject)Y)->type_name);
147: MatSetLayouts(*B,Y->rmap,Y->cmap);
148: MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
149: MatDestroy(&preallocator);
150: }
151: return(0);
152: }
154: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
155: {
157: PetscBool isshell,isdense,isnest;
160: MatIsShell(Y,&isshell);
161: if (isshell) { /* MatShell has special support for AXPY */
162: PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);
164: MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f);
165: if (f) {
166: (*f)(Y,a,X,str);
167: return(0);
168: }
169: }
170: /* no need to preallocate if Y is dense */
171: PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,"");
172: if (isdense) {
173: PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest);
174: if (isnest) {
175: MatAXPY_Dense_Nest(Y,a,X);
176: return(0);
177: }
178: if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
179: }
180: if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
181: PetscInt i,start,end,j,ncols,m,n;
182: const PetscInt *row;
183: PetscScalar *val;
184: const PetscScalar *vals;
186: MatGetSize(X,&m,&n);
187: MatGetOwnershipRange(X,&start,&end);
188: MatGetRowUpperTriangular(X);
189: if (a == 1.0) {
190: for (i = start; i < end; i++) {
191: MatGetRow(X,i,&ncols,&row,&vals);
192: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
193: MatRestoreRow(X,i,&ncols,&row,&vals);
194: }
195: } else {
196: PetscInt vs = 100;
197: /* realloc if needed, as this function may be used in parallel */
198: PetscMalloc1(vs,&val);
199: for (i=start; i<end; i++) {
200: MatGetRow(X,i,&ncols,&row,&vals);
201: if (vs < ncols) {
202: vs = PetscMin(2*ncols,n);
203: PetscRealloc(vs*sizeof(*val),&val);
204: }
205: for (j=0; j<ncols; j++) val[j] = a*vals[j];
206: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
207: MatRestoreRow(X,i,&ncols,&row,&vals);
208: }
209: PetscFree(val);
210: }
211: MatRestoreRowUpperTriangular(X);
212: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
213: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
214: } else {
215: Mat B;
217: MatAXPY_Basic_Preallocate(Y,X,&B);
218: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
219: /* TODO mat_tests-ex37_nsize-1_mat_type-baij_mat_block_size-2 fails with MatHeaderMerge */
220: MatHeaderReplace(Y,&B);
221: }
222: return(0);
223: }
225: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
226: {
227: PetscInt i,start,end,j,ncols,m,n;
228: PetscErrorCode ierr;
229: const PetscInt *row;
230: PetscScalar *val;
231: const PetscScalar *vals;
234: MatGetSize(X,&m,&n);
235: MatGetOwnershipRange(X,&start,&end);
236: MatGetRowUpperTriangular(Y);
237: MatGetRowUpperTriangular(X);
238: if (a == 1.0) {
239: for (i = start; i < end; i++) {
240: MatGetRow(Y,i,&ncols,&row,&vals);
241: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
242: MatRestoreRow(Y,i,&ncols,&row,&vals);
244: MatGetRow(X,i,&ncols,&row,&vals);
245: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
246: MatRestoreRow(X,i,&ncols,&row,&vals);
247: }
248: } else {
249: PetscInt vs = 100;
250: /* realloc if needed, as this function may be used in parallel */
251: PetscMalloc1(vs,&val);
252: for (i=start; i<end; i++) {
253: MatGetRow(Y,i,&ncols,&row,&vals);
254: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
255: MatRestoreRow(Y,i,&ncols,&row,&vals);
257: MatGetRow(X,i,&ncols,&row,&vals);
258: if (vs < ncols) {
259: vs = PetscMin(2*ncols,n);
260: PetscRealloc(vs*sizeof(*val),&val);
261: }
262: for (j=0; j<ncols; j++) val[j] = a*vals[j];
263: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
264: MatRestoreRow(X,i,&ncols,&row,&vals);
265: }
266: PetscFree(val);
267: }
268: MatRestoreRowUpperTriangular(Y);
269: MatRestoreRowUpperTriangular(X);
270: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
271: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
272: return(0);
273: }
275: /*@
276: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
278: Neighbor-wise Collective on Mat
280: Input Parameters:
281: + Y - the matrices
282: - a - the PetscScalar
284: Level: intermediate
286: Notes:
287: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
288: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
289: entry). No operation is performed when a is zero.
291: To form Y = Y + diag(V) use MatDiagonalSet()
293: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
294: @*/
295: PetscErrorCode MatShift(Mat Y,PetscScalar a)
296: {
301: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
302: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
303: MatCheckPreallocated(Y,1);
304: if (a == 0.0) return(0);
306: if (Y->ops->shift) {
307: (*Y->ops->shift)(Y,a);
308: } else {
309: MatShift_Basic(Y,a);
310: }
312: PetscObjectStateIncrease((PetscObject)Y);
313: return(0);
314: }
316: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
317: {
318: PetscErrorCode ierr;
319: PetscInt i,start,end;
320: const PetscScalar *v;
323: MatGetOwnershipRange(Y,&start,&end);
324: VecGetArrayRead(D,&v);
325: for (i=start; i<end; i++) {
326: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
327: }
328: VecRestoreArrayRead(D,&v);
329: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
330: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
331: return(0);
332: }
334: /*@
335: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
336: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
337: INSERT_VALUES.
339: Neighbor-wise Collective on Mat
341: Input Parameters:
342: + Y - the input matrix
343: . D - the diagonal matrix, represented as a vector
344: - i - INSERT_VALUES or ADD_VALUES
346: Notes:
347: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
348: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
349: entry).
351: Level: intermediate
353: .seealso: MatShift(), MatScale(), MatDiagonalScale()
354: @*/
355: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
356: {
358: PetscInt matlocal,veclocal;
363: MatGetLocalSize(Y,&matlocal,NULL);
364: VecGetLocalSize(D,&veclocal);
365: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
366: if (Y->ops->diagonalset) {
367: (*Y->ops->diagonalset)(Y,D,is);
368: } else {
369: MatDiagonalSet_Default(Y,D,is);
370: }
371: PetscObjectStateIncrease((PetscObject)Y);
372: return(0);
373: }
375: /*@
376: MatAYPX - Computes Y = a*Y + X.
378: Logically on Mat
380: Input Parameters:
381: + a - the PetscScalar multiplier
382: . Y - the first matrix
383: . X - the second matrix
384: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
386: Level: intermediate
388: .seealso: MatAXPY()
389: @*/
390: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
391: {
395: MatScale(Y,a);
396: MatAXPY(Y,1.0,X,str);
397: return(0);
398: }
400: /*@
401: MatComputeOperator - Computes the explicit matrix
403: Collective on Mat
405: Input Parameter:
406: + inmat - the matrix
407: - mattype - the matrix type for the explicit operator
409: Output Parameter:
410: . mat - the explict operator
412: Notes:
413: This computation is done by applying the operators to columns of the identity matrix.
414: This routine is costly in general, and is recommended for use only with relatively small systems.
415: Currently, this routine uses a dense matrix format if mattype == NULL.
417: Level: advanced
419: @*/
420: PetscErrorCode MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
421: {
427: MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
428: return(0);
429: }
431: /*@
432: MatComputeOperatorTranspose - Computes the explicit matrix representation of
433: a give matrix that can apply MatMultTranspose()
435: Collective on Mat
437: Input Parameter:
438: . inmat - the matrix
440: Output Parameter:
441: . mat - the explict operator transposed
443: Notes:
444: This computation is done by applying the transpose of the operator to columns of the identity matrix.
445: This routine is costly in general, and is recommended for use only with relatively small systems.
446: Currently, this routine uses a dense matrix format if mattype == NULL.
448: Level: advanced
450: @*/
451: PetscErrorCode MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
452: {
453: Mat A;
459: MatCreateTranspose(inmat,&A);
460: MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
461: MatDestroy(&A);
462: return(0);
463: }
465: /*@
466: MatChop - Set all values in the matrix less than the tolerance to zero
468: Input Parameters:
469: + A - The matrix
470: - tol - The zero tolerance
472: Output Parameters:
473: . A - The chopped matrix
475: Level: intermediate
477: .seealso: MatCreate(), MatZeroEntries()
478: @*/
479: PetscErrorCode MatChop(Mat A, PetscReal tol)
480: {
481: Mat a;
482: PetscScalar *newVals;
483: PetscInt *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
484: PetscBool flg;
488: PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, "");
489: if (flg) {
490: MatDenseGetLocalMatrix(A, &a);
491: MatDenseGetLDA(a, &r);
492: MatGetSize(a, &rStart, &rEnd);
493: MatDenseGetArray(a, &newVals);
494: for (; colMax < rEnd; ++colMax) {
495: for (maxRows = 0; maxRows < rStart; ++maxRows) {
496: newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
497: }
498: }
499: MatDenseRestoreArray(a, &newVals);
500: } else {
501: MatGetOwnershipRange(A, &rStart, &rEnd);
502: MatGetRowUpperTriangular(A);
503: for (r = rStart; r < rEnd; ++r) {
504: PetscInt ncols;
506: MatGetRow(A, r, &ncols, NULL, NULL);
507: colMax = PetscMax(colMax, ncols);
508: MatRestoreRow(A, r, &ncols, NULL, NULL);
509: }
510: numRows = rEnd - rStart;
511: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
512: PetscMalloc2(colMax, &newCols, colMax, &newVals);
513: MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg); /* cache user-defined value */
514: MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
515: /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd() */
516: /* that are potentially called many times depending on the distribution of A */
517: for (r = rStart; r < rStart+maxRows; ++r) {
518: const PetscScalar *vals;
519: const PetscInt *cols;
520: PetscInt ncols, newcols, c;
522: if (r < rEnd) {
523: MatGetRow(A, r, &ncols, &cols, &vals);
524: for (c = 0; c < ncols; ++c) {
525: newCols[c] = cols[c];
526: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
527: }
528: newcols = ncols;
529: MatRestoreRow(A, r, &ncols, &cols, &vals);
530: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
531: }
532: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
533: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
534: }
535: MatRestoreRowUpperTriangular(A);
536: PetscFree2(newCols, newVals);
537: MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg); /* reset option to its user-defined value */
538: }
539: return(0);
540: }