Actual source code: axpy.c

petsc-3.12.5 2020-03-29
Report Typos and Errors

  2:  #include <petsc/private/matimpl.h>

  4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
  5: {
  6:   PetscErrorCode ierr,(*f)(Mat,Mat*);
  7:   Mat            A,F;

 10:   PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
 11:   if (f) {
 12:     MatTransposeGetMat(T,&A);
 13:     if (T == X) {
 14:       PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 15:       MatTranspose(A,MAT_INITIAL_MATRIX,&F);
 16:       A = Y;
 17:     } else {
 18:       PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 19:       MatTranspose(X,MAT_INITIAL_MATRIX,&F);
 20:     }
 21:   } else {
 22:     MatHermitianTransposeGetMat(T,&A);
 23:     if (T == X) {
 24:       PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 25:       MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
 26:       A = Y;
 27:     } else {
 28:       PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 29:       MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
 30:     }
 31:   }
 32:   MatAXPY(A,a,F,str);
 33:   MatDestroy(&F);
 34:   return(0);
 35: }

 37: /*@
 38:    MatAXPY - Computes Y = a*X + Y.

 40:    Logically  Collective on Mat

 42:    Input Parameters:
 43: +  a - the scalar multiplier
 44: .  X - the first matrix
 45: .  Y - the second matrix
 46: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
 47:          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 49:    Level: intermediate

 51: .seealso: MatAYPX()
 52:  @*/
 53: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 54: {
 56:   PetscInt       M1,M2,N1,N2;
 57:   PetscInt       m1,m2,n1,n2;
 58:   MatType        t1,t2;
 59:   PetscBool      sametype,transpose;

 66:   MatGetSize(X,&M1,&N1);
 67:   MatGetSize(Y,&M2,&N2);
 68:   MatGetLocalSize(X,&m1,&n1);
 69:   MatGetLocalSize(Y,&m2,&n2);
 70:   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
 71:   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
 72:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
 73:   if (!X->assembled) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");

 75:   MatGetType(X,&t1);
 76:   MatGetType(Y,&t2);
 77:   PetscStrcmp(t1,t2,&sametype);
 78:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 79:   if (Y->ops->axpy && sametype) {
 80:     (*Y->ops->axpy)(Y,a,X,str);
 81:   } else {
 82:     PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
 83:     if (transpose) {
 84:         MatTransposeAXPY_Private(Y,a,X,str,X);
 85:     } else {
 86:       PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
 87:       if (transpose) {
 88:         MatTransposeAXPY_Private(Y,a,X,str,Y);
 89:       } else {
 90:         if (str != DIFFERENT_NONZERO_PATTERN) {
 91:           MatAXPY_Basic(Y,a,X,str);
 92:         } else {
 93:           Mat B;

 95:           MatAXPY_Basic_Preallocate(Y,X,&B);
 96:           MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
 97:           MatHeaderReplace(Y,&B);
 98:         }
 99:       }
100:     }
101:   }
102:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
103:   return(0);
104: }

106: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
107: {
109:   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;

112:   /* look for any available faster alternative to the general preallocator */
113:   PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
114:   if (preall) {
115:     (*preall)(Y,X,B);
116:   } else { /* Use MatPrellocator, assumes same row-col distribution */
117:     Mat      preallocator;
118:     PetscInt r,rstart,rend;
119:     PetscInt m,n,M,N;

121:     MatGetRowUpperTriangular(Y);
122:     MatGetRowUpperTriangular(X);
123:     MatGetSize(Y,&M,&N);
124:     MatGetLocalSize(Y,&m,&n);
125:     MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
126:     MatSetType(preallocator,MATPREALLOCATOR);
127:     MatSetSizes(preallocator,m,n,M,N);
128:     MatSetUp(preallocator);
129:     MatGetOwnershipRange(preallocator,&rstart,&rend);
130:     for (r = rstart; r < rend; ++r) {
131:       PetscInt          ncols;
132:       const PetscInt    *row;
133:       const PetscScalar *vals;

135:       MatGetRow(Y,r,&ncols,&row,&vals);
136:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
137:       MatRestoreRow(Y,r,&ncols,&row,&vals);
138:       MatGetRow(X,r,&ncols,&row,&vals);
139:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
140:       MatRestoreRow(X,r,&ncols,&row,&vals);
141:     }
142:     MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
143:     MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
144:     MatRestoreRowUpperTriangular(Y);
145:     MatRestoreRowUpperTriangular(X);

147:     MatCreate(PetscObjectComm((PetscObject)Y),B);
148:     MatSetType(*B,((PetscObject)Y)->type_name);
149:     MatSetSizes(*B,m,n,M,N);
150:     MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
151:     MatDestroy(&preallocator);
152:   }
153:   return(0);
154: }

156: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
157: {
158:   PetscInt          i,start,end,j,ncols,m,n;
159:   PetscErrorCode    ierr;
160:   const PetscInt    *row;
161:   PetscScalar       *val;
162:   const PetscScalar *vals;

165:   MatGetSize(X,&m,&n);
166:   MatGetOwnershipRange(X,&start,&end);
167:   MatGetRowUpperTriangular(X);
168:   if (a == 1.0) {
169:     for (i = start; i < end; i++) {
170:       MatGetRow(X,i,&ncols,&row,&vals);
171:       MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
172:       MatRestoreRow(X,i,&ncols,&row,&vals);
173:     }
174:   } else {
175:     PetscInt vs = 100;
176:     /* realloc if needed, as this function may be used in parallel */
177:     PetscMalloc1(vs,&val);
178:     for (i=start; i<end; i++) {
179:       MatGetRow(X,i,&ncols,&row,&vals);
180:       if (vs < ncols) {
181:         vs   = PetscMin(2*ncols,n);
182:         PetscRealloc(vs*sizeof(*val),&val);
183:       }
184:       for (j=0; j<ncols; j++) val[j] = a*vals[j];
185:       MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
186:       MatRestoreRow(X,i,&ncols,&row,&vals);
187:     }
188:     PetscFree(val);
189:   }
190:   MatRestoreRowUpperTriangular(X);
191:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
192:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
193:   return(0);
194: }

196: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
197: {
198:   PetscInt          i,start,end,j,ncols,m,n;
199:   PetscErrorCode    ierr;
200:   const PetscInt    *row;
201:   PetscScalar       *val;
202:   const PetscScalar *vals;

205:   MatGetSize(X,&m,&n);
206:   MatGetOwnershipRange(X,&start,&end);
207:   MatGetRowUpperTriangular(Y);
208:   MatGetRowUpperTriangular(X);
209:   if (a == 1.0) {
210:     for (i = start; i < end; i++) {
211:       MatGetRow(Y,i,&ncols,&row,&vals);
212:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
213:       MatRestoreRow(Y,i,&ncols,&row,&vals);

215:       MatGetRow(X,i,&ncols,&row,&vals);
216:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
217:       MatRestoreRow(X,i,&ncols,&row,&vals);
218:     }
219:   } else {
220:     PetscInt vs = 100;
221:     /* realloc if needed, as this function may be used in parallel */
222:     PetscMalloc1(vs,&val);
223:     for (i=start; i<end; i++) {
224:       MatGetRow(Y,i,&ncols,&row,&vals);
225:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
226:       MatRestoreRow(Y,i,&ncols,&row,&vals);

228:       MatGetRow(X,i,&ncols,&row,&vals);
229:       if (vs < ncols) {
230:         vs   = PetscMin(2*ncols,n);
231:         PetscRealloc(vs*sizeof(*val),&val);
232:       }
233:       for (j=0; j<ncols; j++) val[j] = a*vals[j];
234:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
235:       MatRestoreRow(X,i,&ncols,&row,&vals);
236:     }
237:     PetscFree(val);
238:   }
239:   MatRestoreRowUpperTriangular(Y);
240:   MatRestoreRowUpperTriangular(X);
241:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
242:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
243:   return(0);
244: }

246: /*@
247:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

249:    Neighbor-wise Collective on Mat

251:    Input Parameters:
252: +  Y - the matrices
253: -  a - the PetscScalar

255:    Level: intermediate

257:    Notes:
258:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
259:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
260:    entry).

262:    To form Y = Y + diag(V) use MatDiagonalSet()

264:    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
265:     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.

267: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
268:  @*/
269: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
270: {

275:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
276:   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
277:   MatCheckPreallocated(Y,1);
278:   if (a == 0.0) return(0);

280:   if (Y->ops->shift) {
281:     (*Y->ops->shift)(Y,a);
282:   } else {
283:     MatShift_Basic(Y,a);
284:   }

286:   PetscObjectStateIncrease((PetscObject)Y);
287:   return(0);
288: }

290: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
291: {
293:   PetscInt       i,start,end;
294:   PetscScalar    *v;

297:   MatGetOwnershipRange(Y,&start,&end);
298:   VecGetArray(D,&v);
299:   for (i=start; i<end; i++) {
300:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
301:   }
302:   VecRestoreArray(D,&v);
303:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
304:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
305:   return(0);
306: }

308: /*@
309:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
310:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
311:    INSERT_VALUES.

313:    Input Parameters:
314: +  Y - the input matrix
315: .  D - the diagonal matrix, represented as a vector
316: -  i - INSERT_VALUES or ADD_VALUES

318:    Neighbor-wise Collective on Mat

320:    Notes:
321:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
322:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
323:    entry).

325:    Level: intermediate

327: .seealso: MatShift(), MatScale(), MatDiagonalScale()
328: @*/
329: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
330: {
332:   PetscInt       matlocal,veclocal;

337:   MatGetLocalSize(Y,&matlocal,NULL);
338:   VecGetLocalSize(D,&veclocal);
339:   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
340:   if (Y->ops->diagonalset) {
341:     (*Y->ops->diagonalset)(Y,D,is);
342:   } else {
343:     MatDiagonalSet_Default(Y,D,is);
344:   }
345:   PetscObjectStateIncrease((PetscObject)Y);
346:   return(0);
347: }

349: /*@
350:    MatAYPX - Computes Y = a*Y + X.

352:    Logically on Mat

354:    Input Parameters:
355: +  a - the PetscScalar multiplier
356: .  Y - the first matrix
357: .  X - the second matrix
358: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN

360:    Level: intermediate

362: .seealso: MatAXPY()
363:  @*/
364: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
365: {
366:   PetscScalar    one = 1.0;
368:   PetscInt       mX,mY,nX,nY;

374:   MatGetSize(X,&mX,&nX);
375:   MatGetSize(X,&mY,&nY);
376:   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
377:   MatScale(Y,a);
378:   MatAXPY(Y,one,X,str);
379:   return(0);
380: }

382: /*@
383:     MatComputeOperator - Computes the explicit matrix

385:     Collective on Mat

387:     Input Parameter:
388: +   inmat - the matrix
389: -   mattype - the matrix type for the explicit operator

391:     Output Parameter:
392: .   mat - the explict  operator

394:     Notes:
395:     This computation is done by applying the operators to columns of the identity matrix.
396:     This routine is costly in general, and is recommended for use only with relatively small systems.
397:     Currently, this routine uses a dense matrix format if mattype == NULL.

399:     Level: advanced

401: @*/
402: PetscErrorCode  MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
403: {

409:   MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
410:   return(0);
411: }

413: /*@
414:     MatComputeOperatorTranspose - Computes the explicit matrix representation of
415:         a give matrix that can apply MatMultTranspose()

417:     Collective on Mat

419:     Input Parameter:
420: .   inmat - the matrix

422:     Output Parameter:
423: .   mat - the explict  operator transposed

425:     Notes:
426:     This computation is done by applying the transpose of the operator to columns of the identity matrix.
427:     This routine is costly in general, and is recommended for use only with relatively small systems.
428:     Currently, this routine uses a dense matrix format if mattype == NULL.

430:     Level: advanced

432: @*/
433: PetscErrorCode  MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
434: {
435:   Mat            A;

441:   MatCreateTranspose(inmat,&A);
442:   MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
443:   MatDestroy(&A);
444:   return(0);
445: }

447: /*@
448:   MatChop - Set all values in the matrix less than the tolerance to zero

450:   Input Parameters:
451: + A   - The matrix
452: - tol - The zero tolerance

454:   Output Parameters:
455: . A - The chopped matrix

457:   Level: intermediate

459: .seealso: MatCreate(), MatZeroEntries()
460:  @*/
461: PetscErrorCode MatChop(Mat A, PetscReal tol)
462: {
463:   PetscScalar    *newVals;
464:   PetscInt       *newCols;
465:   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;

469:   MatGetOwnershipRange(A, &rStart, &rEnd);
470:   for (r = rStart; r < rEnd; ++r) {
471:     PetscInt ncols;

473:     MatGetRow(A, r, &ncols, NULL, NULL);
474:     colMax = PetscMax(colMax, ncols);
475:     MatRestoreRow(A, r, &ncols, NULL, NULL);
476:   }
477:   numRows = rEnd - rStart;
478:   MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
479:   PetscMalloc2(colMax,&newCols,colMax,&newVals);
480:   for (r = rStart; r < rStart+maxRows; ++r) {
481:     const PetscScalar *vals;
482:     const PetscInt    *cols;
483:     PetscInt           ncols, newcols, c;

485:     if (r < rEnd) {
486:       MatGetRow(A, r, &ncols, &cols, &vals);
487:       for (c = 0; c < ncols; ++c) {
488:         newCols[c] = cols[c];
489:         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
490:       }
491:       newcols = ncols;
492:       MatRestoreRow(A, r, &ncols, &cols, &vals);
493:       MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
494:     }
495:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
496:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
497:   }
498:   PetscFree2(newCols,newVals);
499:   return(0);
500: }