Actual source code: axpy.c

petsc-3.11.4 2019-09-28
Report Typos and Errors

  2:  #include <petsc/private/matimpl.h>

  4: /*@
  5:    MatAXPY - Computes Y = a*X + Y.

  7:    Logically  Collective on Mat

  9:    Input Parameters:
 10: +  a - the scalar multiplier
 11: .  X - the first matrix
 12: .  Y - the second matrix
 13: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
 14:          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 16:    Level: intermediate

 18: .keywords: matrix, add

 20: .seealso: MatAYPX()
 21:  @*/
 22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 23: {
 25:   PetscInt       M1,M2,N1,N2;
 26:   PetscInt       m1,m2,n1,n2;
 27:   PetscBool      sametype;

 34:   MatGetSize(X,&M1,&N1);
 35:   MatGetSize(Y,&M2,&N2);
 36:   MatGetLocalSize(X,&m1,&n1);
 37:   MatGetLocalSize(Y,&m2,&n2);
 38:   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
 39:   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);

 41:   PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);
 42:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 43:   if (Y->ops->axpy && sametype) {
 44:     (*Y->ops->axpy)(Y,a,X,str);
 45:   } else {
 46:     if (str != DIFFERENT_NONZERO_PATTERN) {
 47:       MatAXPY_Basic(Y,a,X,str);
 48:     } else {
 49:       Mat B;

 51:       MatAXPY_Basic_Preallocate(Y,X,&B);
 52:       MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
 53:       MatHeaderReplace(Y,&B);
 54:     }
 55:   }
 56:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
 57: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
 58:   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
 59:     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
 60:   }
 61: #endif
 62:   return(0);
 63: }

 65: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
 66: {
 68:   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;

 71:   /* look for any available faster alternative to the general preallocator */
 72:   PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
 73:   if (preall) {
 74:     (*preall)(Y,X,B);
 75:   } else { /* Use MatPrellocator, assumes same row-col distribution */
 76:     Mat      preallocator;
 77:     PetscInt r,rstart,rend;
 78:     PetscInt m,n,M,N;

 80:     MatGetRowUpperTriangular(Y);
 81:     MatGetRowUpperTriangular(X);
 82:     MatGetSize(Y,&M,&N);
 83:     MatGetLocalSize(Y,&m,&n);
 84:     MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
 85:     MatSetType(preallocator,MATPREALLOCATOR);
 86:     MatSetSizes(preallocator,m,n,M,N);
 87:     MatSetUp(preallocator);
 88:     MatGetOwnershipRange(preallocator,&rstart,&rend);
 89:     for (r = rstart; r < rend; ++r) {
 90:       PetscInt          ncols;
 91:       const PetscInt    *row;
 92:       const PetscScalar *vals;

 94:       MatGetRow(Y,r,&ncols,&row,&vals);
 95:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
 96:       MatRestoreRow(Y,r,&ncols,&row,&vals);
 97:       MatGetRow(X,r,&ncols,&row,&vals);
 98:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
 99:       MatRestoreRow(X,r,&ncols,&row,&vals);
100:     }
101:     MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
102:     MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
103:     MatRestoreRowUpperTriangular(Y);
104:     MatRestoreRowUpperTriangular(X);

106:     MatCreate(PetscObjectComm((PetscObject)Y),B);
107:     MatSetType(*B,((PetscObject)Y)->type_name);
108:     MatSetSizes(*B,m,n,M,N);
109:     MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
110:     MatDestroy(&preallocator);
111:   }
112:   return(0);
113: }

115: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
116: {
117:   PetscInt          i,start,end,j,ncols,m,n;
118:   PetscErrorCode    ierr;
119:   const PetscInt    *row;
120:   PetscScalar       *val;
121:   const PetscScalar *vals;

124:   MatGetSize(X,&m,&n);
125:   MatGetOwnershipRange(X,&start,&end);
126:   MatGetRowUpperTriangular(X);
127:   if (a == 1.0) {
128:     for (i = start; i < end; i++) {
129:       MatGetRow(X,i,&ncols,&row,&vals);
130:       MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
131:       MatRestoreRow(X,i,&ncols,&row,&vals);
132:     }
133:   } else {
134:     PetscInt vs = 100;
135:     /* realloc if needed, as this function may be used in parallel */
136:     PetscMalloc1(vs,&val);
137:     for (i=start; i<end; i++) {
138:       MatGetRow(X,i,&ncols,&row,&vals);
139:       if (vs < ncols) {
140:         vs   = PetscMin(2*ncols,n);
141:         PetscRealloc(vs*sizeof(*val),&val);
142:       }
143:       for (j=0; j<ncols; j++) val[j] = a*vals[j];
144:       MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
145:       MatRestoreRow(X,i,&ncols,&row,&vals);
146:     }
147:     PetscFree(val);
148:   }
149:   MatRestoreRowUpperTriangular(X);
150:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
151:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
152:   return(0);
153: }

155: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
156: {
157:   PetscInt          i,start,end,j,ncols,m,n;
158:   PetscErrorCode    ierr;
159:   const PetscInt    *row;
160:   PetscScalar       *val;
161:   const PetscScalar *vals;

164:   MatGetSize(X,&m,&n);
165:   MatGetOwnershipRange(X,&start,&end);
166:   MatGetRowUpperTriangular(Y);
167:   MatGetRowUpperTriangular(X);
168:   if (a == 1.0) {
169:     for (i = start; i < end; i++) {
170:       MatGetRow(Y,i,&ncols,&row,&vals);
171:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
172:       MatRestoreRow(Y,i,&ncols,&row,&vals);

174:       MatGetRow(X,i,&ncols,&row,&vals);
175:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
176:       MatRestoreRow(X,i,&ncols,&row,&vals);
177:     }
178:   } else {
179:     PetscInt vs = 100;
180:     /* realloc if needed, as this function may be used in parallel */
181:     PetscMalloc1(vs,&val);
182:     for (i=start; i<end; i++) {
183:       MatGetRow(Y,i,&ncols,&row,&vals);
184:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
185:       MatRestoreRow(Y,i,&ncols,&row,&vals);

187:       MatGetRow(X,i,&ncols,&row,&vals);
188:       if (vs < ncols) {
189:         vs   = PetscMin(2*ncols,n);
190:         PetscRealloc(vs*sizeof(*val),&val);
191:       }
192:       for (j=0; j<ncols; j++) val[j] = a*vals[j];
193:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
194:       MatRestoreRow(X,i,&ncols,&row,&vals);
195:     }
196:     PetscFree(val);
197:   }
198:   MatRestoreRowUpperTriangular(Y);
199:   MatRestoreRowUpperTriangular(X);
200:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
201:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
202:   return(0);
203: }

205: /*@
206:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

208:    Neighbor-wise Collective on Mat

210:    Input Parameters:
211: +  Y - the matrices
212: -  a - the PetscScalar

214:    Level: intermediate

216:    Notes:
217:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
218:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
219:    entry).

221:    To form Y = Y + diag(V) use MatDiagonalSet()

223:    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
224:     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.

226: .keywords: matrix, add, shift

228: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
229:  @*/
230: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
231: {

236:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
237:   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
238:   MatCheckPreallocated(Y,1);

240:   if (Y->ops->shift) {
241:     (*Y->ops->shift)(Y,a);
242:   } else {
243:     MatShift_Basic(Y,a);
244:   }

246:   PetscObjectStateIncrease((PetscObject)Y);
247: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
248:   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
249:     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
250:   }
251: #endif
252:   return(0);
253: }

255: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
256: {
258:   PetscInt       i,start,end;
259:   PetscScalar    *v;

262:   MatGetOwnershipRange(Y,&start,&end);
263:   VecGetArray(D,&v);
264:   for (i=start; i<end; i++) {
265:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
266:   }
267:   VecRestoreArray(D,&v);
268:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
269:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
270:   return(0);
271: }

273: /*@
274:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
275:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
276:    INSERT_VALUES.

278:    Input Parameters:
279: +  Y - the input matrix
280: .  D - the diagonal matrix, represented as a vector
281: -  i - INSERT_VALUES or ADD_VALUES

283:    Neighbor-wise Collective on Mat and Vec

285:    Notes:
286:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
287:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
288:    entry).

290:    Level: intermediate

292: .keywords: matrix, add, shift, diagonal

294: .seealso: MatShift(), MatScale(), MatDiagonalScale()
295: @*/
296: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
297: {
299:   PetscInt       matlocal,veclocal;

304:   MatGetLocalSize(Y,&matlocal,NULL);
305:   VecGetLocalSize(D,&veclocal);
306:   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
307:   if (Y->ops->diagonalset) {
308:     (*Y->ops->diagonalset)(Y,D,is);
309:   } else {
310:     MatDiagonalSet_Default(Y,D,is);
311:   }
312:   PetscObjectStateIncrease((PetscObject)Y);
313:   return(0);
314: }

316: /*@
317:    MatAYPX - Computes Y = a*Y + X.

319:    Logically on Mat

321:    Input Parameters:
322: +  a - the PetscScalar multiplier
323: .  Y - the first matrix
324: .  X - the second matrix
325: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN

327:    Level: intermediate

329: .keywords: matrix, add

331: .seealso: MatAXPY()
332:  @*/
333: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
334: {
335:   PetscScalar    one = 1.0;
337:   PetscInt       mX,mY,nX,nY;

343:   MatGetSize(X,&mX,&nX);
344:   MatGetSize(X,&mY,&nY);
345:   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);

347:   MatScale(Y,a);
348:   MatAXPY(Y,one,X,str);
349:   return(0);
350: }

352: /*@
353:     MatComputeExplicitOperator - Computes the explicit matrix

355:     Collective on Mat

357:     Input Parameter:
358: .   inmat - the matrix

360:     Output Parameter:
361: .   mat - the explict  operator

363:     Notes:
364:     This computation is done by applying the operators to columns of the
365:     identity matrix.

367:     Currently, this routine uses a dense matrix format when 1 processor
368:     is used and a sparse format otherwise.  This routine is costly in general,
369:     and is recommended for use only with relatively small systems.

371:     Level: advanced

373: .keywords: Mat, compute, explicit, operator
374: @*/
375: PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
376: {
378:   MPI_Comm       comm;
379:   PetscMPIInt    size;


385:   PetscObjectGetComm((PetscObject)inmat,&comm);
386:   MPI_Comm_size(comm,&size);
387:   MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);
388:   return(0);
389: }

391: /*@
392:     MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
393:         a give matrix that can apply MatMultTranspose()

395:     Collective on Mat

397:     Input Parameter:
398: .   inmat - the matrix

400:     Output Parameter:
401: .   mat - the explict  operator transposed

403:     Notes:
404:     This computation is done by applying the transpose of the operator to columns of the
405:     identity matrix.

407:     Currently, this routine uses a dense matrix format when 1 processor
408:     is used and a sparse format otherwise.  This routine is costly in general,
409:     and is recommended for use only with relatively small systems.

411:     Level: advanced

413: .keywords: Mat, compute, explicit, operator
414: @*/
415: PetscErrorCode  MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
416: {
417:   Vec            in,out;
419:   PetscInt       i,m,n,M,N,*rows,start,end;
420:   MPI_Comm       comm;
421:   PetscScalar    *array,zero = 0.0,one = 1.0;
422:   PetscMPIInt    size;


428:   PetscObjectGetComm((PetscObject)inmat,&comm);
429:   MPI_Comm_size(comm,&size);

431:   MatGetLocalSize(inmat,&m,&n);
432:   MatGetSize(inmat,&M,&N);
433:   MatCreateVecs(inmat,&in,&out);
434:   VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
435:   VecGetOwnershipRange(out,&start,&end);
436:   PetscMalloc1(m,&rows);
437:   for (i=0; i<m; i++) rows[i] = start + i;

439:   MatCreate(comm,mat);
440:   MatSetSizes(*mat,m,n,M,N);
441:   if (size == 1) {
442:     MatSetType(*mat,MATSEQDENSE);
443:     MatSeqDenseSetPreallocation(*mat,NULL);
444:   } else {
445:     MatSetType(*mat,MATMPIAIJ);
446:     MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
447:   }

449:   for (i=0; i<N; i++) {

451:     VecSet(in,zero);
452:     VecSetValues(in,1,&i,&one,INSERT_VALUES);
453:     VecAssemblyBegin(in);
454:     VecAssemblyEnd(in);

456:     MatMultTranspose(inmat,in,out);

458:     VecGetArray(out,&array);
459:     MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
460:     VecRestoreArray(out,&array);

462:   }
463:   PetscFree(rows);
464:   VecDestroy(&out);
465:   VecDestroy(&in);
466:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
467:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
468:   return(0);
469: }

471: /*@
472:   MatChop - Set all values in the matrix less than the tolerance to zero

474:   Input Parameters:
475: + A   - The matrix
476: - tol - The zero tolerance

478:   Output Parameters:
479: . A - The chopped matrix

481:   Level: intermediate

483: .seealso: MatCreate(), MatZeroEntries()
484:  @*/
485: PetscErrorCode MatChop(Mat A, PetscReal tol)
486: {
487:   PetscScalar    *newVals;
488:   PetscInt       *newCols;
489:   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;

493:   MatGetOwnershipRange(A, &rStart, &rEnd);
494:   for (r = rStart; r < rEnd; ++r) {
495:     PetscInt ncols;

497:     MatGetRow(A, r, &ncols, NULL, NULL);
498:     colMax = PetscMax(colMax, ncols);
499:     MatRestoreRow(A, r, &ncols, NULL, NULL);
500:   }
501:   numRows = rEnd - rStart;
502:   MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
503:   PetscMalloc2(colMax,&newCols,colMax,&newVals);
504:   for (r = rStart; r < rStart+maxRows; ++r) {
505:     const PetscScalar *vals;
506:     const PetscInt    *cols;
507:     PetscInt           ncols, newcols, c;

509:     if (r < rEnd) {
510:       MatGetRow(A, r, &ncols, &cols, &vals);
511:       for (c = 0; c < ncols; ++c) {
512:         newCols[c] = cols[c];
513:         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
514:       }
515:       newcols = ncols;
516:       MatRestoreRow(A, r, &ncols, &cols, &vals);
517:       MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
518:     }
519:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
520:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
521:   }
522:   PetscFree2(newCols,newVals);
523:   return(0);
524: }