Actual source code: axpy.c

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2:  #include <petsc/private/matimpl.h>

  4: /*@
  5:    MatAXPY - Computes Y = a*X + Y.

  7:    Logically  Collective on Mat

  9:    Input Parameters:
 10: +  a - the scalar multiplier
 11: .  X - the first matrix
 12: .  Y - the second matrix
 13: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
 14:          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 16:    Level: intermediate

 18: .keywords: matrix, add

 20: .seealso: MatAYPX()
 21:  @*/
 22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 23: {
 25:   PetscInt       m1,m2,n1,n2;
 26:   PetscBool      sametype;

 32:   MatGetSize(X,&m1,&n1);
 33:   MatGetSize(Y,&m2,&n2);
 34:   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);

 36:   PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);
 37:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 38:   if (Y->ops->axpy && sametype) {
 39:     (*Y->ops->axpy)(Y,a,X,str);
 40:   } else {
 41:     MatAXPY_Basic(Y,a,X,str);
 42:   }
 43:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
 44: #if defined(PETSC_HAVE_CUSP)
 45:   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
 46:     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
 47:   }
 48: #elif defined(PETSC_HAVE_VIENNACL)
 49:   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
 50:     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
 51:   }
 52: #elif defined(PETSC_HAVE_VECCUDA)
 53:   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
 54:     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
 55:   }
 56: #endif
 57:   return(0);
 58: }

 60: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
 61: {
 62:   PetscInt          i,start,end,j,ncols,m,n;
 63:   PetscErrorCode    ierr;
 64:   const PetscInt    *row;
 65:   PetscScalar       *val;
 66:   const PetscScalar *vals;

 69:   MatGetSize(X,&m,&n);
 70:   MatGetOwnershipRange(X,&start,&end);
 71:   if (a == 1.0) {
 72:     for (i = start; i < end; i++) {
 73:       MatGetRow(X,i,&ncols,&row,&vals);
 74:       MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
 75:       MatRestoreRow(X,i,&ncols,&row,&vals);
 76:     }
 77:   } else {
 78:     PetscMalloc1(n+1,&val);
 79:     for (i=start; i<end; i++) {
 80:       MatGetRow(X,i,&ncols,&row,&vals);
 81:       for (j=0; j<ncols; j++) {
 82:         val[j] = a*vals[j];
 83:       }
 84:       MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
 85:       MatRestoreRow(X,i,&ncols,&row,&vals);
 86:     }
 87:     PetscFree(val);
 88:   }
 89:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
 90:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
 91:   return(0);
 92: }

 94: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
 95: {
 96:   PetscInt          i,start,end,j,ncols,m,n;
 97:   PetscErrorCode    ierr;
 98:   const PetscInt    *row;
 99:   PetscScalar       *val;
100:   const PetscScalar *vals;

103:   MatGetSize(X,&m,&n);
104:   MatGetOwnershipRange(X,&start,&end);
105:   if (a == 1.0) {
106:     for (i = start; i < end; i++) {
107:       MatGetRow(Y,i,&ncols,&row,&vals);
108:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
109:       MatRestoreRow(Y,i,&ncols,&row,&vals);

111:       MatGetRow(X,i,&ncols,&row,&vals);
112:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
113:       MatRestoreRow(X,i,&ncols,&row,&vals);
114:     }
115:   } else {
116:     PetscMalloc1(n+1,&val);
117:     for (i=start; i<end; i++) {
118:       MatGetRow(Y,i,&ncols,&row,&vals);
119:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
120:       MatRestoreRow(Y,i,&ncols,&row,&vals);

122:       MatGetRow(X,i,&ncols,&row,&vals);
123:       for (j=0; j<ncols; j++) {
124:         val[j] = a*vals[j];
125:       }
126:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
127:       MatRestoreRow(X,i,&ncols,&row,&vals);
128:     }
129:     PetscFree(val);
130:   }
131:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
132:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
133:   return(0);
134: }

136: /*@
137:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

139:    Neighbor-wise Collective on Mat

141:    Input Parameters:
142: +  Y - the matrices
143: -  a - the PetscScalar

145:    Level: intermediate

147:    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
148:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
149:    entry).

151:    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
152:     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.

154: .keywords: matrix, add, shift

156: .seealso: MatDiagonalSet()
157:  @*/
158: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
159: {

164:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
165:   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
166:   MatCheckPreallocated(Y,1);

168:   if (Y->ops->shift) {
169:     (*Y->ops->shift)(Y,a);
170:   } else {
171:     MatShift_Basic(Y,a);
172:   }

174: #if defined(PETSC_HAVE_CUSP)
175:   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
176:     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
177:   }
178: #elif defined(PETSC_HAVE_VIENNACL)
179:   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
180:     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
181:   }
182: #elif defined(PETSC_HAVE_VECCUDA)
183:   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
184:     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
185:   }
186: #endif
187:   return(0);
188: }

190: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
191: {
193:   PetscInt       i,start,end;
194:   PetscScalar    *v;

197:   MatGetOwnershipRange(Y,&start,&end);
198:   VecGetArray(D,&v);
199:   for (i=start; i<end; i++) {
200:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
201:   }
202:   VecRestoreArray(D,&v);
203:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
204:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
205:   return(0);
206: }

208: /*@
209:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
210:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
211:    INSERT_VALUES.

213:    Input Parameters:
214: +  Y - the input matrix
215: .  D - the diagonal matrix, represented as a vector
216: -  i - INSERT_VALUES or ADD_VALUES

218:    Neighbor-wise Collective on Mat and Vec

220:    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
221:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
222:    entry).

224:    Level: intermediate

226: .keywords: matrix, add, shift, diagonal

228: .seealso: MatShift()
229: @*/
230: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
231: {
233:   PetscInt       matlocal,veclocal;

238:   MatGetLocalSize(Y,&matlocal,NULL);
239:   VecGetLocalSize(D,&veclocal);
240:   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
241:   if (Y->ops->diagonalset) {
242:     (*Y->ops->diagonalset)(Y,D,is);
243:   } else {
244:     MatDiagonalSet_Default(Y,D,is);
245:   }
246:   return(0);
247: }

249: /*@
250:    MatAYPX - Computes Y = a*Y + X.

252:    Logically on Mat

254:    Input Parameters:
255: +  a - the PetscScalar multiplier
256: .  Y - the first matrix
257: .  X - the second matrix
258: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN

260:    Level: intermediate

262: .keywords: matrix, add

264: .seealso: MatAXPY()
265:  @*/
266: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
267: {
268:   PetscScalar    one = 1.0;
270:   PetscInt       mX,mY,nX,nY;

276:   MatGetSize(X,&mX,&nX);
277:   MatGetSize(X,&mY,&nY);
278:   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);

280:   MatScale(Y,a);
281:   MatAXPY(Y,one,X,str);
282:   return(0);
283: }

285: /*@
286:     MatComputeExplicitOperator - Computes the explicit matrix

288:     Collective on Mat

290:     Input Parameter:
291: .   inmat - the matrix

293:     Output Parameter:
294: .   mat - the explict preconditioned operator

296:     Notes:
297:     This computation is done by applying the operators to columns of the
298:     identity matrix.

300:     Currently, this routine uses a dense matrix format when 1 processor
301:     is used and a sparse format otherwise.  This routine is costly in general,
302:     and is recommended for use only with relatively small systems.

304:     Level: advanced

306: .keywords: Mat, compute, explicit, operator
307: @*/
308: PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
309: {
310:   Vec            in,out;
312:   PetscInt       i,m,n,M,N,*rows,start,end;
313:   MPI_Comm       comm;
314:   PetscScalar    *array,zero = 0.0,one = 1.0;
315:   PetscMPIInt    size;


321:   PetscObjectGetComm((PetscObject)inmat,&comm);
322:   MPI_Comm_size(comm,&size);

324:   MatGetLocalSize(inmat,&m,&n);
325:   MatGetSize(inmat,&M,&N);
326:   MatCreateVecs(inmat,&in,&out);
327:   VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
328:   VecGetOwnershipRange(out,&start,&end);
329:   PetscMalloc1(m,&rows);
330:   for (i=0; i<m; i++) rows[i] = start + i;

332:   MatCreate(comm,mat);
333:   MatSetSizes(*mat,m,n,M,N);
334:   if (size == 1) {
335:     MatSetType(*mat,MATSEQDENSE);
336:     MatSeqDenseSetPreallocation(*mat,NULL);
337:   } else {
338:     MatSetType(*mat,MATMPIAIJ);
339:     MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
340:   }

342:   for (i=0; i<N; i++) {

344:     VecSet(in,zero);
345:     VecSetValues(in,1,&i,&one,INSERT_VALUES);
346:     VecAssemblyBegin(in);
347:     VecAssemblyEnd(in);

349:     MatMult(inmat,in,out);

351:     VecGetArray(out,&array);
352:     MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
353:     VecRestoreArray(out,&array);

355:   }
356:   PetscFree(rows);
357:   VecDestroy(&out);
358:   VecDestroy(&in);
359:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
360:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
361:   return(0);
362: }

364: /*@
365:   MatChop - Set all values in the matrix less than the tolerance to zero

367:   Input Parameters:
368: + A   - The matrix
369: - tol - The zero tolerance

371:   Output Parameters:
372: . A - The chopped matrix

374:   Level: intermediate

376: .seealso: MatCreate(), MatZeroEntries()
377:  @*/
378: PetscErrorCode MatChop(Mat A, PetscReal tol)
379: {
380:   PetscScalar    *newVals;
381:   PetscInt       *newCols;
382:   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;

386:   MatGetOwnershipRange(A, &rStart, &rEnd);
387:   for (r = rStart; r < rEnd; ++r) {
388:     PetscInt ncols;

390:     MatGetRow(A, r, &ncols, NULL, NULL);
391:     colMax = PetscMax(colMax, ncols);
392:     MatRestoreRow(A, r, &ncols, NULL, NULL);
393:   }
394:   numRows = rEnd - rStart;
395:   MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
396:   PetscMalloc2(colMax,&newCols,colMax,&newVals);
397:   for (r = rStart; r < rStart+maxRows; ++r) {
398:     const PetscScalar *vals;
399:     const PetscInt    *cols;
400:     PetscInt           ncols, newcols, c;

402:     if (r < rEnd) {
403:       MatGetRow(A, r, &ncols, &cols, &vals);
404:       for (c = 0; c < ncols; ++c) {
405:         newCols[c] = cols[c];
406:         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
407:       }
408:       newcols = ncols;
409:       MatRestoreRow(A, r, &ncols, &cols, &vals);
410:       MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
411:     }
412:     MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
413:     MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
414:   }
415:   PetscFree2(newCols,newVals);
416:   return(0);
417: }