Actual source code: axpy.c


  2: #include <petsc/private/matimpl.h>

  4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
  5: {
  6:   PetscErrorCode ierr,(*f)(Mat,Mat*);
  7:   Mat            A,F;

 10:   PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
 11:   if (f) {
 12:     MatTransposeGetMat(T,&A);
 13:     if (T == X) {
 14:       PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 15:       MatTranspose(A,MAT_INITIAL_MATRIX,&F);
 16:       A = Y;
 17:     } else {
 18:       PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 19:       MatTranspose(X,MAT_INITIAL_MATRIX,&F);
 20:     }
 21:   } else {
 22:     MatHermitianTransposeGetMat(T,&A);
 23:     if (T == X) {
 24:       PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 25:       MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
 26:       A = Y;
 27:     } else {
 28:       PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
 29:       MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
 30:     }
 31:   }
 32:   MatAXPY(A,a,F,str);
 33:   MatDestroy(&F);
 34:   return(0);
 35: }

 37: /*@
 38:    MatAXPY - Computes Y = a*X + Y.

 40:    Logically  Collective on Mat

 42:    Input Parameters:
 43: +  a - the scalar multiplier
 44: .  X - the first matrix
 45: .  Y - the second matrix
 46: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

 48:    Notes: No operation is performed when a is zero.

 50:    Level: intermediate

 52: .seealso: MatAYPX()
 53:  @*/
 54: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
 55: {
 57:   PetscInt       M1,M2,N1,N2;
 58:   PetscInt       m1,m2,n1,n2;
 59:   MatType        t1,t2;
 60:   PetscBool      sametype,transpose;

 67:   MatGetSize(X,&M1,&N1);
 68:   MatGetSize(Y,&M2,&N2);
 69:   MatGetLocalSize(X,&m1,&n1);
 70:   MatGetLocalSize(Y,&m2,&n2);
 71:   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes X %D x %D, Y %D x %D",M1,N1,M2,N2);
 72:   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes X %D x %D, Y %D x %D",m1,n1,m2,n2);
 73:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
 74:   if (!X->assembled) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
 75:   if (a == (PetscScalar)0.0) return(0);
 76:   if (Y == X) {
 77:     MatScale(Y,1.0 + a);
 78:     return(0);
 79:   }
 80:   MatGetType(X,&t1);
 81:   MatGetType(Y,&t2);
 82:   PetscStrcmp(t1,t2,&sametype);
 83:   PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
 84:   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
 85:     (*Y->ops->axpy)(Y,a,X,str);
 86:   } else {
 87:     PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
 88:     if (transpose) {
 89:       MatTransposeAXPY_Private(Y,a,X,str,X);
 90:     } else {
 91:       PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
 92:       if (transpose) {
 93:         MatTransposeAXPY_Private(Y,a,X,str,Y);
 94:       } else {
 95:         MatAXPY_Basic(Y,a,X,str);
 96:       }
 97:     }
 98:   }
 99:   PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
100:   return(0);
101: }

103: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
104: {
106:   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;

109:   /* look for any available faster alternative to the general preallocator */
110:   PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
111:   if (preall) {
112:     (*preall)(Y,X,B);
113:   } else { /* Use MatPrellocator, assumes same row-col distribution */
114:     Mat      preallocator;
115:     PetscInt r,rstart,rend;
116:     PetscInt m,n,M,N;

118:     MatGetRowUpperTriangular(Y);
119:     MatGetRowUpperTriangular(X);
120:     MatGetSize(Y,&M,&N);
121:     MatGetLocalSize(Y,&m,&n);
122:     MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
123:     MatSetType(preallocator,MATPREALLOCATOR);
124:     MatSetLayouts(preallocator,Y->rmap,Y->cmap);
125:     MatSetUp(preallocator);
126:     MatGetOwnershipRange(preallocator,&rstart,&rend);
127:     for (r = rstart; r < rend; ++r) {
128:       PetscInt          ncols;
129:       const PetscInt    *row;
130:       const PetscScalar *vals;

132:       MatGetRow(Y,r,&ncols,&row,&vals);
133:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
134:       MatRestoreRow(Y,r,&ncols,&row,&vals);
135:       MatGetRow(X,r,&ncols,&row,&vals);
136:       MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
137:       MatRestoreRow(X,r,&ncols,&row,&vals);
138:     }
139:     MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
140:     MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
141:     MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
142:     MatRestoreRowUpperTriangular(Y);
143:     MatRestoreRowUpperTriangular(X);

145:     MatCreate(PetscObjectComm((PetscObject)Y),B);
146:     MatSetType(*B,((PetscObject)Y)->type_name);
147:     MatSetLayouts(*B,Y->rmap,Y->cmap);
148:     MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
149:     MatDestroy(&preallocator);
150:   }
151:   return(0);
152: }

154: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
155: {
157:   PetscBool      isshell,isdense,isnest;

160:   MatIsShell(Y,&isshell);
161:   if (isshell) { /* MatShell has special support for AXPY */
162:     PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);

164:     MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f);
165:     if (f) {
166:       (*f)(Y,a,X,str);
167:       return(0);
168:     }
169:   }
170:   /* no need to preallocate if Y is dense */
171:   PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,"");
172:   if (isdense) {
173:     PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest);
174:     if (isnest) {
175:       MatAXPY_Dense_Nest(Y,a,X);
176:       return(0);
177:     }
178:     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
179:   }
180:   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
181:     PetscInt          i,start,end,j,ncols,m,n;
182:     const PetscInt    *row;
183:     PetscScalar       *val;
184:     const PetscScalar *vals;

186:     MatGetSize(X,&m,&n);
187:     MatGetOwnershipRange(X,&start,&end);
188:     MatGetRowUpperTriangular(X);
189:     if (a == 1.0) {
190:       for (i = start; i < end; i++) {
191:         MatGetRow(X,i,&ncols,&row,&vals);
192:         MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
193:         MatRestoreRow(X,i,&ncols,&row,&vals);
194:       }
195:     } else {
196:       PetscInt vs = 100;
197:       /* realloc if needed, as this function may be used in parallel */
198:       PetscMalloc1(vs,&val);
199:       for (i=start; i<end; i++) {
200:         MatGetRow(X,i,&ncols,&row,&vals);
201:         if (vs < ncols) {
202:           vs   = PetscMin(2*ncols,n);
203:           PetscRealloc(vs*sizeof(*val),&val);
204:         }
205:         for (j=0; j<ncols; j++) val[j] = a*vals[j];
206:         MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
207:         MatRestoreRow(X,i,&ncols,&row,&vals);
208:       }
209:       PetscFree(val);
210:     }
211:     MatRestoreRowUpperTriangular(X);
212:     MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
213:     MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
214:   } else {
215:     Mat B;

217:     MatAXPY_Basic_Preallocate(Y,X,&B);
218:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
219:     /* TODO mat_tests-ex37_nsize-1_mat_type-baij_mat_block_size-2 fails with MatHeaderMerge */
220:     MatHeaderReplace(Y,&B);
221:   }
222:   return(0);
223: }

225: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
226: {
227:   PetscInt          i,start,end,j,ncols,m,n;
228:   PetscErrorCode    ierr;
229:   const PetscInt    *row;
230:   PetscScalar       *val;
231:   const PetscScalar *vals;

234:   MatGetSize(X,&m,&n);
235:   MatGetOwnershipRange(X,&start,&end);
236:   MatGetRowUpperTriangular(Y);
237:   MatGetRowUpperTriangular(X);
238:   if (a == 1.0) {
239:     for (i = start; i < end; i++) {
240:       MatGetRow(Y,i,&ncols,&row,&vals);
241:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
242:       MatRestoreRow(Y,i,&ncols,&row,&vals);

244:       MatGetRow(X,i,&ncols,&row,&vals);
245:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
246:       MatRestoreRow(X,i,&ncols,&row,&vals);
247:     }
248:   } else {
249:     PetscInt vs = 100;
250:     /* realloc if needed, as this function may be used in parallel */
251:     PetscMalloc1(vs,&val);
252:     for (i=start; i<end; i++) {
253:       MatGetRow(Y,i,&ncols,&row,&vals);
254:       MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
255:       MatRestoreRow(Y,i,&ncols,&row,&vals);

257:       MatGetRow(X,i,&ncols,&row,&vals);
258:       if (vs < ncols) {
259:         vs   = PetscMin(2*ncols,n);
260:         PetscRealloc(vs*sizeof(*val),&val);
261:       }
262:       for (j=0; j<ncols; j++) val[j] = a*vals[j];
263:       MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
264:       MatRestoreRow(X,i,&ncols,&row,&vals);
265:     }
266:     PetscFree(val);
267:   }
268:   MatRestoreRowUpperTriangular(Y);
269:   MatRestoreRowUpperTriangular(X);
270:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
271:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
272:   return(0);
273: }

275: /*@
276:    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.

278:    Neighbor-wise Collective on Mat

280:    Input Parameters:
281: +  Y - the matrices
282: -  a - the PetscScalar

284:    Level: intermediate

286:    Notes:
287:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
288:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
289:    entry). No operation is performed when a is zero.

291:    To form Y = Y + diag(V) use MatDiagonalSet()

293: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
294:  @*/
295: PetscErrorCode  MatShift(Mat Y,PetscScalar a)
296: {

301:   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
302:   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
303:   MatCheckPreallocated(Y,1);
304:   if (a == 0.0) return(0);

306:   if (Y->ops->shift) {
307:     (*Y->ops->shift)(Y,a);
308:   } else {
309:     MatShift_Basic(Y,a);
310:   }

312:   PetscObjectStateIncrease((PetscObject)Y);
313:   return(0);
314: }

316: PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
317: {
318:   PetscErrorCode    ierr;
319:   PetscInt          i,start,end;
320:   const PetscScalar *v;

323:   MatGetOwnershipRange(Y,&start,&end);
324:   VecGetArrayRead(D,&v);
325:   for (i=start; i<end; i++) {
326:     MatSetValues(Y,1,&i,1,&i,v+i-start,is);
327:   }
328:   VecRestoreArrayRead(D,&v);
329:   MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
330:   MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
331:   return(0);
332: }

334: /*@
335:    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
336:    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
337:    INSERT_VALUES.

339:    Neighbor-wise Collective on Mat

341:    Input Parameters:
342: +  Y - the input matrix
343: .  D - the diagonal matrix, represented as a vector
344: -  i - INSERT_VALUES or ADD_VALUES

346:    Notes:
347:     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
348:    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
349:    entry).

351:    Level: intermediate

353: .seealso: MatShift(), MatScale(), MatDiagonalScale()
354: @*/
355: PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
356: {
358:   PetscInt       matlocal,veclocal;

363:   MatGetLocalSize(Y,&matlocal,NULL);
364:   VecGetLocalSize(D,&veclocal);
365:   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
366:   if (Y->ops->diagonalset) {
367:     (*Y->ops->diagonalset)(Y,D,is);
368:   } else {
369:     MatDiagonalSet_Default(Y,D,is);
370:   }
371:   PetscObjectStateIncrease((PetscObject)Y);
372:   return(0);
373: }

375: /*@
376:    MatAYPX - Computes Y = a*Y + X.

378:    Logically on Mat

380:    Input Parameters:
381: +  a - the PetscScalar multiplier
382: .  Y - the first matrix
383: .  X - the second matrix
384: -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)

386:    Level: intermediate

388: .seealso: MatAXPY()
389:  @*/
390: PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
391: {

395:   MatScale(Y,a);
396:   MatAXPY(Y,1.0,X,str);
397:   return(0);
398: }

400: /*@
401:     MatComputeOperator - Computes the explicit matrix

403:     Collective on Mat

405:     Input Parameter:
406: +   inmat - the matrix
407: -   mattype - the matrix type for the explicit operator

409:     Output Parameter:
410: .   mat - the explict  operator

412:     Notes:
413:     This computation is done by applying the operators to columns of the identity matrix.
414:     This routine is costly in general, and is recommended for use only with relatively small systems.
415:     Currently, this routine uses a dense matrix format if mattype == NULL.

417:     Level: advanced

419: @*/
420: PetscErrorCode  MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
421: {

427:   MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
428:   return(0);
429: }

431: /*@
432:     MatComputeOperatorTranspose - Computes the explicit matrix representation of
433:         a give matrix that can apply MatMultTranspose()

435:     Collective on Mat

437:     Input Parameter:
438: .   inmat - the matrix

440:     Output Parameter:
441: .   mat - the explict  operator transposed

443:     Notes:
444:     This computation is done by applying the transpose of the operator to columns of the identity matrix.
445:     This routine is costly in general, and is recommended for use only with relatively small systems.
446:     Currently, this routine uses a dense matrix format if mattype == NULL.

448:     Level: advanced

450: @*/
451: PetscErrorCode  MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
452: {
453:   Mat            A;

459:   MatCreateTranspose(inmat,&A);
460:   MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
461:   MatDestroy(&A);
462:   return(0);
463: }

465: /*@
466:   MatChop - Set all values in the matrix less than the tolerance to zero

468:   Input Parameters:
469: + A   - The matrix
470: - tol - The zero tolerance

472:   Output Parameters:
473: . A - The chopped matrix

475:   Level: intermediate

477: .seealso: MatCreate(), MatZeroEntries()
478:  @*/
479: PetscErrorCode MatChop(Mat A, PetscReal tol)
480: {
481:   Mat            a;
482:   PetscScalar    *newVals;
483:   PetscInt       *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
484:   PetscBool      flg;

488:   PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, "");
489:   if (flg) {
490:     MatDenseGetLocalMatrix(A, &a);
491:     MatDenseGetLDA(a, &r);
492:     MatGetSize(a, &rStart, &rEnd);
493:     MatDenseGetArray(a, &newVals);
494:     for (; colMax < rEnd; ++colMax) {
495:       for (maxRows = 0; maxRows < rStart; ++maxRows) {
496:         newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
497:       }
498:     }
499:     MatDenseRestoreArray(a, &newVals);
500:   } else {
501:     MatGetOwnershipRange(A, &rStart, &rEnd);
502:     MatGetRowUpperTriangular(A);
503:     for (r = rStart; r < rEnd; ++r) {
504:       PetscInt ncols;

506:       MatGetRow(A, r, &ncols, NULL, NULL);
507:       colMax = PetscMax(colMax, ncols);
508:       MatRestoreRow(A, r, &ncols, NULL, NULL);
509:     }
510:     numRows = rEnd - rStart;
511:     MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
512:     PetscMalloc2(colMax, &newCols, colMax, &newVals);
513:     MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg); /* cache user-defined value */
514:     MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
515:     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
516:     /* that are potentially called many times depending on the distribution of A */
517:     for (r = rStart; r < rStart+maxRows; ++r) {
518:       const PetscScalar *vals;
519:       const PetscInt    *cols;
520:       PetscInt           ncols, newcols, c;

522:       if (r < rEnd) {
523:         MatGetRow(A, r, &ncols, &cols, &vals);
524:         for (c = 0; c < ncols; ++c) {
525:           newCols[c] = cols[c];
526:           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
527:         }
528:         newcols = ncols;
529:         MatRestoreRow(A, r, &ncols, &cols, &vals);
530:         MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
531:       }
532:       MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
533:       MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
534:     }
535:     MatRestoreRowUpperTriangular(A);
536:     PetscFree2(newCols, newVals);
537:     MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg); /* reset option to its user-defined value */
538:   }
539:   return(0);
540: }