Actual source code: mpidense.c


  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4: */

  6: #include <../src/mat/impls/dense/mpi/mpidense.h>
  7: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  8: #include <petscblaslapack.h>

 10: /*@

 12:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 13:               matrix that represents the operator. For sequential matrices it returns itself.

 15:     Input Parameter:
 16: .      A - the Seq or MPI dense matrix

 18:     Output Parameter:
 19: .      B - the inner matrix

 21:     Level: intermediate

 23: @*/
 24: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 25: {
 26:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 27:   PetscBool      flg;

 31:   PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 32:   if (flg) *B = mat->A;
 33:   else {
 34:     PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);
 36:     *B = A;
 37:   }
 38:   return 0;
 39: }

 41: PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
 42: {
 43:   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
 44:   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;

 46:   MatCopy(Amat->A,Bmat->A,s);
 47:   return 0;
 48: }

 50: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 51: {
 52:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 53:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 56:   lrow = row - rstart;
 57:   MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
 58:   return 0;
 59: }

 61: PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 62: {
 63:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 64:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 67:   lrow = row - rstart;
 68:   MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
 69:   return 0;
 70: }

 72: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
 73: {
 74:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 75:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 76:   PetscScalar    *array;
 77:   MPI_Comm       comm;
 78:   PetscBool      flg;
 79:   Mat            B;

 81:   MatHasCongruentLayouts(A,&flg);
 83:   PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
 84:   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */

 86:     PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);
 88:     PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 89:     MatCreate(comm,&B);
 90:     MatSetSizes(B,m,m,m,m);
 91:     MatSetType(B,((PetscObject)mdn->A)->type_name);
 92:     MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);
 93:     MatSeqDenseSetPreallocation(B,array+m*rstart);
 94:     MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);
 95:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
 96:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
 97:     PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
 98:     *a   = B;
 99:     MatDestroy(&B);
100:   } else *a = B;
101:   return 0;
102: }

104: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
105: {
106:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
107:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
108:   PetscBool      roworiented = A->roworiented;

110:   for (i=0; i<m; i++) {
111:     if (idxm[i] < 0) continue;
113:     if (idxm[i] >= rstart && idxm[i] < rend) {
114:       row = idxm[i] - rstart;
115:       if (roworiented) {
116:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
117:       } else {
118:         for (j=0; j<n; j++) {
119:           if (idxn[j] < 0) continue;
121:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
122:         }
123:       }
124:     } else if (!A->donotstash) {
125:       mat->assembled = PETSC_FALSE;
126:       if (roworiented) {
127:         MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
128:       } else {
129:         MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
130:       }
131:     }
132:   }
133:   return 0;
134: }

136: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
137: {
138:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
139:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

141:   for (i=0; i<m; i++) {
142:     if (idxm[i] < 0) continue; /* negative row */
144:     if (idxm[i] >= rstart && idxm[i] < rend) {
145:       row = idxm[i] - rstart;
146:       for (j=0; j<n; j++) {
147:         if (idxn[j] < 0) continue; /* negative column */
149:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
150:       }
151:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
152:   }
153:   return 0;
154: }

156: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
157: {
158:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

160:   MatDenseGetLDA(a->A,lda);
161:   return 0;
162: }

164: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
165: {
166:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
167:   PetscBool      iscuda;

169:   if (!a->A) {
171:     PetscLayoutSetUp(A->rmap);
172:     PetscLayoutSetUp(A->cmap);
173:     MatCreate(PETSC_COMM_SELF,&a->A);
174:     PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);
175:     MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);
176:     PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);
177:     MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);
178:   }
179:   MatDenseSetLDA(a->A,lda);
180:   return 0;
181: }

183: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
184: {
185:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

188:   MatDenseGetArray(a->A,array);
189:   return 0;
190: }

192: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
193: {
194:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

197:   MatDenseGetArrayRead(a->A,array);
198:   return 0;
199: }

201: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
202: {
203:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

206:   MatDenseGetArrayWrite(a->A,array);
207:   return 0;
208: }

210: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
211: {
212:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

216:   MatDensePlaceArray(a->A,array);
217:   return 0;
218: }

220: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
221: {
222:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

226:   MatDenseResetArray(a->A);
227:   return 0;
228: }

230: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
231: {
232:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

236:   MatDenseReplaceArray(a->A,array);
237:   return 0;
238: }

240: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
241: {
242:   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
243:   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
244:   const PetscInt    *irow,*icol;
245:   const PetscScalar *v;
246:   PetscScalar       *bv;
247:   Mat               newmat;
248:   IS                iscol_local;
249:   MPI_Comm          comm_is,comm_mat;

251:   PetscObjectGetComm((PetscObject)A,&comm_mat);
252:   PetscObjectGetComm((PetscObject)iscol,&comm_is);

255:   ISAllGather(iscol,&iscol_local);
256:   ISGetIndices(isrow,&irow);
257:   ISGetIndices(iscol_local,&icol);
258:   ISGetLocalSize(isrow,&nrows);
259:   ISGetLocalSize(iscol,&ncols);
260:   ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */

262:   /* No parallel redistribution currently supported! Should really check each index set
263:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
264:      original matrix! */

266:   MatGetLocalSize(A,&nlrows,&nlcols);
267:   MatGetOwnershipRange(A,&rstart,&rend);

269:   /* Check submatrix call */
270:   if (scall == MAT_REUSE_MATRIX) {
271:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
272:     /* Really need to test rows and column sizes! */
273:     newmat = *B;
274:   } else {
275:     /* Create and fill new matrix */
276:     MatCreate(PetscObjectComm((PetscObject)A),&newmat);
277:     MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
278:     MatSetType(newmat,((PetscObject)A)->type_name);
279:     MatMPIDenseSetPreallocation(newmat,NULL);
280:   }

282:   /* Now extract the data pointers and do the copy, column at a time */
283:   newmatd = (Mat_MPIDense*)newmat->data;
284:   MatDenseGetArray(newmatd->A,&bv);
285:   MatDenseGetArrayRead(mat->A,&v);
286:   MatDenseGetLDA(mat->A,&lda);
287:   for (i=0; i<Ncols; i++) {
288:     const PetscScalar *av = v + lda*icol[i];
289:     for (j=0; j<nrows; j++) {
290:       *bv++ = av[irow[j] - rstart];
291:     }
292:   }
293:   MatDenseRestoreArrayRead(mat->A,&v);
294:   MatDenseRestoreArray(newmatd->A,&bv);

296:   /* Assemble the matrices so that the correct flags are set */
297:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
298:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

300:   /* Free work space */
301:   ISRestoreIndices(isrow,&irow);
302:   ISRestoreIndices(iscol_local,&icol);
303:   ISDestroy(&iscol_local);
304:   *B   = newmat;
305:   return 0;
306: }

308: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
309: {
310:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

312:   MatDenseRestoreArray(a->A,array);
313:   return 0;
314: }

316: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
317: {
318:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

320:   MatDenseRestoreArrayRead(a->A,array);
321:   return 0;
322: }

324: PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
325: {
326:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

328:   MatDenseRestoreArrayWrite(a->A,array);
329:   return 0;
330: }

332: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
333: {
334:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
335:   PetscInt       nstash,reallocs;

337:   if (mdn->donotstash || mat->nooffprocentries) return 0;

339:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
340:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
341:   PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);
342:   return 0;
343: }

345: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
346: {
347:   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
348:   PetscInt       i,*row,*col,flg,j,rstart,ncols;
349:   PetscMPIInt    n;
350:   PetscScalar    *val;

352:   if (!mdn->donotstash && !mat->nooffprocentries) {
353:     /*  wait on receives */
354:     while (1) {
355:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
356:       if (!flg) break;

358:       for (i=0; i<n;) {
359:         /* Now identify the consecutive vals belonging to the same row */
360:         for (j=i,rstart=row[j]; j<n; j++) {
361:           if (row[j] != rstart) break;
362:         }
363:         if (j < n) ncols = j-i;
364:         else       ncols = n-i;
365:         /* Now assemble all these values with a single function call */
366:         MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
367:         i    = j;
368:       }
369:     }
370:     MatStashScatterEnd_Private(&mat->stash);
371:   }

373:   MatAssemblyBegin(mdn->A,mode);
374:   MatAssemblyEnd(mdn->A,mode);

376:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
377:     MatSetUpMultiply_MPIDense(mat);
378:   }
379:   return 0;
380: }

382: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
383: {
384:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

386:   MatZeroEntries(l->A);
387:   return 0;
388: }

390: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
391: {
392:   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
393:   PetscInt          i,len,*lrows;

395:   /* get locally owned rows */
396:   PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);
397:   /* fix right hand side if needed */
398:   if (x && b) {
399:     const PetscScalar *xx;
400:     PetscScalar       *bb;

402:     VecGetArrayRead(x, &xx);
403:     VecGetArrayWrite(b, &bb);
404:     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
405:     VecRestoreArrayRead(x, &xx);
406:     VecRestoreArrayWrite(b, &bb);
407:   }
408:   MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);
409:   if (diag != 0.0) {
410:     Vec d;

412:     MatCreateVecs(A,NULL,&d);
413:     VecSet(d,diag);
414:     MatDiagonalSet(A,d,INSERT_VALUES);
415:     VecDestroy(&d);
416:   }
417:   PetscFree(lrows);
418:   return 0;
419: }

421: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
422: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
423: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
424: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);

426: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
427: {
428:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
429:   const PetscScalar *ax;
430:   PetscScalar       *ay;
431:   PetscMemType      axmtype,aymtype;

433:   VecGetArrayReadAndMemType(xx,&ax,&axmtype);
434:   VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);
435:   PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);
436:   PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);
437:   VecRestoreArrayAndMemType(mdn->lvec,&ay);
438:   VecRestoreArrayReadAndMemType(xx,&ax);
439:   (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);
440:   return 0;
441: }

443: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
444: {
445:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
446:   const PetscScalar *ax;
447:   PetscScalar       *ay;
448:   PetscMemType      axmtype,aymtype;

450:   VecGetArrayReadAndMemType(xx,&ax,&axmtype);
451:   VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype);
452:   PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE);
453:   PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE);
454:   VecRestoreArrayAndMemType(mdn->lvec,&ay);
455:   VecRestoreArrayReadAndMemType(xx,&ax);
456:   (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);
457:   return 0;
458: }

460: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
461: {
462:   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
463:   const PetscScalar *ax;
464:   PetscScalar       *ay;
465:   PetscMemType      axmtype,aymtype;

467:   VecSet(yy,0.0);
468:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
469:   VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);
470:   VecGetArrayAndMemType(yy,&ay,&aymtype);
471:   PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);
472:   PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);
473:   VecRestoreArrayReadAndMemType(a->lvec,&ax);
474:   VecRestoreArrayAndMemType(yy,&ay);
475:   return 0;
476: }

478: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
479: {
480:   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
481:   const PetscScalar *ax;
482:   PetscScalar       *ay;
483:   PetscMemType      axmtype,aymtype;

485:   VecCopy(yy,zz);
486:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
487:   VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype);
488:   VecGetArrayAndMemType(zz,&ay,&aymtype);
489:   PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);
490:   PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);
491:   VecRestoreArrayReadAndMemType(a->lvec,&ax);
492:   VecRestoreArrayAndMemType(zz,&ay);
493:   return 0;
494: }

496: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
497: {
498:   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
499:   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
500:   PetscScalar       *x,zero = 0.0;
501:   const PetscScalar *av;

503:   VecSet(v,zero);
504:   VecGetArray(v,&x);
505:   VecGetSize(v,&n);
507:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
508:   radd = A->rmap->rstart*m;
509:   MatDenseGetArrayRead(a->A,&av);
510:   MatDenseGetLDA(a->A,&lda);
511:   for (i=0; i<len; i++) {
512:     x[i] = av[radd + i*lda + i];
513:   }
514:   MatDenseRestoreArrayRead(a->A,&av);
515:   VecRestoreArray(v,&x);
516:   return 0;
517: }

519: PetscErrorCode MatDestroy_MPIDense(Mat mat)
520: {
521:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

523: #if defined(PETSC_USE_LOG)
524:   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
525: #endif
526:   MatStashDestroy_Private(&mat->stash);
529:   MatDestroy(&mdn->A);
530:   VecDestroy(&mdn->lvec);
531:   PetscSFDestroy(&mdn->Mvctx);
532:   VecDestroy(&mdn->cvec);
533:   MatDestroy(&mdn->cmat);

535:   PetscFree(mat->data);
536:   PetscObjectChangeTypeName((PetscObject)mat,NULL);

538:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);
539:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);
540:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
541:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
542:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);
543:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);
544:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);
545:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);
546:   PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);
547:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);
548:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);
549:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);
550:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL);
551: #if defined(PETSC_HAVE_ELEMENTAL)
552:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
553: #endif
554: #if defined(PETSC_HAVE_SCALAPACK)
555:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);
556: #endif
557:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
558:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);
559:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);
560: #if defined (PETSC_HAVE_CUDA)
561:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);
562:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);
563: #endif
564:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);
565:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);
566:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);
567:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);
568:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);
569:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);
570:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);
571:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);
572:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);
573:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);
574:   return 0;
575: }

577: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);

579: #include <petscdraw.h>
580: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
581: {
582:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
583:   PetscMPIInt       rank;
584:   PetscViewerType   vtype;
585:   PetscBool         iascii,isdraw;
586:   PetscViewer       sviewer;
587:   PetscViewerFormat format;

589:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
590:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
591:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
592:   if (iascii) {
593:     PetscViewerGetType(viewer,&vtype);
594:     PetscViewerGetFormat(viewer,&format);
595:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
596:       MatInfo info;
597:       MatGetInfo(mat,MAT_LOCAL,&info);
598:       PetscViewerASCIIPushSynchronized(viewer);
599:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
600:       PetscViewerFlush(viewer);
601:       PetscViewerASCIIPopSynchronized(viewer);
602:       PetscSFView(mdn->Mvctx,viewer);
603:       return 0;
604:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
605:       return 0;
606:     }
607:   } else if (isdraw) {
608:     PetscDraw draw;
609:     PetscBool isnull;

611:     PetscViewerDrawGetDraw(viewer,0,&draw);
612:     PetscDrawIsNull(draw,&isnull);
613:     if (isnull) return 0;
614:   }

616:   {
617:     /* assemble the entire matrix onto first processor. */
618:     Mat         A;
619:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
620:     PetscInt    *cols;
621:     PetscScalar *vals;

623:     MatCreate(PetscObjectComm((PetscObject)mat),&A);
624:     if (rank == 0) {
625:       MatSetSizes(A,M,N,M,N);
626:     } else {
627:       MatSetSizes(A,0,0,M,N);
628:     }
629:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
630:     MatSetType(A,MATMPIDENSE);
631:     MatMPIDenseSetPreallocation(A,NULL);
632:     PetscLogObjectParent((PetscObject)mat,(PetscObject)A);

634:     /* Copy the matrix ... This isn't the most efficient means,
635:        but it's quick for now */
636:     A->insertmode = INSERT_VALUES;

638:     row = mat->rmap->rstart;
639:     m   = mdn->A->rmap->n;
640:     for (i=0; i<m; i++) {
641:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
642:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
643:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
644:       row++;
645:     }

647:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
648:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
649:     PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
650:     if (rank == 0) {
651:       PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
652:       MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
653:     }
654:     PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
655:     PetscViewerFlush(viewer);
656:     MatDestroy(&A);
657:   }
658:   return 0;
659: }

661: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
662: {
663:   PetscBool      iascii,isbinary,isdraw,issocket;

665:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
666:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
667:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
668:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);

670:   if (iascii || issocket || isdraw) {
671:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
672:   } else if (isbinary) {
673:     MatView_Dense_Binary(mat,viewer);
674:   }
675:   return 0;
676: }

678: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
679: {
680:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
681:   Mat            mdn  = mat->A;
682:   PetscLogDouble isend[5],irecv[5];

684:   info->block_size = 1.0;

686:   MatGetInfo(mdn,MAT_LOCAL,info);

688:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
689:   isend[3] = info->memory;  isend[4] = info->mallocs;
690:   if (flag == MAT_LOCAL) {
691:     info->nz_used      = isend[0];
692:     info->nz_allocated = isend[1];
693:     info->nz_unneeded  = isend[2];
694:     info->memory       = isend[3];
695:     info->mallocs      = isend[4];
696:   } else if (flag == MAT_GLOBAL_MAX) {
697:     MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));

699:     info->nz_used      = irecv[0];
700:     info->nz_allocated = irecv[1];
701:     info->nz_unneeded  = irecv[2];
702:     info->memory       = irecv[3];
703:     info->mallocs      = irecv[4];
704:   } else if (flag == MAT_GLOBAL_SUM) {
705:     MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));

707:     info->nz_used      = irecv[0];
708:     info->nz_allocated = irecv[1];
709:     info->nz_unneeded  = irecv[2];
710:     info->memory       = irecv[3];
711:     info->mallocs      = irecv[4];
712:   }
713:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
714:   info->fill_ratio_needed = 0;
715:   info->factor_mallocs    = 0;
716:   return 0;
717: }

719: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
720: {
721:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

723:   switch (op) {
724:   case MAT_NEW_NONZERO_LOCATIONS:
725:   case MAT_NEW_NONZERO_LOCATION_ERR:
726:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
727:     MatCheckPreallocated(A,1);
728:     MatSetOption(a->A,op,flg);
729:     break;
730:   case MAT_ROW_ORIENTED:
731:     MatCheckPreallocated(A,1);
732:     a->roworiented = flg;
733:     MatSetOption(a->A,op,flg);
734:     break;
735:   case MAT_FORCE_DIAGONAL_ENTRIES:
736:   case MAT_KEEP_NONZERO_PATTERN:
737:   case MAT_USE_HASH_TABLE:
738:   case MAT_SORTED_FULL:
739:     PetscInfo(A,"Option %s ignored\n",MatOptions[op]);
740:     break;
741:   case MAT_IGNORE_OFF_PROC_ENTRIES:
742:     a->donotstash = flg;
743:     break;
744:   case MAT_SYMMETRIC:
745:   case MAT_STRUCTURALLY_SYMMETRIC:
746:   case MAT_HERMITIAN:
747:   case MAT_SYMMETRY_ETERNAL:
748:   case MAT_IGNORE_LOWER_TRIANGULAR:
749:   case MAT_IGNORE_ZERO_ENTRIES:
750:     PetscInfo(A,"Option %s ignored\n",MatOptions[op]);
751:     break;
752:   default:
753:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
754:   }
755:   return 0;
756: }

758: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
759: {
760:   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
761:   const PetscScalar *l;
762:   PetscScalar       x,*v,*vv,*r;
763:   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;

765:   MatDenseGetArray(mdn->A,&vv);
766:   MatDenseGetLDA(mdn->A,&lda);
767:   MatGetLocalSize(A,&s2,&s3);
768:   if (ll) {
769:     VecGetLocalSize(ll,&s2a);
771:     VecGetArrayRead(ll,&l);
772:     for (i=0; i<m; i++) {
773:       x = l[i];
774:       v = vv + i;
775:       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
776:     }
777:     VecRestoreArrayRead(ll,&l);
778:     PetscLogFlops(1.0*n*m);
779:   }
780:   if (rr) {
781:     const PetscScalar *ar;

783:     VecGetLocalSize(rr,&s3a);
785:     VecGetArrayRead(rr,&ar);
786:     VecGetArray(mdn->lvec,&r);
787:     PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);
788:     PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE);
789:     VecRestoreArrayRead(rr,&ar);
790:     for (i=0; i<n; i++) {
791:       x = r[i];
792:       v = vv + i*lda;
793:       for (j=0; j<m; j++) (*v++) *= x;
794:     }
795:     VecRestoreArray(mdn->lvec,&r);
796:     PetscLogFlops(1.0*n*m);
797:   }
798:   MatDenseRestoreArray(mdn->A,&vv);
799:   return 0;
800: }

802: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
803: {
804:   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
805:   PetscInt          i,j;
806:   PetscMPIInt       size;
807:   PetscReal         sum = 0.0;
808:   const PetscScalar *av,*v;

810:   MatDenseGetArrayRead(mdn->A,&av);
811:   v    = av;
812:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
813:   if (size == 1) {
814:     MatNorm(mdn->A,type,nrm);
815:   } else {
816:     if (type == NORM_FROBENIUS) {
817:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
818:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
819:       }
820:       MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
821:       *nrm = PetscSqrtReal(*nrm);
822:       PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
823:     } else if (type == NORM_1) {
824:       PetscReal *tmp,*tmp2;
825:       PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
826:       *nrm = 0.0;
827:       v    = av;
828:       for (j=0; j<mdn->A->cmap->n; j++) {
829:         for (i=0; i<mdn->A->rmap->n; i++) {
830:           tmp[j] += PetscAbsScalar(*v);  v++;
831:         }
832:       }
833:       MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
834:       for (j=0; j<A->cmap->N; j++) {
835:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
836:       }
837:       PetscFree2(tmp,tmp2);
838:       PetscLogFlops(A->cmap->n*A->rmap->n);
839:     } else if (type == NORM_INFINITY) { /* max row norm */
840:       PetscReal ntemp;
841:       MatNorm(mdn->A,type,&ntemp);
842:       MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
843:     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
844:   }
845:   MatDenseRestoreArrayRead(mdn->A,&av);
846:   return 0;
847: }

849: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
850: {
851:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
852:   Mat            B;
853:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
854:   PetscInt       j,i,lda;
855:   PetscScalar    *v;

857:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
858:     MatCreate(PetscObjectComm((PetscObject)A),&B);
859:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
860:     MatSetType(B,((PetscObject)A)->type_name);
861:     MatMPIDenseSetPreallocation(B,NULL);
862:   } else B = *matout;

864:   m    = a->A->rmap->n; n = a->A->cmap->n;
865:   MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);
866:   MatDenseGetLDA(a->A,&lda);
867:   PetscMalloc1(m,&rwork);
868:   for (i=0; i<m; i++) rwork[i] = rstart + i;
869:   for (j=0; j<n; j++) {
870:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
871:     v   += lda;
872:   }
873:   MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);
874:   PetscFree(rwork);
875:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
876:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
877:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
878:     *matout = B;
879:   } else {
880:     MatHeaderMerge(A,&B);
881:   }
882:   return 0;
883: }

885: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
886: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);

888: PetscErrorCode MatSetUp_MPIDense(Mat A)
889: {
890:   PetscLayoutSetUp(A->rmap);
891:   PetscLayoutSetUp(A->cmap);
892:   if (!A->preallocated) {
893:     MatMPIDenseSetPreallocation(A,NULL);
894:   }
895:   return 0;
896: }

898: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
899: {
900:   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;

902:   MatAXPY(A->A,alpha,B->A,str);
903:   return 0;
904: }

906: PetscErrorCode MatConjugate_MPIDense(Mat mat)
907: {
908:   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;

910:   MatConjugate(a->A);
911:   return 0;
912: }

914: PetscErrorCode MatRealPart_MPIDense(Mat A)
915: {
916:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

918:   MatRealPart(a->A);
919:   return 0;
920: }

922: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
923: {
924:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

926:   MatImaginaryPart(a->A);
927:   return 0;
928: }

930: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
931: {
932:   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;

936:   (*a->A->ops->getcolumnvector)(a->A,v,col);
937:   return 0;
938: }

940: PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);

942: PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
943: {
944:   PetscInt       i,m,n;
945:   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
946:   PetscReal      *work;

948:   MatGetSize(A,&m,&n);
949:   PetscMalloc1(n,&work);
950:   if (type == REDUCTION_MEAN_REALPART) {
951:     MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work);
952:   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
953:     MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work);
954:   } else {
955:     MatGetColumnReductions_SeqDense(a->A,type,work);
956:   }
957:   if (type == NORM_2) {
958:     for (i=0; i<n; i++) work[i] *= work[i];
959:   }
960:   if (type == NORM_INFINITY) {
961:     MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
962:   } else {
963:     MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
964:   }
965:   PetscFree(work);
966:   if (type == NORM_2) {
967:     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
968:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
969:     for (i=0; i<n; i++) reductions[i] /= m;
970:   }
971:   return 0;
972: }

974: #if defined(PETSC_HAVE_CUDA)
975: static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
976: {
977:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
978:   PetscInt       lda;

982:   if (!a->cvec) {
983:     VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
984:     PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);
985:   }
986:   a->vecinuse = col + 1;
987:   MatDenseGetLDA(a->A,&lda);
988:   MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);
989:   VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
990:   *v   = a->cvec;
991:   return 0;
992: }

994: static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
995: {
996:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1000:   a->vecinuse = 0;
1001:   MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);
1002:   VecCUDAResetArray(a->cvec);
1003:   if (v) *v = NULL;
1004:   return 0;
1005: }

1007: static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1008: {
1009:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1010:   PetscInt       lda;

1014:   if (!a->cvec) {
1015:     VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1016:     PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);
1017:   }
1018:   a->vecinuse = col + 1;
1019:   MatDenseGetLDA(a->A,&lda);
1020:   MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);
1021:   VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1022:   VecLockReadPush(a->cvec);
1023:   *v   = a->cvec;
1024:   return 0;
1025: }

1027: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1028: {
1029:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1033:   a->vecinuse = 0;
1034:   MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);
1035:   VecLockReadPop(a->cvec);
1036:   VecCUDAResetArray(a->cvec);
1037:   if (v) *v = NULL;
1038:   return 0;
1039: }

1041: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1042: {
1043:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1044:   PetscInt       lda;

1048:   if (!a->cvec) {
1049:     VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1050:     PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);
1051:   }
1052:   a->vecinuse = col + 1;
1053:   MatDenseGetLDA(a->A,&lda);
1054:   MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);
1055:   VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1056:   *v   = a->cvec;
1057:   return 0;
1058: }

1060: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1061: {
1062:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1066:   a->vecinuse = 0;
1067:   MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);
1068:   VecCUDAResetArray(a->cvec);
1069:   if (v) *v = NULL;
1070:   return 0;
1071: }

1073: static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1074: {
1075:   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;

1079:   MatDenseCUDAPlaceArray(l->A,a);
1080:   return 0;
1081: }

1083: static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1084: {
1085:   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;

1089:   MatDenseCUDAResetArray(l->A);
1090:   return 0;
1091: }

1093: static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1094: {
1095:   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;

1099:   MatDenseCUDAReplaceArray(l->A,a);
1100:   return 0;
1101: }

1103: static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1104: {
1105:   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;

1107:   MatDenseCUDAGetArrayWrite(l->A,a);
1108:   return 0;
1109: }

1111: static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1112: {
1113:   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;

1115:   MatDenseCUDARestoreArrayWrite(l->A,a);
1116:   return 0;
1117: }

1119: static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1120: {
1121:   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;

1123:   MatDenseCUDAGetArrayRead(l->A,a);
1124:   return 0;
1125: }

1127: static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1128: {
1129:   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;

1131:   MatDenseCUDARestoreArrayRead(l->A,a);
1132:   return 0;
1133: }

1135: static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1136: {
1137:   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;

1139:   MatDenseCUDAGetArray(l->A,a);
1140:   return 0;
1141: }

1143: static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1144: {
1145:   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;

1147:   MatDenseCUDARestoreArray(l->A,a);
1148:   return 0;
1149: }

1151: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1152: static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1153: static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1154: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1155: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1156: static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1157: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);

1159: static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1160: {
1161:   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;

1165:   if (d->A) {
1166:     MatBindToCPU(d->A,bind);
1167:   }
1168:   mat->boundtocpu = bind;
1169:   if (!bind) {
1170:     PetscBool iscuda;

1172:     PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);
1173:     if (!iscuda) {
1174:       VecDestroy(&d->cvec);
1175:     }
1176:     PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);
1177:     if (!iscuda) {
1178:       MatDestroy(&d->cmat);
1179:     }
1180:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);
1181:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);
1182:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);
1183:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);
1184:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);
1185:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);
1186:   } else {
1187:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);
1188:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);
1189:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);
1190:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);
1191:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);
1192:     PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);
1193:   }
1194:   if (d->cmat) {
1195:     MatBindToCPU(d->cmat,bind);
1196:   }
1197:   return 0;
1198: }

1200: PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1201: {
1202:   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1203:   PetscBool      iscuda;

1206:   PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);
1207:   if (!iscuda) return 0;
1208:   PetscLayoutSetUp(A->rmap);
1209:   PetscLayoutSetUp(A->cmap);
1210:   if (!d->A) {
1211:     MatCreate(PETSC_COMM_SELF,&d->A);
1212:     PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);
1213:     MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);
1214:   }
1215:   MatSetType(d->A,MATSEQDENSECUDA);
1216:   MatSeqDenseCUDASetPreallocation(d->A,d_data);
1217:   A->preallocated = PETSC_TRUE;
1218:   return 0;
1219: }
1220: #endif

1222: static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1223: {
1224:   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;

1226:   MatSetRandom(d->A,rctx);
1227:   return 0;
1228: }

1230: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1231: {
1232:   *missing = PETSC_FALSE;
1233:   return 0;
1234: }

1236: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1237: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1238: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1239: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1240: static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1241: static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);

1243: /* -------------------------------------------------------------------*/
1244: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1245:                                         MatGetRow_MPIDense,
1246:                                         MatRestoreRow_MPIDense,
1247:                                         MatMult_MPIDense,
1248:                                 /*  4*/ MatMultAdd_MPIDense,
1249:                                         MatMultTranspose_MPIDense,
1250:                                         MatMultTransposeAdd_MPIDense,
1251:                                         NULL,
1252:                                         NULL,
1253:                                         NULL,
1254:                                 /* 10*/ NULL,
1255:                                         NULL,
1256:                                         NULL,
1257:                                         NULL,
1258:                                         MatTranspose_MPIDense,
1259:                                 /* 15*/ MatGetInfo_MPIDense,
1260:                                         MatEqual_MPIDense,
1261:                                         MatGetDiagonal_MPIDense,
1262:                                         MatDiagonalScale_MPIDense,
1263:                                         MatNorm_MPIDense,
1264:                                 /* 20*/ MatAssemblyBegin_MPIDense,
1265:                                         MatAssemblyEnd_MPIDense,
1266:                                         MatSetOption_MPIDense,
1267:                                         MatZeroEntries_MPIDense,
1268:                                 /* 24*/ MatZeroRows_MPIDense,
1269:                                         NULL,
1270:                                         NULL,
1271:                                         NULL,
1272:                                         NULL,
1273:                                 /* 29*/ MatSetUp_MPIDense,
1274:                                         NULL,
1275:                                         NULL,
1276:                                         MatGetDiagonalBlock_MPIDense,
1277:                                         NULL,
1278:                                 /* 34*/ MatDuplicate_MPIDense,
1279:                                         NULL,
1280:                                         NULL,
1281:                                         NULL,
1282:                                         NULL,
1283:                                 /* 39*/ MatAXPY_MPIDense,
1284:                                         MatCreateSubMatrices_MPIDense,
1285:                                         NULL,
1286:                                         MatGetValues_MPIDense,
1287:                                         MatCopy_MPIDense,
1288:                                 /* 44*/ NULL,
1289:                                         MatScale_MPIDense,
1290:                                         MatShift_Basic,
1291:                                         NULL,
1292:                                         NULL,
1293:                                 /* 49*/ MatSetRandom_MPIDense,
1294:                                         NULL,
1295:                                         NULL,
1296:                                         NULL,
1297:                                         NULL,
1298:                                 /* 54*/ NULL,
1299:                                         NULL,
1300:                                         NULL,
1301:                                         NULL,
1302:                                         NULL,
1303:                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1304:                                         MatDestroy_MPIDense,
1305:                                         MatView_MPIDense,
1306:                                         NULL,
1307:                                         NULL,
1308:                                 /* 64*/ NULL,
1309:                                         NULL,
1310:                                         NULL,
1311:                                         NULL,
1312:                                         NULL,
1313:                                 /* 69*/ NULL,
1314:                                         NULL,
1315:                                         NULL,
1316:                                         NULL,
1317:                                         NULL,
1318:                                 /* 74*/ NULL,
1319:                                         NULL,
1320:                                         NULL,
1321:                                         NULL,
1322:                                         NULL,
1323:                                 /* 79*/ NULL,
1324:                                         NULL,
1325:                                         NULL,
1326:                                         NULL,
1327:                                 /* 83*/ MatLoad_MPIDense,
1328:                                         NULL,
1329:                                         NULL,
1330:                                         NULL,
1331:                                         NULL,
1332:                                         NULL,
1333:                                 /* 89*/ NULL,
1334:                                         NULL,
1335:                                         NULL,
1336:                                         NULL,
1337:                                         NULL,
1338:                                 /* 94*/ NULL,
1339:                                         NULL,
1340:                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1341:                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1342:                                         NULL,
1343:                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1344:                                         NULL,
1345:                                         NULL,
1346:                                         MatConjugate_MPIDense,
1347:                                         NULL,
1348:                                 /*104*/ NULL,
1349:                                         MatRealPart_MPIDense,
1350:                                         MatImaginaryPart_MPIDense,
1351:                                         NULL,
1352:                                         NULL,
1353:                                 /*109*/ NULL,
1354:                                         NULL,
1355:                                         NULL,
1356:                                         MatGetColumnVector_MPIDense,
1357:                                         MatMissingDiagonal_MPIDense,
1358:                                 /*114*/ NULL,
1359:                                         NULL,
1360:                                         NULL,
1361:                                         NULL,
1362:                                         NULL,
1363:                                 /*119*/ NULL,
1364:                                         NULL,
1365:                                         NULL,
1366:                                         NULL,
1367:                                         NULL,
1368:                                 /*124*/ NULL,
1369:                                         MatGetColumnReductions_MPIDense,
1370:                                         NULL,
1371:                                         NULL,
1372:                                         NULL,
1373:                                 /*129*/ NULL,
1374:                                         NULL,
1375:                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1376:                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1377:                                         NULL,
1378:                                 /*134*/ NULL,
1379:                                         NULL,
1380:                                         NULL,
1381:                                         NULL,
1382:                                         NULL,
1383:                                 /*139*/ NULL,
1384:                                         NULL,
1385:                                         NULL,
1386:                                         NULL,
1387:                                         NULL,
1388:                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1389:                                 /*145*/ NULL,
1390:                                         NULL,
1391:                                         NULL
1392: };

1394: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1395: {
1396:   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1397:   PetscBool      iscuda;

1400:   PetscLayoutSetUp(mat->rmap);
1401:   PetscLayoutSetUp(mat->cmap);
1402:   if (!a->A) {
1403:     MatCreate(PETSC_COMM_SELF,&a->A);
1404:     PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1405:     MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1406:   }
1407:   PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);
1408:   MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);
1409:   MatSeqDenseSetPreallocation(a->A,data);
1410:   mat->preallocated = PETSC_TRUE;
1411:   return 0;
1412: }

1414: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1415: {
1416:   Mat            B,C;

1418:   MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C);
1419:   MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B);
1420:   MatDestroy(&C);
1421:   if (reuse == MAT_REUSE_MATRIX) {
1422:     C = *newmat;
1423:   } else C = NULL;
1424:   MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);
1425:   MatDestroy(&B);
1426:   if (reuse == MAT_INPLACE_MATRIX) {
1427:     MatHeaderReplace(A,&C);
1428:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1429:   return 0;
1430: }

1432: PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1433: {
1434:   Mat            B,C;

1436:   MatDenseGetLocalMatrix(A,&C);
1437:   MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1438:   if (reuse == MAT_REUSE_MATRIX) {
1439:     C = *newmat;
1440:   } else C = NULL;
1441:   MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C);
1442:   MatDestroy(&B);
1443:   if (reuse == MAT_INPLACE_MATRIX) {
1444:     MatHeaderReplace(A,&C);
1445:   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1446:   return 0;
1447: }

1449: #if defined(PETSC_HAVE_ELEMENTAL)
1450: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1451: {
1452:   Mat            mat_elemental;
1453:   PetscScalar    *v;
1454:   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;

1456:   if (reuse == MAT_REUSE_MATRIX) {
1457:     mat_elemental = *newmat;
1458:     MatZeroEntries(*newmat);
1459:   } else {
1460:     MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1461:     MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1462:     MatSetType(mat_elemental,MATELEMENTAL);
1463:     MatSetUp(mat_elemental);
1464:     MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1465:   }

1467:   PetscMalloc2(m,&rows,N,&cols);
1468:   for (i=0; i<N; i++) cols[i] = i;
1469:   for (i=0; i<m; i++) rows[i] = rstart + i;

1471:   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1472:   MatDenseGetArray(A,&v);
1473:   MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1474:   MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1475:   MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1476:   MatDenseRestoreArray(A,&v);
1477:   PetscFree2(rows,cols);

1479:   if (reuse == MAT_INPLACE_MATRIX) {
1480:     MatHeaderReplace(A,&mat_elemental);
1481:   } else {
1482:     *newmat = mat_elemental;
1483:   }
1484:   return 0;
1485: }
1486: #endif

1488: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1489: {
1490:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;

1492:   MatDenseGetColumn(mat->A,col,vals);
1493:   return 0;
1494: }

1496: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1497: {
1498:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;

1500:   MatDenseRestoreColumn(mat->A,vals);
1501:   return 0;
1502: }

1504: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1505: {
1506:   Mat_MPIDense   *mat;
1507:   PetscInt       m,nloc,N;

1509:   MatGetSize(inmat,&m,&N);
1510:   MatGetLocalSize(inmat,NULL,&nloc);
1511:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1512:     PetscInt sum;

1514:     if (n == PETSC_DECIDE) {
1515:       PetscSplitOwnership(comm,&n,&N);
1516:     }
1517:     /* Check sum(n) = N */
1518:     MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);

1521:     MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);
1522:     MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
1523:   }

1525:   /* numeric phase */
1526:   mat = (Mat_MPIDense*)(*outmat)->data;
1527:   MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);
1528:   MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
1529:   MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
1530:   return 0;
1531: }

1533: #if defined(PETSC_HAVE_CUDA)
1534: PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1535: {
1536:   Mat            B;
1537:   Mat_MPIDense   *m;

1539:   if (reuse == MAT_INITIAL_MATRIX) {
1540:     MatDuplicate(M,MAT_COPY_VALUES,newmat);
1541:   } else if (reuse == MAT_REUSE_MATRIX) {
1542:     MatCopy(M,*newmat,SAME_NONZERO_PATTERN);
1543:   }

1545:   B    = *newmat;
1546:   MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);
1547:   PetscFree(B->defaultvectype);
1548:   PetscStrallocpy(VECSTANDARD,&B->defaultvectype);
1549:   PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);
1550:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);
1551:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);
1552:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);
1553:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);
1554:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);
1555:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);
1556:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);
1557:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);
1558:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);
1559:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);
1560:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);
1561:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);
1562:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);
1563:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);
1564:   m    = (Mat_MPIDense*)(B)->data;
1565:   if (m->A) {
1566:     MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);
1567:     MatSetUpMultiply_MPIDense(B);
1568:   }
1569:   B->ops->bindtocpu = NULL;
1570:   B->offloadmask    = PETSC_OFFLOAD_CPU;
1571:   return 0;
1572: }

1574: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1575: {
1576:   Mat            B;
1577:   Mat_MPIDense   *m;

1579:   if (reuse == MAT_INITIAL_MATRIX) {
1580:     MatDuplicate(M,MAT_COPY_VALUES,newmat);
1581:   } else if (reuse == MAT_REUSE_MATRIX) {
1582:     MatCopy(M,*newmat,SAME_NONZERO_PATTERN);
1583:   }

1585:   B    = *newmat;
1586:   PetscFree(B->defaultvectype);
1587:   PetscStrallocpy(VECCUDA,&B->defaultvectype);
1588:   PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);
1589:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);
1590:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);
1591:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);
1592:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);
1593:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);
1594:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);
1595:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);
1596:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);
1597:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);
1598:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);
1599:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);
1600:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);
1601:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);
1602:   PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);
1603:   m    = (Mat_MPIDense*)(B->data);
1604:   if (m->A) {
1605:     MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);
1606:     MatSetUpMultiply_MPIDense(B);
1607:     B->offloadmask = PETSC_OFFLOAD_BOTH;
1608:   } else {
1609:     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1610:   }
1611:   MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);

1613:   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1614:   return 0;
1615: }
1616: #endif

1618: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1619: {
1620:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1621:   PetscInt       lda;

1625:   if (!a->cvec) {
1626:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1627:   }
1628:   a->vecinuse = col + 1;
1629:   MatDenseGetLDA(a->A,&lda);
1630:   MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);
1631:   VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1632:   *v   = a->cvec;
1633:   return 0;
1634: }

1636: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1637: {
1638:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1642:   a->vecinuse = 0;
1643:   MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);
1644:   VecResetArray(a->cvec);
1645:   if (v) *v = NULL;
1646:   return 0;
1647: }

1649: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1650: {
1651:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1652:   PetscInt       lda;

1656:   if (!a->cvec) {
1657:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1658:   }
1659:   a->vecinuse = col + 1;
1660:   MatDenseGetLDA(a->A,&lda);
1661:   MatDenseGetArrayRead(a->A,&a->ptrinuse);
1662:   VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1663:   VecLockReadPush(a->cvec);
1664:   *v   = a->cvec;
1665:   return 0;
1666: }

1668: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1669: {
1670:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1674:   a->vecinuse = 0;
1675:   MatDenseRestoreArrayRead(a->A,&a->ptrinuse);
1676:   VecLockReadPop(a->cvec);
1677:   VecResetArray(a->cvec);
1678:   if (v) *v = NULL;
1679:   return 0;
1680: }

1682: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1683: {
1684:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1685:   PetscInt       lda;

1689:   if (!a->cvec) {
1690:     VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1691:   }
1692:   a->vecinuse = col + 1;
1693:   MatDenseGetLDA(a->A,&lda);
1694:   MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);
1695:   VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1696:   *v   = a->cvec;
1697:   return 0;
1698: }

1700: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1701: {
1702:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;

1706:   a->vecinuse = 0;
1707:   MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);
1708:   VecResetArray(a->cvec);
1709:   if (v) *v = NULL;
1710:   return 0;
1711: }

1713: PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1714: {
1715:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1716:   Mat_MPIDense   *c;
1717:   MPI_Comm       comm;
1718:   PetscBool      setup = PETSC_FALSE;

1720:   PetscObjectGetComm((PetscObject)A,&comm);
1723:   if (!a->cmat) {
1724:     setup = PETSC_TRUE;
1725:     MatCreate(comm,&a->cmat);
1726:     PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);
1727:     MatSetType(a->cmat,((PetscObject)A)->type_name);
1728:     PetscLayoutReference(A->rmap,&a->cmat->rmap);
1729:     PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);
1730:     PetscLayoutSetUp(a->cmat->cmap);
1731:   } else if (cend-cbegin != a->cmat->cmap->N) {
1732:     setup = PETSC_TRUE;
1733:     PetscLayoutDestroy(&a->cmat->cmap);
1734:     PetscLayoutCreate(comm,&a->cmat->cmap);
1735:     PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);
1736:     PetscLayoutSetUp(a->cmat->cmap);
1737:   }
1738:   c = (Mat_MPIDense*)a->cmat->data;
1740:   MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);
1741:   if (setup) { /* do we really need this? */
1742:     MatSetUpMultiply_MPIDense(a->cmat);
1743:   }
1744:   a->cmat->preallocated = PETSC_TRUE;
1745:   a->cmat->assembled = PETSC_TRUE;
1746:   a->matinuse = cbegin + 1;
1747:   *v = a->cmat;
1748:   return 0;
1749: }

1751: PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1752: {
1753:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1754:   Mat_MPIDense *c;

1759:   a->matinuse = 0;
1760:   c    = (Mat_MPIDense*)a->cmat->data;
1761:   MatDenseRestoreSubMatrix(a->A,&c->A);
1762:   if (v) *v = NULL;
1763:   return 0;
1764: }

1766: /*MC
1767:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

1769:    Options Database Keys:
1770: . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()

1772:   Level: beginner

1774: .seealso: MatCreateDense()

1776: M*/
1777: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1778: {
1779:   Mat_MPIDense   *a;

1781:   PetscNewLog(mat,&a);
1782:   mat->data = (void*)a;
1783:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));

1785:   mat->insertmode = NOT_SET_VALUES;

1787:   /* build cache for off array entries formed */
1788:   a->donotstash = PETSC_FALSE;

1790:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);

1792:   /* stuff used for matrix vector multiply */
1793:   a->lvec        = NULL;
1794:   a->Mvctx       = NULL;
1795:   a->roworiented = PETSC_TRUE;

1797:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);
1798:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);
1799:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1800:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1801:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);
1802:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);
1803:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);
1804:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);
1805:   PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);
1806:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);
1807:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);
1808:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);
1809:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);
1810:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);
1811:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);
1812:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);
1813:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);
1814:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);
1815:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);
1816:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);
1817:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ);
1818: #if defined(PETSC_HAVE_ELEMENTAL)
1819:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1820: #endif
1821: #if defined(PETSC_HAVE_SCALAPACK)
1822:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);
1823: #endif
1824: #if defined(PETSC_HAVE_CUDA)
1825:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);
1826: #endif
1827:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1828:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);
1829:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);
1830: #if defined(PETSC_HAVE_CUDA)
1831:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);
1832:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);
1833: #endif

1835:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);
1836:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);
1837:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1838:   return 0;
1839: }

1841: /*MC
1842:    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.

1844:    Options Database Keys:
1845: . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()

1847:   Level: beginner

1849: .seealso:

1851: M*/
1852: #if defined(PETSC_HAVE_CUDA)
1853: #include <petsc/private/deviceimpl.h>
1854: PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1855: {
1856:   PetscDeviceInitialize(PETSC_DEVICE_CUDA);
1857:   MatCreate_MPIDense(B);
1858:   MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);
1859:   return 0;
1860: }
1861: #endif

1863: /*MC
1864:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1866:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1867:    and MATMPIDENSE otherwise.

1869:    Options Database Keys:
1870: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1872:   Level: beginner

1874: .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1875: M*/

1877: /*MC
1878:    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.

1880:    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1881:    and MATMPIDENSECUDA otherwise.

1883:    Options Database Keys:
1884: . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()

1886:   Level: beginner

1888: .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1889: M*/

1891: /*@C
1892:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1894:    Collective

1896:    Input Parameters:
1897: .  B - the matrix
1898: -  data - optional location of matrix data.  Set data=NULL for PETSc
1899:    to control all matrix memory allocation.

1901:    Notes:
1902:    The dense format is fully compatible with standard Fortran 77
1903:    storage by columns.

1905:    The data input variable is intended primarily for Fortran programmers
1906:    who wish to allocate their own matrix memory space.  Most users should
1907:    set data=NULL.

1909:    Level: intermediate

1911: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1912: @*/
1913: PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1914: {
1916:   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1917:   return 0;
1918: }

1920: /*@
1921:    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1922:    array provided by the user. This is useful to avoid copying an array
1923:    into a matrix

1925:    Not Collective

1927:    Input Parameters:
1928: +  mat - the matrix
1929: -  array - the array in column major order

1931:    Notes:
1932:    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1933:    freed when the matrix is destroyed.

1935:    Level: developer

1937: .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()

1939: @*/
1940: PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
1941: {
1943:   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
1944:   PetscObjectStateIncrease((PetscObject)mat);
1945: #if defined(PETSC_HAVE_CUDA)
1946:   mat->offloadmask = PETSC_OFFLOAD_CPU;
1947: #endif
1948:   return 0;
1949: }

1951: /*@
1952:    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()

1954:    Not Collective

1956:    Input Parameters:
1957: .  mat - the matrix

1959:    Notes:
1960:    You can only call this after a call to MatDensePlaceArray()

1962:    Level: developer

1964: .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()

1966: @*/
1967: PetscErrorCode  MatDenseResetArray(Mat mat)
1968: {
1970:   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
1971:   PetscObjectStateIncrease((PetscObject)mat);
1972:   return 0;
1973: }

1975: /*@
1976:    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1977:    array provided by the user. This is useful to avoid copying an array
1978:    into a matrix

1980:    Not Collective

1982:    Input Parameters:
1983: +  mat - the matrix
1984: -  array - the array in column major order

1986:    Notes:
1987:    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
1988:    freed by the user. It will be freed when the matrix is destroyed.

1990:    Level: developer

1992: .seealso: MatDenseGetArray(), VecReplaceArray()
1993: @*/
1994: PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
1995: {
1997:   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
1998:   PetscObjectStateIncrease((PetscObject)mat);
1999: #if defined(PETSC_HAVE_CUDA)
2000:   mat->offloadmask = PETSC_OFFLOAD_CPU;
2001: #endif
2002:   return 0;
2003: }

2005: #if defined(PETSC_HAVE_CUDA)
2006: /*@C
2007:    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2008:    array provided by the user. This is useful to avoid copying an array
2009:    into a matrix

2011:    Not Collective

2013:    Input Parameters:
2014: +  mat - the matrix
2015: -  array - the array in column major order

2017:    Notes:
2018:    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2019:    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().

2021:    Level: developer

2023: .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2024: @*/
2025: PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2026: {
2028:   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2029:   PetscObjectStateIncrease((PetscObject)mat);
2030:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2031:   return 0;
2032: }

2034: /*@C
2035:    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()

2037:    Not Collective

2039:    Input Parameters:
2040: .  mat - the matrix

2042:    Notes:
2043:    You can only call this after a call to MatDenseCUDAPlaceArray()

2045:    Level: developer

2047: .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()

2049: @*/
2050: PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2051: {
2053:   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2054:   PetscObjectStateIncrease((PetscObject)mat);
2055:   return 0;
2056: }

2058: /*@C
2059:    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2060:    array provided by the user. This is useful to avoid copying an array
2061:    into a matrix

2063:    Not Collective

2065:    Input Parameters:
2066: +  mat - the matrix
2067: -  array - the array in column major order

2069:    Notes:
2070:    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2071:    The memory passed in CANNOT be freed by the user. It will be freed
2072:    when the matrix is destroyed. The array should respect the matrix leading dimension.

2074:    Level: developer

2076: .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2077: @*/
2078: PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2079: {
2081:   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2082:   PetscObjectStateIncrease((PetscObject)mat);
2083:   mat->offloadmask = PETSC_OFFLOAD_GPU;
2084:   return 0;
2085: }

2087: /*@C
2088:    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.

2090:    Not Collective

2092:    Input Parameters:
2093: .  A - the matrix

2095:    Output Parameters
2096: .  array - the GPU array in column major order

2098:    Notes:
2099:    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.

2101:    Level: developer

2103: .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2104: @*/
2105: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2106: {
2108:   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2109:   PetscObjectStateIncrease((PetscObject)A);
2110:   return 0;
2111: }

2113: /*@C
2114:    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().

2116:    Not Collective

2118:    Input Parameters:
2119: +  A - the matrix
2120: -  array - the GPU array in column major order

2122:    Notes:

2124:    Level: developer

2126: .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2127: @*/
2128: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2129: {
2131:   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2132:   PetscObjectStateIncrease((PetscObject)A);
2133:   A->offloadmask = PETSC_OFFLOAD_GPU;
2134:   return 0;
2135: }

2137: /*@C
2138:    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.

2140:    Not Collective

2142:    Input Parameters:
2143: .  A - the matrix

2145:    Output Parameters
2146: .  array - the GPU array in column major order

2148:    Notes:
2149:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().

2151:    Level: developer

2153: .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2154: @*/
2155: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2156: {
2158:   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2159:   return 0;
2160: }

2162: /*@C
2163:    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().

2165:    Not Collective

2167:    Input Parameters:
2168: +  A - the matrix
2169: -  array - the GPU array in column major order

2171:    Notes:
2172:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().

2174:    Level: developer

2176: .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2177: @*/
2178: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2179: {
2180:   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2181:   return 0;
2182: }

2184: /*@C
2185:    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.

2187:    Not Collective

2189:    Input Parameters:
2190: .  A - the matrix

2192:    Output Parameters
2193: .  array - the GPU array in column major order

2195:    Notes:
2196:    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().

2198:    Level: developer

2200: .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2201: @*/
2202: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2203: {
2205:   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2206:   PetscObjectStateIncrease((PetscObject)A);
2207:   return 0;
2208: }

2210: /*@C
2211:    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().

2213:    Not Collective

2215:    Input Parameters:
2216: +  A - the matrix
2217: -  array - the GPU array in column major order

2219:    Notes:

2221:    Level: developer

2223: .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2224: @*/
2225: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2226: {
2228:   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2229:   PetscObjectStateIncrease((PetscObject)A);
2230:   A->offloadmask = PETSC_OFFLOAD_GPU;
2231:   return 0;
2232: }
2233: #endif

2235: /*@C
2236:    MatCreateDense - Creates a matrix in dense format.

2238:    Collective

2240:    Input Parameters:
2241: +  comm - MPI communicator
2242: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2243: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2244: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2245: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2246: -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2247:    to control all matrix memory allocation.

2249:    Output Parameter:
2250: .  A - the matrix

2252:    Notes:
2253:    The dense format is fully compatible with standard Fortran 77
2254:    storage by columns.
2255:    Note that, although local portions of the matrix are stored in column-major
2256:    order, the matrix is partitioned across MPI ranks by row.

2258:    The data input variable is intended primarily for Fortran programmers
2259:    who wish to allocate their own matrix memory space.  Most users should
2260:    set data=NULL (PETSC_NULL_SCALAR for Fortran users).

2262:    The user MUST specify either the local or global matrix dimensions
2263:    (possibly both).

2265:    Level: intermediate

2267: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2268: @*/
2269: PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2270: {
2271:   PetscMPIInt    size;

2273:   MatCreate(comm,A);
2274:   MatSetSizes(*A,m,n,M,N);
2275:   MPI_Comm_size(comm,&size);
2276:   if (size > 1) {
2277:     PetscBool havedata = (PetscBool)!!data;

2279:     MatSetType(*A,MATMPIDENSE);
2280:     MatMPIDenseSetPreallocation(*A,data);
2281:     MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);
2282:     if (havedata) {  /* user provided data array, so no need to assemble */
2283:       MatSetUpMultiply_MPIDense(*A);
2284:       (*A)->assembled = PETSC_TRUE;
2285:     }
2286:   } else {
2287:     MatSetType(*A,MATSEQDENSE);
2288:     MatSeqDenseSetPreallocation(*A,data);
2289:   }
2290:   return 0;
2291: }

2293: #if defined(PETSC_HAVE_CUDA)
2294: /*@C
2295:    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.

2297:    Collective

2299:    Input Parameters:
2300: +  comm - MPI communicator
2301: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2302: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2303: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2304: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2305: -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2306:    to control matrix memory allocation.

2308:    Output Parameter:
2309: .  A - the matrix

2311:    Notes:

2313:    Level: intermediate

2315: .seealso: MatCreate(), MatCreateDense()
2316: @*/
2317: PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2318: {
2319:   PetscMPIInt    size;

2321:   MatCreate(comm,A);
2323:   MatSetSizes(*A,m,n,M,N);
2324:   MPI_Comm_size(comm,&size);
2325:   if (size > 1) {
2326:     MatSetType(*A,MATMPIDENSECUDA);
2327:     MatMPIDenseCUDASetPreallocation(*A,data);
2328:     if (data) {  /* user provided data array, so no need to assemble */
2329:       MatSetUpMultiply_MPIDense(*A);
2330:       (*A)->assembled = PETSC_TRUE;
2331:     }
2332:   } else {
2333:     MatSetType(*A,MATSEQDENSECUDA);
2334:     MatSeqDenseCUDASetPreallocation(*A,data);
2335:   }
2336:   return 0;
2337: }
2338: #endif

2340: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2341: {
2342:   Mat            mat;
2343:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

2345:   *newmat = NULL;
2346:   MatCreate(PetscObjectComm((PetscObject)A),&mat);
2347:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
2348:   MatSetType(mat,((PetscObject)A)->type_name);
2349:   a       = (Mat_MPIDense*)mat->data;

2351:   mat->factortype   = A->factortype;
2352:   mat->assembled    = PETSC_TRUE;
2353:   mat->preallocated = PETSC_TRUE;

2355:   mat->insertmode = NOT_SET_VALUES;
2356:   a->donotstash   = oldmat->donotstash;

2358:   PetscLayoutReference(A->rmap,&mat->rmap);
2359:   PetscLayoutReference(A->cmap,&mat->cmap);

2361:   MatDuplicate(oldmat->A,cpvalues,&a->A);
2362:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
2363:   MatSetUpMultiply_MPIDense(mat);

2365:   *newmat = mat;
2366:   return 0;
2367: }

2369: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2370: {
2371:   PetscBool      isbinary;
2372: #if defined(PETSC_HAVE_HDF5)
2373:   PetscBool      ishdf5;
2374: #endif

2378:   /* force binary viewer to load .info file if it has not yet done so */
2379:   PetscViewerSetUp(viewer);
2380:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
2381: #if defined(PETSC_HAVE_HDF5)
2382:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);
2383: #endif
2384:   if (isbinary) {
2385:     MatLoad_Dense_Binary(newMat,viewer);
2386: #if defined(PETSC_HAVE_HDF5)
2387:   } else if (ishdf5) {
2388:     MatLoad_Dense_HDF5(newMat,viewer);
2389: #endif
2390:   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2391:   return 0;
2392: }

2394: static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2395: {
2396:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2397:   Mat            a,b;
2398:   PetscBool      flg;

2400:   a    = matA->A;
2401:   b    = matB->A;
2402:   MatEqual(a,b,&flg);
2403:   MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
2404:   return 0;
2405: }

2407: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2408: {
2409:   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;

2411:   PetscFree2(atb->sendbuf,atb->recvcounts);
2412:   MatDestroy(&atb->atb);
2413:   PetscFree(atb);
2414:   return 0;
2415: }

2417: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2418: {
2419:   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;

2421:   PetscFree2(abt->buf[0],abt->buf[1]);
2422:   PetscFree2(abt->recvcounts,abt->recvdispls);
2423:   PetscFree(abt);
2424:   return 0;
2425: }

2427: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2428: {
2429:   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2430:   Mat_TransMatMultDense *atb;
2431:   MPI_Comm              comm;
2432:   PetscMPIInt           size,*recvcounts;
2433:   PetscScalar           *carray,*sendbuf;
2434:   const PetscScalar     *atbarray;
2435:   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2436:   const PetscInt        *ranges;

2438:   MatCheckProduct(C,3);
2440:   atb = (Mat_TransMatMultDense *)C->product->data;
2441:   recvcounts = atb->recvcounts;
2442:   sendbuf = atb->sendbuf;

2444:   PetscObjectGetComm((PetscObject)A,&comm);
2445:   MPI_Comm_size(comm,&size);

2447:   /* compute atbarray = aseq^T * bseq */
2448:   MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);

2450:   MatGetOwnershipRanges(C,&ranges);

2452:   /* arrange atbarray into sendbuf */
2453:   MatDenseGetArrayRead(atb->atb,&atbarray);
2454:   MatDenseGetLDA(atb->atb,&lda);
2455:   for (proc=0, k=0; proc<size; proc++) {
2456:     for (j=0; j<cN; j++) {
2457:       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2458:     }
2459:   }
2460:   MatDenseRestoreArrayRead(atb->atb,&atbarray);

2462:   /* sum all atbarray to local values of C */
2463:   MatDenseGetArrayWrite(c->A,&carray);
2464:   MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
2465:   MatDenseRestoreArrayWrite(c->A,&carray);
2466:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2467:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2468:   return 0;
2469: }

2471: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2472: {
2473:   MPI_Comm              comm;
2474:   PetscMPIInt           size;
2475:   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2476:   Mat_TransMatMultDense *atb;
2477:   PetscBool             cisdense;
2478:   PetscInt              i;
2479:   const PetscInt        *ranges;

2481:   MatCheckProduct(C,3);
2483:   PetscObjectGetComm((PetscObject)A,&comm);
2484:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2485:     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2486:   }

2488:   /* create matrix product C */
2489:   MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);
2490:   PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");
2491:   if (!cisdense) {
2492:     MatSetType(C,((PetscObject)A)->type_name);
2493:   }
2494:   MatSetUp(C);

2496:   /* create data structure for reuse C */
2497:   MPI_Comm_size(comm,&size);
2498:   PetscNew(&atb);
2499:   cM   = C->rmap->N;
2500:   PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);
2501:   MatGetOwnershipRanges(C,&ranges);
2502:   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;

2504:   C->product->data    = atb;
2505:   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2506:   return 0;
2507: }

2509: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2510: {
2511:   MPI_Comm              comm;
2512:   PetscMPIInt           i, size;
2513:   PetscInt              maxRows, bufsiz;
2514:   PetscMPIInt           tag;
2515:   PetscInt              alg;
2516:   Mat_MatTransMultDense *abt;
2517:   Mat_Product           *product = C->product;
2518:   PetscBool             flg;

2520:   MatCheckProduct(C,4);
2522:   /* check local size of A and B */

2525:   PetscStrcmp(product->alg,"allgatherv",&flg);
2526:   alg  = flg ? 0 : 1;

2528:   /* setup matrix product C */
2529:   MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);
2530:   MatSetType(C,MATMPIDENSE);
2531:   MatSetUp(C);
2532:   PetscObjectGetNewTag((PetscObject)C,&tag);

2534:   /* create data structure for reuse C */
2535:   PetscObjectGetComm((PetscObject)C,&comm);
2536:   MPI_Comm_size(comm,&size);
2537:   PetscNew(&abt);
2538:   abt->tag = tag;
2539:   abt->alg = alg;
2540:   switch (alg) {
2541:   case 1: /* alg: "cyclic" */
2542:     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2543:     bufsiz = A->cmap->N * maxRows;
2544:     PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));
2545:     break;
2546:   default: /* alg: "allgatherv" */
2547:     PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));
2548:     PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));
2549:     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2550:     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2551:     break;
2552:   }

2554:   C->product->data    = abt;
2555:   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2556:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2557:   return 0;
2558: }

2560: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2561: {
2562:   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2563:   Mat_MatTransMultDense *abt;
2564:   MPI_Comm              comm;
2565:   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2566:   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2567:   PetscInt              i,cK=A->cmap->N,k,j,bn;
2568:   PetscScalar           _DOne=1.0,_DZero=0.0;
2569:   const PetscScalar     *av,*bv;
2570:   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2571:   MPI_Request           reqs[2];
2572:   const PetscInt        *ranges;

2574:   MatCheckProduct(C,3);
2576:   abt  = (Mat_MatTransMultDense*)C->product->data;
2577:   PetscObjectGetComm((PetscObject)C,&comm);
2578:   MPI_Comm_rank(comm,&rank);
2579:   MPI_Comm_size(comm,&size);
2580:   MatDenseGetArrayRead(a->A,&av);
2581:   MatDenseGetArrayRead(b->A,&bv);
2582:   MatDenseGetArrayWrite(c->A,&cv);
2583:   MatDenseGetLDA(a->A,&i);
2584:   PetscBLASIntCast(i,&alda);
2585:   MatDenseGetLDA(b->A,&i);
2586:   PetscBLASIntCast(i,&blda);
2587:   MatDenseGetLDA(c->A,&i);
2588:   PetscBLASIntCast(i,&clda);
2589:   MatGetOwnershipRanges(B,&ranges);
2590:   bn   = B->rmap->n;
2591:   if (blda == bn) {
2592:     sendbuf = (PetscScalar*)bv;
2593:   } else {
2594:     sendbuf = abt->buf[0];
2595:     for (k = 0, i = 0; i < cK; i++) {
2596:       for (j = 0; j < bn; j++, k++) {
2597:         sendbuf[k] = bv[i * blda + j];
2598:       }
2599:     }
2600:   }
2601:   if (size > 1) {
2602:     sendto = (rank + size - 1) % size;
2603:     recvfrom = (rank + size + 1) % size;
2604:   } else {
2605:     sendto = recvfrom = 0;
2606:   }
2607:   PetscBLASIntCast(cK,&ck);
2608:   PetscBLASIntCast(c->A->rmap->n,&cm);
2609:   recvisfrom = rank;
2610:   for (i = 0; i < size; i++) {
2611:     /* we have finished receiving in sending, bufs can be read/modified */
2612:     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2613:     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];

2615:     if (nextrecvisfrom != rank) {
2616:       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2617:       sendsiz = cK * bn;
2618:       recvsiz = cK * nextbn;
2619:       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2620:       MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);
2621:       MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);
2622:     }

2624:     /* local aseq * sendbuf^T */
2625:     PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);
2626:     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));

2628:     if (nextrecvisfrom != rank) {
2629:       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2630:       MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
2631:     }
2632:     bn = nextbn;
2633:     recvisfrom = nextrecvisfrom;
2634:     sendbuf = recvbuf;
2635:   }
2636:   MatDenseRestoreArrayRead(a->A,&av);
2637:   MatDenseRestoreArrayRead(b->A,&bv);
2638:   MatDenseRestoreArrayWrite(c->A,&cv);
2639:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2640:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2641:   return 0;
2642: }

2644: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2645: {
2646:   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2647:   Mat_MatTransMultDense *abt;
2648:   MPI_Comm              comm;
2649:   PetscMPIInt           size;
2650:   PetscScalar           *cv, *sendbuf, *recvbuf;
2651:   const PetscScalar     *av,*bv;
2652:   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2653:   PetscScalar           _DOne=1.0,_DZero=0.0;
2654:   PetscBLASInt          cm, cn, ck, alda, clda;

2656:   MatCheckProduct(C,3);
2658:   abt  = (Mat_MatTransMultDense*)C->product->data;
2659:   PetscObjectGetComm((PetscObject)A,&comm);
2660:   MPI_Comm_size(comm,&size);
2661:   MatDenseGetArrayRead(a->A,&av);
2662:   MatDenseGetArrayRead(b->A,&bv);
2663:   MatDenseGetArrayWrite(c->A,&cv);
2664:   MatDenseGetLDA(a->A,&i);
2665:   PetscBLASIntCast(i,&alda);
2666:   MatDenseGetLDA(b->A,&blda);
2667:   MatDenseGetLDA(c->A,&i);
2668:   PetscBLASIntCast(i,&clda);
2669:   /* copy transpose of B into buf[0] */
2670:   bn      = B->rmap->n;
2671:   sendbuf = abt->buf[0];
2672:   recvbuf = abt->buf[1];
2673:   for (k = 0, j = 0; j < bn; j++) {
2674:     for (i = 0; i < cK; i++, k++) {
2675:       sendbuf[k] = bv[i * blda + j];
2676:     }
2677:   }
2678:   MatDenseRestoreArrayRead(b->A,&bv);
2679:   MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);
2680:   PetscBLASIntCast(cK,&ck);
2681:   PetscBLASIntCast(c->A->rmap->n,&cm);
2682:   PetscBLASIntCast(c->A->cmap->n,&cn);
2683:   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2684:   MatDenseRestoreArrayRead(a->A,&av);
2685:   MatDenseRestoreArrayRead(b->A,&bv);
2686:   MatDenseRestoreArrayWrite(c->A,&cv);
2687:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2688:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2689:   return 0;
2690: }

2692: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2693: {
2694:   Mat_MatTransMultDense *abt;

2696:   MatCheckProduct(C,3);
2698:   abt = (Mat_MatTransMultDense*)C->product->data;
2699:   switch (abt->alg) {
2700:   case 1:
2701:     MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);
2702:     break;
2703:   default:
2704:     MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);
2705:     break;
2706:   }
2707:   return 0;
2708: }

2710: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2711: {
2712:   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;

2714:   MatDestroy(&ab->Ce);
2715:   MatDestroy(&ab->Ae);
2716:   MatDestroy(&ab->Be);
2717:   PetscFree(ab);
2718:   return 0;
2719: }

2721: #if defined(PETSC_HAVE_ELEMENTAL)
2722: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2723: {
2724:   Mat_MatMultDense *ab;

2726:   MatCheckProduct(C,3);
2728:   ab   = (Mat_MatMultDense*)C->product->data;
2729:   MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
2730:   MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
2731:   MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);
2732:   MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
2733:   return 0;
2734: }

2736: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2737: {
2738:   Mat              Ae,Be,Ce;
2739:   Mat_MatMultDense *ab;

2741:   MatCheckProduct(C,4);
2743:   /* check local size of A and B */
2744:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2745:     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2746:   }

2748:   /* create elemental matrices Ae and Be */
2749:   MatCreate(PetscObjectComm((PetscObject)A), &Ae);
2750:   MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
2751:   MatSetType(Ae,MATELEMENTAL);
2752:   MatSetUp(Ae);
2753:   MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);

2755:   MatCreate(PetscObjectComm((PetscObject)B), &Be);
2756:   MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);
2757:   MatSetType(Be,MATELEMENTAL);
2758:   MatSetUp(Be);
2759:   MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);

2761:   /* compute symbolic Ce = Ae*Be */
2762:   MatCreate(PetscObjectComm((PetscObject)C),&Ce);
2763:   MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);

2765:   /* setup C */
2766:   MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
2767:   MatSetType(C,MATDENSE);
2768:   MatSetUp(C);

2770:   /* create data structure for reuse Cdense */
2771:   PetscNew(&ab);
2772:   ab->Ae = Ae;
2773:   ab->Be = Be;
2774:   ab->Ce = Ce;

2776:   C->product->data    = ab;
2777:   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2778:   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2779:   return 0;
2780: }
2781: #endif
2782: /* ----------------------------------------------- */
2783: #if defined(PETSC_HAVE_ELEMENTAL)
2784: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2785: {
2786:   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2787:   C->ops->productsymbolic = MatProductSymbolic_AB;
2788:   return 0;
2789: }
2790: #endif

2792: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2793: {
2794:   Mat_Product *product = C->product;
2795:   Mat         A = product->A,B=product->B;

2797:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2798:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2799:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2800:   C->ops->productsymbolic = MatProductSymbolic_AtB;
2801:   return 0;
2802: }

2804: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2805: {
2807:   Mat_Product    *product = C->product;
2808:   const char     *algTypes[2] = {"allgatherv","cyclic"};
2809:   PetscInt       alg,nalg = 2;
2810:   PetscBool      flg = PETSC_FALSE;

2812:   /* Set default algorithm */
2813:   alg = 0; /* default is allgatherv */
2814:   PetscStrcmp(product->alg,"default",&flg);
2815:   if (flg) {
2816:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2817:   }

2819:   /* Get runtime option */
2820:   if (product->api_user) {
2821:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2822:     PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);
2823:     PetscOptionsEnd();
2824:   } else {
2825:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2826:     PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);
2827:     PetscOptionsEnd();
2828:   }
2829:   if (flg) {
2830:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2831:   }

2833:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2834:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2835:   return 0;
2836: }

2838: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2839: {
2840:   Mat_Product    *product = C->product;

2842:   switch (product->type) {
2843: #if defined(PETSC_HAVE_ELEMENTAL)
2844:   case MATPRODUCT_AB:
2845:     MatProductSetFromOptions_MPIDense_AB(C);
2846:     break;
2847: #endif
2848:   case MATPRODUCT_AtB:
2849:     MatProductSetFromOptions_MPIDense_AtB(C);
2850:     break;
2851:   case MATPRODUCT_ABt:
2852:     MatProductSetFromOptions_MPIDense_ABt(C);
2853:     break;
2854:   default:
2855:     break;
2856:   }
2857:   return 0;
2858: }