Actual source code: mpidense.c

petsc-3.13.6 2020-09-29
Report Typos and Errors

  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4: */

  6:  #include <../src/mat/impls/dense/mpi/mpidense.h>
  7:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
  8:  #include <petscblaslapack.h>

 10: /*@

 12:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 13:               matrix that represents the operator. For sequential matrices it returns itself.

 15:     Input Parameter:
 16: .      A - the Seq or MPI dense matrix

 18:     Output Parameter:
 19: .      B - the inner matrix

 21:     Level: intermediate

 23: @*/
 24: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 25: {
 26:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 28:   PetscBool      flg;

 31:   PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 32:   if (flg) *B = mat->A;
 33:   else *B = A;
 34:   return(0);
 35: }

 37: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 38: {
 39:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 41:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 44:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
 45:   lrow = row - rstart;
 46:   MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
 47:   return(0);
 48: }

 50: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 51: {

 55:   if (idx) {PetscFree(*idx);}
 56:   if (v) {PetscFree(*v);}
 57:   return(0);
 58: }

 60: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
 61: {
 62:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 64:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 65:   PetscScalar    *array;
 66:   MPI_Comm       comm;
 67:   Mat            B;

 70:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");

 72:   PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
 73:   if (!B) {
 74:     PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 75:     MatCreate(comm,&B);
 76:     MatSetSizes(B,m,m,m,m);
 77:     MatSetType(B,((PetscObject)mdn->A)->type_name);
 78:     MatDenseGetArray(mdn->A,&array);
 79:     MatSeqDenseSetPreallocation(B,array+m*rstart);
 80:     MatDenseRestoreArray(mdn->A,&array);
 81:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
 82:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
 83:     PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
 84:     *a   = B;
 85:     MatDestroy(&B);
 86:   } else *a = B;
 87:   return(0);
 88: }

 90: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
 91: {
 92:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
 94:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
 95:   PetscBool      roworiented = A->roworiented;

 98:   for (i=0; i<m; i++) {
 99:     if (idxm[i] < 0) continue;
100:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
101:     if (idxm[i] >= rstart && idxm[i] < rend) {
102:       row = idxm[i] - rstart;
103:       if (roworiented) {
104:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
105:       } else {
106:         for (j=0; j<n; j++) {
107:           if (idxn[j] < 0) continue;
108:           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
109:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
110:         }
111:       }
112:     } else if (!A->donotstash) {
113:       mat->assembled = PETSC_FALSE;
114:       if (roworiented) {
115:         MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
116:       } else {
117:         MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
118:       }
119:     }
120:   }
121:   return(0);
122: }

124: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
125: {
126:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

131:   for (i=0; i<m; i++) {
132:     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
133:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
134:     if (idxm[i] >= rstart && idxm[i] < rend) {
135:       row = idxm[i] - rstart;
136:       for (j=0; j<n; j++) {
137:         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
138:         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
139:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
140:       }
141:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
142:   }
143:   return(0);
144: }

146: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
147: {
148:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

152:   MatDenseGetLDA(a->A,lda);
153:   return(0);
154: }

156: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
157: {
158:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

162:   MatDenseGetArray(a->A,array);
163:   return(0);
164: }

166: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
167: {
168:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

172:   MatDenseGetArrayRead(a->A,array);
173:   return(0);
174: }

176: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
177: {
178:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

182:   MatDensePlaceArray(a->A,array);
183:   return(0);
184: }

186: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
187: {
188:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

192:   MatDenseResetArray(a->A);
193:   return(0);
194: }

196: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
197: {
198:   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
199:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
201:   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
202:   const PetscInt *irow,*icol;
203:   PetscScalar    *av,*bv,*v = lmat->v;
204:   Mat            newmat;
205:   IS             iscol_local;
206:   MPI_Comm       comm_is,comm_mat;

209:   PetscObjectGetComm((PetscObject)A,&comm_mat);
210:   PetscObjectGetComm((PetscObject)iscol,&comm_is);
211:   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");

213:   ISAllGather(iscol,&iscol_local);
214:   ISGetIndices(isrow,&irow);
215:   ISGetIndices(iscol_local,&icol);
216:   ISGetLocalSize(isrow,&nrows);
217:   ISGetLocalSize(iscol,&ncols);
218:   ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */

220:   /* No parallel redistribution currently supported! Should really check each index set
221:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
222:      original matrix! */

224:   MatGetLocalSize(A,&nlrows,&nlcols);
225:   MatGetOwnershipRange(A,&rstart,&rend);

227:   /* Check submatrix call */
228:   if (scall == MAT_REUSE_MATRIX) {
229:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
230:     /* Really need to test rows and column sizes! */
231:     newmat = *B;
232:   } else {
233:     /* Create and fill new matrix */
234:     MatCreate(PetscObjectComm((PetscObject)A),&newmat);
235:     MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
236:     MatSetType(newmat,((PetscObject)A)->type_name);
237:     MatMPIDenseSetPreallocation(newmat,NULL);
238:   }

240:   /* Now extract the data pointers and do the copy, column at a time */
241:   newmatd = (Mat_MPIDense*)newmat->data;
242:   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;

244:   for (i=0; i<Ncols; i++) {
245:     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
246:     for (j=0; j<nrows; j++) {
247:       *bv++ = av[irow[j] - rstart];
248:     }
249:   }

251:   /* Assemble the matrices so that the correct flags are set */
252:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
253:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

255:   /* Free work space */
256:   ISRestoreIndices(isrow,&irow);
257:   ISRestoreIndices(iscol_local,&icol);
258:   ISDestroy(&iscol_local);
259:   *B   = newmat;
260:   return(0);
261: }

263: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
264: {
265:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

269:   MatDenseRestoreArray(a->A,array);
270:   return(0);
271: }

273: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
274: {
275:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

279:   MatDenseRestoreArrayRead(a->A,array);
280:   return(0);
281: }

283: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
284: {
285:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287:   PetscInt       nstash,reallocs;

290:   if (mdn->donotstash || mat->nooffprocentries) return(0);

292:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
293:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
294:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
295:   return(0);
296: }

298: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
299: {
300:   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
302:   PetscInt       i,*row,*col,flg,j,rstart,ncols;
303:   PetscMPIInt    n;
304:   PetscScalar    *val;

307:   if (!mdn->donotstash && !mat->nooffprocentries) {
308:     /*  wait on receives */
309:     while (1) {
310:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
311:       if (!flg) break;

313:       for (i=0; i<n;) {
314:         /* Now identify the consecutive vals belonging to the same row */
315:         for (j=i,rstart=row[j]; j<n; j++) {
316:           if (row[j] != rstart) break;
317:         }
318:         if (j < n) ncols = j-i;
319:         else       ncols = n-i;
320:         /* Now assemble all these values with a single function call */
321:         MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
322:         i    = j;
323:       }
324:     }
325:     MatStashScatterEnd_Private(&mat->stash);
326:   }

328:   MatAssemblyBegin(mdn->A,mode);
329:   MatAssemblyEnd(mdn->A,mode);

331:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
332:     MatSetUpMultiply_MPIDense(mat);
333:   }
334:   return(0);
335: }

337: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
338: {
340:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

343:   MatZeroEntries(l->A);
344:   return(0);
345: }

347: /* the code does not do the diagonal entries correctly unless the
348:    matrix is square and the column and row owerships are identical.
349:    This is a BUG. The only way to fix it seems to be to access
350:    mdn->A and mdn->B directly and not through the MatZeroRows()
351:    routine.
352: */
353: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
354: {
355:   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
356:   PetscErrorCode    ierr;
357:   PetscInt          i,*owners = A->rmap->range;
358:   PetscInt          *sizes,j,idx,nsends;
359:   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
360:   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
361:   PetscInt          *lens,*lrows,*values;
362:   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
363:   MPI_Comm          comm;
364:   MPI_Request       *send_waits,*recv_waits;
365:   MPI_Status        recv_status,*send_status;
366:   PetscBool         found;
367:   const PetscScalar *xx;
368:   PetscScalar       *bb;

371:   PetscObjectGetComm((PetscObject)A,&comm);
372:   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
373:   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
374:   /*  first count number of contributors to each processor */
375:   PetscCalloc1(2*size,&sizes);
376:   PetscMalloc1(N+1,&owner);  /* see note*/
377:   for (i=0; i<N; i++) {
378:     idx   = rows[i];
379:     found = PETSC_FALSE;
380:     for (j=0; j<size; j++) {
381:       if (idx >= owners[j] && idx < owners[j+1]) {
382:         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
383:       }
384:     }
385:     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
386:   }
387:   nsends = 0;
388:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

390:   /* inform other processors of number of messages and max length*/
391:   PetscMaxSum(comm,sizes,&nmax,&nrecvs);

393:   /* post receives:   */
394:   PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
395:   PetscMalloc1(nrecvs+1,&recv_waits);
396:   for (i=0; i<nrecvs; i++) {
397:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
398:   }

400:   /* do sends:
401:       1) starts[i] gives the starting index in svalues for stuff going to
402:          the ith processor
403:   */
404:   PetscMalloc1(N+1,&svalues);
405:   PetscMalloc1(nsends+1,&send_waits);
406:   PetscMalloc1(size+1,&starts);

408:   starts[0] = 0;
409:   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
410:   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];

412:   starts[0] = 0;
413:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
414:   count = 0;
415:   for (i=0; i<size; i++) {
416:     if (sizes[2*i+1]) {
417:       MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
418:     }
419:   }
420:   PetscFree(starts);

422:   base = owners[rank];

424:   /*  wait on receives */
425:   PetscMalloc2(nrecvs,&lens,nrecvs,&source);
426:   count = nrecvs;
427:   slen  = 0;
428:   while (count) {
429:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
430:     /* unpack receives into our local space */
431:     MPI_Get_count(&recv_status,MPIU_INT,&n);

433:     source[imdex] = recv_status.MPI_SOURCE;
434:     lens[imdex]   = n;
435:     slen += n;
436:     count--;
437:   }
438:   PetscFree(recv_waits);

440:   /* move the data into the send scatter */
441:   PetscMalloc1(slen+1,&lrows);
442:   count = 0;
443:   for (i=0; i<nrecvs; i++) {
444:     values = rvalues + i*nmax;
445:     for (j=0; j<lens[i]; j++) {
446:       lrows[count++] = values[j] - base;
447:     }
448:   }
449:   PetscFree(rvalues);
450:   PetscFree2(lens,source);
451:   PetscFree(owner);
452:   PetscFree(sizes);

454:   /* fix right hand side if needed */
455:   if (x && b) {
456:     VecGetArrayRead(x,&xx);
457:     VecGetArray(b,&bb);
458:     for (i=0; i<slen; i++) {
459:       bb[lrows[i]] = diag*xx[lrows[i]];
460:     }
461:     VecRestoreArrayRead(x,&xx);
462:     VecRestoreArray(b,&bb);
463:   }

465:   /* actually zap the local rows */
466:   MatZeroRows(l->A,slen,lrows,0.0,0,0);
467:   if (diag != 0.0) {
468:     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
469:     PetscInt     m   = ll->lda, i;

471:     for (i=0; i<slen; i++) {
472:       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
473:     }
474:   }
475:   PetscFree(lrows);

477:   /* wait on sends */
478:   if (nsends) {
479:     PetscMalloc1(nsends,&send_status);
480:     MPI_Waitall(nsends,send_waits,send_status);
481:     PetscFree(send_status);
482:   }
483:   PetscFree(send_waits);
484:   PetscFree(svalues);
485:   return(0);
486: }

488: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
489: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
490: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
491: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);

493: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
494: {
495:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

499:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
500:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
501:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
502:   return(0);
503: }

505: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
506: {
507:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

511:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
512:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
513:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
514:   return(0);
515: }

517: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
518: {
519:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
521:   PetscScalar    zero = 0.0;

524:   VecSet(yy,zero);
525:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
526:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
527:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
528:   return(0);
529: }

531: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
532: {
533:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

537:   VecCopy(yy,zz);
538:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
539:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
540:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
541:   return(0);
542: }

544: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
545: {
546:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
547:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
549:   PetscInt       len,i,n,m = A->rmap->n,radd;
550:   PetscScalar    *x,zero = 0.0;

553:   VecSet(v,zero);
554:   VecGetArray(v,&x);
555:   VecGetSize(v,&n);
556:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
557:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
558:   radd = A->rmap->rstart*m;
559:   for (i=0; i<len; i++) {
560:     x[i] = aloc->v[radd + i*m + i];
561:   }
562:   VecRestoreArray(v,&x);
563:   return(0);
564: }

566: PetscErrorCode MatDestroy_MPIDense(Mat mat)
567: {
568:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

572: #if defined(PETSC_USE_LOG)
573:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
574: #endif
575:   MatStashDestroy_Private(&mat->stash);
576:   MatDestroy(&mdn->A);
577:   VecDestroy(&mdn->lvec);
578:   VecScatterDestroy(&mdn->Mvctx);

580:   PetscFree(mat->data);
581:   PetscObjectChangeTypeName((PetscObject)mat,0);

583:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);
584:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
585:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
586:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);
587:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);
588:   PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);
589:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);
590: #if defined(PETSC_HAVE_ELEMENTAL)
591:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
592: #endif
593:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
594:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);
595:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);
596:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
597:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
598:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);
599:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);
600:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
601:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
602:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);
603:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);
604:   return(0);
605: }

607: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);

609:  #include <petscdraw.h>
610: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
611: {
612:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
613:   PetscErrorCode    ierr;
614:   PetscMPIInt       rank = mdn->rank;
615:   PetscViewerType   vtype;
616:   PetscBool         iascii,isdraw;
617:   PetscViewer       sviewer;
618:   PetscViewerFormat format;

621:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
622:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
623:   if (iascii) {
624:     PetscViewerGetType(viewer,&vtype);
625:     PetscViewerGetFormat(viewer,&format);
626:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
627:       MatInfo info;
628:       MatGetInfo(mat,MAT_LOCAL,&info);
629:       PetscViewerASCIIPushSynchronized(viewer);
630:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
631:       PetscViewerFlush(viewer);
632:       PetscViewerASCIIPopSynchronized(viewer);
633:       VecScatterView(mdn->Mvctx,viewer);
634:       return(0);
635:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
636:       return(0);
637:     }
638:   } else if (isdraw) {
639:     PetscDraw draw;
640:     PetscBool isnull;

642:     PetscViewerDrawGetDraw(viewer,0,&draw);
643:     PetscDrawIsNull(draw,&isnull);
644:     if (isnull) return(0);
645:   }

647:   {
648:     /* assemble the entire matrix onto first processor. */
649:     Mat         A;
650:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
651:     PetscInt    *cols;
652:     PetscScalar *vals;

654:     MatCreate(PetscObjectComm((PetscObject)mat),&A);
655:     if (!rank) {
656:       MatSetSizes(A,M,N,M,N);
657:     } else {
658:       MatSetSizes(A,0,0,M,N);
659:     }
660:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
661:     MatSetType(A,MATMPIDENSE);
662:     MatMPIDenseSetPreallocation(A,NULL);
663:     PetscLogObjectParent((PetscObject)mat,(PetscObject)A);

665:     /* Copy the matrix ... This isn't the most efficient means,
666:        but it's quick for now */
667:     A->insertmode = INSERT_VALUES;

669:     row = mat->rmap->rstart;
670:     m   = mdn->A->rmap->n;
671:     for (i=0; i<m; i++) {
672:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
673:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
674:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
675:       row++;
676:     }

678:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
679:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
680:     PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
681:     if (!rank) {
682:       PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
683:       MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
684:     }
685:     PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
686:     PetscViewerFlush(viewer);
687:     MatDestroy(&A);
688:   }
689:   return(0);
690: }

692: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
693: {
695:   PetscBool      iascii,isbinary,isdraw,issocket;

698:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
699:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
700:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
701:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);

703:   if (iascii || issocket || isdraw) {
704:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
705:   } else if (isbinary) {
706:     MatView_Dense_Binary(mat,viewer);
707:   }
708:   return(0);
709: }

711: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
712: {
713:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
714:   Mat            mdn  = mat->A;
716:   PetscLogDouble isend[5],irecv[5];

719:   info->block_size = 1.0;

721:   MatGetInfo(mdn,MAT_LOCAL,info);

723:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
724:   isend[3] = info->memory;  isend[4] = info->mallocs;
725:   if (flag == MAT_LOCAL) {
726:     info->nz_used      = isend[0];
727:     info->nz_allocated = isend[1];
728:     info->nz_unneeded  = isend[2];
729:     info->memory       = isend[3];
730:     info->mallocs      = isend[4];
731:   } else if (flag == MAT_GLOBAL_MAX) {
732:     MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));

734:     info->nz_used      = irecv[0];
735:     info->nz_allocated = irecv[1];
736:     info->nz_unneeded  = irecv[2];
737:     info->memory       = irecv[3];
738:     info->mallocs      = irecv[4];
739:   } else if (flag == MAT_GLOBAL_SUM) {
740:     MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));

742:     info->nz_used      = irecv[0];
743:     info->nz_allocated = irecv[1];
744:     info->nz_unneeded  = irecv[2];
745:     info->memory       = irecv[3];
746:     info->mallocs      = irecv[4];
747:   }
748:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
749:   info->fill_ratio_needed = 0;
750:   info->factor_mallocs    = 0;
751:   return(0);
752: }

754: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
755: {
756:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

760:   switch (op) {
761:   case MAT_NEW_NONZERO_LOCATIONS:
762:   case MAT_NEW_NONZERO_LOCATION_ERR:
763:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
764:     MatCheckPreallocated(A,1);
765:     MatSetOption(a->A,op,flg);
766:     break;
767:   case MAT_ROW_ORIENTED:
768:     MatCheckPreallocated(A,1);
769:     a->roworiented = flg;
770:     MatSetOption(a->A,op,flg);
771:     break;
772:   case MAT_NEW_DIAGONALS:
773:   case MAT_KEEP_NONZERO_PATTERN:
774:   case MAT_USE_HASH_TABLE:
775:   case MAT_SORTED_FULL:
776:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
777:     break;
778:   case MAT_IGNORE_OFF_PROC_ENTRIES:
779:     a->donotstash = flg;
780:     break;
781:   case MAT_SYMMETRIC:
782:   case MAT_STRUCTURALLY_SYMMETRIC:
783:   case MAT_HERMITIAN:
784:   case MAT_SYMMETRY_ETERNAL:
785:   case MAT_IGNORE_LOWER_TRIANGULAR:
786:   case MAT_IGNORE_ZERO_ENTRIES:
787:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
788:     break;
789:   default:
790:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
791:   }
792:   return(0);
793: }


796: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
797: {
798:   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
799:   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
800:   const PetscScalar *l,*r;
801:   PetscScalar       x,*v;
802:   PetscErrorCode    ierr;
803:   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;

806:   MatGetLocalSize(A,&s2,&s3);
807:   if (ll) {
808:     VecGetLocalSize(ll,&s2a);
809:     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
810:     VecGetArrayRead(ll,&l);
811:     for (i=0; i<m; i++) {
812:       x = l[i];
813:       v = mat->v + i;
814:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
815:     }
816:     VecRestoreArrayRead(ll,&l);
817:     PetscLogFlops(n*m);
818:   }
819:   if (rr) {
820:     VecGetLocalSize(rr,&s3a);
821:     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
822:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
823:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
824:     VecGetArrayRead(mdn->lvec,&r);
825:     for (i=0; i<n; i++) {
826:       x = r[i];
827:       v = mat->v + i*m;
828:       for (j=0; j<m; j++) (*v++) *= x;
829:     }
830:     VecRestoreArrayRead(mdn->lvec,&r);
831:     PetscLogFlops(n*m);
832:   }
833:   return(0);
834: }

836: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
837: {
838:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
839:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
841:   PetscInt       i,j;
842:   PetscReal      sum = 0.0;
843:   PetscScalar    *v  = mat->v;

846:   if (mdn->size == 1) {
847:      MatNorm(mdn->A,type,nrm);
848:   } else {
849:     if (type == NORM_FROBENIUS) {
850:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
851:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
852:       }
853:       MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
854:       *nrm = PetscSqrtReal(*nrm);
855:       PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
856:     } else if (type == NORM_1) {
857:       PetscReal *tmp,*tmp2;
858:       PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
859:       *nrm = 0.0;
860:       v    = mat->v;
861:       for (j=0; j<mdn->A->cmap->n; j++) {
862:         for (i=0; i<mdn->A->rmap->n; i++) {
863:           tmp[j] += PetscAbsScalar(*v);  v++;
864:         }
865:       }
866:       MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
867:       for (j=0; j<A->cmap->N; j++) {
868:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
869:       }
870:       PetscFree2(tmp,tmp2);
871:       PetscLogFlops(A->cmap->n*A->rmap->n);
872:     } else if (type == NORM_INFINITY) { /* max row norm */
873:       PetscReal ntemp;
874:       MatNorm(mdn->A,type,&ntemp);
875:       MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
876:     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
877:   }
878:   return(0);
879: }

881: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
882: {
883:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
884:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
885:   Mat            B;
886:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
888:   PetscInt       j,i;
889:   PetscScalar    *v;

892:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
893:     MatCreate(PetscObjectComm((PetscObject)A),&B);
894:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
895:     MatSetType(B,((PetscObject)A)->type_name);
896:     MatMPIDenseSetPreallocation(B,NULL);
897:   } else {
898:     B = *matout;
899:   }

901:   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
902:   PetscMalloc1(m,&rwork);
903:   for (i=0; i<m; i++) rwork[i] = rstart + i;
904:   for (j=0; j<n; j++) {
905:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
906:     v   += m;
907:   }
908:   PetscFree(rwork);
909:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
910:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
911:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
912:     *matout = B;
913:   } else {
914:     MatHeaderMerge(A,&B);
915:   }
916:   return(0);
917: }

919: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
920: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);

922: PetscErrorCode MatSetUp_MPIDense(Mat A)
923: {

927:   PetscLayoutSetUp(A->rmap);
928:   PetscLayoutSetUp(A->cmap);
929:   if (!A->preallocated) {
930:     MatMPIDenseSetPreallocation(A,0);
931:   }
932:   return(0);
933: }

935: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
936: {
938:   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;

941:   MatAXPY(A->A,alpha,B->A,str);
942:   PetscObjectStateIncrease((PetscObject)Y);
943:   return(0);
944: }

946: PetscErrorCode  MatConjugate_MPIDense(Mat mat)
947: {
948:   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;

952:   MatConjugate(a->A);
953:   return(0);
954: }

956: PetscErrorCode MatRealPart_MPIDense(Mat A)
957: {
958:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

962:   MatRealPart(a->A);
963:   return(0);
964: }

966: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
967: {
968:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

972:   MatImaginaryPart(a->A);
973:   return(0);
974: }

976: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
977: {
978:   PetscErrorCode    ierr;
979:   PetscScalar       *x;
980:   const PetscScalar *a;
981:   PetscInt          lda;

984:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
985:   MatDenseGetLDA(A,&lda);
986:   MatDenseGetArrayRead(A,&a);
987:   VecGetArray(v,&x);
988:   PetscArraycpy(x,a+col*lda,A->rmap->n);
989:   VecRestoreArray(v,&x);
990:   MatDenseGetArrayRead(A,&a);
991:   return(0);
992: }

994: PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);

996: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
997: {
999:   PetscInt       i,n;
1000:   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1001:   PetscReal      *work;

1004:   MatGetSize(A,NULL,&n);
1005:   PetscMalloc1(n,&work);
1006:   MatGetColumnNorms_SeqDense(a->A,type,work);
1007:   if (type == NORM_2) {
1008:     for (i=0; i<n; i++) work[i] *= work[i];
1009:   }
1010:   if (type == NORM_INFINITY) {
1011:     MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1012:   } else {
1013:     MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1014:   }
1015:   PetscFree(work);
1016:   if (type == NORM_2) {
1017:     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1018:   }
1019:   return(0);
1020: }

1022: static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1023: {
1024:   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1026:   PetscScalar    *a;
1027:   PetscInt       m,n,i;

1030:   MatGetSize(d->A,&m,&n);
1031:   MatDenseGetArray(d->A,&a);
1032:   for (i=0; i<m*n; i++) {
1033:     PetscRandomGetValue(rctx,a+i);
1034:   }
1035:   MatDenseRestoreArray(d->A,&a);
1036:   return(0);
1037: }

1039: PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);

1041: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1042: {
1044:   *missing = PETSC_FALSE;
1045:   return(0);
1046: }

1048: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1049: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);

1051: /* -------------------------------------------------------------------*/
1052: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1053:                                         MatGetRow_MPIDense,
1054:                                         MatRestoreRow_MPIDense,
1055:                                         MatMult_MPIDense,
1056:                                 /*  4*/ MatMultAdd_MPIDense,
1057:                                         MatMultTranspose_MPIDense,
1058:                                         MatMultTransposeAdd_MPIDense,
1059:                                         0,
1060:                                         0,
1061:                                         0,
1062:                                 /* 10*/ 0,
1063:                                         0,
1064:                                         0,
1065:                                         0,
1066:                                         MatTranspose_MPIDense,
1067:                                 /* 15*/ MatGetInfo_MPIDense,
1068:                                         MatEqual_MPIDense,
1069:                                         MatGetDiagonal_MPIDense,
1070:                                         MatDiagonalScale_MPIDense,
1071:                                         MatNorm_MPIDense,
1072:                                 /* 20*/ MatAssemblyBegin_MPIDense,
1073:                                         MatAssemblyEnd_MPIDense,
1074:                                         MatSetOption_MPIDense,
1075:                                         MatZeroEntries_MPIDense,
1076:                                 /* 24*/ MatZeroRows_MPIDense,
1077:                                         0,
1078:                                         0,
1079:                                         0,
1080:                                         0,
1081:                                 /* 29*/ MatSetUp_MPIDense,
1082:                                         0,
1083:                                         0,
1084:                                         MatGetDiagonalBlock_MPIDense,
1085:                                         0,
1086:                                 /* 34*/ MatDuplicate_MPIDense,
1087:                                         0,
1088:                                         0,
1089:                                         0,
1090:                                         0,
1091:                                 /* 39*/ MatAXPY_MPIDense,
1092:                                         MatCreateSubMatrices_MPIDense,
1093:                                         0,
1094:                                         MatGetValues_MPIDense,
1095:                                         0,
1096:                                 /* 44*/ 0,
1097:                                         MatScale_MPIDense,
1098:                                         MatShift_Basic,
1099:                                         0,
1100:                                         0,
1101:                                 /* 49*/ MatSetRandom_MPIDense,
1102:                                         0,
1103:                                         0,
1104:                                         0,
1105:                                         0,
1106:                                 /* 54*/ 0,
1107:                                         0,
1108:                                         0,
1109:                                         0,
1110:                                         0,
1111:                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1112:                                         MatDestroy_MPIDense,
1113:                                         MatView_MPIDense,
1114:                                         0,
1115:                                         0,
1116:                                 /* 64*/ 0,
1117:                                         0,
1118:                                         0,
1119:                                         0,
1120:                                         0,
1121:                                 /* 69*/ 0,
1122:                                         0,
1123:                                         0,
1124:                                         0,
1125:                                         0,
1126:                                 /* 74*/ 0,
1127:                                         0,
1128:                                         0,
1129:                                         0,
1130:                                         0,
1131:                                 /* 79*/ 0,
1132:                                         0,
1133:                                         0,
1134:                                         0,
1135:                                 /* 83*/ MatLoad_MPIDense,
1136:                                         0,
1137:                                         0,
1138:                                         0,
1139:                                         0,
1140:                                         0,
1141: #if defined(PETSC_HAVE_ELEMENTAL)
1142:                                 /* 89*/ 0,
1143:                                         0,
1144: #else
1145:                                 /* 89*/ 0,
1146:                                         0,
1147: #endif
1148:                                         MatMatMultNumeric_MPIDense,
1149:                                         0,
1150:                                         0,
1151:                                 /* 94*/ 0,
1152:                                         0,
1153:                                         0,
1154:                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1155:                                         0,
1156:                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1157:                                         0,
1158:                                         0,
1159:                                         MatConjugate_MPIDense,
1160:                                         0,
1161:                                 /*104*/ 0,
1162:                                         MatRealPart_MPIDense,
1163:                                         MatImaginaryPart_MPIDense,
1164:                                         0,
1165:                                         0,
1166:                                 /*109*/ 0,
1167:                                         0,
1168:                                         0,
1169:                                         MatGetColumnVector_MPIDense,
1170:                                         MatMissingDiagonal_MPIDense,
1171:                                 /*114*/ 0,
1172:                                         0,
1173:                                         0,
1174:                                         0,
1175:                                         0,
1176:                                 /*119*/ 0,
1177:                                         0,
1178:                                         0,
1179:                                         0,
1180:                                         0,
1181:                                 /*124*/ 0,
1182:                                         MatGetColumnNorms_MPIDense,
1183:                                         0,
1184:                                         0,
1185:                                         0,
1186:                                 /*129*/ 0,
1187:                                         0,
1188:                                         0,
1189:                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1190:                                         0,
1191:                                 /*134*/ 0,
1192:                                         0,
1193:                                         0,
1194:                                         0,
1195:                                         0,
1196:                                 /*139*/ 0,
1197:                                         0,
1198:                                         0,
1199:                                         0,
1200:                                         0,
1201:                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1202:                                 /*145*/ 0,
1203:                                         0,
1204:                                         0
1205: };

1207: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1208: {
1209:   Mat_MPIDense   *a;

1213:   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1214:   mat->preallocated = PETSC_TRUE;
1215:   /* Note:  For now, when data is specified above, this assumes the user correctly
1216:    allocates the local dense storage space.  We should add error checking. */

1218:   a       = (Mat_MPIDense*)mat->data;
1219:   PetscLayoutSetUp(mat->rmap);
1220:   PetscLayoutSetUp(mat->cmap);
1221:   a->nvec = mat->cmap->n;

1223:   MatCreate(PETSC_COMM_SELF,&a->A);
1224:   MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1225:   MatSetType(a->A,MATSEQDENSE);
1226:   MatSeqDenseSetPreallocation(a->A,data);
1227:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1228:   return(0);
1229: }

1231: #if defined(PETSC_HAVE_ELEMENTAL)
1232: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1233: {
1234:   Mat            mat_elemental;
1236:   PetscScalar    *v;
1237:   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;

1240:   if (reuse == MAT_REUSE_MATRIX) {
1241:     mat_elemental = *newmat;
1242:     MatZeroEntries(*newmat);
1243:   } else {
1244:     MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1245:     MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1246:     MatSetType(mat_elemental,MATELEMENTAL);
1247:     MatSetUp(mat_elemental);
1248:     MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1249:   }

1251:   PetscMalloc2(m,&rows,N,&cols);
1252:   for (i=0; i<N; i++) cols[i] = i;
1253:   for (i=0; i<m; i++) rows[i] = rstart + i;

1255:   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1256:   MatDenseGetArray(A,&v);
1257:   MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1258:   MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1259:   MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1260:   MatDenseRestoreArray(A,&v);
1261:   PetscFree2(rows,cols);

1263:   if (reuse == MAT_INPLACE_MATRIX) {
1264:     MatHeaderReplace(A,&mat_elemental);
1265:   } else {
1266:     *newmat = mat_elemental;
1267:   }
1268:   return(0);
1269: }
1270: #endif

1272: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1273: {
1274:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;

1278:   MatDenseGetColumn(mat->A,col,vals);
1279:   return(0);
1280: }

1282: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1283: {
1284:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;

1288:   MatDenseRestoreColumn(mat->A,vals);
1289:   return(0);
1290: }

1292: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1293: {
1295:   Mat_MPIDense   *mat;
1296:   PetscInt       m,nloc,N;

1299:   MatGetSize(inmat,&m,&N);
1300:   MatGetLocalSize(inmat,NULL,&nloc);
1301:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1302:     PetscInt sum;

1304:     if (n == PETSC_DECIDE) {
1305:       PetscSplitOwnership(comm,&n,&N);
1306:     }
1307:     /* Check sum(n) = N */
1308:     MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
1309:     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);

1311:     MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);
1312:   }

1314:   /* numeric phase */
1315:   mat = (Mat_MPIDense*)(*outmat)->data;
1316:   MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);
1317:   MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
1318:   MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
1319:   return(0);
1320: }

1322: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1323: {
1324:   Mat_MPIDense   *a;

1328:   PetscNewLog(mat,&a);
1329:   mat->data = (void*)a;
1330:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));

1332:   mat->insertmode = NOT_SET_VALUES;
1333:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1334:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);

1336:   /* build cache for off array entries formed */
1337:   a->donotstash = PETSC_FALSE;

1339:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);

1341:   /* stuff used for matrix vector multiply */
1342:   a->lvec        = 0;
1343:   a->Mvctx       = 0;
1344:   a->roworiented = PETSC_TRUE;

1346:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);
1347:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1348:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1349:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);
1350:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);
1351:   PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);
1352:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);
1353: #if defined(PETSC_HAVE_ELEMENTAL)
1354:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1355: #endif
1356:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1357:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);
1358:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);
1359:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1360:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);
1361:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);
1362:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);

1364:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1365:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1366:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);
1367:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);
1368:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1369:   return(0);
1370: }

1372: /*MC
1373:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1375:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1376:    and MATMPIDENSE otherwise.

1378:    Options Database Keys:
1379: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1381:   Level: beginner


1384: .seealso: MATSEQDENSE,MATMPIDENSE
1385: M*/

1387: /*@C
1388:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1390:    Collective

1392:    Input Parameters:
1393: .  B - the matrix
1394: -  data - optional location of matrix data.  Set data=NULL for PETSc
1395:    to control all matrix memory allocation.

1397:    Notes:
1398:    The dense format is fully compatible with standard Fortran 77
1399:    storage by columns.

1401:    The data input variable is intended primarily for Fortran programmers
1402:    who wish to allocate their own matrix memory space.  Most users should
1403:    set data=NULL.

1405:    Level: intermediate

1407: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1408: @*/
1409: PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1410: {

1414:   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1415:   return(0);
1416: }

1418: /*@
1419:    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1420:    array provided by the user. This is useful to avoid copying an array
1421:    into a matrix

1423:    Not Collective

1425:    Input Parameters:
1426: +  mat - the matrix
1427: -  array - the array in column major order

1429:    Notes:
1430:    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1431:    freed when the matrix is destroyed.

1433:    Level: developer

1435: .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()

1437: @*/
1438: PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1439: {
1442:   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
1443:   PetscObjectStateIncrease((PetscObject)mat);
1444:   return(0);
1445: }

1447: /*@
1448:    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()

1450:    Not Collective

1452:    Input Parameters:
1453: .  mat - the matrix

1455:    Notes:
1456:    You can only call this after a call to MatDensePlaceArray()

1458:    Level: developer

1460: .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()

1462: @*/
1463: PetscErrorCode  MatDenseResetArray(Mat mat)
1464: {
1467:   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
1468:   PetscObjectStateIncrease((PetscObject)mat);
1469:   return(0);
1470: }

1472: /*@C
1473:    MatCreateDense - Creates a parallel matrix in dense format.

1475:    Collective

1477:    Input Parameters:
1478: +  comm - MPI communicator
1479: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1480: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1481: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1482: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1483: -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1484:    to control all matrix memory allocation.

1486:    Output Parameter:
1487: .  A - the matrix

1489:    Notes:
1490:    The dense format is fully compatible with standard Fortran 77
1491:    storage by columns.

1493:    The data input variable is intended primarily for Fortran programmers
1494:    who wish to allocate their own matrix memory space.  Most users should
1495:    set data=NULL (PETSC_NULL_SCALAR for Fortran users).

1497:    The user MUST specify either the local or global matrix dimensions
1498:    (possibly both).

1500:    Level: intermediate

1502: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1503: @*/
1504: PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1505: {
1507:   PetscMPIInt    size;

1510:   MatCreate(comm,A);
1511:   MatSetSizes(*A,m,n,M,N);
1512:   MPI_Comm_size(comm,&size);
1513:   if (size > 1) {
1514:     PetscBool havedata = (PetscBool)!!data;

1516:     MatSetType(*A,MATMPIDENSE);
1517:     MatMPIDenseSetPreallocation(*A,data);
1518:     MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);
1519:     if (havedata) {  /* user provided data array, so no need to assemble */
1520:       MatSetUpMultiply_MPIDense(*A);
1521:       (*A)->assembled = PETSC_TRUE;
1522:     }
1523:   } else {
1524:     MatSetType(*A,MATSEQDENSE);
1525:     MatSeqDenseSetPreallocation(*A,data);
1526:   }
1527:   return(0);
1528: }

1530: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1531: {
1532:   Mat            mat;
1533:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1537:   *newmat = 0;
1538:   MatCreate(PetscObjectComm((PetscObject)A),&mat);
1539:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1540:   MatSetType(mat,((PetscObject)A)->type_name);
1541:   a       = (Mat_MPIDense*)mat->data;

1543:   mat->factortype   = A->factortype;
1544:   mat->assembled    = PETSC_TRUE;
1545:   mat->preallocated = PETSC_TRUE;

1547:   a->size         = oldmat->size;
1548:   a->rank         = oldmat->rank;
1549:   mat->insertmode = NOT_SET_VALUES;
1550:   a->nvec         = oldmat->nvec;
1551:   a->donotstash   = oldmat->donotstash;

1553:   PetscLayoutReference(A->rmap,&mat->rmap);
1554:   PetscLayoutReference(A->cmap,&mat->cmap);

1556:   MatSetUpMultiply_MPIDense(mat);
1557:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1558:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);

1560:   *newmat = mat;
1561:   return(0);
1562: }

1564: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1565: {
1567:   PetscBool      isbinary;
1568: #if defined(PETSC_HAVE_HDF5)
1569:   PetscBool      ishdf5;
1570: #endif

1575:   /* force binary viewer to load .info file if it has not yet done so */
1576:   PetscViewerSetUp(viewer);
1577:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1578: #if defined(PETSC_HAVE_HDF5)
1579:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);
1580: #endif
1581:   if (isbinary) {
1582:     MatLoad_Dense_Binary(newMat,viewer);
1583: #if defined(PETSC_HAVE_HDF5)
1584:   } else if (ishdf5) {
1585:     MatLoad_Dense_HDF5(newMat,viewer);
1586: #endif
1587:   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1588:   return(0);
1589: }

1591: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1592: {
1593:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1594:   Mat            a,b;
1595:   PetscBool      flg;

1599:   a    = matA->A;
1600:   b    = matB->A;
1601:   MatEqual(a,b,&flg);
1602:   MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1603:   return(0);
1604: }

1606: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1607: {
1608:   PetscErrorCode        ierr;
1609:   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1610:   Mat_TransMatMultDense *atb = a->atbdense;

1613:   PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1614:   (atb->destroy)(A);
1615:   PetscFree(atb);
1616:   return(0);
1617: }

1619: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1620: {
1621:   PetscErrorCode        ierr;
1622:   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1623:   Mat_MatTransMultDense *abt = a->abtdense;

1626:   PetscFree2(abt->buf[0],abt->buf[1]);
1627:   PetscFree2(abt->recvcounts,abt->recvdispls);
1628:   (abt->destroy)(A);
1629:   PetscFree(abt);
1630:   return(0);
1631: }

1633: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1634: {
1635:   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1636:   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1637:   Mat_TransMatMultDense *atb = c->atbdense;
1639:   MPI_Comm       comm;
1640:   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1641:   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1642:   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1643:   PetscScalar    _DOne=1.0,_DZero=0.0;
1644:   PetscBLASInt   am,an,bn,aN;
1645:   const PetscInt *ranges;

1648:   PetscObjectGetComm((PetscObject)A,&comm);
1649:   MPI_Comm_rank(comm,&rank);
1650:   MPI_Comm_size(comm,&size);

1652:   /* compute atbarray = aseq^T * bseq */
1653:   PetscBLASIntCast(a->A->cmap->n,&an);
1654:   PetscBLASIntCast(b->A->cmap->n,&bn);
1655:   PetscBLASIntCast(a->A->rmap->n,&am);
1656:   PetscBLASIntCast(A->cmap->N,&aN);
1657:   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));

1659:   MatGetOwnershipRanges(C,&ranges);
1660:   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;

1662:   /* arrange atbarray into sendbuf */
1663:   k = 0;
1664:   for (proc=0; proc<size; proc++) {
1665:     for (j=0; j<cN; j++) {
1666:       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1667:     }
1668:   }
1669:   /* sum all atbarray to local values of C */
1670:   MatDenseGetArray(c->A,&carray);
1671:   MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1672:   MatDenseRestoreArray(c->A,&carray);
1673:   return(0);
1674: }

1676: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
1677: {
1678:   PetscErrorCode        ierr;
1679:   MPI_Comm              comm;
1680:   PetscMPIInt           size;
1681:   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1682:   Mat_MPIDense          *c;
1683:   Mat_TransMatMultDense *atb;

1686:   PetscObjectGetComm((PetscObject)A,&comm);
1687:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1688:     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1689:   }

1691:   /* create matrix product C */
1692:   MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1693:   MatSetType(C,MATMPIDENSE);
1694:   MatSetUp(C);
1695:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1696:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

1698:   /* create data structure for reuse C */
1699:   MPI_Comm_size(comm,&size);
1700:   PetscNew(&atb);
1701:   cM = C->rmap->N;
1702:   PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);

1704:   c                    = (Mat_MPIDense*)C->data;
1705:   c->atbdense          = atb;
1706:   atb->destroy         = C->ops->destroy;
1707:   C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1708:   return(0);
1709: }

1711: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1712: {
1713:   PetscErrorCode        ierr;
1714:   MPI_Comm              comm;
1715:   PetscMPIInt           i, size;
1716:   PetscInt              maxRows, bufsiz;
1717:   Mat_MPIDense          *c;
1718:   PetscMPIInt           tag;
1719:   PetscInt              alg;
1720:   Mat_MatTransMultDense *abt;
1721:   Mat_Product           *product = C->product;
1722:   PetscBool             flg;

1725:   /* check local size of A and B */
1726:   if (A->cmap->n != B->cmap->n) {
1727:     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
1728:   }

1730:   PetscStrcmp(product->alg,"allgatherv",&flg);
1731:   if (flg) {
1732:     alg = 0;
1733:   } else alg = 1;

1735:   /* setup matrix product C */
1736:   MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);
1737:   MatSetType(C,MATMPIDENSE);
1738:   MatSetUp(C);
1739:   PetscObjectGetNewTag((PetscObject)C, &tag);
1740:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1741:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

1743:   /* create data structure for reuse C */
1744:   PetscObjectGetComm((PetscObject)C,&comm);
1745:   MPI_Comm_size(comm,&size);
1746:   PetscNew(&abt);
1747:   abt->tag = tag;
1748:   abt->alg = alg;
1749:   switch (alg) {
1750:   case 1: /* alg: "cyclic" */
1751:     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
1752:     bufsiz = A->cmap->N * maxRows;
1753:     PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));
1754:     break;
1755:   default: /* alg: "allgatherv" */
1756:     PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));
1757:     PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));
1758:     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
1759:     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
1760:     break;
1761:   }

1763:   c                               = (Mat_MPIDense*)C->data;
1764:   c->abtdense                     = abt;
1765:   abt->destroy                    = C->ops->destroy;
1766:   C->ops->destroy                 = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
1767:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
1768:   return(0);
1769: }

1771: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
1772: {
1773:   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1774:   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1775:   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
1776:   Mat_MatTransMultDense *abt = c->abtdense;
1778:   MPI_Comm       comm;
1779:   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
1780:   PetscScalar    *sendbuf, *recvbuf=0, *carray;
1781:   PetscInt       i,cK=A->cmap->N,k,j,bn;
1782:   PetscScalar    _DOne=1.0,_DZero=0.0;
1783:   PetscBLASInt   cm, cn, ck;
1784:   MPI_Request    reqs[2];
1785:   const PetscInt *ranges;

1788:   PetscObjectGetComm((PetscObject)A,&comm);
1789:   MPI_Comm_rank(comm,&rank);
1790:   MPI_Comm_size(comm,&size);

1792:   MatGetOwnershipRanges(B,&ranges);
1793:   bn = B->rmap->n;
1794:   if (bseq->lda == bn) {
1795:     sendbuf = bseq->v;
1796:   } else {
1797:     sendbuf = abt->buf[0];
1798:     for (k = 0, i = 0; i < cK; i++) {
1799:       for (j = 0; j < bn; j++, k++) {
1800:         sendbuf[k] = bseq->v[i * bseq->lda + j];
1801:       }
1802:     }
1803:   }
1804:   if (size > 1) {
1805:     sendto = (rank + size - 1) % size;
1806:     recvfrom = (rank + size + 1) % size;
1807:   } else {
1808:     sendto = recvfrom = 0;
1809:   }
1810:   PetscBLASIntCast(cK,&ck);
1811:   PetscBLASIntCast(c->A->rmap->n,&cm);
1812:   recvisfrom = rank;
1813:   for (i = 0; i < size; i++) {
1814:     /* we have finished receiving in sending, bufs can be read/modified */
1815:     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
1816:     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];

1818:     if (nextrecvisfrom != rank) {
1819:       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
1820:       sendsiz = cK * bn;
1821:       recvsiz = cK * nextbn;
1822:       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
1823:       MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);
1824:       MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);
1825:     }

1827:     /* local aseq * sendbuf^T */
1828:     PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);
1829:     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
1830:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));

1832:     if (nextrecvisfrom != rank) {
1833:       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
1834:       MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
1835:     }
1836:     bn = nextbn;
1837:     recvisfrom = nextrecvisfrom;
1838:     sendbuf = recvbuf;
1839:   }
1840:   return(0);
1841: }

1843: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
1844: {
1845:   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1846:   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1847:   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
1848:   Mat_MatTransMultDense *abt = c->abtdense;
1850:   MPI_Comm       comm;
1851:   PetscMPIInt    rank,size;
1852:   PetscScalar    *sendbuf, *recvbuf;
1853:   PetscInt       i,cK=A->cmap->N,k,j,bn;
1854:   PetscScalar    _DOne=1.0,_DZero=0.0;
1855:   PetscBLASInt   cm, cn, ck;

1858:   PetscObjectGetComm((PetscObject)A,&comm);
1859:   MPI_Comm_rank(comm,&rank);
1860:   MPI_Comm_size(comm,&size);

1862:   /* copy transpose of B into buf[0] */
1863:   bn      = B->rmap->n;
1864:   sendbuf = abt->buf[0];
1865:   recvbuf = abt->buf[1];
1866:   for (k = 0, j = 0; j < bn; j++) {
1867:     for (i = 0; i < cK; i++, k++) {
1868:       sendbuf[k] = bseq->v[i * bseq->lda + j];
1869:     }
1870:   }
1871:   MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);
1872:   PetscBLASIntCast(cK,&ck);
1873:   PetscBLASIntCast(c->A->rmap->n,&cm);
1874:   PetscBLASIntCast(c->A->cmap->n,&cn);
1875:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
1876:   return(0);
1877: }

1879: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1880: {
1881:   Mat_MPIDense          *c=(Mat_MPIDense*)C->data;
1882:   Mat_MatTransMultDense *abt = c->abtdense;
1883:   PetscErrorCode        ierr;

1886:   switch (abt->alg) {
1887:   case 1:
1888:     MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);
1889:     break;
1890:   default:
1891:     MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);
1892:     break;
1893:   }
1894:   return(0);
1895: }

1897: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1898: {
1899:   PetscErrorCode   ierr;
1900:   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1901:   Mat_MatMultDense *ab = a->abdense;

1904:   MatDestroy(&ab->Ce);
1905:   MatDestroy(&ab->Ae);
1906:   MatDestroy(&ab->Be);

1908:   (ab->destroy)(A);
1909:   PetscFree(ab);
1910:   return(0);
1911: }

1913: #if defined(PETSC_HAVE_ELEMENTAL)
1914: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1915: {
1916:   PetscErrorCode   ierr;
1917:   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1918:   Mat_MatMultDense *ab=c->abdense;

1921:   MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
1922:   MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
1923:   MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);
1924:   MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
1925:   return(0);
1926: }

1928: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
1929: {
1930:   PetscErrorCode   ierr;
1931:   Mat              Ae,Be,Ce;
1932:   Mat_MPIDense     *c;
1933:   Mat_MatMultDense *ab;

1936:   /* check local size of A and B */
1937:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1938:     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1939:   }

1941:   /* create elemental matrices Ae and Be */
1942:   MatCreate(PetscObjectComm((PetscObject)A), &Ae);
1943:   MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1944:   MatSetType(Ae,MATELEMENTAL);
1945:   MatSetUp(Ae);
1946:   MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);

1948:   MatCreate(PetscObjectComm((PetscObject)B), &Be);
1949:   MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);
1950:   MatSetType(Be,MATELEMENTAL);
1951:   MatSetUp(Be);
1952:   MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);

1954:   /* compute symbolic Ce = Ae*Be */
1955:   MatCreate(PetscObjectComm((PetscObject)C),&Ce);
1956:   MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);

1958:   /* setup C */
1959:   MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1960:   MatSetType(C,MATDENSE);
1961:   MatSetUp(C);

1963:   /* create data structure for reuse Cdense */
1964:   PetscNew(&ab);
1965:   c                  = (Mat_MPIDense*)C->data;
1966:   c->abdense         = ab;

1968:   ab->Ae             = Ae;
1969:   ab->Be             = Be;
1970:   ab->Ce             = Ce;
1971:   ab->destroy        = C->ops->destroy;
1972:   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1973:   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1974:   C->ops->productnumeric = MatProductNumeric_AB;
1975:   return(0);
1976: }
1977: #endif
1978: /* ----------------------------------------------- */
1979: #if defined(PETSC_HAVE_ELEMENTAL)
1980: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
1981: {
1983:   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
1984:   C->ops->productsymbolic = MatProductSymbolic_AB;
1985:   C->ops->productnumeric  = MatProductNumeric_AB;
1986:   return(0);
1987: }
1988: #endif

1990: static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
1991: {
1993:   Mat_Product    *product = C->product;

1996:   MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);
1997:   C->ops->productnumeric          = MatProductNumeric_AtB;
1998:   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
1999:   return(0);
2000: }

2002: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2003: {
2004:   Mat_Product *product = C->product;
2005:   Mat         A = product->A,B=product->B;

2008:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2009:     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);

2011:   C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
2012:   return(0);
2013: }

2015: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2016: {
2018:   Mat_Product    *product = C->product;
2019:   const char     *algTypes[2] = {"allgatherv","cyclic"};
2020:   PetscInt       alg,nalg = 2;
2021:   PetscBool      flg = PETSC_FALSE;

2024:   /* Set default algorithm */
2025:   alg = 0; /* default is allgatherv */
2026:   PetscStrcmp(product->alg,"default",&flg);
2027:   if (flg) {
2028:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2029:   }

2031:   /* Get runtime option */
2032:   if (product->api_user) {
2033:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2034:     PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);
2035:     PetscOptionsEnd();
2036:   } else {
2037:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2038:     PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);
2039:     PetscOptionsEnd();
2040:   }
2041:   if (flg) {
2042:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2043:   }

2045:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2046:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2047:   C->ops->productnumeric           = MatProductNumeric_ABt;
2048:   return(0);
2049: }

2051: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2052: {
2054:   Mat_Product    *product = C->product;

2057:   switch (product->type) {
2058: #if defined(PETSC_HAVE_ELEMENTAL)
2059:   case MATPRODUCT_AB:
2060:     MatProductSetFromOptions_MPIDense_AB(C);
2061:     break;
2062: #endif
2063:   case MATPRODUCT_AtB:
2064:     MatProductSetFromOptions_MPIDense_AtB(C);
2065:     break;
2066:   case MATPRODUCT_ABt:
2067:     MatProductSetFromOptions_MPIDense_ABt(C);
2068:     break;
2069:   default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]);
2070:   }
2071:   return(0);
2072: }