Actual source code: mpidense.c

petsc-3.7.7 2017-09-25
Report Typos and Errors
  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4: */


  7: #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
  8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  9: #include <petscblaslapack.h>

 13: /*@

 15:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 16:               matrix that represents the operator. For sequential matrices it returns itself.

 18:     Input Parameter:
 19: .      A - the Seq or MPI dense matrix

 21:     Output Parameter:
 22: .      B - the inner matrix

 24:     Level: intermediate

 26: @*/
 27: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 28: {
 29:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 31:   PetscBool      flg;

 34:   PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 35:   if (flg) *B = mat->A;
 36:   else *B = A;
 37:   return(0);
 38: }

 42: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 43: {
 44:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 46:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 49:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
 50:   lrow = row - rstart;
 51:   MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
 52:   return(0);
 53: }

 57: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 58: {

 62:   if (idx) {PetscFree(*idx);}
 63:   if (v) {PetscFree(*v);}
 64:   return(0);
 65: }

 69: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
 70: {
 71:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 73:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 74:   PetscScalar    *array;
 75:   MPI_Comm       comm;
 76:   Mat            B;

 79:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");

 81:   PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
 82:   if (!B) {
 83:     PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 84:     MatCreate(comm,&B);
 85:     MatSetSizes(B,m,m,m,m);
 86:     MatSetType(B,((PetscObject)mdn->A)->type_name);
 87:     MatDenseGetArray(mdn->A,&array);
 88:     MatSeqDenseSetPreallocation(B,array+m*rstart);
 89:     MatDenseRestoreArray(mdn->A,&array);
 90:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
 91:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
 92:     PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
 93:     *a   = B;
 94:     MatDestroy(&B);
 95:   } else *a = B;
 96:   return(0);
 97: }

101: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102: {
103:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
105:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106:   PetscBool      roworiented = A->roworiented;

109:   for (i=0; i<m; i++) {
110:     if (idxm[i] < 0) continue;
111:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112:     if (idxm[i] >= rstart && idxm[i] < rend) {
113:       row = idxm[i] - rstart;
114:       if (roworiented) {
115:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
116:       } else {
117:         for (j=0; j<n; j++) {
118:           if (idxn[j] < 0) continue;
119:           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
121:         }
122:       }
123:     } else if (!A->donotstash) {
124:       mat->assembled = PETSC_FALSE;
125:       if (roworiented) {
126:         MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
127:       } else {
128:         MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
129:       }
130:     }
131:   }
132:   return(0);
133: }

137: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138: {
139:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
141:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

144:   for (i=0; i<m; i++) {
145:     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147:     if (idxm[i] >= rstart && idxm[i] < rend) {
148:       row = idxm[i] - rstart;
149:       for (j=0; j<n; j++) {
150:         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151:         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
153:       }
154:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155:   }
156:   return(0);
157: }

161: PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162: {
163:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

167:   MatDenseGetArray(a->A,array);
168:   return(0);
169: }

173: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174: {
175:   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
178:   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179:   const PetscInt *irow,*icol;
180:   PetscScalar    *av,*bv,*v = lmat->v;
181:   Mat            newmat;
182:   IS             iscol_local;

185:   ISAllGather(iscol,&iscol_local);
186:   ISGetIndices(isrow,&irow);
187:   ISGetIndices(iscol_local,&icol);
188:   ISGetLocalSize(isrow,&nrows);
189:   ISGetLocalSize(iscol,&ncols);
190:   ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */

192:   /* No parallel redistribution currently supported! Should really check each index set
193:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194:      original matrix! */

196:   MatGetLocalSize(A,&nlrows,&nlcols);
197:   MatGetOwnershipRange(A,&rstart,&rend);

199:   /* Check submatrix call */
200:   if (scall == MAT_REUSE_MATRIX) {
201:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202:     /* Really need to test rows and column sizes! */
203:     newmat = *B;
204:   } else {
205:     /* Create and fill new matrix */
206:     MatCreate(PetscObjectComm((PetscObject)A),&newmat);
207:     MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
208:     MatSetType(newmat,((PetscObject)A)->type_name);
209:     MatMPIDenseSetPreallocation(newmat,NULL);
210:   }

212:   /* Now extract the data pointers and do the copy, column at a time */
213:   newmatd = (Mat_MPIDense*)newmat->data;
214:   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;

216:   for (i=0; i<Ncols; i++) {
217:     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218:     for (j=0; j<nrows; j++) {
219:       *bv++ = av[irow[j] - rstart];
220:     }
221:   }

223:   /* Assemble the matrices so that the correct flags are set */
224:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
225:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

227:   /* Free work space */
228:   ISRestoreIndices(isrow,&irow);
229:   ISRestoreIndices(iscol_local,&icol);
230:   ISDestroy(&iscol_local);
231:   *B   = newmat;
232:   return(0);
233: }

237: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238: {
239:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

243:   MatDenseRestoreArray(a->A,array);
244:   return(0);
245: }

249: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250: {
251:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252:   MPI_Comm       comm;
254:   PetscInt       nstash,reallocs;
255:   InsertMode     addv;

258:   PetscObjectGetComm((PetscObject)mat,&comm);
259:   /* make sure all processors are either in INSERTMODE or ADDMODE */
260:   MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
261:   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262:   mat->insertmode = addv; /* in case this processor had no cache */

264:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
265:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
266:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
267:   return(0);
268: }

272: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273: {
274:   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
276:   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277:   PetscMPIInt    n;
278:   PetscScalar    *val;

281:   /*  wait on receives */
282:   while (1) {
283:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
284:     if (!flg) break;

286:     for (i=0; i<n;) {
287:       /* Now identify the consecutive vals belonging to the same row */
288:       for (j=i,rstart=row[j]; j<n; j++) {
289:         if (row[j] != rstart) break;
290:       }
291:       if (j < n) ncols = j-i;
292:       else       ncols = n-i;
293:       /* Now assemble all these values with a single function call */
294:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
295:       i    = j;
296:     }
297:   }
298:   MatStashScatterEnd_Private(&mat->stash);

300:   MatAssemblyBegin(mdn->A,mode);
301:   MatAssemblyEnd(mdn->A,mode);

303:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304:     MatSetUpMultiply_MPIDense(mat);
305:   }
306:   return(0);
307: }

311: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312: {
314:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

317:   MatZeroEntries(l->A);
318:   return(0);
319: }

321: /* the code does not do the diagonal entries correctly unless the
322:    matrix is square and the column and row owerships are identical.
323:    This is a BUG. The only way to fix it seems to be to access
324:    mdn->A and mdn->B directly and not through the MatZeroRows()
325:    routine.
326: */
329: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330: {
331:   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332:   PetscErrorCode    ierr;
333:   PetscInt          i,*owners = A->rmap->range;
334:   PetscInt          *sizes,j,idx,nsends;
335:   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336:   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337:   PetscInt          *lens,*lrows,*values;
338:   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339:   MPI_Comm          comm;
340:   MPI_Request       *send_waits,*recv_waits;
341:   MPI_Status        recv_status,*send_status;
342:   PetscBool         found;
343:   const PetscScalar *xx;
344:   PetscScalar       *bb;

347:   PetscObjectGetComm((PetscObject)A,&comm);
348:   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349:   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350:   /*  first count number of contributors to each processor */
351:   PetscCalloc1(2*size,&sizes);
352:   PetscMalloc1(N+1,&owner);  /* see note*/
353:   for (i=0; i<N; i++) {
354:     idx   = rows[i];
355:     found = PETSC_FALSE;
356:     for (j=0; j<size; j++) {
357:       if (idx >= owners[j] && idx < owners[j+1]) {
358:         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359:       }
360:     }
361:     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362:   }
363:   nsends = 0;
364:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

366:   /* inform other processors of number of messages and max length*/
367:   PetscMaxSum(comm,sizes,&nmax,&nrecvs);

369:   /* post receives:   */
370:   PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
371:   PetscMalloc1(nrecvs+1,&recv_waits);
372:   for (i=0; i<nrecvs; i++) {
373:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
374:   }

376:   /* do sends:
377:       1) starts[i] gives the starting index in svalues for stuff going to
378:          the ith processor
379:   */
380:   PetscMalloc1(N+1,&svalues);
381:   PetscMalloc1(nsends+1,&send_waits);
382:   PetscMalloc1(size+1,&starts);

384:   starts[0] = 0;
385:   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386:   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];

388:   starts[0] = 0;
389:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390:   count = 0;
391:   for (i=0; i<size; i++) {
392:     if (sizes[2*i+1]) {
393:       MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
394:     }
395:   }
396:   PetscFree(starts);

398:   base = owners[rank];

400:   /*  wait on receives */
401:   PetscMalloc2(nrecvs,&lens,nrecvs,&source);
402:   count = nrecvs;
403:   slen  = 0;
404:   while (count) {
405:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
406:     /* unpack receives into our local space */
407:     MPI_Get_count(&recv_status,MPIU_INT,&n);

409:     source[imdex] = recv_status.MPI_SOURCE;
410:     lens[imdex]   = n;
411:     slen += n;
412:     count--;
413:   }
414:   PetscFree(recv_waits);

416:   /* move the data into the send scatter */
417:   PetscMalloc1(slen+1,&lrows);
418:   count = 0;
419:   for (i=0; i<nrecvs; i++) {
420:     values = rvalues + i*nmax;
421:     for (j=0; j<lens[i]; j++) {
422:       lrows[count++] = values[j] - base;
423:     }
424:   }
425:   PetscFree(rvalues);
426:   PetscFree2(lens,source);
427:   PetscFree(owner);
428:   PetscFree(sizes);

430:   /* fix right hand side if needed */
431:   if (x && b) {
432:     VecGetArrayRead(x,&xx);
433:     VecGetArray(b,&bb);
434:     for (i=0; i<slen; i++) {
435:       bb[lrows[i]] = diag*xx[lrows[i]];
436:     }
437:     VecRestoreArrayRead(x,&xx);
438:     VecRestoreArray(b,&bb);
439:   }

441:   /* actually zap the local rows */
442:   MatZeroRows(l->A,slen,lrows,0.0,0,0);
443:   if (diag != 0.0) {
444:     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445:     PetscInt     m   = ll->lda, i;

447:     for (i=0; i<slen; i++) {
448:       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449:     }
450:   }
451:   PetscFree(lrows);

453:   /* wait on sends */
454:   if (nsends) {
455:     PetscMalloc1(nsends,&send_status);
456:     MPI_Waitall(nsends,send_waits,send_status);
457:     PetscFree(send_status);
458:   }
459:   PetscFree(send_waits);
460:   PetscFree(svalues);
461:   return(0);
462: }

464: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
465: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
466: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
467: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);

471: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
472: {
473:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

477:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
478:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
479:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
480:   return(0);
481: }

485: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
486: {
487:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

491:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
492:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
493:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
494:   return(0);
495: }

499: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500: {
501:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
503:   PetscScalar    zero = 0.0;

506:   VecSet(yy,zero);
507:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
508:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
509:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
510:   return(0);
511: }

515: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
516: {
517:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

521:   VecCopy(yy,zz);
522:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
523:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
524:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
525:   return(0);
526: }

530: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
531: {
532:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
533:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
535:   PetscInt       len,i,n,m = A->rmap->n,radd;
536:   PetscScalar    *x,zero = 0.0;

539:   VecSet(v,zero);
540:   VecGetArray(v,&x);
541:   VecGetSize(v,&n);
542:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
543:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
544:   radd = A->rmap->rstart*m;
545:   for (i=0; i<len; i++) {
546:     x[i] = aloc->v[radd + i*m + i];
547:   }
548:   VecRestoreArray(v,&x);
549:   return(0);
550: }

554: PetscErrorCode MatDestroy_MPIDense(Mat mat)
555: {
556:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

560: #if defined(PETSC_USE_LOG)
561:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
562: #endif
563:   MatStashDestroy_Private(&mat->stash);
564:   MatDestroy(&mdn->A);
565:   VecDestroy(&mdn->lvec);
566:   VecScatterDestroy(&mdn->Mvctx);

568:   PetscFree(mat->data);
569:   PetscObjectChangeTypeName((PetscObject)mat,0);

571:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
572:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
573: #if defined(PETSC_HAVE_ELEMENTAL)
574:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
575: #endif
576:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
577:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
578:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
579:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
580:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
581:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
582:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
583:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
584:   return(0);
585: }

589: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
590: {
591:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
592:   PetscErrorCode    ierr;
593:   PetscViewerFormat format;
594:   int               fd;
595:   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
596:   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
597:   PetscScalar       *work,*v,*vv;
598:   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;

601:   if (mdn->size == 1) {
602:     MatView(mdn->A,viewer);
603:   } else {
604:     PetscViewerBinaryGetDescriptor(viewer,&fd);
605:     MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
606:     MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);

608:     PetscViewerGetFormat(viewer,&format);
609:     if (format == PETSC_VIEWER_NATIVE) {

611:       if (!rank) {
612:         /* store the matrix as a dense matrix */
613:         header[0] = MAT_FILE_CLASSID;
614:         header[1] = mat->rmap->N;
615:         header[2] = N;
616:         header[3] = MATRIX_BINARY_FORMAT_DENSE;
617:         PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);

619:         /* get largest work array needed for transposing array */
620:         mmax = mat->rmap->n;
621:         for (i=1; i<size; i++) {
622:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623:         }
624:         PetscMalloc1(mmax*N,&work);

626:         /* write out local array, by rows */
627:         m = mat->rmap->n;
628:         v = a->v;
629:         for (j=0; j<N; j++) {
630:           for (i=0; i<m; i++) {
631:             work[j + i*N] = *v++;
632:           }
633:         }
634:         PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
635:         /* get largest work array to receive messages from other processes, excludes process zero */
636:         mmax = 0;
637:         for (i=1; i<size; i++) {
638:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
639:         }
640:         PetscMalloc1(mmax*N,&vv);
641:         for (k = 1; k < size; k++) {
642:           v    = vv;
643:           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
644:           MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));

646:           for (j = 0; j < N; j++) {
647:             for (i = 0; i < m; i++) {
648:               work[j + i*N] = *v++;
649:             }
650:           }
651:           PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
652:         }
653:         PetscFree(work);
654:         PetscFree(vv);
655:       } else {
656:         MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
657:       }
658:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
659:   }
660:   return(0);
661: }

663: extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
664: #include <petscdraw.h>
667: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
668: {
669:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
670:   PetscErrorCode    ierr;
671:   PetscMPIInt       rank = mdn->rank;
672:   PetscViewerType   vtype;
673:   PetscBool         iascii,isdraw;
674:   PetscViewer       sviewer;
675:   PetscViewerFormat format;

678:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
679:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
680:   if (iascii) {
681:     PetscViewerGetType(viewer,&vtype);
682:     PetscViewerGetFormat(viewer,&format);
683:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684:       MatInfo info;
685:       MatGetInfo(mat,MAT_LOCAL,&info);
686:       PetscViewerASCIIPushSynchronized(viewer);
687:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
688:       PetscViewerFlush(viewer);
689:       PetscViewerASCIIPopSynchronized(viewer);
690:       VecScatterView(mdn->Mvctx,viewer);
691:       return(0);
692:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
693:       return(0);
694:     }
695:   } else if (isdraw) {
696:     PetscDraw draw;
697:     PetscBool isnull;

699:     PetscViewerDrawGetDraw(viewer,0,&draw);
700:     PetscDrawIsNull(draw,&isnull);
701:     if (isnull) return(0);
702:   }

704:   {
705:     /* assemble the entire matrix onto first processor. */
706:     Mat         A;
707:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
708:     PetscInt    *cols;
709:     PetscScalar *vals;

711:     MatCreate(PetscObjectComm((PetscObject)mat),&A);
712:     if (!rank) {
713:       MatSetSizes(A,M,N,M,N);
714:     } else {
715:       MatSetSizes(A,0,0,M,N);
716:     }
717:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
718:     MatSetType(A,MATMPIDENSE);
719:     MatMPIDenseSetPreallocation(A,NULL);
720:     PetscLogObjectParent((PetscObject)mat,(PetscObject)A);

722:     /* Copy the matrix ... This isn't the most efficient means,
723:        but it's quick for now */
724:     A->insertmode = INSERT_VALUES;

726:     row = mat->rmap->rstart;
727:     m   = mdn->A->rmap->n;
728:     for (i=0; i<m; i++) {
729:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
730:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
731:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
732:       row++;
733:     }

735:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
736:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
737:     PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
738:     if (!rank) {
739:       PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
740:       MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
741:     }
742:     PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
743:     PetscViewerFlush(viewer);
744:     MatDestroy(&A);
745:   }
746:   return(0);
747: }

751: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
752: {
754:   PetscBool      iascii,isbinary,isdraw,issocket;

757:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
758:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
759:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
760:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);

762:   if (iascii || issocket || isdraw) {
763:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
764:   } else if (isbinary) {
765:     MatView_MPIDense_Binary(mat,viewer);
766:   }
767:   return(0);
768: }

772: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
773: {
774:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
775:   Mat            mdn  = mat->A;
777:   PetscReal      isend[5],irecv[5];

780:   info->block_size = 1.0;

782:   MatGetInfo(mdn,MAT_LOCAL,info);

784:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
785:   isend[3] = info->memory;  isend[4] = info->mallocs;
786:   if (flag == MAT_LOCAL) {
787:     info->nz_used      = isend[0];
788:     info->nz_allocated = isend[1];
789:     info->nz_unneeded  = isend[2];
790:     info->memory       = isend[3];
791:     info->mallocs      = isend[4];
792:   } else if (flag == MAT_GLOBAL_MAX) {
793:     MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));

795:     info->nz_used      = irecv[0];
796:     info->nz_allocated = irecv[1];
797:     info->nz_unneeded  = irecv[2];
798:     info->memory       = irecv[3];
799:     info->mallocs      = irecv[4];
800:   } else if (flag == MAT_GLOBAL_SUM) {
801:     MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));

803:     info->nz_used      = irecv[0];
804:     info->nz_allocated = irecv[1];
805:     info->nz_unneeded  = irecv[2];
806:     info->memory       = irecv[3];
807:     info->mallocs      = irecv[4];
808:   }
809:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
810:   info->fill_ratio_needed = 0;
811:   info->factor_mallocs    = 0;
812:   return(0);
813: }

817: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
818: {
819:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

823:   switch (op) {
824:   case MAT_NEW_NONZERO_LOCATIONS:
825:   case MAT_NEW_NONZERO_LOCATION_ERR:
826:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
827:     MatCheckPreallocated(A,1);
828:     MatSetOption(a->A,op,flg);
829:     break;
830:   case MAT_ROW_ORIENTED:
831:     MatCheckPreallocated(A,1);
832:     a->roworiented = flg;
833:     MatSetOption(a->A,op,flg);
834:     break;
835:   case MAT_NEW_DIAGONALS:
836:   case MAT_KEEP_NONZERO_PATTERN:
837:   case MAT_USE_HASH_TABLE:
838:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
839:     break;
840:   case MAT_IGNORE_OFF_PROC_ENTRIES:
841:     a->donotstash = flg;
842:     break;
843:   case MAT_SYMMETRIC:
844:   case MAT_STRUCTURALLY_SYMMETRIC:
845:   case MAT_HERMITIAN:
846:   case MAT_SYMMETRY_ETERNAL:
847:   case MAT_IGNORE_LOWER_TRIANGULAR:
848:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
849:     break;
850:   default:
851:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
852:   }
853:   return(0);
854: }


859: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
860: {
861:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
862:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
863:   PetscScalar    *l,*r,x,*v;
865:   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;

868:   MatGetLocalSize(A,&s2,&s3);
869:   if (ll) {
870:     VecGetLocalSize(ll,&s2a);
871:     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
872:     VecGetArray(ll,&l);
873:     for (i=0; i<m; i++) {
874:       x = l[i];
875:       v = mat->v + i;
876:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
877:     }
878:     VecRestoreArray(ll,&l);
879:     PetscLogFlops(n*m);
880:   }
881:   if (rr) {
882:     VecGetLocalSize(rr,&s3a);
883:     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
884:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
885:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
886:     VecGetArray(mdn->lvec,&r);
887:     for (i=0; i<n; i++) {
888:       x = r[i];
889:       v = mat->v + i*m;
890:       for (j=0; j<m; j++) (*v++) *= x;
891:     }
892:     VecRestoreArray(mdn->lvec,&r);
893:     PetscLogFlops(n*m);
894:   }
895:   return(0);
896: }

900: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
901: {
902:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
903:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
905:   PetscInt       i,j;
906:   PetscReal      sum = 0.0;
907:   PetscScalar    *v  = mat->v;

910:   if (mdn->size == 1) {
911:      MatNorm(mdn->A,type,nrm);
912:   } else {
913:     if (type == NORM_FROBENIUS) {
914:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
915:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
916:       }
917:       MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
918:       *nrm = PetscSqrtReal(*nrm);
919:       PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
920:     } else if (type == NORM_1) {
921:       PetscReal *tmp,*tmp2;
922:       PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
923:       PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
924:       PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
925:       *nrm = 0.0;
926:       v    = mat->v;
927:       for (j=0; j<mdn->A->cmap->n; j++) {
928:         for (i=0; i<mdn->A->rmap->n; i++) {
929:           tmp[j] += PetscAbsScalar(*v);  v++;
930:         }
931:       }
932:       MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
933:       for (j=0; j<A->cmap->N; j++) {
934:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
935:       }
936:       PetscFree2(tmp,tmp2);
937:       PetscLogFlops(A->cmap->n*A->rmap->n);
938:     } else if (type == NORM_INFINITY) { /* max row norm */
939:       PetscReal ntemp;
940:       MatNorm(mdn->A,type,&ntemp);
941:       MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
942:     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
943:   }
944:   return(0);
945: }

949: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
950: {
951:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
952:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
953:   Mat            B;
954:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
956:   PetscInt       j,i;
957:   PetscScalar    *v;

960:   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
961:   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
962:     MatCreate(PetscObjectComm((PetscObject)A),&B);
963:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
964:     MatSetType(B,((PetscObject)A)->type_name);
965:     MatMPIDenseSetPreallocation(B,NULL);
966:   } else {
967:     B = *matout;
968:   }

970:   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
971:   PetscMalloc1(m,&rwork);
972:   for (i=0; i<m; i++) rwork[i] = rstart + i;
973:   for (j=0; j<n; j++) {
974:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
975:     v   += m;
976:   }
977:   PetscFree(rwork);
978:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
979:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
980:   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
981:     *matout = B;
982:   } else {
983:     MatHeaderMerge(A,&B);
984:   }
985:   return(0);
986: }


989: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
990: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);

994: PetscErrorCode MatSetUp_MPIDense(Mat A)
995: {

999:    MatMPIDenseSetPreallocation(A,0);
1000:   return(0);
1001: }

1005: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1006: {
1008:   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;

1011:   MatAXPY(A->A,alpha,B->A,str);
1012:   PetscObjectStateIncrease((PetscObject)Y);
1013:   return(0);
1014: }

1018: PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1019: {
1020:   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;

1024:   MatConjugate(a->A);
1025:   return(0);
1026: }

1030: PetscErrorCode MatRealPart_MPIDense(Mat A)
1031: {
1032:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1036:   MatRealPart(a->A);
1037:   return(0);
1038: }

1042: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1043: {
1044:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1048:   MatImaginaryPart(a->A);
1049:   return(0);
1050: }

1052: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1055: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1056: {
1058:   PetscInt       i,n;
1059:   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1060:   PetscReal      *work;

1063:   MatGetSize(A,NULL,&n);
1064:   PetscMalloc1(n,&work);
1065:   MatGetColumnNorms_SeqDense(a->A,type,work);
1066:   if (type == NORM_2) {
1067:     for (i=0; i<n; i++) work[i] *= work[i];
1068:   }
1069:   if (type == NORM_INFINITY) {
1070:     MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1071:   } else {
1072:     MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1073:   }
1074:   PetscFree(work);
1075:   if (type == NORM_2) {
1076:     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1077:   }
1078:   return(0);
1079: }

1083: static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1084: {
1085:   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1087:   PetscScalar    *a;
1088:   PetscInt       m,n,i;

1091:   MatGetSize(d->A,&m,&n);
1092:   MatDenseGetArray(d->A,&a);
1093:   for (i=0; i<m*n; i++) {
1094:     PetscRandomGetValue(rctx,a+i);
1095:   }
1096:   MatDenseRestoreArray(d->A,&a);
1097:   return(0);
1098: }

1100: extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);

1104: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1105: {
1107:   *missing = PETSC_FALSE;
1108:   return(0);
1109: }

1111: /* -------------------------------------------------------------------*/
1112: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1113:                                         MatGetRow_MPIDense,
1114:                                         MatRestoreRow_MPIDense,
1115:                                         MatMult_MPIDense,
1116:                                 /*  4*/ MatMultAdd_MPIDense,
1117:                                         MatMultTranspose_MPIDense,
1118:                                         MatMultTransposeAdd_MPIDense,
1119:                                         0,
1120:                                         0,
1121:                                         0,
1122:                                 /* 10*/ 0,
1123:                                         0,
1124:                                         0,
1125:                                         0,
1126:                                         MatTranspose_MPIDense,
1127:                                 /* 15*/ MatGetInfo_MPIDense,
1128:                                         MatEqual_MPIDense,
1129:                                         MatGetDiagonal_MPIDense,
1130:                                         MatDiagonalScale_MPIDense,
1131:                                         MatNorm_MPIDense,
1132:                                 /* 20*/ MatAssemblyBegin_MPIDense,
1133:                                         MatAssemblyEnd_MPIDense,
1134:                                         MatSetOption_MPIDense,
1135:                                         MatZeroEntries_MPIDense,
1136:                                 /* 24*/ MatZeroRows_MPIDense,
1137:                                         0,
1138:                                         0,
1139:                                         0,
1140:                                         0,
1141:                                 /* 29*/ MatSetUp_MPIDense,
1142:                                         0,
1143:                                         0,
1144:                                         0,
1145:                                         0,
1146:                                 /* 34*/ MatDuplicate_MPIDense,
1147:                                         0,
1148:                                         0,
1149:                                         0,
1150:                                         0,
1151:                                 /* 39*/ MatAXPY_MPIDense,
1152:                                         MatGetSubMatrices_MPIDense,
1153:                                         0,
1154:                                         MatGetValues_MPIDense,
1155:                                         0,
1156:                                 /* 44*/ 0,
1157:                                         MatScale_MPIDense,
1158:                                         MatShift_Basic,
1159:                                         0,
1160:                                         0,
1161:                                 /* 49*/ MatSetRandom_MPIDense,
1162:                                         0,
1163:                                         0,
1164:                                         0,
1165:                                         0,
1166:                                 /* 54*/ 0,
1167:                                         0,
1168:                                         0,
1169:                                         0,
1170:                                         0,
1171:                                 /* 59*/ MatGetSubMatrix_MPIDense,
1172:                                         MatDestroy_MPIDense,
1173:                                         MatView_MPIDense,
1174:                                         0,
1175:                                         0,
1176:                                 /* 64*/ 0,
1177:                                         0,
1178:                                         0,
1179:                                         0,
1180:                                         0,
1181:                                 /* 69*/ 0,
1182:                                         0,
1183:                                         0,
1184:                                         0,
1185:                                         0,
1186:                                 /* 74*/ 0,
1187:                                         0,
1188:                                         0,
1189:                                         0,
1190:                                         0,
1191:                                 /* 79*/ 0,
1192:                                         0,
1193:                                         0,
1194:                                         0,
1195:                                 /* 83*/ MatLoad_MPIDense,
1196:                                         0,
1197:                                         0,
1198:                                         0,
1199:                                         0,
1200:                                         0,
1201: #if defined(PETSC_HAVE_ELEMENTAL)
1202:                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1203:                                         MatMatMultSymbolic_MPIDense_MPIDense,
1204: #else
1205:                                 /* 89*/ 0,
1206:                                         0,
1207: #endif
1208:                                         MatMatMultNumeric_MPIDense,
1209:                                         0,
1210:                                         0,
1211:                                 /* 94*/ 0,
1212:                                         0,
1213:                                         0,
1214:                                         0,
1215:                                         0,
1216:                                 /* 99*/ 0,
1217:                                         0,
1218:                                         0,
1219:                                         MatConjugate_MPIDense,
1220:                                         0,
1221:                                 /*104*/ 0,
1222:                                         MatRealPart_MPIDense,
1223:                                         MatImaginaryPart_MPIDense,
1224:                                         0,
1225:                                         0,
1226:                                 /*109*/ 0,
1227:                                         0,
1228:                                         0,
1229:                                         0,
1230:                                         MatMissingDiagonal_MPIDense,
1231:                                 /*114*/ 0,
1232:                                         0,
1233:                                         0,
1234:                                         0,
1235:                                         0,
1236:                                 /*119*/ 0,
1237:                                         0,
1238:                                         0,
1239:                                         0,
1240:                                         0,
1241:                                 /*124*/ 0,
1242:                                         MatGetColumnNorms_MPIDense,
1243:                                         0,
1244:                                         0,
1245:                                         0,
1246:                                 /*129*/ 0,
1247:                                         MatTransposeMatMult_MPIDense_MPIDense,
1248:                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1249:                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1250:                                         0,
1251:                                 /*134*/ 0,
1252:                                         0,
1253:                                         0,
1254:                                         0,
1255:                                         0,
1256:                                 /*139*/ 0,
1257:                                         0,
1258:                                         0
1259: };

1263: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1264: {
1265:   Mat_MPIDense   *a;

1269:   mat->preallocated = PETSC_TRUE;
1270:   /* Note:  For now, when data is specified above, this assumes the user correctly
1271:    allocates the local dense storage space.  We should add error checking. */

1273:   a       = (Mat_MPIDense*)mat->data;
1274:   PetscLayoutSetUp(mat->rmap);
1275:   PetscLayoutSetUp(mat->cmap);
1276:   a->nvec = mat->cmap->n;

1278:   MatCreate(PETSC_COMM_SELF,&a->A);
1279:   MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1280:   MatSetType(a->A,MATSEQDENSE);
1281:   MatSeqDenseSetPreallocation(a->A,data);
1282:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1283:   return(0);
1284: }

1286: #if defined(PETSC_HAVE_ELEMENTAL)
1289: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1290: {
1291:   Mat            mat_elemental;
1293:   PetscScalar    *v;
1294:   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1295: 
1297:   if (reuse == MAT_REUSE_MATRIX) {
1298:     mat_elemental = *newmat;
1299:     MatZeroEntries(*newmat);
1300:   } else {
1301:     MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1302:     MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1303:     MatSetType(mat_elemental,MATELEMENTAL);
1304:     MatSetUp(mat_elemental);
1305:     MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1306:   }

1308:   PetscMalloc2(m,&rows,N,&cols);
1309:   for (i=0; i<N; i++) cols[i] = i;
1310:   for (i=0; i<m; i++) rows[i] = rstart + i;
1311: 
1312:   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1313:   MatDenseGetArray(A,&v);
1314:   MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1315:   MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1316:   MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1317:   MatDenseRestoreArray(A,&v);
1318:   PetscFree2(rows,cols);

1320:   if (reuse == MAT_INPLACE_MATRIX) {
1321:     MatHeaderReplace(A,&mat_elemental);
1322:   } else {
1323:     *newmat = mat_elemental;
1324:   }
1325:   return(0);
1326: }
1327: #endif

1331: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1332: {
1333:   Mat_MPIDense   *a;

1337:   PetscNewLog(mat,&a);
1338:   mat->data = (void*)a;
1339:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));

1341:   mat->insertmode = NOT_SET_VALUES;
1342:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1343:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);

1345:   /* build cache for off array entries formed */
1346:   a->donotstash = PETSC_FALSE;

1348:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);

1350:   /* stuff used for matrix vector multiply */
1351:   a->lvec        = 0;
1352:   a->Mvctx       = 0;
1353:   a->roworiented = PETSC_TRUE;

1355:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1356:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1357: #if defined(PETSC_HAVE_ELEMENTAL)
1358:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1359: #endif
1360:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);
1361:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1362:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1363:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1364:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);

1366:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1367:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1368:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1369:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1370:   return(0);
1371: }

1373: /*MC
1374:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1376:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1377:    and MATMPIDENSE otherwise.

1379:    Options Database Keys:
1380: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1382:   Level: beginner


1385: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1386: M*/

1390: /*@C
1391:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1393:    Not collective

1395:    Input Parameters:
1396: .  B - the matrix
1397: -  data - optional location of matrix data.  Set data=NULL for PETSc
1398:    to control all matrix memory allocation.

1400:    Notes:
1401:    The dense format is fully compatible with standard Fortran 77
1402:    storage by columns.

1404:    The data input variable is intended primarily for Fortran programmers
1405:    who wish to allocate their own matrix memory space.  Most users should
1406:    set data=NULL.

1408:    Level: intermediate

1410: .keywords: matrix,dense, parallel

1412: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1413: @*/
1414: PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1415: {

1419:   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1420:   return(0);
1421: }

1425: /*@C
1426:    MatCreateDense - Creates a parallel matrix in dense format.

1428:    Collective on MPI_Comm

1430:    Input Parameters:
1431: +  comm - MPI communicator
1432: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1433: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1434: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1435: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1436: -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1437:    to control all matrix memory allocation.

1439:    Output Parameter:
1440: .  A - the matrix

1442:    Notes:
1443:    The dense format is fully compatible with standard Fortran 77
1444:    storage by columns.

1446:    The data input variable is intended primarily for Fortran programmers
1447:    who wish to allocate their own matrix memory space.  Most users should
1448:    set data=NULL (PETSC_NULL_SCALAR for Fortran users).

1450:    The user MUST specify either the local or global matrix dimensions
1451:    (possibly both).

1453:    Level: intermediate

1455: .keywords: matrix,dense, parallel

1457: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1458: @*/
1459: PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1460: {
1462:   PetscMPIInt    size;

1465:   MatCreate(comm,A);
1466:   MatSetSizes(*A,m,n,M,N);
1467:   MPI_Comm_size(comm,&size);
1468:   if (size > 1) {
1469:     MatSetType(*A,MATMPIDENSE);
1470:     MatMPIDenseSetPreallocation(*A,data);
1471:     if (data) {  /* user provided data array, so no need to assemble */
1472:       MatSetUpMultiply_MPIDense(*A);
1473:       (*A)->assembled = PETSC_TRUE;
1474:     }
1475:   } else {
1476:     MatSetType(*A,MATSEQDENSE);
1477:     MatSeqDenseSetPreallocation(*A,data);
1478:   }
1479:   return(0);
1480: }

1484: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1485: {
1486:   Mat            mat;
1487:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1491:   *newmat = 0;
1492:   MatCreate(PetscObjectComm((PetscObject)A),&mat);
1493:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1494:   MatSetType(mat,((PetscObject)A)->type_name);
1495:   a       = (Mat_MPIDense*)mat->data;
1496:   PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));

1498:   mat->factortype   = A->factortype;
1499:   mat->assembled    = PETSC_TRUE;
1500:   mat->preallocated = PETSC_TRUE;

1502:   a->size         = oldmat->size;
1503:   a->rank         = oldmat->rank;
1504:   mat->insertmode = NOT_SET_VALUES;
1505:   a->nvec         = oldmat->nvec;
1506:   a->donotstash   = oldmat->donotstash;

1508:   PetscLayoutReference(A->rmap,&mat->rmap);
1509:   PetscLayoutReference(A->cmap,&mat->cmap);

1511:   MatSetUpMultiply_MPIDense(mat);
1512:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1513:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);

1515:   *newmat = mat;
1516:   return(0);
1517: }

1521: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1522: {
1524:   PetscMPIInt    rank,size;
1525:   const PetscInt *rowners;
1526:   PetscInt       i,m,n,nz,j,mMax;
1527:   PetscScalar    *array,*vals,*vals_ptr;
1528:   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;

1531:   MPI_Comm_rank(comm,&rank);
1532:   MPI_Comm_size(comm,&size);

1534:   /* determine ownership of rows and columns */
1535:   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1536:   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;

1538:   MatSetSizes(newmat,m,n,M,N);
1539:   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1540:     MatMPIDenseSetPreallocation(newmat,NULL);
1541:   }
1542:   MatDenseGetArray(newmat,&array);
1543:   MatGetLocalSize(newmat,&m,NULL);
1544:   MatGetOwnershipRanges(newmat,&rowners);
1545:   MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);
1546:   if (!rank) {
1547:     PetscMalloc1(mMax*N,&vals);

1549:     /* read in my part of the matrix numerical values  */
1550:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);

1552:     /* insert into matrix-by row (this is why cannot directly read into array */
1553:     vals_ptr = vals;
1554:     for (i=0; i<m; i++) {
1555:       for (j=0; j<N; j++) {
1556:         array[i + j*m] = *vals_ptr++;
1557:       }
1558:     }

1560:     /* read in other processors and ship out */
1561:     for (i=1; i<size; i++) {
1562:       nz   = (rowners[i+1] - rowners[i])*N;
1563:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1564:       MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1565:     }
1566:   } else {
1567:     /* receive numeric values */
1568:     PetscMalloc1(m*N,&vals);

1570:     /* receive message of values*/
1571:     MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);

1573:     /* insert into matrix-by row (this is why cannot directly read into array */
1574:     vals_ptr = vals;
1575:     for (i=0; i<m; i++) {
1576:       for (j=0; j<N; j++) {
1577:         array[i + j*m] = *vals_ptr++;
1578:       }
1579:     }
1580:   }
1581:   MatDenseRestoreArray(newmat,&array);
1582:   PetscFree(vals);
1583:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1584:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1585:   return(0);
1586: }

1590: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1591: {
1592:   Mat_MPIDense   *a;
1593:   PetscScalar    *vals,*svals;
1594:   MPI_Comm       comm;
1595:   MPI_Status     status;
1596:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1597:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1598:   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1599:   PetscInt       i,nz,j,rstart,rend;
1600:   int            fd;

1604:   /* force binary viewer to load .info file if it has not yet done so */
1605:   PetscViewerSetUp(viewer);
1606:   PetscObjectGetComm((PetscObject)viewer,&comm);
1607:   MPI_Comm_size(comm,&size);
1608:   MPI_Comm_rank(comm,&rank);
1609:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1610:   if (!rank) {
1611:     PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1612:     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1613:   }
1614:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1615:   M    = header[1]; N = header[2]; nz = header[3];

1617:   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1618:   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1619:   if (newmat->cmap->N < 0) newmat->cmap->N = N;

1621:   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1622:   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);

1624:   /*
1625:        Handle case where matrix is stored on disk as a dense matrix
1626:   */
1627:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1628:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1629:     return(0);
1630:   }

1632:   /* determine ownership of all rows */
1633:   if (newmat->rmap->n < 0) {
1634:     PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1635:   } else {
1636:     PetscMPIIntCast(newmat->rmap->n,&m);
1637:   }
1638:   if (newmat->cmap->n < 0) {
1639:     n = PETSC_DECIDE;
1640:   } else {
1641:     PetscMPIIntCast(newmat->cmap->n,&n);
1642:   }

1644:   PetscMalloc1(size+2,&rowners);
1645:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1646:   rowners[0] = 0;
1647:   for (i=2; i<=size; i++) {
1648:     rowners[i] += rowners[i-1];
1649:   }
1650:   rstart = rowners[rank];
1651:   rend   = rowners[rank+1];

1653:   /* distribute row lengths to all processors */
1654:   PetscMalloc1(rend-rstart,&ourlens);
1655:   if (!rank) {
1656:     PetscMalloc1(M,&rowlengths);
1657:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1658:     PetscMalloc1(size,&sndcounts);
1659:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1660:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1661:     PetscFree(sndcounts);
1662:   } else {
1663:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1664:   }

1666:   if (!rank) {
1667:     /* calculate the number of nonzeros on each processor */
1668:     PetscMalloc1(size,&procsnz);
1669:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1670:     for (i=0; i<size; i++) {
1671:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1672:         procsnz[i] += rowlengths[j];
1673:       }
1674:     }
1675:     PetscFree(rowlengths);

1677:     /* determine max buffer needed and allocate it */
1678:     maxnz = 0;
1679:     for (i=0; i<size; i++) {
1680:       maxnz = PetscMax(maxnz,procsnz[i]);
1681:     }
1682:     PetscMalloc1(maxnz,&cols);

1684:     /* read in my part of the matrix column indices  */
1685:     nz   = procsnz[0];
1686:     PetscMalloc1(nz,&mycols);
1687:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1689:     /* read in every one elses and ship off */
1690:     for (i=1; i<size; i++) {
1691:       nz   = procsnz[i];
1692:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1693:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1694:     }
1695:     PetscFree(cols);
1696:   } else {
1697:     /* determine buffer space needed for message */
1698:     nz = 0;
1699:     for (i=0; i<m; i++) {
1700:       nz += ourlens[i];
1701:     }
1702:     PetscMalloc1(nz+1,&mycols);

1704:     /* receive message of column indices*/
1705:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1706:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1707:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1708:   }

1710:   MatSetSizes(newmat,m,n,M,N);
1711:   a = (Mat_MPIDense*)newmat->data;
1712:   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1713:     MatMPIDenseSetPreallocation(newmat,NULL);
1714:   }

1716:   if (!rank) {
1717:     PetscMalloc1(maxnz,&vals);

1719:     /* read in my part of the matrix numerical values  */
1720:     nz   = procsnz[0];
1721:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);

1723:     /* insert into matrix */
1724:     jj      = rstart;
1725:     smycols = mycols;
1726:     svals   = vals;
1727:     for (i=0; i<m; i++) {
1728:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1729:       smycols += ourlens[i];
1730:       svals   += ourlens[i];
1731:       jj++;
1732:     }

1734:     /* read in other processors and ship out */
1735:     for (i=1; i<size; i++) {
1736:       nz   = procsnz[i];
1737:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1738:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1739:     }
1740:     PetscFree(procsnz);
1741:   } else {
1742:     /* receive numeric values */
1743:     PetscMalloc1(nz+1,&vals);

1745:     /* receive message of values*/
1746:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1747:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1748:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1750:     /* insert into matrix */
1751:     jj      = rstart;
1752:     smycols = mycols;
1753:     svals   = vals;
1754:     for (i=0; i<m; i++) {
1755:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1756:       smycols += ourlens[i];
1757:       svals   += ourlens[i];
1758:       jj++;
1759:     }
1760:   }
1761:   PetscFree(ourlens);
1762:   PetscFree(vals);
1763:   PetscFree(mycols);
1764:   PetscFree(rowners);

1766:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1767:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1768:   return(0);
1769: }

1773: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1774: {
1775:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1776:   Mat            a,b;
1777:   PetscBool      flg;

1781:   a    = matA->A;
1782:   b    = matB->A;
1783:   MatEqual(a,b,&flg);
1784:   MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1785:   return(0);
1786: }

1790: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1791: {
1792:   PetscErrorCode        ierr;
1793:   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1794:   Mat_TransMatMultDense *atb = a->atbdense;

1797:   PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1798:   (atb->destroy)(A);
1799:   PetscFree(atb);
1800:   return(0);
1801: }

1805: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1806: {
1807:   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1808:   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1809:   Mat_TransMatMultDense *atb = c->atbdense;
1811:   MPI_Comm       comm;
1812:   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1813:   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1814:   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1815:   PetscScalar    _DOne=1.0,_DZero=0.0;
1816:   PetscBLASInt   am,an,bn,aN;
1817:   const PetscInt *ranges;

1820:   PetscObjectGetComm((PetscObject)A,&comm);
1821:   MPI_Comm_rank(comm,&rank);
1822:   MPI_Comm_size(comm,&size);

1824:   /* compute atbarray = aseq^T * bseq */
1825:   PetscBLASIntCast(a->A->cmap->n,&an);
1826:   PetscBLASIntCast(b->A->cmap->n,&bn);
1827:   PetscBLASIntCast(a->A->rmap->n,&am);
1828:   PetscBLASIntCast(A->cmap->N,&aN);
1829:   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1830: 
1831:   MatGetOwnershipRanges(C,&ranges);
1832:   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1833: 
1834:   /* arrange atbarray into sendbuf */
1835:   k = 0;
1836:   for (proc=0; proc<size; proc++) {
1837:     for (j=0; j<cN; j++) {
1838:       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1839:     }
1840:   }
1841:   /* sum all atbarray to local values of C */
1842:   MatDenseGetArray(c->A,&carray);
1843:   MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1844:   MatDenseRestoreArray(c->A,&carray);
1845:   return(0);
1846: }

1850: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1851: {
1852:   PetscErrorCode        ierr;
1853:   Mat                   Cdense;
1854:   MPI_Comm              comm;
1855:   PetscMPIInt           size;
1856:   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1857:   Mat_MPIDense          *c;
1858:   Mat_TransMatMultDense *atb;

1861:   PetscObjectGetComm((PetscObject)A,&comm);
1862:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1863:     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1864:   }

1866:   /* create matrix product Cdense */
1867:   MatCreate(comm,&Cdense);
1868:   MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1869:   MatSetType(Cdense,MATMPIDENSE);
1870:   MatMPIDenseSetPreallocation(Cdense,NULL);
1871:   MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
1872:   MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
1873:   *C   = Cdense;

1875:   /* create data structure for reuse Cdense */
1876:   MPI_Comm_size(comm,&size);
1877:   PetscNew(&atb);
1878:   cM = Cdense->rmap->N;
1879:   PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);
1880: 
1881:   c                    = (Mat_MPIDense*)Cdense->data;
1882:   c->atbdense          = atb;
1883:   atb->destroy         = Cdense->ops->destroy;
1884:   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1885:   return(0);
1886: }

1890: PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1891: {

1895:   if (scall == MAT_INITIAL_MATRIX) {
1896:     MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1897:   }
1898:   MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1899:   return(0);
1900: }

1904: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1905: {
1906:   PetscErrorCode   ierr;
1907:   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1908:   Mat_MatMultDense *ab = a->abdense;

1911:   MatDestroy(&ab->Ce);
1912:   MatDestroy(&ab->Ae);
1913:   MatDestroy(&ab->Be);

1915:   (ab->destroy)(A);
1916:   PetscFree(ab);
1917:   return(0);
1918: }

1920: #if defined(PETSC_HAVE_ELEMENTAL)
1923: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1924: {
1925:   PetscErrorCode   ierr;
1926:   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1927:   Mat_MatMultDense *ab=c->abdense;

1930:   MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
1931:   MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
1932:   MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);
1933:   MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
1934:   return(0);
1935: }

1939: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1940: {
1941:   PetscErrorCode   ierr;
1942:   Mat              Ae,Be,Ce;
1943:   Mat_MPIDense     *c;
1944:   Mat_MatMultDense *ab;

1947:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1948:     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1949:   }

1951:   /* convert A and B to Elemental matrices Ae and Be */
1952:   MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);
1953:   MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);

1955:   /* Ce = Ae*Be */
1956:   MatMatMultSymbolic(Ae,Be,fill,&Ce);
1957:   MatMatMultNumeric(Ae,Be,Ce);
1958: 
1959:   /* convert Ce to C */
1960:   MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);

1962:   /* create data structure for reuse Cdense */
1963:   PetscNew(&ab);
1964:   c                  = (Mat_MPIDense*)(*C)->data;
1965:   c->abdense         = ab;

1967:   ab->Ae             = Ae;
1968:   ab->Be             = Be;
1969:   ab->Ce             = Ce;
1970:   ab->destroy        = (*C)->ops->destroy;
1971:   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1972:   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1973:   return(0);
1974: }

1978: PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1979: {

1983:   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1984:     PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
1985:     MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1986:     PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
1987:   } else {
1988:     PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
1989:     MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1990:     PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
1991:   }
1992:   return(0);
1993: }
1994: #endif