Actual source code: mpidense.c

petsc-3.10.5 2019-03-28
Report Typos and Errors

  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4: */


  7:  #include <../src/mat/impls/dense/mpi/mpidense.h>
  8:  #include <../src/mat/impls/aij/mpi/mpiaij.h>
  9:  #include <petscblaslapack.h>

 11: /*@

 13:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 14:               matrix that represents the operator. For sequential matrices it returns itself.

 16:     Input Parameter:
 17: .      A - the Seq or MPI dense matrix

 19:     Output Parameter:
 20: .      B - the inner matrix

 22:     Level: intermediate

 24: @*/
 25: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 26: {
 27:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 29:   PetscBool      flg;

 32:   PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 33:   if (flg) *B = mat->A;
 34:   else *B = A;
 35:   return(0);
 36: }

 38: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 39: {
 40:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 42:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 45:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
 46:   lrow = row - rstart;
 47:   MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
 48:   return(0);
 49: }

 51: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 52: {

 56:   if (idx) {PetscFree(*idx);}
 57:   if (v) {PetscFree(*v);}
 58:   return(0);
 59: }

 61: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
 62: {
 63:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 65:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 66:   PetscScalar    *array;
 67:   MPI_Comm       comm;
 68:   Mat            B;

 71:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");

 73:   PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
 74:   if (!B) {
 75:     PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 76:     MatCreate(comm,&B);
 77:     MatSetSizes(B,m,m,m,m);
 78:     MatSetType(B,((PetscObject)mdn->A)->type_name);
 79:     MatDenseGetArray(mdn->A,&array);
 80:     MatSeqDenseSetPreallocation(B,array+m*rstart);
 81:     MatDenseRestoreArray(mdn->A,&array);
 82:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
 83:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
 84:     PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
 85:     *a   = B;
 86:     MatDestroy(&B);
 87:   } else *a = B;
 88:   return(0);
 89: }

 91: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
 92: {
 93:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
 95:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
 96:   PetscBool      roworiented = A->roworiented;

 99:   for (i=0; i<m; i++) {
100:     if (idxm[i] < 0) continue;
101:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102:     if (idxm[i] >= rstart && idxm[i] < rend) {
103:       row = idxm[i] - rstart;
104:       if (roworiented) {
105:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
106:       } else {
107:         for (j=0; j<n; j++) {
108:           if (idxn[j] < 0) continue;
109:           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
111:         }
112:       }
113:     } else if (!A->donotstash) {
114:       mat->assembled = PETSC_FALSE;
115:       if (roworiented) {
116:         MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
117:       } else {
118:         MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
119:       }
120:     }
121:   }
122:   return(0);
123: }

125: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126: {
127:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
129:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

132:   for (i=0; i<m; i++) {
133:     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135:     if (idxm[i] >= rstart && idxm[i] < rend) {
136:       row = idxm[i] - rstart;
137:       for (j=0; j<n; j++) {
138:         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139:         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
141:       }
142:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143:   }
144:   return(0);
145: }

147: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148: {
149:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

153:   MatDenseGetArray(a->A,array);
154:   return(0);
155: }

157: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158: {
159:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

163:   MatDenseGetArrayRead(a->A,array);
164:   return(0);
165: }

167: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168: {
169:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

173:   MatDensePlaceArray(a->A,array);
174:   return(0);
175: }

177: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178: {
179:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

183:   MatDenseResetArray(a->A);
184:   return(0);
185: }

187: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188: {
189:   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
192:   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193:   const PetscInt *irow,*icol;
194:   PetscScalar    *av,*bv,*v = lmat->v;
195:   Mat            newmat;
196:   IS             iscol_local;
197:   MPI_Comm       comm_is,comm_mat;

200:   PetscObjectGetComm((PetscObject)A,&comm_mat);
201:   PetscObjectGetComm((PetscObject)iscol,&comm_is);
202:   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");

204:   ISAllGather(iscol,&iscol_local);
205:   ISGetIndices(isrow,&irow);
206:   ISGetIndices(iscol_local,&icol);
207:   ISGetLocalSize(isrow,&nrows);
208:   ISGetLocalSize(iscol,&ncols);
209:   ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */

211:   /* No parallel redistribution currently supported! Should really check each index set
212:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
213:      original matrix! */

215:   MatGetLocalSize(A,&nlrows,&nlcols);
216:   MatGetOwnershipRange(A,&rstart,&rend);

218:   /* Check submatrix call */
219:   if (scall == MAT_REUSE_MATRIX) {
220:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
221:     /* Really need to test rows and column sizes! */
222:     newmat = *B;
223:   } else {
224:     /* Create and fill new matrix */
225:     MatCreate(PetscObjectComm((PetscObject)A),&newmat);
226:     MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
227:     MatSetType(newmat,((PetscObject)A)->type_name);
228:     MatMPIDenseSetPreallocation(newmat,NULL);
229:   }

231:   /* Now extract the data pointers and do the copy, column at a time */
232:   newmatd = (Mat_MPIDense*)newmat->data;
233:   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;

235:   for (i=0; i<Ncols; i++) {
236:     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
237:     for (j=0; j<nrows; j++) {
238:       *bv++ = av[irow[j] - rstart];
239:     }
240:   }

242:   /* Assemble the matrices so that the correct flags are set */
243:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
244:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

246:   /* Free work space */
247:   ISRestoreIndices(isrow,&irow);
248:   ISRestoreIndices(iscol_local,&icol);
249:   ISDestroy(&iscol_local);
250:   *B   = newmat;
251:   return(0);
252: }

254: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
255: {
256:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

260:   MatDenseRestoreArray(a->A,array);
261:   return(0);
262: }

264: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
265: {
266:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

270:   MatDenseRestoreArrayRead(a->A,array);
271:   return(0);
272: }

274: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
275: {
276:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
277:   MPI_Comm       comm;
279:   PetscInt       nstash,reallocs;
280:   InsertMode     addv;

283:   PetscObjectGetComm((PetscObject)mat,&comm);
284:   /* make sure all processors are either in INSERTMODE or ADDMODE */
285:   MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
286:   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
287:   mat->insertmode = addv; /* in case this processor had no cache */

289:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
290:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
291:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
292:   return(0);
293: }

295: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
296: {
297:   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
299:   PetscInt       i,*row,*col,flg,j,rstart,ncols;
300:   PetscMPIInt    n;
301:   PetscScalar    *val;

304:   /*  wait on receives */
305:   while (1) {
306:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
307:     if (!flg) break;

309:     for (i=0; i<n;) {
310:       /* Now identify the consecutive vals belonging to the same row */
311:       for (j=i,rstart=row[j]; j<n; j++) {
312:         if (row[j] != rstart) break;
313:       }
314:       if (j < n) ncols = j-i;
315:       else       ncols = n-i;
316:       /* Now assemble all these values with a single function call */
317:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
318:       i    = j;
319:     }
320:   }
321:   MatStashScatterEnd_Private(&mat->stash);

323:   MatAssemblyBegin(mdn->A,mode);
324:   MatAssemblyEnd(mdn->A,mode);

326:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
327:     MatSetUpMultiply_MPIDense(mat);
328:   }
329:   return(0);
330: }

332: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333: {
335:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

338:   MatZeroEntries(l->A);
339:   return(0);
340: }

342: /* the code does not do the diagonal entries correctly unless the
343:    matrix is square and the column and row owerships are identical.
344:    This is a BUG. The only way to fix it seems to be to access
345:    mdn->A and mdn->B directly and not through the MatZeroRows()
346:    routine.
347: */
348: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
349: {
350:   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
351:   PetscErrorCode    ierr;
352:   PetscInt          i,*owners = A->rmap->range;
353:   PetscInt          *sizes,j,idx,nsends;
354:   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
355:   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
356:   PetscInt          *lens,*lrows,*values;
357:   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
358:   MPI_Comm          comm;
359:   MPI_Request       *send_waits,*recv_waits;
360:   MPI_Status        recv_status,*send_status;
361:   PetscBool         found;
362:   const PetscScalar *xx;
363:   PetscScalar       *bb;

366:   PetscObjectGetComm((PetscObject)A,&comm);
367:   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
368:   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
369:   /*  first count number of contributors to each processor */
370:   PetscCalloc1(2*size,&sizes);
371:   PetscMalloc1(N+1,&owner);  /* see note*/
372:   for (i=0; i<N; i++) {
373:     idx   = rows[i];
374:     found = PETSC_FALSE;
375:     for (j=0; j<size; j++) {
376:       if (idx >= owners[j] && idx < owners[j+1]) {
377:         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
378:       }
379:     }
380:     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
381:   }
382:   nsends = 0;
383:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

385:   /* inform other processors of number of messages and max length*/
386:   PetscMaxSum(comm,sizes,&nmax,&nrecvs);

388:   /* post receives:   */
389:   PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
390:   PetscMalloc1(nrecvs+1,&recv_waits);
391:   for (i=0; i<nrecvs; i++) {
392:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
393:   }

395:   /* do sends:
396:       1) starts[i] gives the starting index in svalues for stuff going to
397:          the ith processor
398:   */
399:   PetscMalloc1(N+1,&svalues);
400:   PetscMalloc1(nsends+1,&send_waits);
401:   PetscMalloc1(size+1,&starts);

403:   starts[0] = 0;
404:   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
405:   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];

407:   starts[0] = 0;
408:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
409:   count = 0;
410:   for (i=0; i<size; i++) {
411:     if (sizes[2*i+1]) {
412:       MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
413:     }
414:   }
415:   PetscFree(starts);

417:   base = owners[rank];

419:   /*  wait on receives */
420:   PetscMalloc2(nrecvs,&lens,nrecvs,&source);
421:   count = nrecvs;
422:   slen  = 0;
423:   while (count) {
424:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
425:     /* unpack receives into our local space */
426:     MPI_Get_count(&recv_status,MPIU_INT,&n);

428:     source[imdex] = recv_status.MPI_SOURCE;
429:     lens[imdex]   = n;
430:     slen += n;
431:     count--;
432:   }
433:   PetscFree(recv_waits);

435:   /* move the data into the send scatter */
436:   PetscMalloc1(slen+1,&lrows);
437:   count = 0;
438:   for (i=0; i<nrecvs; i++) {
439:     values = rvalues + i*nmax;
440:     for (j=0; j<lens[i]; j++) {
441:       lrows[count++] = values[j] - base;
442:     }
443:   }
444:   PetscFree(rvalues);
445:   PetscFree2(lens,source);
446:   PetscFree(owner);
447:   PetscFree(sizes);

449:   /* fix right hand side if needed */
450:   if (x && b) {
451:     VecGetArrayRead(x,&xx);
452:     VecGetArray(b,&bb);
453:     for (i=0; i<slen; i++) {
454:       bb[lrows[i]] = diag*xx[lrows[i]];
455:     }
456:     VecRestoreArrayRead(x,&xx);
457:     VecRestoreArray(b,&bb);
458:   }

460:   /* actually zap the local rows */
461:   MatZeroRows(l->A,slen,lrows,0.0,0,0);
462:   if (diag != 0.0) {
463:     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
464:     PetscInt     m   = ll->lda, i;

466:     for (i=0; i<slen; i++) {
467:       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
468:     }
469:   }
470:   PetscFree(lrows);

472:   /* wait on sends */
473:   if (nsends) {
474:     PetscMalloc1(nsends,&send_status);
475:     MPI_Waitall(nsends,send_waits,send_status);
476:     PetscFree(send_status);
477:   }
478:   PetscFree(send_waits);
479:   PetscFree(svalues);
480:   return(0);
481: }

483: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
484: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
485: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
486: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);

488: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
489: {
490:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

494:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
495:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
496:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
497:   return(0);
498: }

500: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
501: {
502:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

506:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
507:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
508:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
509:   return(0);
510: }

512: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
513: {
514:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
516:   PetscScalar    zero = 0.0;

519:   VecSet(yy,zero);
520:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
521:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
522:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
523:   return(0);
524: }

526: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
527: {
528:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

532:   VecCopy(yy,zz);
533:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
534:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
535:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
536:   return(0);
537: }

539: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540: {
541:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
542:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
544:   PetscInt       len,i,n,m = A->rmap->n,radd;
545:   PetscScalar    *x,zero = 0.0;

548:   VecSet(v,zero);
549:   VecGetArray(v,&x);
550:   VecGetSize(v,&n);
551:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
553:   radd = A->rmap->rstart*m;
554:   for (i=0; i<len; i++) {
555:     x[i] = aloc->v[radd + i*m + i];
556:   }
557:   VecRestoreArray(v,&x);
558:   return(0);
559: }

561: PetscErrorCode MatDestroy_MPIDense(Mat mat)
562: {
563:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

567: #if defined(PETSC_USE_LOG)
568:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
569: #endif
570:   MatStashDestroy_Private(&mat->stash);
571:   MatDestroy(&mdn->A);
572:   VecDestroy(&mdn->lvec);
573:   VecScatterDestroy(&mdn->Mvctx);

575:   PetscFree(mat->data);
576:   PetscObjectChangeTypeName((PetscObject)mat,0);

578:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
579:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
580:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);
581:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);
582:   PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);
583:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);
584: #if defined(PETSC_HAVE_ELEMENTAL)
585:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
586: #endif
587:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
588:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
589:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
590:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
591:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
592:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
593:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
594:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);
595:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);
596:   return(0);
597: }

599: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
600: {
601:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602:   PetscErrorCode    ierr;
603:   PetscViewerFormat format;
604:   int               fd;
605:   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
606:   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
607:   PetscScalar       *work,*v,*vv;
608:   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;

611:   if (mdn->size == 1) {
612:     MatView(mdn->A,viewer);
613:   } else {
614:     PetscViewerBinaryGetDescriptor(viewer,&fd);
615:     MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
616:     MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);

618:     PetscViewerGetFormat(viewer,&format);
619:     if (format == PETSC_VIEWER_NATIVE) {

621:       if (!rank) {
622:         /* store the matrix as a dense matrix */
623:         header[0] = MAT_FILE_CLASSID;
624:         header[1] = mat->rmap->N;
625:         header[2] = N;
626:         header[3] = MATRIX_BINARY_FORMAT_DENSE;
627:         PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);

629:         /* get largest work array needed for transposing array */
630:         mmax = mat->rmap->n;
631:         for (i=1; i<size; i++) {
632:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
633:         }
634:         PetscMalloc1(mmax*N,&work);

636:         /* write out local array, by rows */
637:         m = mat->rmap->n;
638:         v = a->v;
639:         for (j=0; j<N; j++) {
640:           for (i=0; i<m; i++) {
641:             work[j + i*N] = *v++;
642:           }
643:         }
644:         PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
645:         /* get largest work array to receive messages from other processes, excludes process zero */
646:         mmax = 0;
647:         for (i=1; i<size; i++) {
648:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
649:         }
650:         PetscMalloc1(mmax*N,&vv);
651:         for (k = 1; k < size; k++) {
652:           v    = vv;
653:           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
654:           MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));

656:           for (j = 0; j < N; j++) {
657:             for (i = 0; i < m; i++) {
658:               work[j + i*N] = *v++;
659:             }
660:           }
661:           PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
662:         }
663:         PetscFree(work);
664:         PetscFree(vv);
665:       } else {
666:         MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
667:       }
668:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
669:   }
670:   return(0);
671: }

673: extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
674:  #include <petscdraw.h>
675: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
676: {
677:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
678:   PetscErrorCode    ierr;
679:   PetscMPIInt       rank = mdn->rank;
680:   PetscViewerType   vtype;
681:   PetscBool         iascii,isdraw;
682:   PetscViewer       sviewer;
683:   PetscViewerFormat format;

686:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
687:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
688:   if (iascii) {
689:     PetscViewerGetType(viewer,&vtype);
690:     PetscViewerGetFormat(viewer,&format);
691:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
692:       MatInfo info;
693:       MatGetInfo(mat,MAT_LOCAL,&info);
694:       PetscViewerASCIIPushSynchronized(viewer);
695:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
696:       PetscViewerFlush(viewer);
697:       PetscViewerASCIIPopSynchronized(viewer);
698:       VecScatterView(mdn->Mvctx,viewer);
699:       return(0);
700:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
701:       return(0);
702:     }
703:   } else if (isdraw) {
704:     PetscDraw draw;
705:     PetscBool isnull;

707:     PetscViewerDrawGetDraw(viewer,0,&draw);
708:     PetscDrawIsNull(draw,&isnull);
709:     if (isnull) return(0);
710:   }

712:   {
713:     /* assemble the entire matrix onto first processor. */
714:     Mat         A;
715:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
716:     PetscInt    *cols;
717:     PetscScalar *vals;

719:     MatCreate(PetscObjectComm((PetscObject)mat),&A);
720:     if (!rank) {
721:       MatSetSizes(A,M,N,M,N);
722:     } else {
723:       MatSetSizes(A,0,0,M,N);
724:     }
725:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
726:     MatSetType(A,MATMPIDENSE);
727:     MatMPIDenseSetPreallocation(A,NULL);
728:     PetscLogObjectParent((PetscObject)mat,(PetscObject)A);

730:     /* Copy the matrix ... This isn't the most efficient means,
731:        but it's quick for now */
732:     A->insertmode = INSERT_VALUES;

734:     row = mat->rmap->rstart;
735:     m   = mdn->A->rmap->n;
736:     for (i=0; i<m; i++) {
737:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
738:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
739:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
740:       row++;
741:     }

743:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
744:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
745:     PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
746:     if (!rank) {
747:       PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
748:       MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
749:     }
750:     PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
751:     PetscViewerFlush(viewer);
752:     MatDestroy(&A);
753:   }
754:   return(0);
755: }

757: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
758: {
760:   PetscBool      iascii,isbinary,isdraw,issocket;

763:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
764:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
765:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
766:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);

768:   if (iascii || issocket || isdraw) {
769:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
770:   } else if (isbinary) {
771:     MatView_MPIDense_Binary(mat,viewer);
772:   }
773:   return(0);
774: }

776: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
777: {
778:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
779:   Mat            mdn  = mat->A;
781:   PetscReal      isend[5],irecv[5];

784:   info->block_size = 1.0;

786:   MatGetInfo(mdn,MAT_LOCAL,info);

788:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
789:   isend[3] = info->memory;  isend[4] = info->mallocs;
790:   if (flag == MAT_LOCAL) {
791:     info->nz_used      = isend[0];
792:     info->nz_allocated = isend[1];
793:     info->nz_unneeded  = isend[2];
794:     info->memory       = isend[3];
795:     info->mallocs      = isend[4];
796:   } else if (flag == MAT_GLOBAL_MAX) {
797:     MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));

799:     info->nz_used      = irecv[0];
800:     info->nz_allocated = irecv[1];
801:     info->nz_unneeded  = irecv[2];
802:     info->memory       = irecv[3];
803:     info->mallocs      = irecv[4];
804:   } else if (flag == MAT_GLOBAL_SUM) {
805:     MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));

807:     info->nz_used      = irecv[0];
808:     info->nz_allocated = irecv[1];
809:     info->nz_unneeded  = irecv[2];
810:     info->memory       = irecv[3];
811:     info->mallocs      = irecv[4];
812:   }
813:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
814:   info->fill_ratio_needed = 0;
815:   info->factor_mallocs    = 0;
816:   return(0);
817: }

819: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
820: {
821:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

825:   switch (op) {
826:   case MAT_NEW_NONZERO_LOCATIONS:
827:   case MAT_NEW_NONZERO_LOCATION_ERR:
828:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
829:     MatCheckPreallocated(A,1);
830:     MatSetOption(a->A,op,flg);
831:     break;
832:   case MAT_ROW_ORIENTED:
833:     MatCheckPreallocated(A,1);
834:     a->roworiented = flg;
835:     MatSetOption(a->A,op,flg);
836:     break;
837:   case MAT_NEW_DIAGONALS:
838:   case MAT_KEEP_NONZERO_PATTERN:
839:   case MAT_USE_HASH_TABLE:
840:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
841:     break;
842:   case MAT_IGNORE_OFF_PROC_ENTRIES:
843:     a->donotstash = flg;
844:     break;
845:   case MAT_SYMMETRIC:
846:   case MAT_STRUCTURALLY_SYMMETRIC:
847:   case MAT_HERMITIAN:
848:   case MAT_SYMMETRY_ETERNAL:
849:   case MAT_IGNORE_LOWER_TRIANGULAR:
850:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
851:     break;
852:   default:
853:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
854:   }
855:   return(0);
856: }


859: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
860: {
861:   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
862:   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
863:   const PetscScalar *l,*r;
864:   PetscScalar       x,*v;
865:   PetscErrorCode    ierr;
866:   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;

869:   MatGetLocalSize(A,&s2,&s3);
870:   if (ll) {
871:     VecGetLocalSize(ll,&s2a);
872:     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
873:     VecGetArrayRead(ll,&l);
874:     for (i=0; i<m; i++) {
875:       x = l[i];
876:       v = mat->v + i;
877:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
878:     }
879:     VecRestoreArrayRead(ll,&l);
880:     PetscLogFlops(n*m);
881:   }
882:   if (rr) {
883:     VecGetLocalSize(rr,&s3a);
884:     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
885:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
886:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
887:     VecGetArrayRead(mdn->lvec,&r);
888:     for (i=0; i<n; i++) {
889:       x = r[i];
890:       v = mat->v + i*m;
891:       for (j=0; j<m; j++) (*v++) *= x;
892:     }
893:     VecRestoreArrayRead(mdn->lvec,&r);
894:     PetscLogFlops(n*m);
895:   }
896:   return(0);
897: }

899: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
900: {
901:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
902:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
904:   PetscInt       i,j;
905:   PetscReal      sum = 0.0;
906:   PetscScalar    *v  = mat->v;

909:   if (mdn->size == 1) {
910:      MatNorm(mdn->A,type,nrm);
911:   } else {
912:     if (type == NORM_FROBENIUS) {
913:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
914:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
915:       }
916:       MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
917:       *nrm = PetscSqrtReal(*nrm);
918:       PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
919:     } else if (type == NORM_1) {
920:       PetscReal *tmp,*tmp2;
921:       PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
922:       PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
923:       PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
924:       *nrm = 0.0;
925:       v    = mat->v;
926:       for (j=0; j<mdn->A->cmap->n; j++) {
927:         for (i=0; i<mdn->A->rmap->n; i++) {
928:           tmp[j] += PetscAbsScalar(*v);  v++;
929:         }
930:       }
931:       MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
932:       for (j=0; j<A->cmap->N; j++) {
933:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
934:       }
935:       PetscFree2(tmp,tmp2);
936:       PetscLogFlops(A->cmap->n*A->rmap->n);
937:     } else if (type == NORM_INFINITY) { /* max row norm */
938:       PetscReal ntemp;
939:       MatNorm(mdn->A,type,&ntemp);
940:       MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
941:     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
942:   }
943:   return(0);
944: }

946: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
947: {
948:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
949:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
950:   Mat            B;
951:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
953:   PetscInt       j,i;
954:   PetscScalar    *v;

957:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
958:     MatCreate(PetscObjectComm((PetscObject)A),&B);
959:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
960:     MatSetType(B,((PetscObject)A)->type_name);
961:     MatMPIDenseSetPreallocation(B,NULL);
962:   } else {
963:     B = *matout;
964:   }

966:   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
967:   PetscMalloc1(m,&rwork);
968:   for (i=0; i<m; i++) rwork[i] = rstart + i;
969:   for (j=0; j<n; j++) {
970:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
971:     v   += m;
972:   }
973:   PetscFree(rwork);
974:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
975:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
976:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
977:     *matout = B;
978:   } else {
979:     MatHeaderMerge(A,&B);
980:   }
981:   return(0);
982: }


985: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
986: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);

988: PetscErrorCode MatSetUp_MPIDense(Mat A)
989: {

993:    MatMPIDenseSetPreallocation(A,0);
994:   return(0);
995: }

997: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998: {
1000:   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;

1003:   MatAXPY(A->A,alpha,B->A,str);
1004:   PetscObjectStateIncrease((PetscObject)Y);
1005:   return(0);
1006: }

1008: PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1009: {
1010:   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;

1014:   MatConjugate(a->A);
1015:   return(0);
1016: }

1018: PetscErrorCode MatRealPart_MPIDense(Mat A)
1019: {
1020:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1024:   MatRealPart(a->A);
1025:   return(0);
1026: }

1028: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1029: {
1030:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1034:   MatImaginaryPart(a->A);
1035:   return(0);
1036: }

1038: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1039: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1040: {
1042:   PetscInt       i,n;
1043:   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1044:   PetscReal      *work;

1047:   MatGetSize(A,NULL,&n);
1048:   PetscMalloc1(n,&work);
1049:   MatGetColumnNorms_SeqDense(a->A,type,work);
1050:   if (type == NORM_2) {
1051:     for (i=0; i<n; i++) work[i] *= work[i];
1052:   }
1053:   if (type == NORM_INFINITY) {
1054:     MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1055:   } else {
1056:     MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1057:   }
1058:   PetscFree(work);
1059:   if (type == NORM_2) {
1060:     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1061:   }
1062:   return(0);
1063: }

1065: static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1066: {
1067:   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1069:   PetscScalar    *a;
1070:   PetscInt       m,n,i;

1073:   MatGetSize(d->A,&m,&n);
1074:   MatDenseGetArray(d->A,&a);
1075:   for (i=0; i<m*n; i++) {
1076:     PetscRandomGetValue(rctx,a+i);
1077:   }
1078:   MatDenseRestoreArray(d->A,&a);
1079:   return(0);
1080: }

1082: extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);

1084: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1085: {
1087:   *missing = PETSC_FALSE;
1088:   return(0);
1089: }

1091: /* -------------------------------------------------------------------*/
1092: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1093:                                         MatGetRow_MPIDense,
1094:                                         MatRestoreRow_MPIDense,
1095:                                         MatMult_MPIDense,
1096:                                 /*  4*/ MatMultAdd_MPIDense,
1097:                                         MatMultTranspose_MPIDense,
1098:                                         MatMultTransposeAdd_MPIDense,
1099:                                         0,
1100:                                         0,
1101:                                         0,
1102:                                 /* 10*/ 0,
1103:                                         0,
1104:                                         0,
1105:                                         0,
1106:                                         MatTranspose_MPIDense,
1107:                                 /* 15*/ MatGetInfo_MPIDense,
1108:                                         MatEqual_MPIDense,
1109:                                         MatGetDiagonal_MPIDense,
1110:                                         MatDiagonalScale_MPIDense,
1111:                                         MatNorm_MPIDense,
1112:                                 /* 20*/ MatAssemblyBegin_MPIDense,
1113:                                         MatAssemblyEnd_MPIDense,
1114:                                         MatSetOption_MPIDense,
1115:                                         MatZeroEntries_MPIDense,
1116:                                 /* 24*/ MatZeroRows_MPIDense,
1117:                                         0,
1118:                                         0,
1119:                                         0,
1120:                                         0,
1121:                                 /* 29*/ MatSetUp_MPIDense,
1122:                                         0,
1123:                                         0,
1124:                                         MatGetDiagonalBlock_MPIDense,
1125:                                         0,
1126:                                 /* 34*/ MatDuplicate_MPIDense,
1127:                                         0,
1128:                                         0,
1129:                                         0,
1130:                                         0,
1131:                                 /* 39*/ MatAXPY_MPIDense,
1132:                                         MatCreateSubMatrices_MPIDense,
1133:                                         0,
1134:                                         MatGetValues_MPIDense,
1135:                                         0,
1136:                                 /* 44*/ 0,
1137:                                         MatScale_MPIDense,
1138:                                         MatShift_Basic,
1139:                                         0,
1140:                                         0,
1141:                                 /* 49*/ MatSetRandom_MPIDense,
1142:                                         0,
1143:                                         0,
1144:                                         0,
1145:                                         0,
1146:                                 /* 54*/ 0,
1147:                                         0,
1148:                                         0,
1149:                                         0,
1150:                                         0,
1151:                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1152:                                         MatDestroy_MPIDense,
1153:                                         MatView_MPIDense,
1154:                                         0,
1155:                                         0,
1156:                                 /* 64*/ 0,
1157:                                         0,
1158:                                         0,
1159:                                         0,
1160:                                         0,
1161:                                 /* 69*/ 0,
1162:                                         0,
1163:                                         0,
1164:                                         0,
1165:                                         0,
1166:                                 /* 74*/ 0,
1167:                                         0,
1168:                                         0,
1169:                                         0,
1170:                                         0,
1171:                                 /* 79*/ 0,
1172:                                         0,
1173:                                         0,
1174:                                         0,
1175:                                 /* 83*/ MatLoad_MPIDense,
1176:                                         0,
1177:                                         0,
1178:                                         0,
1179:                                         0,
1180:                                         0,
1181: #if defined(PETSC_HAVE_ELEMENTAL)
1182:                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1183:                                         MatMatMultSymbolic_MPIDense_MPIDense,
1184: #else
1185:                                 /* 89*/ 0,
1186:                                         0,
1187: #endif
1188:                                         MatMatMultNumeric_MPIDense,
1189:                                         0,
1190:                                         0,
1191:                                 /* 94*/ 0,
1192:                                         0,
1193:                                         0,
1194:                                         0,
1195:                                         0,
1196:                                 /* 99*/ 0,
1197:                                         0,
1198:                                         0,
1199:                                         MatConjugate_MPIDense,
1200:                                         0,
1201:                                 /*104*/ 0,
1202:                                         MatRealPart_MPIDense,
1203:                                         MatImaginaryPart_MPIDense,
1204:                                         0,
1205:                                         0,
1206:                                 /*109*/ 0,
1207:                                         0,
1208:                                         0,
1209:                                         0,
1210:                                         MatMissingDiagonal_MPIDense,
1211:                                 /*114*/ 0,
1212:                                         0,
1213:                                         0,
1214:                                         0,
1215:                                         0,
1216:                                 /*119*/ 0,
1217:                                         0,
1218:                                         0,
1219:                                         0,
1220:                                         0,
1221:                                 /*124*/ 0,
1222:                                         MatGetColumnNorms_MPIDense,
1223:                                         0,
1224:                                         0,
1225:                                         0,
1226:                                 /*129*/ 0,
1227:                                         MatTransposeMatMult_MPIDense_MPIDense,
1228:                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1229:                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1230:                                         0,
1231:                                 /*134*/ 0,
1232:                                         0,
1233:                                         0,
1234:                                         0,
1235:                                         0,
1236:                                 /*139*/ 0,
1237:                                         0,
1238:                                         0,
1239:                                         0,
1240:                                         0,
1241:                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1242: };

1244: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1245: {
1246:   Mat_MPIDense   *a;

1250:   mat->preallocated = PETSC_TRUE;
1251:   /* Note:  For now, when data is specified above, this assumes the user correctly
1252:    allocates the local dense storage space.  We should add error checking. */

1254:   a       = (Mat_MPIDense*)mat->data;
1255:   PetscLayoutSetUp(mat->rmap);
1256:   PetscLayoutSetUp(mat->cmap);
1257:   a->nvec = mat->cmap->n;

1259:   MatCreate(PETSC_COMM_SELF,&a->A);
1260:   MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1261:   MatSetType(a->A,MATSEQDENSE);
1262:   MatSeqDenseSetPreallocation(a->A,data);
1263:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1264:   return(0);
1265: }

1267: #if defined(PETSC_HAVE_ELEMENTAL)
1268: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1269: {
1270:   Mat            mat_elemental;
1272:   PetscScalar    *v;
1273:   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1274: 
1276:   if (reuse == MAT_REUSE_MATRIX) {
1277:     mat_elemental = *newmat;
1278:     MatZeroEntries(*newmat);
1279:   } else {
1280:     MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1281:     MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1282:     MatSetType(mat_elemental,MATELEMENTAL);
1283:     MatSetUp(mat_elemental);
1284:     MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1285:   }

1287:   PetscMalloc2(m,&rows,N,&cols);
1288:   for (i=0; i<N; i++) cols[i] = i;
1289:   for (i=0; i<m; i++) rows[i] = rstart + i;
1290: 
1291:   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1292:   MatDenseGetArray(A,&v);
1293:   MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1294:   MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1295:   MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1296:   MatDenseRestoreArray(A,&v);
1297:   PetscFree2(rows,cols);

1299:   if (reuse == MAT_INPLACE_MATRIX) {
1300:     MatHeaderReplace(A,&mat_elemental);
1301:   } else {
1302:     *newmat = mat_elemental;
1303:   }
1304:   return(0);
1305: }
1306: #endif

1308: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1309: {
1310:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;

1314:   MatDenseGetColumn(mat->A,col,vals);
1315:   return(0);
1316: }

1318: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1319: {
1320:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;

1324:   MatDenseRestoreColumn(mat->A,vals);
1325:   return(0);
1326: }

1328: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1329: {
1331:   Mat_MPIDense   *mat;
1332:   PetscInt       m,nloc,N;

1335:   MatGetSize(inmat,&m,&N);
1336:   MatGetLocalSize(inmat,NULL,&nloc);
1337:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1338:     PetscInt sum;

1340:     if (n == PETSC_DECIDE) {
1341:       PetscSplitOwnership(comm,&n,&N);
1342:     }
1343:     /* Check sum(n) = N */
1344:     MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
1345:     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);

1347:     MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);
1348:   }

1350:   /* numeric phase */
1351:   mat = (Mat_MPIDense*)(*outmat)->data;
1352:   MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);
1353:   MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
1354:   MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
1355:   return(0);
1356: }

1358: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1359: {
1360:   Mat_MPIDense   *a;

1364:   PetscNewLog(mat,&a);
1365:   mat->data = (void*)a;
1366:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));

1368:   mat->insertmode = NOT_SET_VALUES;
1369:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1370:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);

1372:   /* build cache for off array entries formed */
1373:   a->donotstash = PETSC_FALSE;

1375:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);

1377:   /* stuff used for matrix vector multiply */
1378:   a->lvec        = 0;
1379:   a->Mvctx       = 0;
1380:   a->roworiented = PETSC_TRUE;

1382:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1383:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1384:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);
1385:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);
1386:   PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);
1387:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);
1388: #if defined(PETSC_HAVE_ELEMENTAL)
1389:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1390: #endif
1391:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1392:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1393:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1394:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);

1396:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1397:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1398:   PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1399:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);
1400:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);
1401:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1402:   return(0);
1403: }

1405: /*MC
1406:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1408:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1409:    and MATMPIDENSE otherwise.

1411:    Options Database Keys:
1412: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1414:   Level: beginner


1417: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1418: M*/

1420: /*@C
1421:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1423:    Not collective

1425:    Input Parameters:
1426: .  B - the matrix
1427: -  data - optional location of matrix data.  Set data=NULL for PETSc
1428:    to control all matrix memory allocation.

1430:    Notes:
1431:    The dense format is fully compatible with standard Fortran 77
1432:    storage by columns.

1434:    The data input variable is intended primarily for Fortran programmers
1435:    who wish to allocate their own matrix memory space.  Most users should
1436:    set data=NULL.

1438:    Level: intermediate

1440: .keywords: matrix,dense, parallel

1442: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1443: @*/
1444: PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1445: {

1449:   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1450:   return(0);
1451: }

1453: /*@
1454:    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1455:    array provided by the user. This is useful to avoid copying an array
1456:    into a matrix

1458:    Not Collective

1460:    Input Parameters:
1461: +  mat - the matrix
1462: -  array - the array in column major order

1464:    Notes:
1465:    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1466:    freed when the matrix is destroyed.

1468:    Level: developer

1470: .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()

1472: @*/
1473: PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1474: {
1477:   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
1478:   PetscObjectStateIncrease((PetscObject)mat);
1479:   return(0);
1480: }

1482: /*@
1483:    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()

1485:    Not Collective

1487:    Input Parameters:
1488: .  mat - the matrix

1490:    Notes:
1491:    You can only call this after a call to MatDensePlaceArray()

1493:    Level: developer

1495: .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()

1497: @*/
1498: PetscErrorCode  MatDenseResetArray(Mat mat)
1499: {
1502:   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
1503:   PetscObjectStateIncrease((PetscObject)mat);
1504:   return(0);
1505: }

1507: /*@C
1508:    MatCreateDense - Creates a parallel matrix in dense format.

1510:    Collective on MPI_Comm

1512:    Input Parameters:
1513: +  comm - MPI communicator
1514: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1515: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1516: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1517: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1518: -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1519:    to control all matrix memory allocation.

1521:    Output Parameter:
1522: .  A - the matrix

1524:    Notes:
1525:    The dense format is fully compatible with standard Fortran 77
1526:    storage by columns.

1528:    The data input variable is intended primarily for Fortran programmers
1529:    who wish to allocate their own matrix memory space.  Most users should
1530:    set data=NULL (PETSC_NULL_SCALAR for Fortran users).

1532:    The user MUST specify either the local or global matrix dimensions
1533:    (possibly both).

1535:    Level: intermediate

1537: .keywords: matrix,dense, parallel

1539: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1540: @*/
1541: PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1542: {
1544:   PetscMPIInt    size;

1547:   MatCreate(comm,A);
1548:   MatSetSizes(*A,m,n,M,N);
1549:   MPI_Comm_size(comm,&size);
1550:   if (size > 1) {
1551:     MatSetType(*A,MATMPIDENSE);
1552:     MatMPIDenseSetPreallocation(*A,data);
1553:     if (data) {  /* user provided data array, so no need to assemble */
1554:       MatSetUpMultiply_MPIDense(*A);
1555:       (*A)->assembled = PETSC_TRUE;
1556:     }
1557:   } else {
1558:     MatSetType(*A,MATSEQDENSE);
1559:     MatSeqDenseSetPreallocation(*A,data);
1560:   }
1561:   return(0);
1562: }

1564: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1565: {
1566:   Mat            mat;
1567:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1571:   *newmat = 0;
1572:   MatCreate(PetscObjectComm((PetscObject)A),&mat);
1573:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1574:   MatSetType(mat,((PetscObject)A)->type_name);
1575:   a       = (Mat_MPIDense*)mat->data;
1576:   PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));

1578:   mat->factortype   = A->factortype;
1579:   mat->assembled    = PETSC_TRUE;
1580:   mat->preallocated = PETSC_TRUE;

1582:   a->size         = oldmat->size;
1583:   a->rank         = oldmat->rank;
1584:   mat->insertmode = NOT_SET_VALUES;
1585:   a->nvec         = oldmat->nvec;
1586:   a->donotstash   = oldmat->donotstash;

1588:   PetscLayoutReference(A->rmap,&mat->rmap);
1589:   PetscLayoutReference(A->cmap,&mat->cmap);

1591:   MatSetUpMultiply_MPIDense(mat);
1592:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1593:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);

1595:   *newmat = mat;
1596:   return(0);
1597: }

1599: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1600: {
1602:   PetscMPIInt    rank,size;
1603:   const PetscInt *rowners;
1604:   PetscInt       i,m,n,nz,j,mMax;
1605:   PetscScalar    *array,*vals,*vals_ptr;
1606:   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;

1609:   MPI_Comm_rank(comm,&rank);
1610:   MPI_Comm_size(comm,&size);

1612:   /* determine ownership of rows and columns */
1613:   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1614:   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;

1616:   MatSetSizes(newmat,m,n,M,N);
1617:   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1618:     MatMPIDenseSetPreallocation(newmat,NULL);
1619:   }
1620:   MatDenseGetArray(newmat,&array);
1621:   MatGetLocalSize(newmat,&m,NULL);
1622:   MatGetOwnershipRanges(newmat,&rowners);
1623:   MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);
1624:   if (!rank) {
1625:     PetscMalloc1(mMax*N,&vals);

1627:     /* read in my part of the matrix numerical values  */
1628:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);

1630:     /* insert into matrix-by row (this is why cannot directly read into array */
1631:     vals_ptr = vals;
1632:     for (i=0; i<m; i++) {
1633:       for (j=0; j<N; j++) {
1634:         array[i + j*m] = *vals_ptr++;
1635:       }
1636:     }

1638:     /* read in other processors and ship out */
1639:     for (i=1; i<size; i++) {
1640:       nz   = (rowners[i+1] - rowners[i])*N;
1641:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1642:       MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1643:     }
1644:   } else {
1645:     /* receive numeric values */
1646:     PetscMalloc1(m*N,&vals);

1648:     /* receive message of values*/
1649:     MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);

1651:     /* insert into matrix-by row (this is why cannot directly read into array */
1652:     vals_ptr = vals;
1653:     for (i=0; i<m; i++) {
1654:       for (j=0; j<N; j++) {
1655:         array[i + j*m] = *vals_ptr++;
1656:       }
1657:     }
1658:   }
1659:   MatDenseRestoreArray(newmat,&array);
1660:   PetscFree(vals);
1661:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1662:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1663:   return(0);
1664: }

1666: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1667: {
1668:   Mat_MPIDense   *a;
1669:   PetscScalar    *vals,*svals;
1670:   MPI_Comm       comm;
1671:   MPI_Status     status;
1672:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1673:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1674:   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1675:   PetscInt       i,nz,j,rstart,rend;
1676:   int            fd;

1680:   /* force binary viewer to load .info file if it has not yet done so */
1681:   PetscViewerSetUp(viewer);
1682:   PetscObjectGetComm((PetscObject)viewer,&comm);
1683:   MPI_Comm_size(comm,&size);
1684:   MPI_Comm_rank(comm,&rank);
1685:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1686:   if (!rank) {
1687:     PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1688:     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1689:   }
1690:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1691:   M    = header[1]; N = header[2]; nz = header[3];

1693:   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1694:   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1695:   if (newmat->cmap->N < 0) newmat->cmap->N = N;

1697:   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1698:   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);

1700:   /*
1701:        Handle case where matrix is stored on disk as a dense matrix
1702:   */
1703:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1704:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1705:     return(0);
1706:   }

1708:   /* determine ownership of all rows */
1709:   if (newmat->rmap->n < 0) {
1710:     PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1711:   } else {
1712:     PetscMPIIntCast(newmat->rmap->n,&m);
1713:   }
1714:   if (newmat->cmap->n < 0) {
1715:     n = PETSC_DECIDE;
1716:   } else {
1717:     PetscMPIIntCast(newmat->cmap->n,&n);
1718:   }

1720:   PetscMalloc1(size+2,&rowners);
1721:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1722:   rowners[0] = 0;
1723:   for (i=2; i<=size; i++) {
1724:     rowners[i] += rowners[i-1];
1725:   }
1726:   rstart = rowners[rank];
1727:   rend   = rowners[rank+1];

1729:   /* distribute row lengths to all processors */
1730:   PetscMalloc1(rend-rstart,&ourlens);
1731:   if (!rank) {
1732:     PetscMalloc1(M,&rowlengths);
1733:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1734:     PetscMalloc1(size,&sndcounts);
1735:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1736:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1737:     PetscFree(sndcounts);
1738:   } else {
1739:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1740:   }

1742:   if (!rank) {
1743:     /* calculate the number of nonzeros on each processor */
1744:     PetscMalloc1(size,&procsnz);
1745:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1746:     for (i=0; i<size; i++) {
1747:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1748:         procsnz[i] += rowlengths[j];
1749:       }
1750:     }
1751:     PetscFree(rowlengths);

1753:     /* determine max buffer needed and allocate it */
1754:     maxnz = 0;
1755:     for (i=0; i<size; i++) {
1756:       maxnz = PetscMax(maxnz,procsnz[i]);
1757:     }
1758:     PetscMalloc1(maxnz,&cols);

1760:     /* read in my part of the matrix column indices  */
1761:     nz   = procsnz[0];
1762:     PetscMalloc1(nz,&mycols);
1763:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1765:     /* read in every one elses and ship off */
1766:     for (i=1; i<size; i++) {
1767:       nz   = procsnz[i];
1768:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1769:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1770:     }
1771:     PetscFree(cols);
1772:   } else {
1773:     /* determine buffer space needed for message */
1774:     nz = 0;
1775:     for (i=0; i<m; i++) {
1776:       nz += ourlens[i];
1777:     }
1778:     PetscMalloc1(nz+1,&mycols);

1780:     /* receive message of column indices*/
1781:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1782:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1783:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1784:   }

1786:   MatSetSizes(newmat,m,n,M,N);
1787:   a = (Mat_MPIDense*)newmat->data;
1788:   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1789:     MatMPIDenseSetPreallocation(newmat,NULL);
1790:   }

1792:   if (!rank) {
1793:     PetscMalloc1(maxnz,&vals);

1795:     /* read in my part of the matrix numerical values  */
1796:     nz   = procsnz[0];
1797:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);

1799:     /* insert into matrix */
1800:     jj      = rstart;
1801:     smycols = mycols;
1802:     svals   = vals;
1803:     for (i=0; i<m; i++) {
1804:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1805:       smycols += ourlens[i];
1806:       svals   += ourlens[i];
1807:       jj++;
1808:     }

1810:     /* read in other processors and ship out */
1811:     for (i=1; i<size; i++) {
1812:       nz   = procsnz[i];
1813:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1814:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1815:     }
1816:     PetscFree(procsnz);
1817:   } else {
1818:     /* receive numeric values */
1819:     PetscMalloc1(nz+1,&vals);

1821:     /* receive message of values*/
1822:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1823:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1824:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1826:     /* insert into matrix */
1827:     jj      = rstart;
1828:     smycols = mycols;
1829:     svals   = vals;
1830:     for (i=0; i<m; i++) {
1831:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1832:       smycols += ourlens[i];
1833:       svals   += ourlens[i];
1834:       jj++;
1835:     }
1836:   }
1837:   PetscFree(ourlens);
1838:   PetscFree(vals);
1839:   PetscFree(mycols);
1840:   PetscFree(rowners);

1842:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1843:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1844:   return(0);
1845: }

1847: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1848: {
1849:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1850:   Mat            a,b;
1851:   PetscBool      flg;

1855:   a    = matA->A;
1856:   b    = matB->A;
1857:   MatEqual(a,b,&flg);
1858:   MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1859:   return(0);
1860: }

1862: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1863: {
1864:   PetscErrorCode        ierr;
1865:   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1866:   Mat_TransMatMultDense *atb = a->atbdense;

1869:   PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1870:   (atb->destroy)(A);
1871:   PetscFree(atb);
1872:   return(0);
1873: }

1875: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1876: {
1877:   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1878:   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1879:   Mat_TransMatMultDense *atb = c->atbdense;
1881:   MPI_Comm       comm;
1882:   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1883:   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1884:   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1885:   PetscScalar    _DOne=1.0,_DZero=0.0;
1886:   PetscBLASInt   am,an,bn,aN;
1887:   const PetscInt *ranges;

1890:   PetscObjectGetComm((PetscObject)A,&comm);
1891:   MPI_Comm_rank(comm,&rank);
1892:   MPI_Comm_size(comm,&size);

1894:   /* compute atbarray = aseq^T * bseq */
1895:   PetscBLASIntCast(a->A->cmap->n,&an);
1896:   PetscBLASIntCast(b->A->cmap->n,&bn);
1897:   PetscBLASIntCast(a->A->rmap->n,&am);
1898:   PetscBLASIntCast(A->cmap->N,&aN);
1899:   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1900: 
1901:   MatGetOwnershipRanges(C,&ranges);
1902:   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1903: 
1904:   /* arrange atbarray into sendbuf */
1905:   k = 0;
1906:   for (proc=0; proc<size; proc++) {
1907:     for (j=0; j<cN; j++) {
1908:       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1909:     }
1910:   }
1911:   /* sum all atbarray to local values of C */
1912:   MatDenseGetArray(c->A,&carray);
1913:   MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1914:   MatDenseRestoreArray(c->A,&carray);
1915:   return(0);
1916: }

1918: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1919: {
1920:   PetscErrorCode        ierr;
1921:   Mat                   Cdense;
1922:   MPI_Comm              comm;
1923:   PetscMPIInt           size;
1924:   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1925:   Mat_MPIDense          *c;
1926:   Mat_TransMatMultDense *atb;

1929:   PetscObjectGetComm((PetscObject)A,&comm);
1930:   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1931:     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1932:   }

1934:   /* create matrix product Cdense */
1935:   MatCreate(comm,&Cdense);
1936:   MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1937:   MatSetType(Cdense,MATMPIDENSE);
1938:   MatMPIDenseSetPreallocation(Cdense,NULL);
1939:   MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
1940:   MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
1941:   *C   = Cdense;

1943:   /* create data structure for reuse Cdense */
1944:   MPI_Comm_size(comm,&size);
1945:   PetscNew(&atb);
1946:   cM = Cdense->rmap->N;
1947:   PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);
1948: 
1949:   c                    = (Mat_MPIDense*)Cdense->data;
1950:   c->atbdense          = atb;
1951:   atb->destroy         = Cdense->ops->destroy;
1952:   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1953:   return(0);
1954: }

1956: PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1957: {

1961:   if (scall == MAT_INITIAL_MATRIX) {
1962:     MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1963:   }
1964:   MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1965:   return(0);
1966: }

1968: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1969: {
1970:   PetscErrorCode   ierr;
1971:   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1972:   Mat_MatMultDense *ab = a->abdense;

1975:   MatDestroy(&ab->Ce);
1976:   MatDestroy(&ab->Ae);
1977:   MatDestroy(&ab->Be);

1979:   (ab->destroy)(A);
1980:   PetscFree(ab);
1981:   return(0);
1982: }

1984: #if defined(PETSC_HAVE_ELEMENTAL)
1985: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1986: {
1987:   PetscErrorCode   ierr;
1988:   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1989:   Mat_MatMultDense *ab=c->abdense;

1992:   MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
1993:   MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
1994:   MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);
1995:   MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
1996:   return(0);
1997: }

1999: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2000: {
2001:   PetscErrorCode   ierr;
2002:   Mat              Ae,Be,Ce;
2003:   Mat_MPIDense     *c;
2004:   Mat_MatMultDense *ab;

2007:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2008:     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2009:   }

2011:   /* convert A and B to Elemental matrices Ae and Be */
2012:   MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);
2013:   MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);

2015:   /* Ce = Ae*Be */
2016:   MatMatMultSymbolic(Ae,Be,fill,&Ce);
2017:   MatMatMultNumeric(Ae,Be,Ce);
2018: 
2019:   /* convert Ce to C */
2020:   MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);

2022:   /* create data structure for reuse Cdense */
2023:   PetscNew(&ab);
2024:   c                  = (Mat_MPIDense*)(*C)->data;
2025:   c->abdense         = ab;

2027:   ab->Ae             = Ae;
2028:   ab->Be             = Be;
2029:   ab->Ce             = Ce;
2030:   ab->destroy        = (*C)->ops->destroy;
2031:   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2032:   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2033:   return(0);
2034: }

2036: PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2037: {

2041:   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2042:     PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
2043:     MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
2044:     PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
2045:   } else {
2046:     PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
2047:     MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
2048:     PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
2049:   }
2050:   return(0);
2051: }
2052: #endif