Actual source code: mpidense.c

petsc-3.4.5 2014-06-29
  2: /*
  3:    Basic functions for basic parallel dense matrices.
  4: */


  7: #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/

 11: /*@

 13:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 14:               matrix that represents the operator. For sequential matrices it returns itself.

 16:     Input Parameter:
 17: .      A - the Seq or MPI dense matrix

 19:     Output Parameter:
 20: .      B - the inner matrix

 22:     Level: intermediate

 24: @*/
 25: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 26: {
 27:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 29:   PetscBool      flg;

 32:   PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 33:   if (flg) *B = mat->A;
 34:   else *B = A;
 35:   return(0);
 36: }

 40: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 41: {
 42:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 44:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 47:   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
 48:   lrow = row - rstart;
 49:   MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
 50:   return(0);
 51: }

 55: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 56: {

 60:   if (idx) {PetscFree(*idx);}
 61:   if (v) {PetscFree(*v);}
 62:   return(0);
 63: }

 67: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
 68: {
 69:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 71:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 72:   PetscScalar    *array;
 73:   MPI_Comm       comm;
 74:   Mat            B;

 77:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");

 79:   PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
 80:   if (!B) {
 81:     PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 82:     MatCreate(comm,&B);
 83:     MatSetSizes(B,m,m,m,m);
 84:     MatSetType(B,((PetscObject)mdn->A)->type_name);
 85:     MatDenseGetArray(mdn->A,&array);
 86:     MatSeqDenseSetPreallocation(B,array+m*rstart);
 87:     MatDenseRestoreArray(mdn->A,&array);
 88:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
 89:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
 90:     PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
 91:     *a   = B;
 92:     MatDestroy(&B);
 93:   } else *a = B;
 94:   return(0);
 95: }

 99: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
100: {
101:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
103:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104:   PetscBool      roworiented = A->roworiented;

108:   for (i=0; i<m; i++) {
109:     if (idxm[i] < 0) continue;
110:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
111:     if (idxm[i] >= rstart && idxm[i] < rend) {
112:       row = idxm[i] - rstart;
113:       if (roworiented) {
114:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
115:       } else {
116:         for (j=0; j<n; j++) {
117:           if (idxn[j] < 0) continue;
118:           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
119:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
120:         }
121:       }
122:     } else if (!A->donotstash) {
123:       mat->assembled = PETSC_FALSE;
124:       if (roworiented) {
125:         MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
126:       } else {
127:         MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
128:       }
129:     }
130:   }
131:   return(0);
132: }

136: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
137: {
138:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

143:   for (i=0; i<m; i++) {
144:     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
145:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
146:     if (idxm[i] >= rstart && idxm[i] < rend) {
147:       row = idxm[i] - rstart;
148:       for (j=0; j<n; j++) {
149:         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
150:         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
152:       }
153:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
154:   }
155:   return(0);
156: }

160: PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
161: {
162:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

166:   MatDenseGetArray(a->A,array);
167:   return(0);
168: }

172: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
173: {
174:   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
175:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177:   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
178:   const PetscInt *irow,*icol;
179:   PetscScalar    *av,*bv,*v = lmat->v;
180:   Mat            newmat;
181:   IS             iscol_local;

184:   ISAllGather(iscol,&iscol_local);
185:   ISGetIndices(isrow,&irow);
186:   ISGetIndices(iscol_local,&icol);
187:   ISGetLocalSize(isrow,&nrows);
188:   ISGetLocalSize(iscol,&ncols);
189:   ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */

191:   /* No parallel redistribution currently supported! Should really check each index set
192:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
193:      original matrix! */

195:   MatGetLocalSize(A,&nlrows,&nlcols);
196:   MatGetOwnershipRange(A,&rstart,&rend);

198:   /* Check submatrix call */
199:   if (scall == MAT_REUSE_MATRIX) {
200:     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
201:     /* Really need to test rows and column sizes! */
202:     newmat = *B;
203:   } else {
204:     /* Create and fill new matrix */
205:     MatCreate(PetscObjectComm((PetscObject)A),&newmat);
206:     MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
207:     MatSetType(newmat,((PetscObject)A)->type_name);
208:     MatMPIDenseSetPreallocation(newmat,NULL);
209:   }

211:   /* Now extract the data pointers and do the copy, column at a time */
212:   newmatd = (Mat_MPIDense*)newmat->data;
213:   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;

215:   for (i=0; i<Ncols; i++) {
216:     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
217:     for (j=0; j<nrows; j++) {
218:       *bv++ = av[irow[j] - rstart];
219:     }
220:   }

222:   /* Assemble the matrices so that the correct flags are set */
223:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
224:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

226:   /* Free work space */
227:   ISRestoreIndices(isrow,&irow);
228:   ISRestoreIndices(iscol_local,&icol);
229:   ISDestroy(&iscol_local);
230:   *B   = newmat;
231:   return(0);
232: }

236: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
237: {
238:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

242:   MatDenseRestoreArray(a->A,array);
243:   return(0);
244: }

248: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
249: {
250:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
251:   MPI_Comm       comm;
253:   PetscInt       nstash,reallocs;
254:   InsertMode     addv;

257:   PetscObjectGetComm((PetscObject)mat,&comm);
258:   /* make sure all processors are either in INSERTMODE or ADDMODE */
259:   MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
260:   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
261:   mat->insertmode = addv; /* in case this processor had no cache */

263:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
264:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
265:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
266:   return(0);
267: }

271: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
272: {
273:   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275:   PetscInt       i,*row,*col,flg,j,rstart,ncols;
276:   PetscMPIInt    n;
277:   PetscScalar    *val;
278:   InsertMode     addv=mat->insertmode;

281:   /*  wait on receives */
282:   while (1) {
283:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
284:     if (!flg) break;

286:     for (i=0; i<n;) {
287:       /* Now identify the consecutive vals belonging to the same row */
288:       for (j=i,rstart=row[j]; j<n; j++) {
289:         if (row[j] != rstart) break;
290:       }
291:       if (j < n) ncols = j-i;
292:       else       ncols = n-i;
293:       /* Now assemble all these values with a single function call */
294:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
295:       i    = j;
296:     }
297:   }
298:   MatStashScatterEnd_Private(&mat->stash);

300:   MatAssemblyBegin(mdn->A,mode);
301:   MatAssemblyEnd(mdn->A,mode);

303:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304:     MatSetUpMultiply_MPIDense(mat);
305:   }
306:   return(0);
307: }

311: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312: {
314:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

317:   MatZeroEntries(l->A);
318:   return(0);
319: }

321: /* the code does not do the diagonal entries correctly unless the
322:    matrix is square and the column and row owerships are identical.
323:    This is a BUG. The only way to fix it seems to be to access
324:    mdn->A and mdn->B directly and not through the MatZeroRows()
325:    routine.
326: */
329: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330: {
331:   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332:   PetscErrorCode    ierr;
333:   PetscInt          i,*owners = A->rmap->range;
334:   PetscInt          *nprocs,j,idx,nsends;
335:   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336:   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337:   PetscInt          *lens,*lrows,*values;
338:   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339:   MPI_Comm          comm;
340:   MPI_Request       *send_waits,*recv_waits;
341:   MPI_Status        recv_status,*send_status;
342:   PetscBool         found;
343:   const PetscScalar *xx;
344:   PetscScalar       *bb;

347:   PetscObjectGetComm((PetscObject)A,&comm);
348:   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349:   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350:   /*  first count number of contributors to each processor */
351:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
352:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
353:   PetscMalloc((N+1)*sizeof(PetscInt),&owner);  /* see note*/
354:   for (i=0; i<N; i++) {
355:     idx   = rows[i];
356:     found = PETSC_FALSE;
357:     for (j=0; j<size; j++) {
358:       if (idx >= owners[j] && idx < owners[j+1]) {
359:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
360:       }
361:     }
362:     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
363:   }
364:   nsends = 0;
365:   for (i=0; i<size; i++) nsends += nprocs[2*i+1];

367:   /* inform other processors of number of messages and max length*/
368:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

370:   /* post receives:   */
371:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
372:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
373:   for (i=0; i<nrecvs; i++) {
374:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
375:   }

377:   /* do sends:
378:       1) starts[i] gives the starting index in svalues for stuff going to
379:          the ith processor
380:   */
381:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
382:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
383:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);

385:   starts[0] = 0;
386:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
387:   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];

389:   starts[0] = 0;
390:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
391:   count = 0;
392:   for (i=0; i<size; i++) {
393:     if (nprocs[2*i+1]) {
394:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
395:     }
396:   }
397:   PetscFree(starts);

399:   base = owners[rank];

401:   /*  wait on receives */
402:   PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
403:   count = nrecvs;
404:   slen  = 0;
405:   while (count) {
406:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
407:     /* unpack receives into our local space */
408:     MPI_Get_count(&recv_status,MPIU_INT,&n);

410:     source[imdex] = recv_status.MPI_SOURCE;
411:     lens[imdex]   = n;
412:     slen += n;
413:     count--;
414:   }
415:   PetscFree(recv_waits);

417:   /* move the data into the send scatter */
418:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
419:   count = 0;
420:   for (i=0; i<nrecvs; i++) {
421:     values = rvalues + i*nmax;
422:     for (j=0; j<lens[i]; j++) {
423:       lrows[count++] = values[j] - base;
424:     }
425:   }
426:   PetscFree(rvalues);
427:   PetscFree2(lens,source);
428:   PetscFree(owner);
429:   PetscFree(nprocs);

431:   /* fix right hand side if needed */
432:   if (x && b) {
433:     VecGetArrayRead(x,&xx);
434:     VecGetArray(b,&bb);
435:     for (i=0; i<slen; i++) {
436:       bb[lrows[i]] = diag*xx[lrows[i]];
437:     }
438:     VecRestoreArrayRead(x,&xx);
439:     VecRestoreArray(b,&bb);
440:   }

442:   /* actually zap the local rows */
443:   MatZeroRows(l->A,slen,lrows,0.0,0,0);
444:   if (diag != 0.0) {
445:     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
446:     PetscInt     m   = ll->lda, i;

448:     for (i=0; i<slen; i++) {
449:       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
450:     }
451:   }
452:   PetscFree(lrows);

454:   /* wait on sends */
455:   if (nsends) {
456:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
457:     MPI_Waitall(nsends,send_waits,send_status);
458:     PetscFree(send_status);
459:   }
460:   PetscFree(send_waits);
461:   PetscFree(svalues);
462:   return(0);
463: }

467: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
468: {
469:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

473:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
474:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
475:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
476:   return(0);
477: }

481: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
482: {
483:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

487:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
488:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
489:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
490:   return(0);
491: }

495: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
496: {
497:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
499:   PetscScalar    zero = 0.0;

502:   VecSet(yy,zero);
503:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
504:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
505:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
506:   return(0);
507: }

511: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
512: {
513:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

517:   VecCopy(yy,zz);
518:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
519:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
520:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
521:   return(0);
522: }

526: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
527: {
528:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
529:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
531:   PetscInt       len,i,n,m = A->rmap->n,radd;
532:   PetscScalar    *x,zero = 0.0;

535:   VecSet(v,zero);
536:   VecGetArray(v,&x);
537:   VecGetSize(v,&n);
538:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
539:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
540:   radd = A->rmap->rstart*m;
541:   for (i=0; i<len; i++) {
542:     x[i] = aloc->v[radd + i*m + i];
543:   }
544:   VecRestoreArray(v,&x);
545:   return(0);
546: }

550: PetscErrorCode MatDestroy_MPIDense(Mat mat)
551: {
552:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

556: #if defined(PETSC_USE_LOG)
557:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
558: #endif
559:   MatStashDestroy_Private(&mat->stash);
560:   MatDestroy(&mdn->A);
561:   VecDestroy(&mdn->lvec);
562:   VecScatterDestroy(&mdn->Mvctx);

564:   PetscFree(mat->data);
565:   PetscObjectChangeTypeName((PetscObject)mat,0);
566:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
567:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
568:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
569:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
570:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
571:   return(0);
572: }

576: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
577: {
578:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
579:   PetscErrorCode    ierr;
580:   PetscViewerFormat format;
581:   int               fd;
582:   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
583:   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
584:   PetscScalar       *work,*v,*vv;
585:   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;

588:   if (mdn->size == 1) {
589:     MatView(mdn->A,viewer);
590:   } else {
591:     PetscViewerBinaryGetDescriptor(viewer,&fd);
592:     MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
593:     MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);

595:     PetscViewerGetFormat(viewer,&format);
596:     if (format == PETSC_VIEWER_NATIVE) {

598:       if (!rank) {
599:         /* store the matrix as a dense matrix */
600:         header[0] = MAT_FILE_CLASSID;
601:         header[1] = mat->rmap->N;
602:         header[2] = N;
603:         header[3] = MATRIX_BINARY_FORMAT_DENSE;
604:         PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);

606:         /* get largest work array needed for transposing array */
607:         mmax = mat->rmap->n;
608:         for (i=1; i<size; i++) {
609:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
610:         }
611:         PetscMalloc(mmax*N*sizeof(PetscScalar),&work);

613:         /* write out local array, by rows */
614:         m = mat->rmap->n;
615:         v = a->v;
616:         for (j=0; j<N; j++) {
617:           for (i=0; i<m; i++) {
618:             work[j + i*N] = *v++;
619:           }
620:         }
621:         PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
622:         /* get largest work array to receive messages from other processes, excludes process zero */
623:         mmax = 0;
624:         for (i=1; i<size; i++) {
625:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
626:         }
627:         PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);
628:         for (k = 1; k < size; k++) {
629:           v    = vv;
630:           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
631:           MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));

633:           for (j = 0; j < N; j++) {
634:             for (i = 0; i < m; i++) {
635:               work[j + i*N] = *v++;
636:             }
637:           }
638:           PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
639:         }
640:         PetscFree(work);
641:         PetscFree(vv);
642:       } else {
643:         MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
644:       }
645:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
646:   }
647:   return(0);
648: }

650: #include <petscdraw.h>
653: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
654: {
655:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
656:   PetscErrorCode    ierr;
657:   PetscMPIInt       size = mdn->size,rank = mdn->rank;
658:   PetscViewerType   vtype;
659:   PetscBool         iascii,isdraw;
660:   PetscViewer       sviewer;
661:   PetscViewerFormat format;

664:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
665:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
666:   if (iascii) {
667:     PetscViewerGetType(viewer,&vtype);
668:     PetscViewerGetFormat(viewer,&format);
669:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
670:       MatInfo info;
671:       MatGetInfo(mat,MAT_LOCAL,&info);
672:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
673:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
674:       PetscViewerFlush(viewer);
675:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
676:       VecScatterView(mdn->Mvctx,viewer);
677:       return(0);
678:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
679:       return(0);
680:     }
681:   } else if (isdraw) {
682:     PetscDraw draw;
683:     PetscBool isnull;

685:     PetscViewerDrawGetDraw(viewer,0,&draw);
686:     PetscDrawIsNull(draw,&isnull);
687:     if (isnull) return(0);
688:   }

690:   if (size == 1) {
691:     MatView(mdn->A,viewer);
692:   } else {
693:     /* assemble the entire matrix onto first processor. */
694:     Mat         A;
695:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
696:     PetscInt    *cols;
697:     PetscScalar *vals;

699:     MatCreate(PetscObjectComm((PetscObject)mat),&A);
700:     if (!rank) {
701:       MatSetSizes(A,M,N,M,N);
702:     } else {
703:       MatSetSizes(A,0,0,M,N);
704:     }
705:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
706:     MatSetType(A,MATMPIDENSE);
707:     MatMPIDenseSetPreallocation(A,NULL);
708:     PetscLogObjectParent(mat,A);

710:     /* Copy the matrix ... This isn't the most efficient means,
711:        but it's quick for now */
712:     A->insertmode = INSERT_VALUES;

714:     row = mat->rmap->rstart;
715:     m   = mdn->A->rmap->n;
716:     for (i=0; i<m; i++) {
717:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
718:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
719:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
720:       row++;
721:     }

723:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
724:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
725:     PetscViewerGetSingleton(viewer,&sviewer);
726:     if (!rank) {
727:       PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
728:       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
729:       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
730:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
731:     }
732:     PetscViewerRestoreSingleton(viewer,&sviewer);
733:     PetscViewerFlush(viewer);
734:     MatDestroy(&A);
735:   }
736:   return(0);
737: }

741: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
742: {
744:   PetscBool      iascii,isbinary,isdraw,issocket;

747:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
748:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
749:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
750:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);

752:   if (iascii || issocket || isdraw) {
753:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
754:   } else if (isbinary) {
755:     MatView_MPIDense_Binary(mat,viewer);
756:   }
757:   return(0);
758: }

762: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
763: {
764:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
765:   Mat            mdn  = mat->A;
767:   PetscReal      isend[5],irecv[5];

770:   info->block_size = 1.0;

772:   MatGetInfo(mdn,MAT_LOCAL,info);

774:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
775:   isend[3] = info->memory;  isend[4] = info->mallocs;
776:   if (flag == MAT_LOCAL) {
777:     info->nz_used      = isend[0];
778:     info->nz_allocated = isend[1];
779:     info->nz_unneeded  = isend[2];
780:     info->memory       = isend[3];
781:     info->mallocs      = isend[4];
782:   } else if (flag == MAT_GLOBAL_MAX) {
783:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));

785:     info->nz_used      = irecv[0];
786:     info->nz_allocated = irecv[1];
787:     info->nz_unneeded  = irecv[2];
788:     info->memory       = irecv[3];
789:     info->mallocs      = irecv[4];
790:   } else if (flag == MAT_GLOBAL_SUM) {
791:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));

793:     info->nz_used      = irecv[0];
794:     info->nz_allocated = irecv[1];
795:     info->nz_unneeded  = irecv[2];
796:     info->memory       = irecv[3];
797:     info->mallocs      = irecv[4];
798:   }
799:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
800:   info->fill_ratio_needed = 0;
801:   info->factor_mallocs    = 0;
802:   return(0);
803: }

807: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
808: {
809:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

813:   switch (op) {
814:   case MAT_NEW_NONZERO_LOCATIONS:
815:   case MAT_NEW_NONZERO_LOCATION_ERR:
816:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
817:     MatSetOption(a->A,op,flg);
818:     break;
819:   case MAT_ROW_ORIENTED:
820:     a->roworiented = flg;

822:     MatSetOption(a->A,op,flg);
823:     break;
824:   case MAT_NEW_DIAGONALS:
825:   case MAT_KEEP_NONZERO_PATTERN:
826:   case MAT_USE_HASH_TABLE:
827:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
828:     break;
829:   case MAT_IGNORE_OFF_PROC_ENTRIES:
830:     a->donotstash = flg;
831:     break;
832:   case MAT_SYMMETRIC:
833:   case MAT_STRUCTURALLY_SYMMETRIC:
834:   case MAT_HERMITIAN:
835:   case MAT_SYMMETRY_ETERNAL:
836:   case MAT_IGNORE_LOWER_TRIANGULAR:
837:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
838:     break;
839:   default:
840:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
841:   }
842:   return(0);
843: }


848: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
849: {
850:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
851:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
852:   PetscScalar    *l,*r,x,*v;
854:   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;

857:   MatGetLocalSize(A,&s2,&s3);
858:   if (ll) {
859:     VecGetLocalSize(ll,&s2a);
860:     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
861:     VecGetArray(ll,&l);
862:     for (i=0; i<m; i++) {
863:       x = l[i];
864:       v = mat->v + i;
865:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
866:     }
867:     VecRestoreArray(ll,&l);
868:     PetscLogFlops(n*m);
869:   }
870:   if (rr) {
871:     VecGetLocalSize(rr,&s3a);
872:     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
873:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
874:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
875:     VecGetArray(mdn->lvec,&r);
876:     for (i=0; i<n; i++) {
877:       x = r[i];
878:       v = mat->v + i*m;
879:       for (j=0; j<m; j++) (*v++) *= x;
880:     }
881:     VecRestoreArray(mdn->lvec,&r);
882:     PetscLogFlops(n*m);
883:   }
884:   return(0);
885: }

889: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
890: {
891:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
892:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
894:   PetscInt       i,j;
895:   PetscReal      sum = 0.0;
896:   PetscScalar    *v  = mat->v;

899:   if (mdn->size == 1) {
900:      MatNorm(mdn->A,type,nrm);
901:   } else {
902:     if (type == NORM_FROBENIUS) {
903:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
904:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
905:       }
906:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
907:       *nrm = PetscSqrtReal(*nrm);
908:       PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
909:     } else if (type == NORM_1) {
910:       PetscReal *tmp,*tmp2;
911:       PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);
912:       PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
913:       PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
914:       *nrm = 0.0;
915:       v    = mat->v;
916:       for (j=0; j<mdn->A->cmap->n; j++) {
917:         for (i=0; i<mdn->A->rmap->n; i++) {
918:           tmp[j] += PetscAbsScalar(*v);  v++;
919:         }
920:       }
921:       MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
922:       for (j=0; j<A->cmap->N; j++) {
923:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
924:       }
925:       PetscFree2(tmp,tmp);
926:       PetscLogFlops(A->cmap->n*A->rmap->n);
927:     } else if (type == NORM_INFINITY) { /* max row norm */
928:       PetscReal ntemp;
929:       MatNorm(mdn->A,type,&ntemp);
930:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
931:     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
932:   }
933:   return(0);
934: }

938: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
939: {
940:   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
941:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
942:   Mat            B;
943:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
945:   PetscInt       j,i;
946:   PetscScalar    *v;

949:   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
950:   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
951:     MatCreate(PetscObjectComm((PetscObject)A),&B);
952:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
953:     MatSetType(B,((PetscObject)A)->type_name);
954:     MatMPIDenseSetPreallocation(B,NULL);
955:   } else {
956:     B = *matout;
957:   }

959:   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
960:   PetscMalloc(m*sizeof(PetscInt),&rwork);
961:   for (i=0; i<m; i++) rwork[i] = rstart + i;
962:   for (j=0; j<n; j++) {
963:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
964:     v   += m;
965:   }
966:   PetscFree(rwork);
967:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
968:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
969:   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
970:     *matout = B;
971:   } else {
972:     MatHeaderMerge(A,B);
973:   }
974:   return(0);
975: }


978: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
979: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);

983: PetscErrorCode MatSetUp_MPIDense(Mat A)
984: {

988:    MatMPIDenseSetPreallocation(A,0);
989:   return(0);
990: }

994: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
995: {
997:   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;

1000:   MatAXPY(A->A,alpha,B->A,str);
1001:   return(0);
1002: }

1006: PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1007: {
1008:   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;

1012:   MatConjugate(a->A);
1013:   return(0);
1014: }

1018: PetscErrorCode MatRealPart_MPIDense(Mat A)
1019: {
1020:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1024:   MatRealPart(a->A);
1025:   return(0);
1026: }

1030: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1031: {
1032:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1036:   MatImaginaryPart(a->A);
1037:   return(0);
1038: }

1040: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1043: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1044: {
1046:   PetscInt       i,n;
1047:   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1048:   PetscReal      *work;

1051:   MatGetSize(A,NULL,&n);
1052:   PetscMalloc(n*sizeof(PetscReal),&work);
1053:   MatGetColumnNorms_SeqDense(a->A,type,work);
1054:   if (type == NORM_2) {
1055:     for (i=0; i<n; i++) work[i] *= work[i];
1056:   }
1057:   if (type == NORM_INFINITY) {
1058:     MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1059:   } else {
1060:     MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1061:   }
1062:   PetscFree(work);
1063:   if (type == NORM_2) {
1064:     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1065:   }
1066:   return(0);
1067: }

1071: static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1072: {
1073:   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1075:   PetscScalar    *a;
1076:   PetscInt       m,n,i;

1079:   MatGetSize(d->A,&m,&n);
1080:   MatDenseGetArray(d->A,&a);
1081:   for (i=0; i<m*n; i++) {
1082:     PetscRandomGetValue(rctx,a+i);
1083:   }
1084:   MatDenseRestoreArray(d->A,&a);
1085:   return(0);
1086: }

1088: /* -------------------------------------------------------------------*/
1089: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1090:                                         MatGetRow_MPIDense,
1091:                                         MatRestoreRow_MPIDense,
1092:                                         MatMult_MPIDense,
1093:                                 /*  4*/ MatMultAdd_MPIDense,
1094:                                         MatMultTranspose_MPIDense,
1095:                                         MatMultTransposeAdd_MPIDense,
1096:                                         0,
1097:                                         0,
1098:                                         0,
1099:                                 /* 10*/ 0,
1100:                                         0,
1101:                                         0,
1102:                                         0,
1103:                                         MatTranspose_MPIDense,
1104:                                 /* 15*/ MatGetInfo_MPIDense,
1105:                                         MatEqual_MPIDense,
1106:                                         MatGetDiagonal_MPIDense,
1107:                                         MatDiagonalScale_MPIDense,
1108:                                         MatNorm_MPIDense,
1109:                                 /* 20*/ MatAssemblyBegin_MPIDense,
1110:                                         MatAssemblyEnd_MPIDense,
1111:                                         MatSetOption_MPIDense,
1112:                                         MatZeroEntries_MPIDense,
1113:                                 /* 24*/ MatZeroRows_MPIDense,
1114:                                         0,
1115:                                         0,
1116:                                         0,
1117:                                         0,
1118:                                 /* 29*/ MatSetUp_MPIDense,
1119:                                         0,
1120:                                         0,
1121:                                         0,
1122:                                         0,
1123:                                 /* 34*/ MatDuplicate_MPIDense,
1124:                                         0,
1125:                                         0,
1126:                                         0,
1127:                                         0,
1128:                                 /* 39*/ MatAXPY_MPIDense,
1129:                                         MatGetSubMatrices_MPIDense,
1130:                                         0,
1131:                                         MatGetValues_MPIDense,
1132:                                         0,
1133:                                 /* 44*/ 0,
1134:                                         MatScale_MPIDense,
1135:                                         0,
1136:                                         0,
1137:                                         0,
1138:                                 /* 49*/ MatSetRandom_MPIDense,
1139:                                         0,
1140:                                         0,
1141:                                         0,
1142:                                         0,
1143:                                 /* 54*/ 0,
1144:                                         0,
1145:                                         0,
1146:                                         0,
1147:                                         0,
1148:                                 /* 59*/ MatGetSubMatrix_MPIDense,
1149:                                         MatDestroy_MPIDense,
1150:                                         MatView_MPIDense,
1151:                                         0,
1152:                                         0,
1153:                                 /* 64*/ 0,
1154:                                         0,
1155:                                         0,
1156:                                         0,
1157:                                         0,
1158:                                 /* 69*/ 0,
1159:                                         0,
1160:                                         0,
1161:                                         0,
1162:                                         0,
1163:                                 /* 74*/ 0,
1164:                                         0,
1165:                                         0,
1166:                                         0,
1167:                                         0,
1168:                                 /* 79*/ 0,
1169:                                         0,
1170:                                         0,
1171:                                         0,
1172:                                 /* 83*/ MatLoad_MPIDense,
1173:                                         0,
1174:                                         0,
1175:                                         0,
1176:                                         0,
1177:                                         0,
1178:                                 /* 89*/
1179:                                         0,
1180:                                         0,
1181:                                         0,
1182:                                         0,
1183:                                         0,
1184:                                 /* 94*/ 0,
1185:                                         0,
1186:                                         0,
1187:                                         0,
1188:                                         0,
1189:                                 /* 99*/ 0,
1190:                                         0,
1191:                                         0,
1192:                                         MatConjugate_MPIDense,
1193:                                         0,
1194:                                 /*104*/ 0,
1195:                                         MatRealPart_MPIDense,
1196:                                         MatImaginaryPart_MPIDense,
1197:                                         0,
1198:                                         0,
1199:                                 /*109*/ 0,
1200:                                         0,
1201:                                         0,
1202:                                         0,
1203:                                         0,
1204:                                 /*114*/ 0,
1205:                                         0,
1206:                                         0,
1207:                                         0,
1208:                                         0,
1209:                                 /*119*/ 0,
1210:                                         0,
1211:                                         0,
1212:                                         0,
1213:                                         0,
1214:                                 /*124*/ 0,
1215:                                         MatGetColumnNorms_MPIDense,
1216:                                         0,
1217:                                         0,
1218:                                         0,
1219:                                 /*129*/ 0,
1220:                                         0,
1221:                                         0,
1222:                                         0,
1223:                                         0,
1224:                                 /*134*/ 0,
1225:                                         0,
1226:                                         0,
1227:                                         0,
1228:                                         0,
1229:                                 /*139*/ 0,
1230:                                         0
1231: };

1235: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1236: {
1237:   Mat_MPIDense   *a;

1241:   mat->preallocated = PETSC_TRUE;
1242:   /* Note:  For now, when data is specified above, this assumes the user correctly
1243:    allocates the local dense storage space.  We should add error checking. */

1245:   a       = (Mat_MPIDense*)mat->data;
1246:   PetscLayoutSetUp(mat->rmap);
1247:   PetscLayoutSetUp(mat->cmap);
1248:   a->nvec = mat->cmap->n;

1250:   MatCreate(PETSC_COMM_SELF,&a->A);
1251:   MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1252:   MatSetType(a->A,MATSEQDENSE);
1253:   MatSeqDenseSetPreallocation(a->A,data);
1254:   PetscLogObjectParent(mat,a->A);
1255:   return(0);
1256: }

1260: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1261: {
1262:   Mat_MPIDense   *a;

1266:   PetscNewLog(mat,Mat_MPIDense,&a);
1267:   mat->data = (void*)a;
1268:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));

1270:   mat->insertmode = NOT_SET_VALUES;
1271:   MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1272:   MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);

1274:   /* build cache for off array entries formed */
1275:   a->donotstash = PETSC_FALSE;

1277:   MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);

1279:   /* stuff used for matrix vector multiply */
1280:   a->lvec        = 0;
1281:   a->Mvctx       = 0;
1282:   a->roworiented = PETSC_TRUE;

1284:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1285:   PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);

1287:   PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);
1288:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1289:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1290:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1291:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);
1292:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1293:   return(0);
1294: }

1296: /*MC
1297:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1299:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1300:    and MATMPIDENSE otherwise.

1302:    Options Database Keys:
1303: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1305:   Level: beginner


1308: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1309: M*/

1313: /*@C
1314:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1316:    Not collective

1318:    Input Parameters:
1319: .  A - the matrix
1320: -  data - optional location of matrix data.  Set data=NULL for PETSc
1321:    to control all matrix memory allocation.

1323:    Notes:
1324:    The dense format is fully compatible with standard Fortran 77
1325:    storage by columns.

1327:    The data input variable is intended primarily for Fortran programmers
1328:    who wish to allocate their own matrix memory space.  Most users should
1329:    set data=NULL.

1331:    Level: intermediate

1333: .keywords: matrix,dense, parallel

1335: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1336: @*/
1337: PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1338: {

1342:   PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));
1343:   return(0);
1344: }

1348: /*@C
1349:    MatCreateDense - Creates a parallel matrix in dense format.

1351:    Collective on MPI_Comm

1353:    Input Parameters:
1354: +  comm - MPI communicator
1355: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1356: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1357: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1358: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1359: -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1360:    to control all matrix memory allocation.

1362:    Output Parameter:
1363: .  A - the matrix

1365:    Notes:
1366:    The dense format is fully compatible with standard Fortran 77
1367:    storage by columns.

1369:    The data input variable is intended primarily for Fortran programmers
1370:    who wish to allocate their own matrix memory space.  Most users should
1371:    set data=NULL (NULL_SCALAR for Fortran users).

1373:    The user MUST specify either the local or global matrix dimensions
1374:    (possibly both).

1376:    Level: intermediate

1378: .keywords: matrix,dense, parallel

1380: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1381: @*/
1382: PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1383: {
1385:   PetscMPIInt    size;

1388:   MatCreate(comm,A);
1389:   MatSetSizes(*A,m,n,M,N);
1390:   MPI_Comm_size(comm,&size);
1391:   if (size > 1) {
1392:     MatSetType(*A,MATMPIDENSE);
1393:     MatMPIDenseSetPreallocation(*A,data);
1394:   } else {
1395:     MatSetType(*A,MATSEQDENSE);
1396:     MatSeqDenseSetPreallocation(*A,data);
1397:   }
1398:   return(0);
1399: }

1403: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1404: {
1405:   Mat            mat;
1406:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1410:   *newmat = 0;
1411:   MatCreate(PetscObjectComm((PetscObject)A),&mat);
1412:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1413:   MatSetType(mat,((PetscObject)A)->type_name);
1414:   a       = (Mat_MPIDense*)mat->data;
1415:   PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));

1417:   mat->factortype   = A->factortype;
1418:   mat->assembled    = PETSC_TRUE;
1419:   mat->preallocated = PETSC_TRUE;

1421:   a->size         = oldmat->size;
1422:   a->rank         = oldmat->rank;
1423:   mat->insertmode = NOT_SET_VALUES;
1424:   a->nvec         = oldmat->nvec;
1425:   a->donotstash   = oldmat->donotstash;

1427:   PetscLayoutReference(A->rmap,&mat->rmap);
1428:   PetscLayoutReference(A->cmap,&mat->cmap);

1430:   MatSetUpMultiply_MPIDense(mat);
1431:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1432:   PetscLogObjectParent(mat,a->A);

1434:   *newmat = mat;
1435:   return(0);
1436: }

1440: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1441: {
1443:   PetscMPIInt    rank,size;
1444:   PetscInt       *rowners,i,m,nz,j;
1445:   PetscScalar    *array,*vals,*vals_ptr;

1448:   MPI_Comm_rank(comm,&rank);
1449:   MPI_Comm_size(comm,&size);

1451:   /* determine ownership of all rows */
1452:   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1453:   else m = newmat->rmap->n;
1454:   PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1455:   MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1456:   rowners[0] = 0;
1457:   for (i=2; i<=size; i++) {
1458:     rowners[i] += rowners[i-1];
1459:   }

1461:   if (!sizesset) {
1462:     MatSetSizes(newmat,m,PETSC_DECIDE,M,N);
1463:   }
1464:   MatMPIDenseSetPreallocation(newmat,NULL);
1465:   MatDenseGetArray(newmat,&array);

1467:   if (!rank) {
1468:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1470:     /* read in my part of the matrix numerical values  */
1471:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);

1473:     /* insert into matrix-by row (this is why cannot directly read into array */
1474:     vals_ptr = vals;
1475:     for (i=0; i<m; i++) {
1476:       for (j=0; j<N; j++) {
1477:         array[i + j*m] = *vals_ptr++;
1478:       }
1479:     }

1481:     /* read in other processors and ship out */
1482:     for (i=1; i<size; i++) {
1483:       nz   = (rowners[i+1] - rowners[i])*N;
1484:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1485:       MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1486:     }
1487:   } else {
1488:     /* receive numeric values */
1489:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1491:     /* receive message of values*/
1492:     MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);

1494:     /* insert into matrix-by row (this is why cannot directly read into array */
1495:     vals_ptr = vals;
1496:     for (i=0; i<m; i++) {
1497:       for (j=0; j<N; j++) {
1498:         array[i + j*m] = *vals_ptr++;
1499:       }
1500:     }
1501:   }
1502:   MatDenseRestoreArray(newmat,&array);
1503:   PetscFree(rowners);
1504:   PetscFree(vals);
1505:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1506:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1507:   return(0);
1508: }

1512: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1513: {
1514:   PetscScalar    *vals,*svals;
1515:   MPI_Comm       comm;
1516:   MPI_Status     status;
1517:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1518:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1519:   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1520:   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1521:   int            fd;

1525:   PetscObjectGetComm((PetscObject)viewer,&comm);
1526:   MPI_Comm_size(comm,&size);
1527:   MPI_Comm_rank(comm,&rank);
1528:   if (!rank) {
1529:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1530:     PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1531:     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1532:   }
1533:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;

1535:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1536:   M    = header[1]; N = header[2]; nz = header[3];

1538:   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1539:   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1540:   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;

1542:   /* If global sizes are set, check if they are consistent with that given in the file */
1543:   if (sizesset) {
1544:     MatGetSize(newmat,&grows,&gcols);
1545:   }
1546:   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1547:   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);

1549:   /*
1550:        Handle case where matrix is stored on disk as a dense matrix
1551:   */
1552:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1553:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);
1554:     return(0);
1555:   }

1557:   /* determine ownership of all rows */
1558:   if (newmat->rmap->n < 0) {
1559:     PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1560:   } else {
1561:     PetscMPIIntCast(newmat->rmap->n,&m);
1562:   }
1563:   PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);
1564:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1565:   rowners[0] = 0;
1566:   for (i=2; i<=size; i++) {
1567:     rowners[i] += rowners[i-1];
1568:   }
1569:   rstart = rowners[rank];
1570:   rend   = rowners[rank+1];

1572:   /* distribute row lengths to all processors */
1573:   PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);
1574:   if (!rank) {
1575:     PetscMalloc(M*sizeof(PetscInt),&rowlengths);
1576:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1577:     PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1578:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1579:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1580:     PetscFree(sndcounts);
1581:   } else {
1582:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1583:   }

1585:   if (!rank) {
1586:     /* calculate the number of nonzeros on each processor */
1587:     PetscMalloc(size*sizeof(PetscInt),&procsnz);
1588:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1589:     for (i=0; i<size; i++) {
1590:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1591:         procsnz[i] += rowlengths[j];
1592:       }
1593:     }
1594:     PetscFree(rowlengths);

1596:     /* determine max buffer needed and allocate it */
1597:     maxnz = 0;
1598:     for (i=0; i<size; i++) {
1599:       maxnz = PetscMax(maxnz,procsnz[i]);
1600:     }
1601:     PetscMalloc(maxnz*sizeof(PetscInt),&cols);

1603:     /* read in my part of the matrix column indices  */
1604:     nz   = procsnz[0];
1605:     PetscMalloc(nz*sizeof(PetscInt),&mycols);
1606:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1608:     /* read in every one elses and ship off */
1609:     for (i=1; i<size; i++) {
1610:       nz   = procsnz[i];
1611:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1612:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1613:     }
1614:     PetscFree(cols);
1615:   } else {
1616:     /* determine buffer space needed for message */
1617:     nz = 0;
1618:     for (i=0; i<m; i++) {
1619:       nz += ourlens[i];
1620:     }
1621:     PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);

1623:     /* receive message of column indices*/
1624:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1625:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1626:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1627:   }

1629:   /* loop over local rows, determining number of off diagonal entries */
1630:   PetscMemzero(offlens,m*sizeof(PetscInt));
1631:   jj   = 0;
1632:   for (i=0; i<m; i++) {
1633:     for (j=0; j<ourlens[i]; j++) {
1634:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1635:       jj++;
1636:     }
1637:   }

1639:   /* create our matrix */
1640:   for (i=0; i<m; i++) ourlens[i] -= offlens[i];

1642:   if (!sizesset) {
1643:     MatSetSizes(newmat,m,PETSC_DECIDE,M,N);
1644:   }
1645:   MatMPIDenseSetPreallocation(newmat,NULL);
1646:   for (i=0; i<m; i++) ourlens[i] += offlens[i];

1648:   if (!rank) {
1649:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1651:     /* read in my part of the matrix numerical values  */
1652:     nz   = procsnz[0];
1653:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);

1655:     /* insert into matrix */
1656:     jj      = rstart;
1657:     smycols = mycols;
1658:     svals   = vals;
1659:     for (i=0; i<m; i++) {
1660:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1661:       smycols += ourlens[i];
1662:       svals   += ourlens[i];
1663:       jj++;
1664:     }

1666:     /* read in other processors and ship out */
1667:     for (i=1; i<size; i++) {
1668:       nz   = procsnz[i];
1669:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1670:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1671:     }
1672:     PetscFree(procsnz);
1673:   } else {
1674:     /* receive numeric values */
1675:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

1677:     /* receive message of values*/
1678:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1679:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1680:     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1682:     /* insert into matrix */
1683:     jj      = rstart;
1684:     smycols = mycols;
1685:     svals   = vals;
1686:     for (i=0; i<m; i++) {
1687:       MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1688:       smycols += ourlens[i];
1689:       svals   += ourlens[i];
1690:       jj++;
1691:     }
1692:   }
1693:   PetscFree2(ourlens,offlens);
1694:   PetscFree(vals);
1695:   PetscFree(mycols);
1696:   PetscFree(rowners);

1698:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1699:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1700:   return(0);
1701: }

1705: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1706: {
1707:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1708:   Mat            a,b;
1709:   PetscBool      flg;

1713:   a    = matA->A;
1714:   b    = matB->A;
1715:   MatEqual(a,b,&flg);
1716:   MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1717:   return(0);
1718: }