Actual source code: mpidense.c
petsc-3.7.7 2017-09-25
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
7: #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/
8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9: #include <petscblaslapack.h>
13: /*@
15: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
16: matrix that represents the operator. For sequential matrices it returns itself.
18: Input Parameter:
19: . A - the Seq or MPI dense matrix
21: Output Parameter:
22: . B - the inner matrix
24: Level: intermediate
26: @*/
27: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
28: {
29: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
31: PetscBool flg;
34: PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
35: if (flg) *B = mat->A;
36: else *B = A;
37: return(0);
38: }
42: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
43: {
44: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
46: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
49: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
50: lrow = row - rstart;
51: MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
52: return(0);
53: }
57: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
58: {
62: if (idx) {PetscFree(*idx);}
63: if (v) {PetscFree(*v);}
64: return(0);
65: }
69: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70: {
71: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
73: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
74: PetscScalar *array;
75: MPI_Comm comm;
76: Mat B;
79: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
81: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
82: if (!B) {
83: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
84: MatCreate(comm,&B);
85: MatSetSizes(B,m,m,m,m);
86: MatSetType(B,((PetscObject)mdn->A)->type_name);
87: MatDenseGetArray(mdn->A,&array);
88: MatSeqDenseSetPreallocation(B,array+m*rstart);
89: MatDenseRestoreArray(mdn->A,&array);
90: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
91: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
92: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
93: *a = B;
94: MatDestroy(&B);
95: } else *a = B;
96: return(0);
97: }
101: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102: {
103: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
105: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106: PetscBool roworiented = A->roworiented;
109: for (i=0; i<m; i++) {
110: if (idxm[i] < 0) continue;
111: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112: if (idxm[i] >= rstart && idxm[i] < rend) {
113: row = idxm[i] - rstart;
114: if (roworiented) {
115: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
116: } else {
117: for (j=0; j<n; j++) {
118: if (idxn[j] < 0) continue;
119: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
121: }
122: }
123: } else if (!A->donotstash) {
124: mat->assembled = PETSC_FALSE;
125: if (roworiented) {
126: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
127: } else {
128: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
129: }
130: }
131: }
132: return(0);
133: }
137: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138: {
139: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
141: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
144: for (i=0; i<m; i++) {
145: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147: if (idxm[i] >= rstart && idxm[i] < rend) {
148: row = idxm[i] - rstart;
149: for (j=0; j<n; j++) {
150: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
153: }
154: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155: }
156: return(0);
157: }
161: PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162: {
163: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
167: MatDenseGetArray(a->A,array);
168: return(0);
169: }
173: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174: {
175: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
176: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
178: PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179: const PetscInt *irow,*icol;
180: PetscScalar *av,*bv,*v = lmat->v;
181: Mat newmat;
182: IS iscol_local;
185: ISAllGather(iscol,&iscol_local);
186: ISGetIndices(isrow,&irow);
187: ISGetIndices(iscol_local,&icol);
188: ISGetLocalSize(isrow,&nrows);
189: ISGetLocalSize(iscol,&ncols);
190: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
192: /* No parallel redistribution currently supported! Should really check each index set
193: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
194: original matrix! */
196: MatGetLocalSize(A,&nlrows,&nlcols);
197: MatGetOwnershipRange(A,&rstart,&rend);
199: /* Check submatrix call */
200: if (scall == MAT_REUSE_MATRIX) {
201: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202: /* Really need to test rows and column sizes! */
203: newmat = *B;
204: } else {
205: /* Create and fill new matrix */
206: MatCreate(PetscObjectComm((PetscObject)A),&newmat);
207: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
208: MatSetType(newmat,((PetscObject)A)->type_name);
209: MatMPIDenseSetPreallocation(newmat,NULL);
210: }
212: /* Now extract the data pointers and do the copy, column at a time */
213: newmatd = (Mat_MPIDense*)newmat->data;
214: bv = ((Mat_SeqDense*)newmatd->A->data)->v;
216: for (i=0; i<Ncols; i++) {
217: av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218: for (j=0; j<nrows; j++) {
219: *bv++ = av[irow[j] - rstart];
220: }
221: }
223: /* Assemble the matrices so that the correct flags are set */
224: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
225: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
227: /* Free work space */
228: ISRestoreIndices(isrow,&irow);
229: ISRestoreIndices(iscol_local,&icol);
230: ISDestroy(&iscol_local);
231: *B = newmat;
232: return(0);
233: }
237: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238: {
239: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
243: MatDenseRestoreArray(a->A,array);
244: return(0);
245: }
249: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250: {
251: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
252: MPI_Comm comm;
254: PetscInt nstash,reallocs;
255: InsertMode addv;
258: PetscObjectGetComm((PetscObject)mat,&comm);
259: /* make sure all processors are either in INSERTMODE or ADDMODE */
260: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
261: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262: mat->insertmode = addv; /* in case this processor had no cache */
264: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
265: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
266: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
267: return(0);
268: }
272: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273: {
274: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
276: PetscInt i,*row,*col,flg,j,rstart,ncols;
277: PetscMPIInt n;
278: PetscScalar *val;
281: /* wait on receives */
282: while (1) {
283: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
284: if (!flg) break;
286: for (i=0; i<n;) {
287: /* Now identify the consecutive vals belonging to the same row */
288: for (j=i,rstart=row[j]; j<n; j++) {
289: if (row[j] != rstart) break;
290: }
291: if (j < n) ncols = j-i;
292: else ncols = n-i;
293: /* Now assemble all these values with a single function call */
294: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
295: i = j;
296: }
297: }
298: MatStashScatterEnd_Private(&mat->stash);
300: MatAssemblyBegin(mdn->A,mode);
301: MatAssemblyEnd(mdn->A,mode);
303: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304: MatSetUpMultiply_MPIDense(mat);
305: }
306: return(0);
307: }
311: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312: {
314: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
317: MatZeroEntries(l->A);
318: return(0);
319: }
321: /* the code does not do the diagonal entries correctly unless the
322: matrix is square and the column and row owerships are identical.
323: This is a BUG. The only way to fix it seems to be to access
324: mdn->A and mdn->B directly and not through the MatZeroRows()
325: routine.
326: */
329: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330: {
331: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
332: PetscErrorCode ierr;
333: PetscInt i,*owners = A->rmap->range;
334: PetscInt *sizes,j,idx,nsends;
335: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
336: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337: PetscInt *lens,*lrows,*values;
338: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
339: MPI_Comm comm;
340: MPI_Request *send_waits,*recv_waits;
341: MPI_Status recv_status,*send_status;
342: PetscBool found;
343: const PetscScalar *xx;
344: PetscScalar *bb;
347: PetscObjectGetComm((PetscObject)A,&comm);
348: if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350: /* first count number of contributors to each processor */
351: PetscCalloc1(2*size,&sizes);
352: PetscMalloc1(N+1,&owner); /* see note*/
353: for (i=0; i<N; i++) {
354: idx = rows[i];
355: found = PETSC_FALSE;
356: for (j=0; j<size; j++) {
357: if (idx >= owners[j] && idx < owners[j+1]) {
358: sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359: }
360: }
361: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362: }
363: nsends = 0;
364: for (i=0; i<size; i++) nsends += sizes[2*i+1];
366: /* inform other processors of number of messages and max length*/
367: PetscMaxSum(comm,sizes,&nmax,&nrecvs);
369: /* post receives: */
370: PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
371: PetscMalloc1(nrecvs+1,&recv_waits);
372: for (i=0; i<nrecvs; i++) {
373: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
374: }
376: /* do sends:
377: 1) starts[i] gives the starting index in svalues for stuff going to
378: the ith processor
379: */
380: PetscMalloc1(N+1,&svalues);
381: PetscMalloc1(nsends+1,&send_waits);
382: PetscMalloc1(size+1,&starts);
384: starts[0] = 0;
385: for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386: for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
388: starts[0] = 0;
389: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390: count = 0;
391: for (i=0; i<size; i++) {
392: if (sizes[2*i+1]) {
393: MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
394: }
395: }
396: PetscFree(starts);
398: base = owners[rank];
400: /* wait on receives */
401: PetscMalloc2(nrecvs,&lens,nrecvs,&source);
402: count = nrecvs;
403: slen = 0;
404: while (count) {
405: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
406: /* unpack receives into our local space */
407: MPI_Get_count(&recv_status,MPIU_INT,&n);
409: source[imdex] = recv_status.MPI_SOURCE;
410: lens[imdex] = n;
411: slen += n;
412: count--;
413: }
414: PetscFree(recv_waits);
416: /* move the data into the send scatter */
417: PetscMalloc1(slen+1,&lrows);
418: count = 0;
419: for (i=0; i<nrecvs; i++) {
420: values = rvalues + i*nmax;
421: for (j=0; j<lens[i]; j++) {
422: lrows[count++] = values[j] - base;
423: }
424: }
425: PetscFree(rvalues);
426: PetscFree2(lens,source);
427: PetscFree(owner);
428: PetscFree(sizes);
430: /* fix right hand side if needed */
431: if (x && b) {
432: VecGetArrayRead(x,&xx);
433: VecGetArray(b,&bb);
434: for (i=0; i<slen; i++) {
435: bb[lrows[i]] = diag*xx[lrows[i]];
436: }
437: VecRestoreArrayRead(x,&xx);
438: VecRestoreArray(b,&bb);
439: }
441: /* actually zap the local rows */
442: MatZeroRows(l->A,slen,lrows,0.0,0,0);
443: if (diag != 0.0) {
444: Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445: PetscInt m = ll->lda, i;
447: for (i=0; i<slen; i++) {
448: ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449: }
450: }
451: PetscFree(lrows);
453: /* wait on sends */
454: if (nsends) {
455: PetscMalloc1(nsends,&send_status);
456: MPI_Waitall(nsends,send_waits,send_status);
457: PetscFree(send_status);
458: }
459: PetscFree(send_waits);
460: PetscFree(svalues);
461: return(0);
462: }
464: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
465: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
466: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
467: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
471: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
472: {
473: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
477: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
478: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
479: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
480: return(0);
481: }
485: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
486: {
487: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
491: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
492: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
493: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
494: return(0);
495: }
499: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500: {
501: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
503: PetscScalar zero = 0.0;
506: VecSet(yy,zero);
507: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
508: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
509: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
510: return(0);
511: }
515: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
516: {
517: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
521: VecCopy(yy,zz);
522: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
523: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
524: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
525: return(0);
526: }
530: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
531: {
532: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
533: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
535: PetscInt len,i,n,m = A->rmap->n,radd;
536: PetscScalar *x,zero = 0.0;
539: VecSet(v,zero);
540: VecGetArray(v,&x);
541: VecGetSize(v,&n);
542: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
543: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
544: radd = A->rmap->rstart*m;
545: for (i=0; i<len; i++) {
546: x[i] = aloc->v[radd + i*m + i];
547: }
548: VecRestoreArray(v,&x);
549: return(0);
550: }
554: PetscErrorCode MatDestroy_MPIDense(Mat mat)
555: {
556: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
560: #if defined(PETSC_USE_LOG)
561: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
562: #endif
563: MatStashDestroy_Private(&mat->stash);
564: MatDestroy(&mdn->A);
565: VecDestroy(&mdn->lvec);
566: VecScatterDestroy(&mdn->Mvctx);
568: PetscFree(mat->data);
569: PetscObjectChangeTypeName((PetscObject)mat,0);
571: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
572: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
573: #if defined(PETSC_HAVE_ELEMENTAL)
574: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
575: #endif
576: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
577: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
578: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
579: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
580: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
581: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
582: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
583: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
584: return(0);
585: }
589: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
590: {
591: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
592: PetscErrorCode ierr;
593: PetscViewerFormat format;
594: int fd;
595: PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k;
596: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
597: PetscScalar *work,*v,*vv;
598: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
601: if (mdn->size == 1) {
602: MatView(mdn->A,viewer);
603: } else {
604: PetscViewerBinaryGetDescriptor(viewer,&fd);
605: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
606: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
608: PetscViewerGetFormat(viewer,&format);
609: if (format == PETSC_VIEWER_NATIVE) {
611: if (!rank) {
612: /* store the matrix as a dense matrix */
613: header[0] = MAT_FILE_CLASSID;
614: header[1] = mat->rmap->N;
615: header[2] = N;
616: header[3] = MATRIX_BINARY_FORMAT_DENSE;
617: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
619: /* get largest work array needed for transposing array */
620: mmax = mat->rmap->n;
621: for (i=1; i<size; i++) {
622: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623: }
624: PetscMalloc1(mmax*N,&work);
626: /* write out local array, by rows */
627: m = mat->rmap->n;
628: v = a->v;
629: for (j=0; j<N; j++) {
630: for (i=0; i<m; i++) {
631: work[j + i*N] = *v++;
632: }
633: }
634: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
635: /* get largest work array to receive messages from other processes, excludes process zero */
636: mmax = 0;
637: for (i=1; i<size; i++) {
638: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
639: }
640: PetscMalloc1(mmax*N,&vv);
641: for (k = 1; k < size; k++) {
642: v = vv;
643: m = mat->rmap->range[k+1] - mat->rmap->range[k];
644: MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));
646: for (j = 0; j < N; j++) {
647: for (i = 0; i < m; i++) {
648: work[j + i*N] = *v++;
649: }
650: }
651: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
652: }
653: PetscFree(work);
654: PetscFree(vv);
655: } else {
656: MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
657: }
658: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
659: }
660: return(0);
661: }
663: extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
664: #include <petscdraw.h>
667: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
668: {
669: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
670: PetscErrorCode ierr;
671: PetscMPIInt rank = mdn->rank;
672: PetscViewerType vtype;
673: PetscBool iascii,isdraw;
674: PetscViewer sviewer;
675: PetscViewerFormat format;
678: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
679: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
680: if (iascii) {
681: PetscViewerGetType(viewer,&vtype);
682: PetscViewerGetFormat(viewer,&format);
683: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684: MatInfo info;
685: MatGetInfo(mat,MAT_LOCAL,&info);
686: PetscViewerASCIIPushSynchronized(viewer);
687: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
688: PetscViewerFlush(viewer);
689: PetscViewerASCIIPopSynchronized(viewer);
690: VecScatterView(mdn->Mvctx,viewer);
691: return(0);
692: } else if (format == PETSC_VIEWER_ASCII_INFO) {
693: return(0);
694: }
695: } else if (isdraw) {
696: PetscDraw draw;
697: PetscBool isnull;
699: PetscViewerDrawGetDraw(viewer,0,&draw);
700: PetscDrawIsNull(draw,&isnull);
701: if (isnull) return(0);
702: }
704: {
705: /* assemble the entire matrix onto first processor. */
706: Mat A;
707: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
708: PetscInt *cols;
709: PetscScalar *vals;
711: MatCreate(PetscObjectComm((PetscObject)mat),&A);
712: if (!rank) {
713: MatSetSizes(A,M,N,M,N);
714: } else {
715: MatSetSizes(A,0,0,M,N);
716: }
717: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
718: MatSetType(A,MATMPIDENSE);
719: MatMPIDenseSetPreallocation(A,NULL);
720: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
722: /* Copy the matrix ... This isn't the most efficient means,
723: but it's quick for now */
724: A->insertmode = INSERT_VALUES;
726: row = mat->rmap->rstart;
727: m = mdn->A->rmap->n;
728: for (i=0; i<m; i++) {
729: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
730: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
731: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
732: row++;
733: }
735: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
736: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
737: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
738: if (!rank) {
739: PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
740: MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
741: }
742: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
743: PetscViewerFlush(viewer);
744: MatDestroy(&A);
745: }
746: return(0);
747: }
751: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
752: {
754: PetscBool iascii,isbinary,isdraw,issocket;
757: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
758: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
759: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
760: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
762: if (iascii || issocket || isdraw) {
763: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
764: } else if (isbinary) {
765: MatView_MPIDense_Binary(mat,viewer);
766: }
767: return(0);
768: }
772: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
773: {
774: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
775: Mat mdn = mat->A;
777: PetscReal isend[5],irecv[5];
780: info->block_size = 1.0;
782: MatGetInfo(mdn,MAT_LOCAL,info);
784: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
785: isend[3] = info->memory; isend[4] = info->mallocs;
786: if (flag == MAT_LOCAL) {
787: info->nz_used = isend[0];
788: info->nz_allocated = isend[1];
789: info->nz_unneeded = isend[2];
790: info->memory = isend[3];
791: info->mallocs = isend[4];
792: } else if (flag == MAT_GLOBAL_MAX) {
793: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
795: info->nz_used = irecv[0];
796: info->nz_allocated = irecv[1];
797: info->nz_unneeded = irecv[2];
798: info->memory = irecv[3];
799: info->mallocs = irecv[4];
800: } else if (flag == MAT_GLOBAL_SUM) {
801: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
803: info->nz_used = irecv[0];
804: info->nz_allocated = irecv[1];
805: info->nz_unneeded = irecv[2];
806: info->memory = irecv[3];
807: info->mallocs = irecv[4];
808: }
809: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
810: info->fill_ratio_needed = 0;
811: info->factor_mallocs = 0;
812: return(0);
813: }
817: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
818: {
819: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
823: switch (op) {
824: case MAT_NEW_NONZERO_LOCATIONS:
825: case MAT_NEW_NONZERO_LOCATION_ERR:
826: case MAT_NEW_NONZERO_ALLOCATION_ERR:
827: MatCheckPreallocated(A,1);
828: MatSetOption(a->A,op,flg);
829: break;
830: case MAT_ROW_ORIENTED:
831: MatCheckPreallocated(A,1);
832: a->roworiented = flg;
833: MatSetOption(a->A,op,flg);
834: break;
835: case MAT_NEW_DIAGONALS:
836: case MAT_KEEP_NONZERO_PATTERN:
837: case MAT_USE_HASH_TABLE:
838: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
839: break;
840: case MAT_IGNORE_OFF_PROC_ENTRIES:
841: a->donotstash = flg;
842: break;
843: case MAT_SYMMETRIC:
844: case MAT_STRUCTURALLY_SYMMETRIC:
845: case MAT_HERMITIAN:
846: case MAT_SYMMETRY_ETERNAL:
847: case MAT_IGNORE_LOWER_TRIANGULAR:
848: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
849: break;
850: default:
851: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
852: }
853: return(0);
854: }
859: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
860: {
861: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
862: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
863: PetscScalar *l,*r,x,*v;
865: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
868: MatGetLocalSize(A,&s2,&s3);
869: if (ll) {
870: VecGetLocalSize(ll,&s2a);
871: if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
872: VecGetArray(ll,&l);
873: for (i=0; i<m; i++) {
874: x = l[i];
875: v = mat->v + i;
876: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
877: }
878: VecRestoreArray(ll,&l);
879: PetscLogFlops(n*m);
880: }
881: if (rr) {
882: VecGetLocalSize(rr,&s3a);
883: if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
884: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
885: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
886: VecGetArray(mdn->lvec,&r);
887: for (i=0; i<n; i++) {
888: x = r[i];
889: v = mat->v + i*m;
890: for (j=0; j<m; j++) (*v++) *= x;
891: }
892: VecRestoreArray(mdn->lvec,&r);
893: PetscLogFlops(n*m);
894: }
895: return(0);
896: }
900: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
901: {
902: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
903: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
905: PetscInt i,j;
906: PetscReal sum = 0.0;
907: PetscScalar *v = mat->v;
910: if (mdn->size == 1) {
911: MatNorm(mdn->A,type,nrm);
912: } else {
913: if (type == NORM_FROBENIUS) {
914: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
915: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
916: }
917: MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
918: *nrm = PetscSqrtReal(*nrm);
919: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
920: } else if (type == NORM_1) {
921: PetscReal *tmp,*tmp2;
922: PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
923: PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
924: PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
925: *nrm = 0.0;
926: v = mat->v;
927: for (j=0; j<mdn->A->cmap->n; j++) {
928: for (i=0; i<mdn->A->rmap->n; i++) {
929: tmp[j] += PetscAbsScalar(*v); v++;
930: }
931: }
932: MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
933: for (j=0; j<A->cmap->N; j++) {
934: if (tmp2[j] > *nrm) *nrm = tmp2[j];
935: }
936: PetscFree2(tmp,tmp2);
937: PetscLogFlops(A->cmap->n*A->rmap->n);
938: } else if (type == NORM_INFINITY) { /* max row norm */
939: PetscReal ntemp;
940: MatNorm(mdn->A,type,&ntemp);
941: MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
942: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
943: }
944: return(0);
945: }
949: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
950: {
951: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
952: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
953: Mat B;
954: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
956: PetscInt j,i;
957: PetscScalar *v;
960: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
961: if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
962: MatCreate(PetscObjectComm((PetscObject)A),&B);
963: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
964: MatSetType(B,((PetscObject)A)->type_name);
965: MatMPIDenseSetPreallocation(B,NULL);
966: } else {
967: B = *matout;
968: }
970: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
971: PetscMalloc1(m,&rwork);
972: for (i=0; i<m; i++) rwork[i] = rstart + i;
973: for (j=0; j<n; j++) {
974: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
975: v += m;
976: }
977: PetscFree(rwork);
978: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
979: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
980: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
981: *matout = B;
982: } else {
983: MatHeaderMerge(A,&B);
984: }
985: return(0);
986: }
989: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
990: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
994: PetscErrorCode MatSetUp_MPIDense(Mat A)
995: {
999: MatMPIDenseSetPreallocation(A,0);
1000: return(0);
1001: }
1005: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1006: {
1008: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1011: MatAXPY(A->A,alpha,B->A,str);
1012: PetscObjectStateIncrease((PetscObject)Y);
1013: return(0);
1014: }
1018: PetscErrorCode MatConjugate_MPIDense(Mat mat)
1019: {
1020: Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
1024: MatConjugate(a->A);
1025: return(0);
1026: }
1030: PetscErrorCode MatRealPart_MPIDense(Mat A)
1031: {
1032: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1036: MatRealPart(a->A);
1037: return(0);
1038: }
1042: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1043: {
1044: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1048: MatImaginaryPart(a->A);
1049: return(0);
1050: }
1052: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1055: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1056: {
1058: PetscInt i,n;
1059: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1060: PetscReal *work;
1063: MatGetSize(A,NULL,&n);
1064: PetscMalloc1(n,&work);
1065: MatGetColumnNorms_SeqDense(a->A,type,work);
1066: if (type == NORM_2) {
1067: for (i=0; i<n; i++) work[i] *= work[i];
1068: }
1069: if (type == NORM_INFINITY) {
1070: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1071: } else {
1072: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1073: }
1074: PetscFree(work);
1075: if (type == NORM_2) {
1076: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1077: }
1078: return(0);
1079: }
1083: static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1084: {
1085: Mat_MPIDense *d = (Mat_MPIDense*)x->data;
1087: PetscScalar *a;
1088: PetscInt m,n,i;
1091: MatGetSize(d->A,&m,&n);
1092: MatDenseGetArray(d->A,&a);
1093: for (i=0; i<m*n; i++) {
1094: PetscRandomGetValue(rctx,a+i);
1095: }
1096: MatDenseRestoreArray(d->A,&a);
1097: return(0);
1098: }
1100: extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1104: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d)
1105: {
1107: *missing = PETSC_FALSE;
1108: return(0);
1109: }
1111: /* -------------------------------------------------------------------*/
1112: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1113: MatGetRow_MPIDense,
1114: MatRestoreRow_MPIDense,
1115: MatMult_MPIDense,
1116: /* 4*/ MatMultAdd_MPIDense,
1117: MatMultTranspose_MPIDense,
1118: MatMultTransposeAdd_MPIDense,
1119: 0,
1120: 0,
1121: 0,
1122: /* 10*/ 0,
1123: 0,
1124: 0,
1125: 0,
1126: MatTranspose_MPIDense,
1127: /* 15*/ MatGetInfo_MPIDense,
1128: MatEqual_MPIDense,
1129: MatGetDiagonal_MPIDense,
1130: MatDiagonalScale_MPIDense,
1131: MatNorm_MPIDense,
1132: /* 20*/ MatAssemblyBegin_MPIDense,
1133: MatAssemblyEnd_MPIDense,
1134: MatSetOption_MPIDense,
1135: MatZeroEntries_MPIDense,
1136: /* 24*/ MatZeroRows_MPIDense,
1137: 0,
1138: 0,
1139: 0,
1140: 0,
1141: /* 29*/ MatSetUp_MPIDense,
1142: 0,
1143: 0,
1144: 0,
1145: 0,
1146: /* 34*/ MatDuplicate_MPIDense,
1147: 0,
1148: 0,
1149: 0,
1150: 0,
1151: /* 39*/ MatAXPY_MPIDense,
1152: MatGetSubMatrices_MPIDense,
1153: 0,
1154: MatGetValues_MPIDense,
1155: 0,
1156: /* 44*/ 0,
1157: MatScale_MPIDense,
1158: MatShift_Basic,
1159: 0,
1160: 0,
1161: /* 49*/ MatSetRandom_MPIDense,
1162: 0,
1163: 0,
1164: 0,
1165: 0,
1166: /* 54*/ 0,
1167: 0,
1168: 0,
1169: 0,
1170: 0,
1171: /* 59*/ MatGetSubMatrix_MPIDense,
1172: MatDestroy_MPIDense,
1173: MatView_MPIDense,
1174: 0,
1175: 0,
1176: /* 64*/ 0,
1177: 0,
1178: 0,
1179: 0,
1180: 0,
1181: /* 69*/ 0,
1182: 0,
1183: 0,
1184: 0,
1185: 0,
1186: /* 74*/ 0,
1187: 0,
1188: 0,
1189: 0,
1190: 0,
1191: /* 79*/ 0,
1192: 0,
1193: 0,
1194: 0,
1195: /* 83*/ MatLoad_MPIDense,
1196: 0,
1197: 0,
1198: 0,
1199: 0,
1200: 0,
1201: #if defined(PETSC_HAVE_ELEMENTAL)
1202: /* 89*/ MatMatMult_MPIDense_MPIDense,
1203: MatMatMultSymbolic_MPIDense_MPIDense,
1204: #else
1205: /* 89*/ 0,
1206: 0,
1207: #endif
1208: MatMatMultNumeric_MPIDense,
1209: 0,
1210: 0,
1211: /* 94*/ 0,
1212: 0,
1213: 0,
1214: 0,
1215: 0,
1216: /* 99*/ 0,
1217: 0,
1218: 0,
1219: MatConjugate_MPIDense,
1220: 0,
1221: /*104*/ 0,
1222: MatRealPart_MPIDense,
1223: MatImaginaryPart_MPIDense,
1224: 0,
1225: 0,
1226: /*109*/ 0,
1227: 0,
1228: 0,
1229: 0,
1230: MatMissingDiagonal_MPIDense,
1231: /*114*/ 0,
1232: 0,
1233: 0,
1234: 0,
1235: 0,
1236: /*119*/ 0,
1237: 0,
1238: 0,
1239: 0,
1240: 0,
1241: /*124*/ 0,
1242: MatGetColumnNorms_MPIDense,
1243: 0,
1244: 0,
1245: 0,
1246: /*129*/ 0,
1247: MatTransposeMatMult_MPIDense_MPIDense,
1248: MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1249: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1250: 0,
1251: /*134*/ 0,
1252: 0,
1253: 0,
1254: 0,
1255: 0,
1256: /*139*/ 0,
1257: 0,
1258: 0
1259: };
1263: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1264: {
1265: Mat_MPIDense *a;
1269: mat->preallocated = PETSC_TRUE;
1270: /* Note: For now, when data is specified above, this assumes the user correctly
1271: allocates the local dense storage space. We should add error checking. */
1273: a = (Mat_MPIDense*)mat->data;
1274: PetscLayoutSetUp(mat->rmap);
1275: PetscLayoutSetUp(mat->cmap);
1276: a->nvec = mat->cmap->n;
1278: MatCreate(PETSC_COMM_SELF,&a->A);
1279: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1280: MatSetType(a->A,MATSEQDENSE);
1281: MatSeqDenseSetPreallocation(a->A,data);
1282: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1283: return(0);
1284: }
1286: #if defined(PETSC_HAVE_ELEMENTAL)
1289: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1290: {
1291: Mat mat_elemental;
1293: PetscScalar *v;
1294: PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1295:
1297: if (reuse == MAT_REUSE_MATRIX) {
1298: mat_elemental = *newmat;
1299: MatZeroEntries(*newmat);
1300: } else {
1301: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1302: MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1303: MatSetType(mat_elemental,MATELEMENTAL);
1304: MatSetUp(mat_elemental);
1305: MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1306: }
1308: PetscMalloc2(m,&rows,N,&cols);
1309: for (i=0; i<N; i++) cols[i] = i;
1310: for (i=0; i<m; i++) rows[i] = rstart + i;
1311:
1312: /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1313: MatDenseGetArray(A,&v);
1314: MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1315: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1316: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1317: MatDenseRestoreArray(A,&v);
1318: PetscFree2(rows,cols);
1320: if (reuse == MAT_INPLACE_MATRIX) {
1321: MatHeaderReplace(A,&mat_elemental);
1322: } else {
1323: *newmat = mat_elemental;
1324: }
1325: return(0);
1326: }
1327: #endif
1331: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1332: {
1333: Mat_MPIDense *a;
1337: PetscNewLog(mat,&a);
1338: mat->data = (void*)a;
1339: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1341: mat->insertmode = NOT_SET_VALUES;
1342: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1343: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);
1345: /* build cache for off array entries formed */
1346: a->donotstash = PETSC_FALSE;
1348: MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);
1350: /* stuff used for matrix vector multiply */
1351: a->lvec = 0;
1352: a->Mvctx = 0;
1353: a->roworiented = PETSC_TRUE;
1355: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1356: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1357: #if defined(PETSC_HAVE_ELEMENTAL)
1358: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1359: #endif
1360: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);
1361: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1362: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1363: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1364: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);
1366: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1367: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1368: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1369: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1370: return(0);
1371: }
1373: /*MC
1374: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1376: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1377: and MATMPIDENSE otherwise.
1379: Options Database Keys:
1380: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1382: Level: beginner
1385: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1386: M*/
1390: /*@C
1391: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1393: Not collective
1395: Input Parameters:
1396: . B - the matrix
1397: - data - optional location of matrix data. Set data=NULL for PETSc
1398: to control all matrix memory allocation.
1400: Notes:
1401: The dense format is fully compatible with standard Fortran 77
1402: storage by columns.
1404: The data input variable is intended primarily for Fortran programmers
1405: who wish to allocate their own matrix memory space. Most users should
1406: set data=NULL.
1408: Level: intermediate
1410: .keywords: matrix,dense, parallel
1412: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1413: @*/
1414: PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1415: {
1419: PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1420: return(0);
1421: }
1425: /*@C
1426: MatCreateDense - Creates a parallel matrix in dense format.
1428: Collective on MPI_Comm
1430: Input Parameters:
1431: + comm - MPI communicator
1432: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1433: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1434: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1435: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1436: - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1437: to control all matrix memory allocation.
1439: Output Parameter:
1440: . A - the matrix
1442: Notes:
1443: The dense format is fully compatible with standard Fortran 77
1444: storage by columns.
1446: The data input variable is intended primarily for Fortran programmers
1447: who wish to allocate their own matrix memory space. Most users should
1448: set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1450: The user MUST specify either the local or global matrix dimensions
1451: (possibly both).
1453: Level: intermediate
1455: .keywords: matrix,dense, parallel
1457: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1458: @*/
1459: PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1460: {
1462: PetscMPIInt size;
1465: MatCreate(comm,A);
1466: MatSetSizes(*A,m,n,M,N);
1467: MPI_Comm_size(comm,&size);
1468: if (size > 1) {
1469: MatSetType(*A,MATMPIDENSE);
1470: MatMPIDenseSetPreallocation(*A,data);
1471: if (data) { /* user provided data array, so no need to assemble */
1472: MatSetUpMultiply_MPIDense(*A);
1473: (*A)->assembled = PETSC_TRUE;
1474: }
1475: } else {
1476: MatSetType(*A,MATSEQDENSE);
1477: MatSeqDenseSetPreallocation(*A,data);
1478: }
1479: return(0);
1480: }
1484: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1485: {
1486: Mat mat;
1487: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1491: *newmat = 0;
1492: MatCreate(PetscObjectComm((PetscObject)A),&mat);
1493: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1494: MatSetType(mat,((PetscObject)A)->type_name);
1495: a = (Mat_MPIDense*)mat->data;
1496: PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1498: mat->factortype = A->factortype;
1499: mat->assembled = PETSC_TRUE;
1500: mat->preallocated = PETSC_TRUE;
1502: a->size = oldmat->size;
1503: a->rank = oldmat->rank;
1504: mat->insertmode = NOT_SET_VALUES;
1505: a->nvec = oldmat->nvec;
1506: a->donotstash = oldmat->donotstash;
1508: PetscLayoutReference(A->rmap,&mat->rmap);
1509: PetscLayoutReference(A->cmap,&mat->cmap);
1511: MatSetUpMultiply_MPIDense(mat);
1512: MatDuplicate(oldmat->A,cpvalues,&a->A);
1513: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1515: *newmat = mat;
1516: return(0);
1517: }
1521: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1522: {
1524: PetscMPIInt rank,size;
1525: const PetscInt *rowners;
1526: PetscInt i,m,n,nz,j,mMax;
1527: PetscScalar *array,*vals,*vals_ptr;
1528: Mat_MPIDense *a = (Mat_MPIDense*)newmat->data;
1531: MPI_Comm_rank(comm,&rank);
1532: MPI_Comm_size(comm,&size);
1534: /* determine ownership of rows and columns */
1535: m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1536: n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1538: MatSetSizes(newmat,m,n,M,N);
1539: if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1540: MatMPIDenseSetPreallocation(newmat,NULL);
1541: }
1542: MatDenseGetArray(newmat,&array);
1543: MatGetLocalSize(newmat,&m,NULL);
1544: MatGetOwnershipRanges(newmat,&rowners);
1545: MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);
1546: if (!rank) {
1547: PetscMalloc1(mMax*N,&vals);
1549: /* read in my part of the matrix numerical values */
1550: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1552: /* insert into matrix-by row (this is why cannot directly read into array */
1553: vals_ptr = vals;
1554: for (i=0; i<m; i++) {
1555: for (j=0; j<N; j++) {
1556: array[i + j*m] = *vals_ptr++;
1557: }
1558: }
1560: /* read in other processors and ship out */
1561: for (i=1; i<size; i++) {
1562: nz = (rowners[i+1] - rowners[i])*N;
1563: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1564: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1565: }
1566: } else {
1567: /* receive numeric values */
1568: PetscMalloc1(m*N,&vals);
1570: /* receive message of values*/
1571: MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);
1573: /* insert into matrix-by row (this is why cannot directly read into array */
1574: vals_ptr = vals;
1575: for (i=0; i<m; i++) {
1576: for (j=0; j<N; j++) {
1577: array[i + j*m] = *vals_ptr++;
1578: }
1579: }
1580: }
1581: MatDenseRestoreArray(newmat,&array);
1582: PetscFree(vals);
1583: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1584: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1585: return(0);
1586: }
1590: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1591: {
1592: Mat_MPIDense *a;
1593: PetscScalar *vals,*svals;
1594: MPI_Comm comm;
1595: MPI_Status status;
1596: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1597: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1598: PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1599: PetscInt i,nz,j,rstart,rend;
1600: int fd;
1604: /* force binary viewer to load .info file if it has not yet done so */
1605: PetscViewerSetUp(viewer);
1606: PetscObjectGetComm((PetscObject)viewer,&comm);
1607: MPI_Comm_size(comm,&size);
1608: MPI_Comm_rank(comm,&rank);
1609: PetscViewerBinaryGetDescriptor(viewer,&fd);
1610: if (!rank) {
1611: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1612: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1613: }
1614: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1615: M = header[1]; N = header[2]; nz = header[3];
1617: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1618: if (newmat->rmap->N < 0) newmat->rmap->N = M;
1619: if (newmat->cmap->N < 0) newmat->cmap->N = N;
1621: if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1622: if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1624: /*
1625: Handle case where matrix is stored on disk as a dense matrix
1626: */
1627: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1628: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1629: return(0);
1630: }
1632: /* determine ownership of all rows */
1633: if (newmat->rmap->n < 0) {
1634: PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1635: } else {
1636: PetscMPIIntCast(newmat->rmap->n,&m);
1637: }
1638: if (newmat->cmap->n < 0) {
1639: n = PETSC_DECIDE;
1640: } else {
1641: PetscMPIIntCast(newmat->cmap->n,&n);
1642: }
1644: PetscMalloc1(size+2,&rowners);
1645: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1646: rowners[0] = 0;
1647: for (i=2; i<=size; i++) {
1648: rowners[i] += rowners[i-1];
1649: }
1650: rstart = rowners[rank];
1651: rend = rowners[rank+1];
1653: /* distribute row lengths to all processors */
1654: PetscMalloc1(rend-rstart,&ourlens);
1655: if (!rank) {
1656: PetscMalloc1(M,&rowlengths);
1657: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1658: PetscMalloc1(size,&sndcounts);
1659: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1660: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1661: PetscFree(sndcounts);
1662: } else {
1663: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1664: }
1666: if (!rank) {
1667: /* calculate the number of nonzeros on each processor */
1668: PetscMalloc1(size,&procsnz);
1669: PetscMemzero(procsnz,size*sizeof(PetscInt));
1670: for (i=0; i<size; i++) {
1671: for (j=rowners[i]; j< rowners[i+1]; j++) {
1672: procsnz[i] += rowlengths[j];
1673: }
1674: }
1675: PetscFree(rowlengths);
1677: /* determine max buffer needed and allocate it */
1678: maxnz = 0;
1679: for (i=0; i<size; i++) {
1680: maxnz = PetscMax(maxnz,procsnz[i]);
1681: }
1682: PetscMalloc1(maxnz,&cols);
1684: /* read in my part of the matrix column indices */
1685: nz = procsnz[0];
1686: PetscMalloc1(nz,&mycols);
1687: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1689: /* read in every one elses and ship off */
1690: for (i=1; i<size; i++) {
1691: nz = procsnz[i];
1692: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1693: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1694: }
1695: PetscFree(cols);
1696: } else {
1697: /* determine buffer space needed for message */
1698: nz = 0;
1699: for (i=0; i<m; i++) {
1700: nz += ourlens[i];
1701: }
1702: PetscMalloc1(nz+1,&mycols);
1704: /* receive message of column indices*/
1705: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1706: MPI_Get_count(&status,MPIU_INT,&maxnz);
1707: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1708: }
1710: MatSetSizes(newmat,m,n,M,N);
1711: a = (Mat_MPIDense*)newmat->data;
1712: if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1713: MatMPIDenseSetPreallocation(newmat,NULL);
1714: }
1716: if (!rank) {
1717: PetscMalloc1(maxnz,&vals);
1719: /* read in my part of the matrix numerical values */
1720: nz = procsnz[0];
1721: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1723: /* insert into matrix */
1724: jj = rstart;
1725: smycols = mycols;
1726: svals = vals;
1727: for (i=0; i<m; i++) {
1728: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1729: smycols += ourlens[i];
1730: svals += ourlens[i];
1731: jj++;
1732: }
1734: /* read in other processors and ship out */
1735: for (i=1; i<size; i++) {
1736: nz = procsnz[i];
1737: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1738: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1739: }
1740: PetscFree(procsnz);
1741: } else {
1742: /* receive numeric values */
1743: PetscMalloc1(nz+1,&vals);
1745: /* receive message of values*/
1746: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1747: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1748: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1750: /* insert into matrix */
1751: jj = rstart;
1752: smycols = mycols;
1753: svals = vals;
1754: for (i=0; i<m; i++) {
1755: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1756: smycols += ourlens[i];
1757: svals += ourlens[i];
1758: jj++;
1759: }
1760: }
1761: PetscFree(ourlens);
1762: PetscFree(vals);
1763: PetscFree(mycols);
1764: PetscFree(rowners);
1766: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1767: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1768: return(0);
1769: }
1773: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
1774: {
1775: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1776: Mat a,b;
1777: PetscBool flg;
1781: a = matA->A;
1782: b = matB->A;
1783: MatEqual(a,b,&flg);
1784: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1785: return(0);
1786: }
1790: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1791: {
1792: PetscErrorCode ierr;
1793: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1794: Mat_TransMatMultDense *atb = a->atbdense;
1797: PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1798: (atb->destroy)(A);
1799: PetscFree(atb);
1800: return(0);
1801: }
1805: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1806: {
1807: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1808: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1809: Mat_TransMatMultDense *atb = c->atbdense;
1811: MPI_Comm comm;
1812: PetscMPIInt rank,size,*recvcounts=atb->recvcounts;
1813: PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1814: PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1815: PetscScalar _DOne=1.0,_DZero=0.0;
1816: PetscBLASInt am,an,bn,aN;
1817: const PetscInt *ranges;
1820: PetscObjectGetComm((PetscObject)A,&comm);
1821: MPI_Comm_rank(comm,&rank);
1822: MPI_Comm_size(comm,&size);
1824: /* compute atbarray = aseq^T * bseq */
1825: PetscBLASIntCast(a->A->cmap->n,&an);
1826: PetscBLASIntCast(b->A->cmap->n,&bn);
1827: PetscBLASIntCast(a->A->rmap->n,&am);
1828: PetscBLASIntCast(A->cmap->N,&aN);
1829: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1830:
1831: MatGetOwnershipRanges(C,&ranges);
1832: for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1833:
1834: /* arrange atbarray into sendbuf */
1835: k = 0;
1836: for (proc=0; proc<size; proc++) {
1837: for (j=0; j<cN; j++) {
1838: for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1839: }
1840: }
1841: /* sum all atbarray to local values of C */
1842: MatDenseGetArray(c->A,&carray);
1843: MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1844: MatDenseRestoreArray(c->A,&carray);
1845: return(0);
1846: }
1850: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1851: {
1852: PetscErrorCode ierr;
1853: Mat Cdense;
1854: MPI_Comm comm;
1855: PetscMPIInt size;
1856: PetscInt cm=A->cmap->n,cM,cN=B->cmap->N;
1857: Mat_MPIDense *c;
1858: Mat_TransMatMultDense *atb;
1861: PetscObjectGetComm((PetscObject)A,&comm);
1862: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1863: SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1864: }
1866: /* create matrix product Cdense */
1867: MatCreate(comm,&Cdense);
1868: MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1869: MatSetType(Cdense,MATMPIDENSE);
1870: MatMPIDenseSetPreallocation(Cdense,NULL);
1871: MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
1872: MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
1873: *C = Cdense;
1875: /* create data structure for reuse Cdense */
1876: MPI_Comm_size(comm,&size);
1877: PetscNew(&atb);
1878: cM = Cdense->rmap->N;
1879: PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);
1880:
1881: c = (Mat_MPIDense*)Cdense->data;
1882: c->atbdense = atb;
1883: atb->destroy = Cdense->ops->destroy;
1884: Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1885: return(0);
1886: }
1890: PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1891: {
1895: if (scall == MAT_INITIAL_MATRIX) {
1896: MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1897: }
1898: MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1899: return(0);
1900: }
1904: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1905: {
1906: PetscErrorCode ierr;
1907: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1908: Mat_MatMultDense *ab = a->abdense;
1911: MatDestroy(&ab->Ce);
1912: MatDestroy(&ab->Ae);
1913: MatDestroy(&ab->Be);
1915: (ab->destroy)(A);
1916: PetscFree(ab);
1917: return(0);
1918: }
1920: #if defined(PETSC_HAVE_ELEMENTAL)
1923: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1924: {
1925: PetscErrorCode ierr;
1926: Mat_MPIDense *c=(Mat_MPIDense*)C->data;
1927: Mat_MatMultDense *ab=c->abdense;
1930: MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
1931: MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
1932: MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);
1933: MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
1934: return(0);
1935: }
1939: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1940: {
1941: PetscErrorCode ierr;
1942: Mat Ae,Be,Ce;
1943: Mat_MPIDense *c;
1944: Mat_MatMultDense *ab;
1947: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1948: SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1949: }
1951: /* convert A and B to Elemental matrices Ae and Be */
1952: MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);
1953: MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);
1955: /* Ce = Ae*Be */
1956: MatMatMultSymbolic(Ae,Be,fill,&Ce);
1957: MatMatMultNumeric(Ae,Be,Ce);
1958:
1959: /* convert Ce to C */
1960: MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);
1962: /* create data structure for reuse Cdense */
1963: PetscNew(&ab);
1964: c = (Mat_MPIDense*)(*C)->data;
1965: c->abdense = ab;
1967: ab->Ae = Ae;
1968: ab->Be = Be;
1969: ab->Ce = Ce;
1970: ab->destroy = (*C)->ops->destroy;
1971: (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
1972: (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1973: return(0);
1974: }
1978: PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1979: {
1983: if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1984: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
1985: MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1986: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
1987: } else {
1988: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
1989: MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1990: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
1991: }
1992: return(0);
1993: }
1994: #endif