Actual source code: mpidense.c
petsc-3.12.5 2020-03-29
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
7: #include <../src/mat/impls/dense/mpi/mpidense.h>
8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9: #include <petscblaslapack.h>
11: /*@
13: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14: matrix that represents the operator. For sequential matrices it returns itself.
16: Input Parameter:
17: . A - the Seq or MPI dense matrix
19: Output Parameter:
20: . B - the inner matrix
22: Level: intermediate
24: @*/
25: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26: {
27: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
29: PetscBool flg;
32: PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
33: if (flg) *B = mat->A;
34: else *B = A;
35: return(0);
36: }
38: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39: {
40: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
42: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
45: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46: lrow = row - rstart;
47: MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
48: return(0);
49: }
51: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52: {
56: if (idx) {PetscFree(*idx);}
57: if (v) {PetscFree(*v);}
58: return(0);
59: }
61: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62: {
63: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
65: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
66: PetscScalar *array;
67: MPI_Comm comm;
68: Mat B;
71: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
73: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
74: if (!B) {
75: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
76: MatCreate(comm,&B);
77: MatSetSizes(B,m,m,m,m);
78: MatSetType(B,((PetscObject)mdn->A)->type_name);
79: MatDenseGetArray(mdn->A,&array);
80: MatSeqDenseSetPreallocation(B,array+m*rstart);
81: MatDenseRestoreArray(mdn->A,&array);
82: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
83: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
84: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
85: *a = B;
86: MatDestroy(&B);
87: } else *a = B;
88: return(0);
89: }
91: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92: {
93: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
95: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96: PetscBool roworiented = A->roworiented;
99: for (i=0; i<m; i++) {
100: if (idxm[i] < 0) continue;
101: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102: if (idxm[i] >= rstart && idxm[i] < rend) {
103: row = idxm[i] - rstart;
104: if (roworiented) {
105: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
106: } else {
107: for (j=0; j<n; j++) {
108: if (idxn[j] < 0) continue;
109: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
111: }
112: }
113: } else if (!A->donotstash) {
114: mat->assembled = PETSC_FALSE;
115: if (roworiented) {
116: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
117: } else {
118: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
119: }
120: }
121: }
122: return(0);
123: }
125: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126: {
127: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
129: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
132: for (i=0; i<m; i++) {
133: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135: if (idxm[i] >= rstart && idxm[i] < rend) {
136: row = idxm[i] - rstart;
137: for (j=0; j<n; j++) {
138: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
141: }
142: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143: }
144: return(0);
145: }
147: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148: {
149: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
153: MatDenseGetLDA(a->A,lda);
154: return(0);
155: }
157: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158: {
159: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
163: MatDenseGetArray(a->A,array);
164: return(0);
165: }
167: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168: {
169: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
173: MatDenseGetArrayRead(a->A,array);
174: return(0);
175: }
177: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178: {
179: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
183: MatDensePlaceArray(a->A,array);
184: return(0);
185: }
187: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188: {
189: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
193: MatDenseResetArray(a->A);
194: return(0);
195: }
197: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198: {
199: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
200: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
202: PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203: const PetscInt *irow,*icol;
204: PetscScalar *av,*bv,*v = lmat->v;
205: Mat newmat;
206: IS iscol_local;
207: MPI_Comm comm_is,comm_mat;
210: PetscObjectGetComm((PetscObject)A,&comm_mat);
211: PetscObjectGetComm((PetscObject)iscol,&comm_is);
212: if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
214: ISAllGather(iscol,&iscol_local);
215: ISGetIndices(isrow,&irow);
216: ISGetIndices(iscol_local,&icol);
217: ISGetLocalSize(isrow,&nrows);
218: ISGetLocalSize(iscol,&ncols);
219: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
221: /* No parallel redistribution currently supported! Should really check each index set
222: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
223: original matrix! */
225: MatGetLocalSize(A,&nlrows,&nlcols);
226: MatGetOwnershipRange(A,&rstart,&rend);
228: /* Check submatrix call */
229: if (scall == MAT_REUSE_MATRIX) {
230: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231: /* Really need to test rows and column sizes! */
232: newmat = *B;
233: } else {
234: /* Create and fill new matrix */
235: MatCreate(PetscObjectComm((PetscObject)A),&newmat);
236: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
237: MatSetType(newmat,((PetscObject)A)->type_name);
238: MatMPIDenseSetPreallocation(newmat,NULL);
239: }
241: /* Now extract the data pointers and do the copy, column at a time */
242: newmatd = (Mat_MPIDense*)newmat->data;
243: bv = ((Mat_SeqDense*)newmatd->A->data)->v;
245: for (i=0; i<Ncols; i++) {
246: av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247: for (j=0; j<nrows; j++) {
248: *bv++ = av[irow[j] - rstart];
249: }
250: }
252: /* Assemble the matrices so that the correct flags are set */
253: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
254: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
256: /* Free work space */
257: ISRestoreIndices(isrow,&irow);
258: ISRestoreIndices(iscol_local,&icol);
259: ISDestroy(&iscol_local);
260: *B = newmat;
261: return(0);
262: }
264: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265: {
266: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
270: MatDenseRestoreArray(a->A,array);
271: return(0);
272: }
274: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275: {
276: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
280: MatDenseRestoreArrayRead(a->A,array);
281: return(0);
282: }
284: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285: {
286: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
287: MPI_Comm comm;
289: PetscInt nstash,reallocs;
290: InsertMode addv;
293: PetscObjectGetComm((PetscObject)mat,&comm);
294: /* make sure all processors are either in INSERTMODE or ADDMODE */
295: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
296: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297: mat->insertmode = addv; /* in case this processor had no cache */
299: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
300: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
301: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
302: return(0);
303: }
305: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306: {
307: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
309: PetscInt i,*row,*col,flg,j,rstart,ncols;
310: PetscMPIInt n;
311: PetscScalar *val;
314: /* wait on receives */
315: while (1) {
316: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
317: if (!flg) break;
319: for (i=0; i<n;) {
320: /* Now identify the consecutive vals belonging to the same row */
321: for (j=i,rstart=row[j]; j<n; j++) {
322: if (row[j] != rstart) break;
323: }
324: if (j < n) ncols = j-i;
325: else ncols = n-i;
326: /* Now assemble all these values with a single function call */
327: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
328: i = j;
329: }
330: }
331: MatStashScatterEnd_Private(&mat->stash);
333: MatAssemblyBegin(mdn->A,mode);
334: MatAssemblyEnd(mdn->A,mode);
336: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337: MatSetUpMultiply_MPIDense(mat);
338: }
339: return(0);
340: }
342: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343: {
345: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
348: MatZeroEntries(l->A);
349: return(0);
350: }
352: /* the code does not do the diagonal entries correctly unless the
353: matrix is square and the column and row owerships are identical.
354: This is a BUG. The only way to fix it seems to be to access
355: mdn->A and mdn->B directly and not through the MatZeroRows()
356: routine.
357: */
358: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359: {
360: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
361: PetscErrorCode ierr;
362: PetscInt i,*owners = A->rmap->range;
363: PetscInt *sizes,j,idx,nsends;
364: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
365: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366: PetscInt *lens,*lrows,*values;
367: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
368: MPI_Comm comm;
369: MPI_Request *send_waits,*recv_waits;
370: MPI_Status recv_status,*send_status;
371: PetscBool found;
372: const PetscScalar *xx;
373: PetscScalar *bb;
376: PetscObjectGetComm((PetscObject)A,&comm);
377: if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379: /* first count number of contributors to each processor */
380: PetscCalloc1(2*size,&sizes);
381: PetscMalloc1(N+1,&owner); /* see note*/
382: for (i=0; i<N; i++) {
383: idx = rows[i];
384: found = PETSC_FALSE;
385: for (j=0; j<size; j++) {
386: if (idx >= owners[j] && idx < owners[j+1]) {
387: sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388: }
389: }
390: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391: }
392: nsends = 0;
393: for (i=0; i<size; i++) nsends += sizes[2*i+1];
395: /* inform other processors of number of messages and max length*/
396: PetscMaxSum(comm,sizes,&nmax,&nrecvs);
398: /* post receives: */
399: PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
400: PetscMalloc1(nrecvs+1,&recv_waits);
401: for (i=0; i<nrecvs; i++) {
402: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
403: }
405: /* do sends:
406: 1) starts[i] gives the starting index in svalues for stuff going to
407: the ith processor
408: */
409: PetscMalloc1(N+1,&svalues);
410: PetscMalloc1(nsends+1,&send_waits);
411: PetscMalloc1(size+1,&starts);
413: starts[0] = 0;
414: for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415: for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
417: starts[0] = 0;
418: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419: count = 0;
420: for (i=0; i<size; i++) {
421: if (sizes[2*i+1]) {
422: MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
423: }
424: }
425: PetscFree(starts);
427: base = owners[rank];
429: /* wait on receives */
430: PetscMalloc2(nrecvs,&lens,nrecvs,&source);
431: count = nrecvs;
432: slen = 0;
433: while (count) {
434: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
435: /* unpack receives into our local space */
436: MPI_Get_count(&recv_status,MPIU_INT,&n);
438: source[imdex] = recv_status.MPI_SOURCE;
439: lens[imdex] = n;
440: slen += n;
441: count--;
442: }
443: PetscFree(recv_waits);
445: /* move the data into the send scatter */
446: PetscMalloc1(slen+1,&lrows);
447: count = 0;
448: for (i=0; i<nrecvs; i++) {
449: values = rvalues + i*nmax;
450: for (j=0; j<lens[i]; j++) {
451: lrows[count++] = values[j] - base;
452: }
453: }
454: PetscFree(rvalues);
455: PetscFree2(lens,source);
456: PetscFree(owner);
457: PetscFree(sizes);
459: /* fix right hand side if needed */
460: if (x && b) {
461: VecGetArrayRead(x,&xx);
462: VecGetArray(b,&bb);
463: for (i=0; i<slen; i++) {
464: bb[lrows[i]] = diag*xx[lrows[i]];
465: }
466: VecRestoreArrayRead(x,&xx);
467: VecRestoreArray(b,&bb);
468: }
470: /* actually zap the local rows */
471: MatZeroRows(l->A,slen,lrows,0.0,0,0);
472: if (diag != 0.0) {
473: Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474: PetscInt m = ll->lda, i;
476: for (i=0; i<slen; i++) {
477: ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478: }
479: }
480: PetscFree(lrows);
482: /* wait on sends */
483: if (nsends) {
484: PetscMalloc1(nsends,&send_status);
485: MPI_Waitall(nsends,send_waits,send_status);
486: PetscFree(send_status);
487: }
488: PetscFree(send_waits);
489: PetscFree(svalues);
490: return(0);
491: }
493: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
498: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499: {
500: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
504: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
505: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
506: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
507: return(0);
508: }
510: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511: {
512: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
516: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
517: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
518: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
519: return(0);
520: }
522: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523: {
524: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
526: PetscScalar zero = 0.0;
529: VecSet(yy,zero);
530: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
531: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
532: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
533: return(0);
534: }
536: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537: {
538: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
542: VecCopy(yy,zz);
543: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
544: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
545: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
546: return(0);
547: }
549: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550: {
551: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
552: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
554: PetscInt len,i,n,m = A->rmap->n,radd;
555: PetscScalar *x,zero = 0.0;
558: VecSet(v,zero);
559: VecGetArray(v,&x);
560: VecGetSize(v,&n);
561: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
563: radd = A->rmap->rstart*m;
564: for (i=0; i<len; i++) {
565: x[i] = aloc->v[radd + i*m + i];
566: }
567: VecRestoreArray(v,&x);
568: return(0);
569: }
571: PetscErrorCode MatDestroy_MPIDense(Mat mat)
572: {
573: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
577: #if defined(PETSC_USE_LOG)
578: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579: #endif
580: MatStashDestroy_Private(&mat->stash);
581: MatDestroy(&mdn->A);
582: VecDestroy(&mdn->lvec);
583: VecScatterDestroy(&mdn->Mvctx);
585: PetscFree(mat->data);
586: PetscObjectChangeTypeName((PetscObject)mat,0);
588: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);
589: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
590: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
591: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);
592: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);
593: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);
594: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);
595: #if defined(PETSC_HAVE_ELEMENTAL)
596: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
597: #endif
598: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
599: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
600: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
601: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
602: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
603: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
604: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
605: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);
606: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);
607: return(0);
608: }
610: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
611: {
612: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
613: PetscErrorCode ierr;
614: PetscViewerFormat format;
615: int fd;
616: PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k;
617: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
618: PetscScalar *work,*v,*vv;
619: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
622: if (mdn->size == 1) {
623: MatView(mdn->A,viewer);
624: } else {
625: PetscViewerBinaryGetDescriptor(viewer,&fd);
626: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
627: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
629: PetscViewerGetFormat(viewer,&format);
630: if (format == PETSC_VIEWER_NATIVE) {
632: if (!rank) {
633: /* store the matrix as a dense matrix */
634: header[0] = MAT_FILE_CLASSID;
635: header[1] = mat->rmap->N;
636: header[2] = N;
637: header[3] = MATRIX_BINARY_FORMAT_DENSE;
638: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
640: /* get largest work array needed for transposing array */
641: mmax = mat->rmap->n;
642: for (i=1; i<size; i++) {
643: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644: }
645: PetscMalloc1(mmax*N,&work);
647: /* write out local array, by rows */
648: m = mat->rmap->n;
649: v = a->v;
650: for (j=0; j<N; j++) {
651: for (i=0; i<m; i++) {
652: work[j + i*N] = *v++;
653: }
654: }
655: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
656: /* get largest work array to receive messages from other processes, excludes process zero */
657: mmax = 0;
658: for (i=1; i<size; i++) {
659: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
660: }
661: PetscMalloc1(mmax*N,&vv);
662: for (k = 1; k < size; k++) {
663: v = vv;
664: m = mat->rmap->range[k+1] - mat->rmap->range[k];
665: MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));
667: for (j = 0; j < N; j++) {
668: for (i = 0; i < m; i++) {
669: work[j + i*N] = *v++;
670: }
671: }
672: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
673: }
674: PetscFree(work);
675: PetscFree(vv);
676: } else {
677: MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
678: }
679: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
680: }
681: return(0);
682: }
684: extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
685: #include <petscdraw.h>
686: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
687: {
688: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
689: PetscErrorCode ierr;
690: PetscMPIInt rank = mdn->rank;
691: PetscViewerType vtype;
692: PetscBool iascii,isdraw;
693: PetscViewer sviewer;
694: PetscViewerFormat format;
697: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
698: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
699: if (iascii) {
700: PetscViewerGetType(viewer,&vtype);
701: PetscViewerGetFormat(viewer,&format);
702: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
703: MatInfo info;
704: MatGetInfo(mat,MAT_LOCAL,&info);
705: PetscViewerASCIIPushSynchronized(viewer);
706: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
707: PetscViewerFlush(viewer);
708: PetscViewerASCIIPopSynchronized(viewer);
709: VecScatterView(mdn->Mvctx,viewer);
710: return(0);
711: } else if (format == PETSC_VIEWER_ASCII_INFO) {
712: return(0);
713: }
714: } else if (isdraw) {
715: PetscDraw draw;
716: PetscBool isnull;
718: PetscViewerDrawGetDraw(viewer,0,&draw);
719: PetscDrawIsNull(draw,&isnull);
720: if (isnull) return(0);
721: }
723: {
724: /* assemble the entire matrix onto first processor. */
725: Mat A;
726: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
727: PetscInt *cols;
728: PetscScalar *vals;
730: MatCreate(PetscObjectComm((PetscObject)mat),&A);
731: if (!rank) {
732: MatSetSizes(A,M,N,M,N);
733: } else {
734: MatSetSizes(A,0,0,M,N);
735: }
736: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
737: MatSetType(A,MATMPIDENSE);
738: MatMPIDenseSetPreallocation(A,NULL);
739: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
741: /* Copy the matrix ... This isn't the most efficient means,
742: but it's quick for now */
743: A->insertmode = INSERT_VALUES;
745: row = mat->rmap->rstart;
746: m = mdn->A->rmap->n;
747: for (i=0; i<m; i++) {
748: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
749: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
750: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
751: row++;
752: }
754: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
755: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
756: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
757: if (!rank) {
758: PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
759: MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
760: }
761: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
762: PetscViewerFlush(viewer);
763: MatDestroy(&A);
764: }
765: return(0);
766: }
768: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
769: {
771: PetscBool iascii,isbinary,isdraw,issocket;
774: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
775: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
776: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
777: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
779: if (iascii || issocket || isdraw) {
780: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
781: } else if (isbinary) {
782: MatView_MPIDense_Binary(mat,viewer);
783: }
784: return(0);
785: }
787: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
788: {
789: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
790: Mat mdn = mat->A;
792: PetscLogDouble isend[5],irecv[5];
795: info->block_size = 1.0;
797: MatGetInfo(mdn,MAT_LOCAL,info);
799: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800: isend[3] = info->memory; isend[4] = info->mallocs;
801: if (flag == MAT_LOCAL) {
802: info->nz_used = isend[0];
803: info->nz_allocated = isend[1];
804: info->nz_unneeded = isend[2];
805: info->memory = isend[3];
806: info->mallocs = isend[4];
807: } else if (flag == MAT_GLOBAL_MAX) {
808: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));
810: info->nz_used = irecv[0];
811: info->nz_allocated = irecv[1];
812: info->nz_unneeded = irecv[2];
813: info->memory = irecv[3];
814: info->mallocs = irecv[4];
815: } else if (flag == MAT_GLOBAL_SUM) {
816: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));
818: info->nz_used = irecv[0];
819: info->nz_allocated = irecv[1];
820: info->nz_unneeded = irecv[2];
821: info->memory = irecv[3];
822: info->mallocs = irecv[4];
823: }
824: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
825: info->fill_ratio_needed = 0;
826: info->factor_mallocs = 0;
827: return(0);
828: }
830: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
831: {
832: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
836: switch (op) {
837: case MAT_NEW_NONZERO_LOCATIONS:
838: case MAT_NEW_NONZERO_LOCATION_ERR:
839: case MAT_NEW_NONZERO_ALLOCATION_ERR:
840: MatCheckPreallocated(A,1);
841: MatSetOption(a->A,op,flg);
842: break;
843: case MAT_ROW_ORIENTED:
844: MatCheckPreallocated(A,1);
845: a->roworiented = flg;
846: MatSetOption(a->A,op,flg);
847: break;
848: case MAT_NEW_DIAGONALS:
849: case MAT_KEEP_NONZERO_PATTERN:
850: case MAT_USE_HASH_TABLE:
851: case MAT_SORTED_FULL:
852: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
853: break;
854: case MAT_IGNORE_OFF_PROC_ENTRIES:
855: a->donotstash = flg;
856: break;
857: case MAT_SYMMETRIC:
858: case MAT_STRUCTURALLY_SYMMETRIC:
859: case MAT_HERMITIAN:
860: case MAT_SYMMETRY_ETERNAL:
861: case MAT_IGNORE_LOWER_TRIANGULAR:
862: case MAT_IGNORE_ZERO_ENTRIES:
863: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
864: break;
865: default:
866: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
867: }
868: return(0);
869: }
872: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
873: {
874: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
875: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
876: const PetscScalar *l,*r;
877: PetscScalar x,*v;
878: PetscErrorCode ierr;
879: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
882: MatGetLocalSize(A,&s2,&s3);
883: if (ll) {
884: VecGetLocalSize(ll,&s2a);
885: if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
886: VecGetArrayRead(ll,&l);
887: for (i=0; i<m; i++) {
888: x = l[i];
889: v = mat->v + i;
890: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
891: }
892: VecRestoreArrayRead(ll,&l);
893: PetscLogFlops(n*m);
894: }
895: if (rr) {
896: VecGetLocalSize(rr,&s3a);
897: if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
898: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
899: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
900: VecGetArrayRead(mdn->lvec,&r);
901: for (i=0; i<n; i++) {
902: x = r[i];
903: v = mat->v + i*m;
904: for (j=0; j<m; j++) (*v++) *= x;
905: }
906: VecRestoreArrayRead(mdn->lvec,&r);
907: PetscLogFlops(n*m);
908: }
909: return(0);
910: }
912: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
913: {
914: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
915: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
917: PetscInt i,j;
918: PetscReal sum = 0.0;
919: PetscScalar *v = mat->v;
922: if (mdn->size == 1) {
923: MatNorm(mdn->A,type,nrm);
924: } else {
925: if (type == NORM_FROBENIUS) {
926: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
927: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
928: }
929: MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
930: *nrm = PetscSqrtReal(*nrm);
931: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
932: } else if (type == NORM_1) {
933: PetscReal *tmp,*tmp2;
934: PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
935: *nrm = 0.0;
936: v = mat->v;
937: for (j=0; j<mdn->A->cmap->n; j++) {
938: for (i=0; i<mdn->A->rmap->n; i++) {
939: tmp[j] += PetscAbsScalar(*v); v++;
940: }
941: }
942: MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
943: for (j=0; j<A->cmap->N; j++) {
944: if (tmp2[j] > *nrm) *nrm = tmp2[j];
945: }
946: PetscFree2(tmp,tmp2);
947: PetscLogFlops(A->cmap->n*A->rmap->n);
948: } else if (type == NORM_INFINITY) { /* max row norm */
949: PetscReal ntemp;
950: MatNorm(mdn->A,type,&ntemp);
951: MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
952: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
953: }
954: return(0);
955: }
957: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
958: {
959: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
960: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
961: Mat B;
962: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
964: PetscInt j,i;
965: PetscScalar *v;
968: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
969: MatCreate(PetscObjectComm((PetscObject)A),&B);
970: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
971: MatSetType(B,((PetscObject)A)->type_name);
972: MatMPIDenseSetPreallocation(B,NULL);
973: } else {
974: B = *matout;
975: }
977: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
978: PetscMalloc1(m,&rwork);
979: for (i=0; i<m; i++) rwork[i] = rstart + i;
980: for (j=0; j<n; j++) {
981: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
982: v += m;
983: }
984: PetscFree(rwork);
985: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
986: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
987: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988: *matout = B;
989: } else {
990: MatHeaderMerge(A,&B);
991: }
992: return(0);
993: }
996: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
997: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
999: PetscErrorCode MatSetUp_MPIDense(Mat A)
1000: {
1004: MatMPIDenseSetPreallocation(A,0);
1005: return(0);
1006: }
1008: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1009: {
1011: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1014: MatAXPY(A->A,alpha,B->A,str);
1015: PetscObjectStateIncrease((PetscObject)Y);
1016: return(0);
1017: }
1019: PetscErrorCode MatConjugate_MPIDense(Mat mat)
1020: {
1021: Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
1025: MatConjugate(a->A);
1026: return(0);
1027: }
1029: PetscErrorCode MatRealPart_MPIDense(Mat A)
1030: {
1031: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1035: MatRealPart(a->A);
1036: return(0);
1037: }
1039: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1040: {
1041: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1045: MatImaginaryPart(a->A);
1046: return(0);
1047: }
1049: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1050: {
1051: PetscErrorCode ierr;
1052: PetscScalar *x;
1053: const PetscScalar *a;
1054: PetscInt lda;
1057: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1058: MatDenseGetLDA(A,&lda);
1059: MatDenseGetArrayRead(A,&a);
1060: VecGetArray(v,&x);
1061: PetscArraycpy(x,a+col*lda,A->rmap->n);
1062: VecRestoreArray(v,&x);
1063: MatDenseGetArrayRead(A,&a);
1064: return(0);
1065: }
1067: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1068: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1069: {
1071: PetscInt i,n;
1072: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1073: PetscReal *work;
1076: MatGetSize(A,NULL,&n);
1077: PetscMalloc1(n,&work);
1078: MatGetColumnNorms_SeqDense(a->A,type,work);
1079: if (type == NORM_2) {
1080: for (i=0; i<n; i++) work[i] *= work[i];
1081: }
1082: if (type == NORM_INFINITY) {
1083: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1084: } else {
1085: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1086: }
1087: PetscFree(work);
1088: if (type == NORM_2) {
1089: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1090: }
1091: return(0);
1092: }
1094: static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1095: {
1096: Mat_MPIDense *d = (Mat_MPIDense*)x->data;
1098: PetscScalar *a;
1099: PetscInt m,n,i;
1102: MatGetSize(d->A,&m,&n);
1103: MatDenseGetArray(d->A,&a);
1104: for (i=0; i<m*n; i++) {
1105: PetscRandomGetValue(rctx,a+i);
1106: }
1107: MatDenseRestoreArray(d->A,&a);
1108: return(0);
1109: }
1111: extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1113: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d)
1114: {
1116: *missing = PETSC_FALSE;
1117: return(0);
1118: }
1120: static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1121: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1122: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1124: /* -------------------------------------------------------------------*/
1125: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1126: MatGetRow_MPIDense,
1127: MatRestoreRow_MPIDense,
1128: MatMult_MPIDense,
1129: /* 4*/ MatMultAdd_MPIDense,
1130: MatMultTranspose_MPIDense,
1131: MatMultTransposeAdd_MPIDense,
1132: 0,
1133: 0,
1134: 0,
1135: /* 10*/ 0,
1136: 0,
1137: 0,
1138: 0,
1139: MatTranspose_MPIDense,
1140: /* 15*/ MatGetInfo_MPIDense,
1141: MatEqual_MPIDense,
1142: MatGetDiagonal_MPIDense,
1143: MatDiagonalScale_MPIDense,
1144: MatNorm_MPIDense,
1145: /* 20*/ MatAssemblyBegin_MPIDense,
1146: MatAssemblyEnd_MPIDense,
1147: MatSetOption_MPIDense,
1148: MatZeroEntries_MPIDense,
1149: /* 24*/ MatZeroRows_MPIDense,
1150: 0,
1151: 0,
1152: 0,
1153: 0,
1154: /* 29*/ MatSetUp_MPIDense,
1155: 0,
1156: 0,
1157: MatGetDiagonalBlock_MPIDense,
1158: 0,
1159: /* 34*/ MatDuplicate_MPIDense,
1160: 0,
1161: 0,
1162: 0,
1163: 0,
1164: /* 39*/ MatAXPY_MPIDense,
1165: MatCreateSubMatrices_MPIDense,
1166: 0,
1167: MatGetValues_MPIDense,
1168: 0,
1169: /* 44*/ 0,
1170: MatScale_MPIDense,
1171: MatShift_Basic,
1172: 0,
1173: 0,
1174: /* 49*/ MatSetRandom_MPIDense,
1175: 0,
1176: 0,
1177: 0,
1178: 0,
1179: /* 54*/ 0,
1180: 0,
1181: 0,
1182: 0,
1183: 0,
1184: /* 59*/ MatCreateSubMatrix_MPIDense,
1185: MatDestroy_MPIDense,
1186: MatView_MPIDense,
1187: 0,
1188: 0,
1189: /* 64*/ 0,
1190: 0,
1191: 0,
1192: 0,
1193: 0,
1194: /* 69*/ 0,
1195: 0,
1196: 0,
1197: 0,
1198: 0,
1199: /* 74*/ 0,
1200: 0,
1201: 0,
1202: 0,
1203: 0,
1204: /* 79*/ 0,
1205: 0,
1206: 0,
1207: 0,
1208: /* 83*/ MatLoad_MPIDense,
1209: 0,
1210: 0,
1211: 0,
1212: 0,
1213: 0,
1214: #if defined(PETSC_HAVE_ELEMENTAL)
1215: /* 89*/ MatMatMult_MPIDense_MPIDense,
1216: MatMatMultSymbolic_MPIDense_MPIDense,
1217: #else
1218: /* 89*/ 0,
1219: 0,
1220: #endif
1221: MatMatMultNumeric_MPIDense,
1222: 0,
1223: 0,
1224: /* 94*/ 0,
1225: MatMatTransposeMult_MPIDense_MPIDense,
1226: MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1227: MatMatTransposeMultNumeric_MPIDense_MPIDense,
1228: 0,
1229: /* 99*/ 0,
1230: 0,
1231: 0,
1232: MatConjugate_MPIDense,
1233: 0,
1234: /*104*/ 0,
1235: MatRealPart_MPIDense,
1236: MatImaginaryPart_MPIDense,
1237: 0,
1238: 0,
1239: /*109*/ 0,
1240: 0,
1241: 0,
1242: MatGetColumnVector_MPIDense,
1243: MatMissingDiagonal_MPIDense,
1244: /*114*/ 0,
1245: 0,
1246: 0,
1247: 0,
1248: 0,
1249: /*119*/ 0,
1250: 0,
1251: 0,
1252: 0,
1253: 0,
1254: /*124*/ 0,
1255: MatGetColumnNorms_MPIDense,
1256: 0,
1257: 0,
1258: 0,
1259: /*129*/ 0,
1260: MatTransposeMatMult_MPIDense_MPIDense,
1261: MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1262: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1263: 0,
1264: /*134*/ 0,
1265: 0,
1266: 0,
1267: 0,
1268: 0,
1269: /*139*/ 0,
1270: 0,
1271: 0,
1272: 0,
1273: 0,
1274: /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1275: };
1277: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1278: {
1279: Mat_MPIDense *a;
1283: if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1284: mat->preallocated = PETSC_TRUE;
1285: /* Note: For now, when data is specified above, this assumes the user correctly
1286: allocates the local dense storage space. We should add error checking. */
1288: a = (Mat_MPIDense*)mat->data;
1289: PetscLayoutSetUp(mat->rmap);
1290: PetscLayoutSetUp(mat->cmap);
1291: a->nvec = mat->cmap->n;
1293: MatCreate(PETSC_COMM_SELF,&a->A);
1294: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1295: MatSetType(a->A,MATSEQDENSE);
1296: MatSeqDenseSetPreallocation(a->A,data);
1297: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1298: return(0);
1299: }
1301: #if defined(PETSC_HAVE_ELEMENTAL)
1302: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1303: {
1304: Mat mat_elemental;
1306: PetscScalar *v;
1307: PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1308:
1310: if (reuse == MAT_REUSE_MATRIX) {
1311: mat_elemental = *newmat;
1312: MatZeroEntries(*newmat);
1313: } else {
1314: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1315: MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1316: MatSetType(mat_elemental,MATELEMENTAL);
1317: MatSetUp(mat_elemental);
1318: MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1319: }
1321: PetscMalloc2(m,&rows,N,&cols);
1322: for (i=0; i<N; i++) cols[i] = i;
1323: for (i=0; i<m; i++) rows[i] = rstart + i;
1324:
1325: /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1326: MatDenseGetArray(A,&v);
1327: MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1328: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1329: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1330: MatDenseRestoreArray(A,&v);
1331: PetscFree2(rows,cols);
1333: if (reuse == MAT_INPLACE_MATRIX) {
1334: MatHeaderReplace(A,&mat_elemental);
1335: } else {
1336: *newmat = mat_elemental;
1337: }
1338: return(0);
1339: }
1340: #endif
1342: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1343: {
1344: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1348: MatDenseGetColumn(mat->A,col,vals);
1349: return(0);
1350: }
1352: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1353: {
1354: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1358: MatDenseRestoreColumn(mat->A,vals);
1359: return(0);
1360: }
1362: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1363: {
1365: Mat_MPIDense *mat;
1366: PetscInt m,nloc,N;
1369: MatGetSize(inmat,&m,&N);
1370: MatGetLocalSize(inmat,NULL,&nloc);
1371: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1372: PetscInt sum;
1374: if (n == PETSC_DECIDE) {
1375: PetscSplitOwnership(comm,&n,&N);
1376: }
1377: /* Check sum(n) = N */
1378: MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
1379: if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1381: MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);
1382: }
1384: /* numeric phase */
1385: mat = (Mat_MPIDense*)(*outmat)->data;
1386: MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);
1387: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
1388: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
1389: return(0);
1390: }
1392: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1393: {
1394: Mat_MPIDense *a;
1398: PetscNewLog(mat,&a);
1399: mat->data = (void*)a;
1400: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1402: mat->insertmode = NOT_SET_VALUES;
1403: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1404: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);
1406: /* build cache for off array entries formed */
1407: a->donotstash = PETSC_FALSE;
1409: MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);
1411: /* stuff used for matrix vector multiply */
1412: a->lvec = 0;
1413: a->Mvctx = 0;
1414: a->roworiented = PETSC_TRUE;
1416: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);
1417: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1418: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1419: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);
1420: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);
1421: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);
1422: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);
1423: #if defined(PETSC_HAVE_ELEMENTAL)
1424: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1425: #endif
1426: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1427: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1428: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1429: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);
1431: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1432: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1433: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1434: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);
1435: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);
1436: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1437: return(0);
1438: }
1440: /*MC
1441: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1443: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1444: and MATMPIDENSE otherwise.
1446: Options Database Keys:
1447: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1449: Level: beginner
1452: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1453: M*/
1455: /*@C
1456: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1458: Not collective
1460: Input Parameters:
1461: . B - the matrix
1462: - data - optional location of matrix data. Set data=NULL for PETSc
1463: to control all matrix memory allocation.
1465: Notes:
1466: The dense format is fully compatible with standard Fortran 77
1467: storage by columns.
1469: The data input variable is intended primarily for Fortran programmers
1470: who wish to allocate their own matrix memory space. Most users should
1471: set data=NULL.
1473: Level: intermediate
1475: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1476: @*/
1477: PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1478: {
1482: PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1483: return(0);
1484: }
1486: /*@
1487: MatDensePlaceArray - Allows one to replace the array in a dense array with an
1488: array provided by the user. This is useful to avoid copying an array
1489: into a matrix
1491: Not Collective
1493: Input Parameters:
1494: + mat - the matrix
1495: - array - the array in column major order
1497: Notes:
1498: You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1499: freed when the matrix is destroyed.
1501: Level: developer
1503: .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1505: @*/
1506: PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[])
1507: {
1510: PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
1511: PetscObjectStateIncrease((PetscObject)mat);
1512: return(0);
1513: }
1515: /*@
1516: MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1518: Not Collective
1520: Input Parameters:
1521: . mat - the matrix
1523: Notes:
1524: You can only call this after a call to MatDensePlaceArray()
1526: Level: developer
1528: .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1530: @*/
1531: PetscErrorCode MatDenseResetArray(Mat mat)
1532: {
1535: PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
1536: PetscObjectStateIncrease((PetscObject)mat);
1537: return(0);
1538: }
1540: /*@C
1541: MatCreateDense - Creates a parallel matrix in dense format.
1543: Collective
1545: Input Parameters:
1546: + comm - MPI communicator
1547: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1548: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1549: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1550: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1551: - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1552: to control all matrix memory allocation.
1554: Output Parameter:
1555: . A - the matrix
1557: Notes:
1558: The dense format is fully compatible with standard Fortran 77
1559: storage by columns.
1561: The data input variable is intended primarily for Fortran programmers
1562: who wish to allocate their own matrix memory space. Most users should
1563: set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1565: The user MUST specify either the local or global matrix dimensions
1566: (possibly both).
1568: Level: intermediate
1570: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1571: @*/
1572: PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1573: {
1575: PetscMPIInt size;
1578: MatCreate(comm,A);
1579: MatSetSizes(*A,m,n,M,N);
1580: MPI_Comm_size(comm,&size);
1581: if (size > 1) {
1582: MatSetType(*A,MATMPIDENSE);
1583: MatMPIDenseSetPreallocation(*A,data);
1584: if (data) { /* user provided data array, so no need to assemble */
1585: MatSetUpMultiply_MPIDense(*A);
1586: (*A)->assembled = PETSC_TRUE;
1587: }
1588: } else {
1589: MatSetType(*A,MATSEQDENSE);
1590: MatSeqDenseSetPreallocation(*A,data);
1591: }
1592: return(0);
1593: }
1595: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1596: {
1597: Mat mat;
1598: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1602: *newmat = 0;
1603: MatCreate(PetscObjectComm((PetscObject)A),&mat);
1604: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1605: MatSetType(mat,((PetscObject)A)->type_name);
1606: a = (Mat_MPIDense*)mat->data;
1608: mat->factortype = A->factortype;
1609: mat->assembled = PETSC_TRUE;
1610: mat->preallocated = PETSC_TRUE;
1612: a->size = oldmat->size;
1613: a->rank = oldmat->rank;
1614: mat->insertmode = NOT_SET_VALUES;
1615: a->nvec = oldmat->nvec;
1616: a->donotstash = oldmat->donotstash;
1618: PetscLayoutReference(A->rmap,&mat->rmap);
1619: PetscLayoutReference(A->cmap,&mat->cmap);
1621: MatSetUpMultiply_MPIDense(mat);
1622: MatDuplicate(oldmat->A,cpvalues,&a->A);
1623: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1625: *newmat = mat;
1626: return(0);
1627: }
1629: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1630: {
1632: PetscMPIInt rank,size;
1633: const PetscInt *rowners;
1634: PetscInt i,m,n,nz,j,mMax;
1635: PetscScalar *array,*vals,*vals_ptr;
1636: Mat_MPIDense *a = (Mat_MPIDense*)newmat->data;
1639: MPI_Comm_rank(comm,&rank);
1640: MPI_Comm_size(comm,&size);
1642: /* determine ownership of rows and columns */
1643: m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1644: n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1646: MatSetSizes(newmat,m,n,M,N);
1647: if (!a->A) {
1648: MatMPIDenseSetPreallocation(newmat,NULL);
1649: }
1650: MatDenseGetArray(newmat,&array);
1651: MatGetLocalSize(newmat,&m,NULL);
1652: MatGetOwnershipRanges(newmat,&rowners);
1653: MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);
1654: if (!rank) {
1655: PetscMalloc1(mMax*N,&vals);
1657: /* read in my part of the matrix numerical values */
1658: PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);
1660: /* insert into matrix-by row (this is why cannot directly read into array */
1661: vals_ptr = vals;
1662: for (i=0; i<m; i++) {
1663: for (j=0; j<N; j++) {
1664: array[i + j*m] = *vals_ptr++;
1665: }
1666: }
1668: /* read in other processors and ship out */
1669: for (i=1; i<size; i++) {
1670: nz = (rowners[i+1] - rowners[i])*N;
1671: PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);
1672: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1673: }
1674: } else {
1675: /* receive numeric values */
1676: PetscMalloc1(m*N,&vals);
1678: /* receive message of values*/
1679: MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);
1681: /* insert into matrix-by row (this is why cannot directly read into array */
1682: vals_ptr = vals;
1683: for (i=0; i<m; i++) {
1684: for (j=0; j<N; j++) {
1685: array[i + j*m] = *vals_ptr++;
1686: }
1687: }
1688: }
1689: MatDenseRestoreArray(newmat,&array);
1690: PetscFree(vals);
1691: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1692: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1693: return(0);
1694: }
1696: static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer)
1697: {
1698: Mat_MPIDense *a;
1699: PetscScalar *vals,*svals;
1700: MPI_Comm comm;
1701: MPI_Status status;
1702: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1703: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1704: PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1705: PetscInt i,nz,j,rstart,rend;
1706: int fd;
1710: PetscObjectGetComm((PetscObject)viewer,&comm);
1711: MPI_Comm_size(comm,&size);
1712: MPI_Comm_rank(comm,&rank);
1713: PetscViewerBinaryGetDescriptor(viewer,&fd);
1714: if (!rank) {
1715: PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);
1716: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1717: }
1718: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1719: M = header[1]; N = header[2]; nz = header[3];
1721: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1722: if (newmat->rmap->N < 0) newmat->rmap->N = M;
1723: if (newmat->cmap->N < 0) newmat->cmap->N = N;
1725: if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1726: if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1728: /*
1729: Handle case where matrix is stored on disk as a dense matrix
1730: */
1731: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1732: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1733: return(0);
1734: }
1736: /* determine ownership of all rows */
1737: if (newmat->rmap->n < 0) {
1738: PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1739: } else {
1740: PetscMPIIntCast(newmat->rmap->n,&m);
1741: }
1742: if (newmat->cmap->n < 0) {
1743: n = PETSC_DECIDE;
1744: } else {
1745: PetscMPIIntCast(newmat->cmap->n,&n);
1746: }
1748: PetscMalloc1(size+2,&rowners);
1749: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1750: rowners[0] = 0;
1751: for (i=2; i<=size; i++) {
1752: rowners[i] += rowners[i-1];
1753: }
1754: rstart = rowners[rank];
1755: rend = rowners[rank+1];
1757: /* distribute row lengths to all processors */
1758: PetscMalloc1(rend-rstart,&ourlens);
1759: if (!rank) {
1760: PetscMalloc1(M,&rowlengths);
1761: PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);
1762: PetscMalloc1(size,&sndcounts);
1763: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1764: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1765: PetscFree(sndcounts);
1766: } else {
1767: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1768: }
1770: if (!rank) {
1771: /* calculate the number of nonzeros on each processor */
1772: PetscCalloc1(size,&procsnz);
1773: for (i=0; i<size; i++) {
1774: for (j=rowners[i]; j< rowners[i+1]; j++) {
1775: procsnz[i] += rowlengths[j];
1776: }
1777: }
1778: PetscFree(rowlengths);
1780: /* determine max buffer needed and allocate it */
1781: maxnz = 0;
1782: for (i=0; i<size; i++) {
1783: maxnz = PetscMax(maxnz,procsnz[i]);
1784: }
1785: PetscMalloc1(maxnz,&cols);
1787: /* read in my part of the matrix column indices */
1788: nz = procsnz[0];
1789: PetscMalloc1(nz,&mycols);
1790: PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);
1792: /* read in every one elses and ship off */
1793: for (i=1; i<size; i++) {
1794: nz = procsnz[i];
1795: PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);
1796: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1797: }
1798: PetscFree(cols);
1799: } else {
1800: /* determine buffer space needed for message */
1801: nz = 0;
1802: for (i=0; i<m; i++) {
1803: nz += ourlens[i];
1804: }
1805: PetscMalloc1(nz+1,&mycols);
1807: /* receive message of column indices*/
1808: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1809: MPI_Get_count(&status,MPIU_INT,&maxnz);
1810: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1811: }
1813: MatSetSizes(newmat,m,n,M,N);
1814: a = (Mat_MPIDense*)newmat->data;
1815: if (!a->A) {
1816: MatMPIDenseSetPreallocation(newmat,NULL);
1817: }
1819: if (!rank) {
1820: PetscMalloc1(maxnz,&vals);
1822: /* read in my part of the matrix numerical values */
1823: nz = procsnz[0];
1824: PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);
1826: /* insert into matrix */
1827: jj = rstart;
1828: smycols = mycols;
1829: svals = vals;
1830: for (i=0; i<m; i++) {
1831: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1832: smycols += ourlens[i];
1833: svals += ourlens[i];
1834: jj++;
1835: }
1837: /* read in other processors and ship out */
1838: for (i=1; i<size; i++) {
1839: nz = procsnz[i];
1840: PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);
1841: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1842: }
1843: PetscFree(procsnz);
1844: } else {
1845: /* receive numeric values */
1846: PetscMalloc1(nz+1,&vals);
1848: /* receive message of values*/
1849: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1850: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1851: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1853: /* insert into matrix */
1854: jj = rstart;
1855: smycols = mycols;
1856: svals = vals;
1857: for (i=0; i<m; i++) {
1858: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1859: smycols += ourlens[i];
1860: svals += ourlens[i];
1861: jj++;
1862: }
1863: }
1864: PetscFree(ourlens);
1865: PetscFree(vals);
1866: PetscFree(mycols);
1867: PetscFree(rowners);
1869: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1870: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1871: return(0);
1872: }
1874: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1875: {
1877: PetscBool isbinary;
1878: #if defined(PETSC_HAVE_HDF5)
1879: PetscBool ishdf5;
1880: #endif
1885: /* force binary viewer to load .info file if it has not yet done so */
1886: PetscViewerSetUp(viewer);
1887: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1888: #if defined(PETSC_HAVE_HDF5)
1889: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);
1890: #endif
1891: if (isbinary) {
1892: MatLoad_MPIDense_Binary(newMat,viewer);
1893: #if defined(PETSC_HAVE_HDF5)
1894: } else if (ishdf5) {
1895: MatLoad_Dense_HDF5(newMat,viewer);
1896: #endif
1897: } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1898: return(0);
1899: }
1901: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
1902: {
1903: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1904: Mat a,b;
1905: PetscBool flg;
1909: a = matA->A;
1910: b = matB->A;
1911: MatEqual(a,b,&flg);
1912: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1913: return(0);
1914: }
1916: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1917: {
1918: PetscErrorCode ierr;
1919: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1920: Mat_TransMatMultDense *atb = a->atbdense;
1923: PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1924: (atb->destroy)(A);
1925: PetscFree(atb);
1926: return(0);
1927: }
1929: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1930: {
1931: PetscErrorCode ierr;
1932: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1933: Mat_MatTransMultDense *abt = a->abtdense;
1936: PetscFree2(abt->buf[0],abt->buf[1]);
1937: PetscFree2(abt->recvcounts,abt->recvdispls);
1938: (abt->destroy)(A);
1939: PetscFree(abt);
1940: return(0);
1941: }
1943: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1944: {
1945: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1946: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1947: Mat_TransMatMultDense *atb = c->atbdense;
1949: MPI_Comm comm;
1950: PetscMPIInt rank,size,*recvcounts=atb->recvcounts;
1951: PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1952: PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1953: PetscScalar _DOne=1.0,_DZero=0.0;
1954: PetscBLASInt am,an,bn,aN;
1955: const PetscInt *ranges;
1958: PetscObjectGetComm((PetscObject)A,&comm);
1959: MPI_Comm_rank(comm,&rank);
1960: MPI_Comm_size(comm,&size);
1962: /* compute atbarray = aseq^T * bseq */
1963: PetscBLASIntCast(a->A->cmap->n,&an);
1964: PetscBLASIntCast(b->A->cmap->n,&bn);
1965: PetscBLASIntCast(a->A->rmap->n,&am);
1966: PetscBLASIntCast(A->cmap->N,&aN);
1967: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1969: MatGetOwnershipRanges(C,&ranges);
1970: for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1972: /* arrange atbarray into sendbuf */
1973: k = 0;
1974: for (proc=0; proc<size; proc++) {
1975: for (j=0; j<cN; j++) {
1976: for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1977: }
1978: }
1979: /* sum all atbarray to local values of C */
1980: MatDenseGetArray(c->A,&carray);
1981: MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1982: MatDenseRestoreArray(c->A,&carray);
1983: return(0);
1984: }
1986: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1987: {
1988: PetscErrorCode ierr;
1989: Mat Cdense;
1990: MPI_Comm comm;
1991: PetscMPIInt size;
1992: PetscInt cm=A->cmap->n,cM,cN=B->cmap->N;
1993: Mat_MPIDense *c;
1994: Mat_TransMatMultDense *atb;
1997: PetscObjectGetComm((PetscObject)A,&comm);
1998: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1999: SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2000: }
2002: /* create matrix product Cdense */
2003: MatCreate(comm,&Cdense);
2004: MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
2005: MatSetType(Cdense,MATMPIDENSE);
2006: MatMPIDenseSetPreallocation(Cdense,NULL);
2007: MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
2008: MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
2009: *C = Cdense;
2011: /* create data structure for reuse Cdense */
2012: MPI_Comm_size(comm,&size);
2013: PetscNew(&atb);
2014: cM = Cdense->rmap->N;
2015: PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);
2016:
2017: c = (Mat_MPIDense*)Cdense->data;
2018: c->atbdense = atb;
2019: atb->destroy = Cdense->ops->destroy;
2020: Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2021: return(0);
2022: }
2024: PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2025: {
2029: if (scall == MAT_INITIAL_MATRIX) {
2030: MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
2031: }
2032: MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);
2033: return(0);
2034: }
2036: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
2037: {
2038: PetscErrorCode ierr;
2039: Mat Cdense;
2040: MPI_Comm comm;
2041: PetscMPIInt i, size;
2042: PetscInt maxRows, bufsiz;
2043: Mat_MPIDense *c;
2044: PetscMPIInt tag;
2045: const char *algTypes[2] = {"allgatherv","cyclic"};
2046: PetscInt alg, nalg = 2;
2047: Mat_MatTransMultDense *abt;
2050: PetscObjectGetComm((PetscObject)A,&comm);
2051: alg = 0; /* default is allgatherv */
2052: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");
2053: PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);
2054: PetscOptionsEnd();
2055: if (A->cmap->N != B->cmap->N) {
2056: SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2057: }
2059: /* create matrix product Cdense */
2060: MatCreate(comm,&Cdense);
2061: MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);
2062: MatSetType(Cdense,MATMPIDENSE);
2063: MatMPIDenseSetPreallocation(Cdense,NULL);
2064: PetscObjectGetNewTag((PetscObject)Cdense, &tag);
2065: MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
2066: MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
2067: *C = Cdense;
2069: /* create data structure for reuse Cdense */
2070: MPI_Comm_size(comm,&size);
2071: PetscNew(&abt);
2072: abt->tag = tag;
2073: abt->alg = alg;
2074: switch (alg) {
2075: case 1:
2076: for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2077: bufsiz = A->cmap->N * maxRows;
2078: PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));
2079: break;
2080: default:
2081: PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));
2082: PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));
2083: for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2084: for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2085: break;
2086: }
2088: c = (Mat_MPIDense*)Cdense->data;
2089: c->abtdense = abt;
2090: abt->destroy = Cdense->ops->destroy;
2091: Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2092: return(0);
2093: }
2095: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2096: {
2097: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2098: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2099: Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data;
2100: Mat_MatTransMultDense *abt = c->abtdense;
2102: MPI_Comm comm;
2103: PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2104: PetscScalar *sendbuf, *recvbuf=0, *carray;
2105: PetscInt i,cK=A->cmap->N,k,j,bn;
2106: PetscScalar _DOne=1.0,_DZero=0.0;
2107: PetscBLASInt cm, cn, ck;
2108: MPI_Request reqs[2];
2109: const PetscInt *ranges;
2112: PetscObjectGetComm((PetscObject)A,&comm);
2113: MPI_Comm_rank(comm,&rank);
2114: MPI_Comm_size(comm,&size);
2116: MatGetOwnershipRanges(B,&ranges);
2117: bn = B->rmap->n;
2118: if (bseq->lda == bn) {
2119: sendbuf = bseq->v;
2120: } else {
2121: sendbuf = abt->buf[0];
2122: for (k = 0, i = 0; i < cK; i++) {
2123: for (j = 0; j < bn; j++, k++) {
2124: sendbuf[k] = bseq->v[i * bseq->lda + j];
2125: }
2126: }
2127: }
2128: if (size > 1) {
2129: sendto = (rank + size - 1) % size;
2130: recvfrom = (rank + size + 1) % size;
2131: } else {
2132: sendto = recvfrom = 0;
2133: }
2134: PetscBLASIntCast(cK,&ck);
2135: PetscBLASIntCast(c->A->rmap->n,&cm);
2136: recvisfrom = rank;
2137: for (i = 0; i < size; i++) {
2138: /* we have finished receiving in sending, bufs can be read/modified */
2139: PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2140: PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2142: if (nextrecvisfrom != rank) {
2143: /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2144: sendsiz = cK * bn;
2145: recvsiz = cK * nextbn;
2146: recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2147: MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);
2148: MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);
2149: }
2151: /* local aseq * sendbuf^T */
2152: PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);
2153: carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2154: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2156: if (nextrecvisfrom != rank) {
2157: /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2158: MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
2159: }
2160: bn = nextbn;
2161: recvisfrom = nextrecvisfrom;
2162: sendbuf = recvbuf;
2163: }
2164: return(0);
2165: }
2167: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2168: {
2169: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2170: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2171: Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data;
2172: Mat_MatTransMultDense *abt = c->abtdense;
2174: MPI_Comm comm;
2175: PetscMPIInt rank,size;
2176: PetscScalar *sendbuf, *recvbuf;
2177: PetscInt i,cK=A->cmap->N,k,j,bn;
2178: PetscScalar _DOne=1.0,_DZero=0.0;
2179: PetscBLASInt cm, cn, ck;
2182: PetscObjectGetComm((PetscObject)A,&comm);
2183: MPI_Comm_rank(comm,&rank);
2184: MPI_Comm_size(comm,&size);
2186: /* copy transpose of B into buf[0] */
2187: bn = B->rmap->n;
2188: sendbuf = abt->buf[0];
2189: recvbuf = abt->buf[1];
2190: for (k = 0, j = 0; j < bn; j++) {
2191: for (i = 0; i < cK; i++, k++) {
2192: sendbuf[k] = bseq->v[i * bseq->lda + j];
2193: }
2194: }
2195: MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);
2196: PetscBLASIntCast(cK,&ck);
2197: PetscBLASIntCast(c->A->rmap->n,&cm);
2198: PetscBLASIntCast(c->A->cmap->n,&cn);
2199: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2200: return(0);
2201: }
2203: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2204: {
2205: Mat_MPIDense *c=(Mat_MPIDense*)C->data;
2206: Mat_MatTransMultDense *abt = c->abtdense;
2210: switch (abt->alg) {
2211: case 1:
2212: MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);
2213: break;
2214: default:
2215: MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);
2216: break;
2217: }
2218: return(0);
2219: }
2221: static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2222: {
2226: if (scall == MAT_INITIAL_MATRIX) {
2227: MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
2228: }
2229: MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);
2230: return(0);
2231: }
2233: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2234: {
2235: PetscErrorCode ierr;
2236: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
2237: Mat_MatMultDense *ab = a->abdense;
2240: MatDestroy(&ab->Ce);
2241: MatDestroy(&ab->Ae);
2242: MatDestroy(&ab->Be);
2244: (ab->destroy)(A);
2245: PetscFree(ab);
2246: return(0);
2247: }
2249: #if defined(PETSC_HAVE_ELEMENTAL)
2250: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2251: {
2252: PetscErrorCode ierr;
2253: Mat_MPIDense *c=(Mat_MPIDense*)C->data;
2254: Mat_MatMultDense *ab=c->abdense;
2257: MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
2258: MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
2259: MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);
2260: MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
2261: return(0);
2262: }
2264: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2265: {
2266: PetscErrorCode ierr;
2267: Mat Ae,Be,Ce;
2268: Mat_MPIDense *c;
2269: Mat_MatMultDense *ab;
2272: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2273: SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2274: }
2276: /* convert A and B to Elemental matrices Ae and Be */
2277: MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);
2278: MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);
2280: /* Ce = Ae*Be */
2281: MatMatMultSymbolic(Ae,Be,fill,&Ce);
2282: MatMatMultNumeric(Ae,Be,Ce);
2283:
2284: /* convert Ce to C */
2285: MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);
2287: /* create data structure for reuse Cdense */
2288: PetscNew(&ab);
2289: c = (Mat_MPIDense*)(*C)->data;
2290: c->abdense = ab;
2292: ab->Ae = Ae;
2293: ab->Be = Be;
2294: ab->Ce = Ce;
2295: ab->destroy = (*C)->ops->destroy;
2296: (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2297: (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2298: return(0);
2299: }
2301: PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2302: {
2306: if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2307: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
2308: MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
2309: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
2310: } else {
2311: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
2312: MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
2313: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
2314: }
2315: return(0);
2316: }
2317: #endif