Actual source code: mpidense.c
petsc-3.11.4 2019-09-28
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
7: #include <../src/mat/impls/dense/mpi/mpidense.h>
8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9: #include <petscblaslapack.h>
11: /*@
13: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14: matrix that represents the operator. For sequential matrices it returns itself.
16: Input Parameter:
17: . A - the Seq or MPI dense matrix
19: Output Parameter:
20: . B - the inner matrix
22: Level: intermediate
24: @*/
25: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26: {
27: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
29: PetscBool flg;
32: PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
33: if (flg) *B = mat->A;
34: else *B = A;
35: return(0);
36: }
38: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39: {
40: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
42: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
45: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46: lrow = row - rstart;
47: MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
48: return(0);
49: }
51: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52: {
56: if (idx) {PetscFree(*idx);}
57: if (v) {PetscFree(*v);}
58: return(0);
59: }
61: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62: {
63: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
65: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
66: PetscScalar *array;
67: MPI_Comm comm;
68: Mat B;
71: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
73: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
74: if (!B) {
75: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
76: MatCreate(comm,&B);
77: MatSetSizes(B,m,m,m,m);
78: MatSetType(B,((PetscObject)mdn->A)->type_name);
79: MatDenseGetArray(mdn->A,&array);
80: MatSeqDenseSetPreallocation(B,array+m*rstart);
81: MatDenseRestoreArray(mdn->A,&array);
82: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
83: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
84: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
85: *a = B;
86: MatDestroy(&B);
87: } else *a = B;
88: return(0);
89: }
91: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92: {
93: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
95: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96: PetscBool roworiented = A->roworiented;
99: for (i=0; i<m; i++) {
100: if (idxm[i] < 0) continue;
101: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102: if (idxm[i] >= rstart && idxm[i] < rend) {
103: row = idxm[i] - rstart;
104: if (roworiented) {
105: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
106: } else {
107: for (j=0; j<n; j++) {
108: if (idxn[j] < 0) continue;
109: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
111: }
112: }
113: } else if (!A->donotstash) {
114: mat->assembled = PETSC_FALSE;
115: if (roworiented) {
116: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
117: } else {
118: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
119: }
120: }
121: }
122: return(0);
123: }
125: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126: {
127: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
129: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
132: for (i=0; i<m; i++) {
133: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135: if (idxm[i] >= rstart && idxm[i] < rend) {
136: row = idxm[i] - rstart;
137: for (j=0; j<n; j++) {
138: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
141: }
142: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143: }
144: return(0);
145: }
147: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148: {
149: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
153: MatDenseGetArray(a->A,array);
154: return(0);
155: }
157: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158: {
159: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
163: MatDenseGetArrayRead(a->A,array);
164: return(0);
165: }
167: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168: {
169: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
173: MatDensePlaceArray(a->A,array);
174: return(0);
175: }
177: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178: {
179: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
183: MatDenseResetArray(a->A);
184: return(0);
185: }
187: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188: {
189: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
190: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
192: PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193: const PetscInt *irow,*icol;
194: PetscScalar *av,*bv,*v = lmat->v;
195: Mat newmat;
196: IS iscol_local;
197: MPI_Comm comm_is,comm_mat;
200: PetscObjectGetComm((PetscObject)A,&comm_mat);
201: PetscObjectGetComm((PetscObject)iscol,&comm_is);
202: if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
204: ISAllGather(iscol,&iscol_local);
205: ISGetIndices(isrow,&irow);
206: ISGetIndices(iscol_local,&icol);
207: ISGetLocalSize(isrow,&nrows);
208: ISGetLocalSize(iscol,&ncols);
209: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
211: /* No parallel redistribution currently supported! Should really check each index set
212: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
213: original matrix! */
215: MatGetLocalSize(A,&nlrows,&nlcols);
216: MatGetOwnershipRange(A,&rstart,&rend);
218: /* Check submatrix call */
219: if (scall == MAT_REUSE_MATRIX) {
220: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
221: /* Really need to test rows and column sizes! */
222: newmat = *B;
223: } else {
224: /* Create and fill new matrix */
225: MatCreate(PetscObjectComm((PetscObject)A),&newmat);
226: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
227: MatSetType(newmat,((PetscObject)A)->type_name);
228: MatMPIDenseSetPreallocation(newmat,NULL);
229: }
231: /* Now extract the data pointers and do the copy, column at a time */
232: newmatd = (Mat_MPIDense*)newmat->data;
233: bv = ((Mat_SeqDense*)newmatd->A->data)->v;
235: for (i=0; i<Ncols; i++) {
236: av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
237: for (j=0; j<nrows; j++) {
238: *bv++ = av[irow[j] - rstart];
239: }
240: }
242: /* Assemble the matrices so that the correct flags are set */
243: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
244: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
246: /* Free work space */
247: ISRestoreIndices(isrow,&irow);
248: ISRestoreIndices(iscol_local,&icol);
249: ISDestroy(&iscol_local);
250: *B = newmat;
251: return(0);
252: }
254: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
255: {
256: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
260: MatDenseRestoreArray(a->A,array);
261: return(0);
262: }
264: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
265: {
266: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
270: MatDenseRestoreArrayRead(a->A,array);
271: return(0);
272: }
274: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
275: {
276: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
277: MPI_Comm comm;
279: PetscInt nstash,reallocs;
280: InsertMode addv;
283: PetscObjectGetComm((PetscObject)mat,&comm);
284: /* make sure all processors are either in INSERTMODE or ADDMODE */
285: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
286: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
287: mat->insertmode = addv; /* in case this processor had no cache */
289: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
290: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
291: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
292: return(0);
293: }
295: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
296: {
297: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
299: PetscInt i,*row,*col,flg,j,rstart,ncols;
300: PetscMPIInt n;
301: PetscScalar *val;
304: /* wait on receives */
305: while (1) {
306: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
307: if (!flg) break;
309: for (i=0; i<n;) {
310: /* Now identify the consecutive vals belonging to the same row */
311: for (j=i,rstart=row[j]; j<n; j++) {
312: if (row[j] != rstart) break;
313: }
314: if (j < n) ncols = j-i;
315: else ncols = n-i;
316: /* Now assemble all these values with a single function call */
317: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
318: i = j;
319: }
320: }
321: MatStashScatterEnd_Private(&mat->stash);
323: MatAssemblyBegin(mdn->A,mode);
324: MatAssemblyEnd(mdn->A,mode);
326: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
327: MatSetUpMultiply_MPIDense(mat);
328: }
329: return(0);
330: }
332: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333: {
335: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
338: MatZeroEntries(l->A);
339: return(0);
340: }
342: /* the code does not do the diagonal entries correctly unless the
343: matrix is square and the column and row owerships are identical.
344: This is a BUG. The only way to fix it seems to be to access
345: mdn->A and mdn->B directly and not through the MatZeroRows()
346: routine.
347: */
348: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
349: {
350: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
351: PetscErrorCode ierr;
352: PetscInt i,*owners = A->rmap->range;
353: PetscInt *sizes,j,idx,nsends;
354: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
355: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
356: PetscInt *lens,*lrows,*values;
357: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
358: MPI_Comm comm;
359: MPI_Request *send_waits,*recv_waits;
360: MPI_Status recv_status,*send_status;
361: PetscBool found;
362: const PetscScalar *xx;
363: PetscScalar *bb;
366: PetscObjectGetComm((PetscObject)A,&comm);
367: if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
368: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
369: /* first count number of contributors to each processor */
370: PetscCalloc1(2*size,&sizes);
371: PetscMalloc1(N+1,&owner); /* see note*/
372: for (i=0; i<N; i++) {
373: idx = rows[i];
374: found = PETSC_FALSE;
375: for (j=0; j<size; j++) {
376: if (idx >= owners[j] && idx < owners[j+1]) {
377: sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
378: }
379: }
380: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
381: }
382: nsends = 0;
383: for (i=0; i<size; i++) nsends += sizes[2*i+1];
385: /* inform other processors of number of messages and max length*/
386: PetscMaxSum(comm,sizes,&nmax,&nrecvs);
388: /* post receives: */
389: PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
390: PetscMalloc1(nrecvs+1,&recv_waits);
391: for (i=0; i<nrecvs; i++) {
392: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
393: }
395: /* do sends:
396: 1) starts[i] gives the starting index in svalues for stuff going to
397: the ith processor
398: */
399: PetscMalloc1(N+1,&svalues);
400: PetscMalloc1(nsends+1,&send_waits);
401: PetscMalloc1(size+1,&starts);
403: starts[0] = 0;
404: for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
405: for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
407: starts[0] = 0;
408: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
409: count = 0;
410: for (i=0; i<size; i++) {
411: if (sizes[2*i+1]) {
412: MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
413: }
414: }
415: PetscFree(starts);
417: base = owners[rank];
419: /* wait on receives */
420: PetscMalloc2(nrecvs,&lens,nrecvs,&source);
421: count = nrecvs;
422: slen = 0;
423: while (count) {
424: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
425: /* unpack receives into our local space */
426: MPI_Get_count(&recv_status,MPIU_INT,&n);
428: source[imdex] = recv_status.MPI_SOURCE;
429: lens[imdex] = n;
430: slen += n;
431: count--;
432: }
433: PetscFree(recv_waits);
435: /* move the data into the send scatter */
436: PetscMalloc1(slen+1,&lrows);
437: count = 0;
438: for (i=0; i<nrecvs; i++) {
439: values = rvalues + i*nmax;
440: for (j=0; j<lens[i]; j++) {
441: lrows[count++] = values[j] - base;
442: }
443: }
444: PetscFree(rvalues);
445: PetscFree2(lens,source);
446: PetscFree(owner);
447: PetscFree(sizes);
449: /* fix right hand side if needed */
450: if (x && b) {
451: VecGetArrayRead(x,&xx);
452: VecGetArray(b,&bb);
453: for (i=0; i<slen; i++) {
454: bb[lrows[i]] = diag*xx[lrows[i]];
455: }
456: VecRestoreArrayRead(x,&xx);
457: VecRestoreArray(b,&bb);
458: }
460: /* actually zap the local rows */
461: MatZeroRows(l->A,slen,lrows,0.0,0,0);
462: if (diag != 0.0) {
463: Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
464: PetscInt m = ll->lda, i;
466: for (i=0; i<slen; i++) {
467: ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
468: }
469: }
470: PetscFree(lrows);
472: /* wait on sends */
473: if (nsends) {
474: PetscMalloc1(nsends,&send_status);
475: MPI_Waitall(nsends,send_waits,send_status);
476: PetscFree(send_status);
477: }
478: PetscFree(send_waits);
479: PetscFree(svalues);
480: return(0);
481: }
483: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
484: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
485: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
486: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
488: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
489: {
490: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
494: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
495: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
496: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
497: return(0);
498: }
500: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
501: {
502: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
506: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
507: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
508: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
509: return(0);
510: }
512: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
513: {
514: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
516: PetscScalar zero = 0.0;
519: VecSet(yy,zero);
520: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
521: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
522: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
523: return(0);
524: }
526: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
527: {
528: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
532: VecCopy(yy,zz);
533: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
534: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
535: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
536: return(0);
537: }
539: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540: {
541: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
542: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
544: PetscInt len,i,n,m = A->rmap->n,radd;
545: PetscScalar *x,zero = 0.0;
548: VecSet(v,zero);
549: VecGetArray(v,&x);
550: VecGetSize(v,&n);
551: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
553: radd = A->rmap->rstart*m;
554: for (i=0; i<len; i++) {
555: x[i] = aloc->v[radd + i*m + i];
556: }
557: VecRestoreArray(v,&x);
558: return(0);
559: }
561: PetscErrorCode MatDestroy_MPIDense(Mat mat)
562: {
563: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
567: #if defined(PETSC_USE_LOG)
568: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
569: #endif
570: MatStashDestroy_Private(&mat->stash);
571: MatDestroy(&mdn->A);
572: VecDestroy(&mdn->lvec);
573: VecScatterDestroy(&mdn->Mvctx);
575: PetscFree(mat->data);
576: PetscObjectChangeTypeName((PetscObject)mat,0);
578: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
579: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
580: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);
581: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);
582: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);
583: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);
584: #if defined(PETSC_HAVE_ELEMENTAL)
585: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
586: #endif
587: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
588: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
589: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
590: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
591: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
592: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
593: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
594: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);
595: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);
596: return(0);
597: }
599: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
600: {
601: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
602: PetscErrorCode ierr;
603: PetscViewerFormat format;
604: int fd;
605: PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k;
606: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
607: PetscScalar *work,*v,*vv;
608: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
611: if (mdn->size == 1) {
612: MatView(mdn->A,viewer);
613: } else {
614: PetscViewerBinaryGetDescriptor(viewer,&fd);
615: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
616: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
618: PetscViewerGetFormat(viewer,&format);
619: if (format == PETSC_VIEWER_NATIVE) {
621: if (!rank) {
622: /* store the matrix as a dense matrix */
623: header[0] = MAT_FILE_CLASSID;
624: header[1] = mat->rmap->N;
625: header[2] = N;
626: header[3] = MATRIX_BINARY_FORMAT_DENSE;
627: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
629: /* get largest work array needed for transposing array */
630: mmax = mat->rmap->n;
631: for (i=1; i<size; i++) {
632: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
633: }
634: PetscMalloc1(mmax*N,&work);
636: /* write out local array, by rows */
637: m = mat->rmap->n;
638: v = a->v;
639: for (j=0; j<N; j++) {
640: for (i=0; i<m; i++) {
641: work[j + i*N] = *v++;
642: }
643: }
644: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
645: /* get largest work array to receive messages from other processes, excludes process zero */
646: mmax = 0;
647: for (i=1; i<size; i++) {
648: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
649: }
650: PetscMalloc1(mmax*N,&vv);
651: for (k = 1; k < size; k++) {
652: v = vv;
653: m = mat->rmap->range[k+1] - mat->rmap->range[k];
654: MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));
656: for (j = 0; j < N; j++) {
657: for (i = 0; i < m; i++) {
658: work[j + i*N] = *v++;
659: }
660: }
661: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
662: }
663: PetscFree(work);
664: PetscFree(vv);
665: } else {
666: MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
667: }
668: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
669: }
670: return(0);
671: }
673: extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
674: #include <petscdraw.h>
675: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
676: {
677: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
678: PetscErrorCode ierr;
679: PetscMPIInt rank = mdn->rank;
680: PetscViewerType vtype;
681: PetscBool iascii,isdraw;
682: PetscViewer sviewer;
683: PetscViewerFormat format;
686: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
687: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
688: if (iascii) {
689: PetscViewerGetType(viewer,&vtype);
690: PetscViewerGetFormat(viewer,&format);
691: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
692: MatInfo info;
693: MatGetInfo(mat,MAT_LOCAL,&info);
694: PetscViewerASCIIPushSynchronized(viewer);
695: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
696: PetscViewerFlush(viewer);
697: PetscViewerASCIIPopSynchronized(viewer);
698: VecScatterView(mdn->Mvctx,viewer);
699: return(0);
700: } else if (format == PETSC_VIEWER_ASCII_INFO) {
701: return(0);
702: }
703: } else if (isdraw) {
704: PetscDraw draw;
705: PetscBool isnull;
707: PetscViewerDrawGetDraw(viewer,0,&draw);
708: PetscDrawIsNull(draw,&isnull);
709: if (isnull) return(0);
710: }
712: {
713: /* assemble the entire matrix onto first processor. */
714: Mat A;
715: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
716: PetscInt *cols;
717: PetscScalar *vals;
719: MatCreate(PetscObjectComm((PetscObject)mat),&A);
720: if (!rank) {
721: MatSetSizes(A,M,N,M,N);
722: } else {
723: MatSetSizes(A,0,0,M,N);
724: }
725: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
726: MatSetType(A,MATMPIDENSE);
727: MatMPIDenseSetPreallocation(A,NULL);
728: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
730: /* Copy the matrix ... This isn't the most efficient means,
731: but it's quick for now */
732: A->insertmode = INSERT_VALUES;
734: row = mat->rmap->rstart;
735: m = mdn->A->rmap->n;
736: for (i=0; i<m; i++) {
737: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
738: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
739: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
740: row++;
741: }
743: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
744: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
745: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
746: if (!rank) {
747: PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
748: MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
749: }
750: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
751: PetscViewerFlush(viewer);
752: MatDestroy(&A);
753: }
754: return(0);
755: }
757: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
758: {
760: PetscBool iascii,isbinary,isdraw,issocket;
763: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
764: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
765: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
766: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
768: if (iascii || issocket || isdraw) {
769: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
770: } else if (isbinary) {
771: MatView_MPIDense_Binary(mat,viewer);
772: }
773: return(0);
774: }
776: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
777: {
778: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
779: Mat mdn = mat->A;
781: PetscReal isend[5],irecv[5];
784: info->block_size = 1.0;
786: MatGetInfo(mdn,MAT_LOCAL,info);
788: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
789: isend[3] = info->memory; isend[4] = info->mallocs;
790: if (flag == MAT_LOCAL) {
791: info->nz_used = isend[0];
792: info->nz_allocated = isend[1];
793: info->nz_unneeded = isend[2];
794: info->memory = isend[3];
795: info->mallocs = isend[4];
796: } else if (flag == MAT_GLOBAL_MAX) {
797: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
799: info->nz_used = irecv[0];
800: info->nz_allocated = irecv[1];
801: info->nz_unneeded = irecv[2];
802: info->memory = irecv[3];
803: info->mallocs = irecv[4];
804: } else if (flag == MAT_GLOBAL_SUM) {
805: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
807: info->nz_used = irecv[0];
808: info->nz_allocated = irecv[1];
809: info->nz_unneeded = irecv[2];
810: info->memory = irecv[3];
811: info->mallocs = irecv[4];
812: }
813: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
814: info->fill_ratio_needed = 0;
815: info->factor_mallocs = 0;
816: return(0);
817: }
819: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
820: {
821: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
825: switch (op) {
826: case MAT_NEW_NONZERO_LOCATIONS:
827: case MAT_NEW_NONZERO_LOCATION_ERR:
828: case MAT_NEW_NONZERO_ALLOCATION_ERR:
829: MatCheckPreallocated(A,1);
830: MatSetOption(a->A,op,flg);
831: break;
832: case MAT_ROW_ORIENTED:
833: MatCheckPreallocated(A,1);
834: a->roworiented = flg;
835: MatSetOption(a->A,op,flg);
836: break;
837: case MAT_NEW_DIAGONALS:
838: case MAT_KEEP_NONZERO_PATTERN:
839: case MAT_USE_HASH_TABLE:
840: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
841: break;
842: case MAT_IGNORE_OFF_PROC_ENTRIES:
843: a->donotstash = flg;
844: break;
845: case MAT_SYMMETRIC:
846: case MAT_STRUCTURALLY_SYMMETRIC:
847: case MAT_HERMITIAN:
848: case MAT_SYMMETRY_ETERNAL:
849: case MAT_IGNORE_LOWER_TRIANGULAR:
850: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
851: break;
852: default:
853: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
854: }
855: return(0);
856: }
859: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
860: {
861: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
862: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
863: const PetscScalar *l,*r;
864: PetscScalar x,*v;
865: PetscErrorCode ierr;
866: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
869: MatGetLocalSize(A,&s2,&s3);
870: if (ll) {
871: VecGetLocalSize(ll,&s2a);
872: if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
873: VecGetArrayRead(ll,&l);
874: for (i=0; i<m; i++) {
875: x = l[i];
876: v = mat->v + i;
877: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
878: }
879: VecRestoreArrayRead(ll,&l);
880: PetscLogFlops(n*m);
881: }
882: if (rr) {
883: VecGetLocalSize(rr,&s3a);
884: if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
885: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
886: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
887: VecGetArrayRead(mdn->lvec,&r);
888: for (i=0; i<n; i++) {
889: x = r[i];
890: v = mat->v + i*m;
891: for (j=0; j<m; j++) (*v++) *= x;
892: }
893: VecRestoreArrayRead(mdn->lvec,&r);
894: PetscLogFlops(n*m);
895: }
896: return(0);
897: }
899: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
900: {
901: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
902: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
904: PetscInt i,j;
905: PetscReal sum = 0.0;
906: PetscScalar *v = mat->v;
909: if (mdn->size == 1) {
910: MatNorm(mdn->A,type,nrm);
911: } else {
912: if (type == NORM_FROBENIUS) {
913: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
914: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
915: }
916: MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
917: *nrm = PetscSqrtReal(*nrm);
918: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
919: } else if (type == NORM_1) {
920: PetscReal *tmp,*tmp2;
921: PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
922: PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
923: PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
924: *nrm = 0.0;
925: v = mat->v;
926: for (j=0; j<mdn->A->cmap->n; j++) {
927: for (i=0; i<mdn->A->rmap->n; i++) {
928: tmp[j] += PetscAbsScalar(*v); v++;
929: }
930: }
931: MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
932: for (j=0; j<A->cmap->N; j++) {
933: if (tmp2[j] > *nrm) *nrm = tmp2[j];
934: }
935: PetscFree2(tmp,tmp2);
936: PetscLogFlops(A->cmap->n*A->rmap->n);
937: } else if (type == NORM_INFINITY) { /* max row norm */
938: PetscReal ntemp;
939: MatNorm(mdn->A,type,&ntemp);
940: MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
941: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
942: }
943: return(0);
944: }
946: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
947: {
948: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
949: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
950: Mat B;
951: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
953: PetscInt j,i;
954: PetscScalar *v;
957: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
958: MatCreate(PetscObjectComm((PetscObject)A),&B);
959: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
960: MatSetType(B,((PetscObject)A)->type_name);
961: MatMPIDenseSetPreallocation(B,NULL);
962: } else {
963: B = *matout;
964: }
966: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
967: PetscMalloc1(m,&rwork);
968: for (i=0; i<m; i++) rwork[i] = rstart + i;
969: for (j=0; j<n; j++) {
970: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
971: v += m;
972: }
973: PetscFree(rwork);
974: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
975: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
976: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
977: *matout = B;
978: } else {
979: MatHeaderMerge(A,&B);
980: }
981: return(0);
982: }
985: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
986: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
988: PetscErrorCode MatSetUp_MPIDense(Mat A)
989: {
993: MatMPIDenseSetPreallocation(A,0);
994: return(0);
995: }
997: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998: {
1000: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1003: MatAXPY(A->A,alpha,B->A,str);
1004: PetscObjectStateIncrease((PetscObject)Y);
1005: return(0);
1006: }
1008: PetscErrorCode MatConjugate_MPIDense(Mat mat)
1009: {
1010: Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
1014: MatConjugate(a->A);
1015: return(0);
1016: }
1018: PetscErrorCode MatRealPart_MPIDense(Mat A)
1019: {
1020: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1024: MatRealPart(a->A);
1025: return(0);
1026: }
1028: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1029: {
1030: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1034: MatImaginaryPart(a->A);
1035: return(0);
1036: }
1038: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1039: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1040: {
1042: PetscInt i,n;
1043: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1044: PetscReal *work;
1047: MatGetSize(A,NULL,&n);
1048: PetscMalloc1(n,&work);
1049: MatGetColumnNorms_SeqDense(a->A,type,work);
1050: if (type == NORM_2) {
1051: for (i=0; i<n; i++) work[i] *= work[i];
1052: }
1053: if (type == NORM_INFINITY) {
1054: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1055: } else {
1056: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1057: }
1058: PetscFree(work);
1059: if (type == NORM_2) {
1060: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1061: }
1062: return(0);
1063: }
1065: static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1066: {
1067: Mat_MPIDense *d = (Mat_MPIDense*)x->data;
1069: PetscScalar *a;
1070: PetscInt m,n,i;
1073: MatGetSize(d->A,&m,&n);
1074: MatDenseGetArray(d->A,&a);
1075: for (i=0; i<m*n; i++) {
1076: PetscRandomGetValue(rctx,a+i);
1077: }
1078: MatDenseRestoreArray(d->A,&a);
1079: return(0);
1080: }
1082: extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1084: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d)
1085: {
1087: *missing = PETSC_FALSE;
1088: return(0);
1089: }
1091: static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1092: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1093: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1095: /* -------------------------------------------------------------------*/
1096: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1097: MatGetRow_MPIDense,
1098: MatRestoreRow_MPIDense,
1099: MatMult_MPIDense,
1100: /* 4*/ MatMultAdd_MPIDense,
1101: MatMultTranspose_MPIDense,
1102: MatMultTransposeAdd_MPIDense,
1103: 0,
1104: 0,
1105: 0,
1106: /* 10*/ 0,
1107: 0,
1108: 0,
1109: 0,
1110: MatTranspose_MPIDense,
1111: /* 15*/ MatGetInfo_MPIDense,
1112: MatEqual_MPIDense,
1113: MatGetDiagonal_MPIDense,
1114: MatDiagonalScale_MPIDense,
1115: MatNorm_MPIDense,
1116: /* 20*/ MatAssemblyBegin_MPIDense,
1117: MatAssemblyEnd_MPIDense,
1118: MatSetOption_MPIDense,
1119: MatZeroEntries_MPIDense,
1120: /* 24*/ MatZeroRows_MPIDense,
1121: 0,
1122: 0,
1123: 0,
1124: 0,
1125: /* 29*/ MatSetUp_MPIDense,
1126: 0,
1127: 0,
1128: MatGetDiagonalBlock_MPIDense,
1129: 0,
1130: /* 34*/ MatDuplicate_MPIDense,
1131: 0,
1132: 0,
1133: 0,
1134: 0,
1135: /* 39*/ MatAXPY_MPIDense,
1136: MatCreateSubMatrices_MPIDense,
1137: 0,
1138: MatGetValues_MPIDense,
1139: 0,
1140: /* 44*/ 0,
1141: MatScale_MPIDense,
1142: MatShift_Basic,
1143: 0,
1144: 0,
1145: /* 49*/ MatSetRandom_MPIDense,
1146: 0,
1147: 0,
1148: 0,
1149: 0,
1150: /* 54*/ 0,
1151: 0,
1152: 0,
1153: 0,
1154: 0,
1155: /* 59*/ MatCreateSubMatrix_MPIDense,
1156: MatDestroy_MPIDense,
1157: MatView_MPIDense,
1158: 0,
1159: 0,
1160: /* 64*/ 0,
1161: 0,
1162: 0,
1163: 0,
1164: 0,
1165: /* 69*/ 0,
1166: 0,
1167: 0,
1168: 0,
1169: 0,
1170: /* 74*/ 0,
1171: 0,
1172: 0,
1173: 0,
1174: 0,
1175: /* 79*/ 0,
1176: 0,
1177: 0,
1178: 0,
1179: /* 83*/ MatLoad_MPIDense,
1180: 0,
1181: 0,
1182: 0,
1183: 0,
1184: 0,
1185: #if defined(PETSC_HAVE_ELEMENTAL)
1186: /* 89*/ MatMatMult_MPIDense_MPIDense,
1187: MatMatMultSymbolic_MPIDense_MPIDense,
1188: #else
1189: /* 89*/ 0,
1190: 0,
1191: #endif
1192: MatMatMultNumeric_MPIDense,
1193: 0,
1194: 0,
1195: /* 94*/ 0,
1196: MatMatTransposeMult_MPIDense_MPIDense,
1197: MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1198: MatMatTransposeMultNumeric_MPIDense_MPIDense,
1199: 0,
1200: /* 99*/ 0,
1201: 0,
1202: 0,
1203: MatConjugate_MPIDense,
1204: 0,
1205: /*104*/ 0,
1206: MatRealPart_MPIDense,
1207: MatImaginaryPart_MPIDense,
1208: 0,
1209: 0,
1210: /*109*/ 0,
1211: 0,
1212: 0,
1213: 0,
1214: MatMissingDiagonal_MPIDense,
1215: /*114*/ 0,
1216: 0,
1217: 0,
1218: 0,
1219: 0,
1220: /*119*/ 0,
1221: 0,
1222: 0,
1223: 0,
1224: 0,
1225: /*124*/ 0,
1226: MatGetColumnNorms_MPIDense,
1227: 0,
1228: 0,
1229: 0,
1230: /*129*/ 0,
1231: MatTransposeMatMult_MPIDense_MPIDense,
1232: MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1233: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1234: 0,
1235: /*134*/ 0,
1236: 0,
1237: 0,
1238: 0,
1239: 0,
1240: /*139*/ 0,
1241: 0,
1242: 0,
1243: 0,
1244: 0,
1245: /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1246: };
1248: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1249: {
1250: Mat_MPIDense *a;
1254: if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1255: mat->preallocated = PETSC_TRUE;
1256: /* Note: For now, when data is specified above, this assumes the user correctly
1257: allocates the local dense storage space. We should add error checking. */
1259: a = (Mat_MPIDense*)mat->data;
1260: PetscLayoutSetUp(mat->rmap);
1261: PetscLayoutSetUp(mat->cmap);
1262: a->nvec = mat->cmap->n;
1264: MatCreate(PETSC_COMM_SELF,&a->A);
1265: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1266: MatSetType(a->A,MATSEQDENSE);
1267: MatSeqDenseSetPreallocation(a->A,data);
1268: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1269: return(0);
1270: }
1272: #if defined(PETSC_HAVE_ELEMENTAL)
1273: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1274: {
1275: Mat mat_elemental;
1277: PetscScalar *v;
1278: PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1279:
1281: if (reuse == MAT_REUSE_MATRIX) {
1282: mat_elemental = *newmat;
1283: MatZeroEntries(*newmat);
1284: } else {
1285: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1286: MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1287: MatSetType(mat_elemental,MATELEMENTAL);
1288: MatSetUp(mat_elemental);
1289: MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1290: }
1292: PetscMalloc2(m,&rows,N,&cols);
1293: for (i=0; i<N; i++) cols[i] = i;
1294: for (i=0; i<m; i++) rows[i] = rstart + i;
1295:
1296: /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1297: MatDenseGetArray(A,&v);
1298: MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1299: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1300: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1301: MatDenseRestoreArray(A,&v);
1302: PetscFree2(rows,cols);
1304: if (reuse == MAT_INPLACE_MATRIX) {
1305: MatHeaderReplace(A,&mat_elemental);
1306: } else {
1307: *newmat = mat_elemental;
1308: }
1309: return(0);
1310: }
1311: #endif
1313: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1314: {
1315: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1319: MatDenseGetColumn(mat->A,col,vals);
1320: return(0);
1321: }
1323: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1324: {
1325: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1329: MatDenseRestoreColumn(mat->A,vals);
1330: return(0);
1331: }
1333: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1334: {
1336: Mat_MPIDense *mat;
1337: PetscInt m,nloc,N;
1340: MatGetSize(inmat,&m,&N);
1341: MatGetLocalSize(inmat,NULL,&nloc);
1342: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1343: PetscInt sum;
1345: if (n == PETSC_DECIDE) {
1346: PetscSplitOwnership(comm,&n,&N);
1347: }
1348: /* Check sum(n) = N */
1349: MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
1350: if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1352: MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);
1353: }
1355: /* numeric phase */
1356: mat = (Mat_MPIDense*)(*outmat)->data;
1357: MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);
1358: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
1359: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
1360: return(0);
1361: }
1363: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1364: {
1365: Mat_MPIDense *a;
1369: PetscNewLog(mat,&a);
1370: mat->data = (void*)a;
1371: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1373: mat->insertmode = NOT_SET_VALUES;
1374: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1375: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);
1377: /* build cache for off array entries formed */
1378: a->donotstash = PETSC_FALSE;
1380: MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);
1382: /* stuff used for matrix vector multiply */
1383: a->lvec = 0;
1384: a->Mvctx = 0;
1385: a->roworiented = PETSC_TRUE;
1387: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1388: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1389: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);
1390: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);
1391: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);
1392: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);
1393: #if defined(PETSC_HAVE_ELEMENTAL)
1394: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1395: #endif
1396: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1397: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1398: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1399: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);
1401: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1402: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1403: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1404: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);
1405: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);
1406: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1407: return(0);
1408: }
1410: /*MC
1411: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1413: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1414: and MATMPIDENSE otherwise.
1416: Options Database Keys:
1417: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1419: Level: beginner
1422: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1423: M*/
1425: /*@C
1426: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1428: Not collective
1430: Input Parameters:
1431: . B - the matrix
1432: - data - optional location of matrix data. Set data=NULL for PETSc
1433: to control all matrix memory allocation.
1435: Notes:
1436: The dense format is fully compatible with standard Fortran 77
1437: storage by columns.
1439: The data input variable is intended primarily for Fortran programmers
1440: who wish to allocate their own matrix memory space. Most users should
1441: set data=NULL.
1443: Level: intermediate
1445: .keywords: matrix,dense, parallel
1447: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1448: @*/
1449: PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1450: {
1454: PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1455: return(0);
1456: }
1458: /*@
1459: MatDensePlaceArray - Allows one to replace the array in a dense array with an
1460: array provided by the user. This is useful to avoid copying an array
1461: into a matrix
1463: Not Collective
1465: Input Parameters:
1466: + mat - the matrix
1467: - array - the array in column major order
1469: Notes:
1470: You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1471: freed when the matrix is destroyed.
1473: Level: developer
1475: .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1477: @*/
1478: PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[])
1479: {
1482: PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
1483: PetscObjectStateIncrease((PetscObject)mat);
1484: return(0);
1485: }
1487: /*@
1488: MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1490: Not Collective
1492: Input Parameters:
1493: . mat - the matrix
1495: Notes:
1496: You can only call this after a call to MatDensePlaceArray()
1498: Level: developer
1500: .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1502: @*/
1503: PetscErrorCode MatDenseResetArray(Mat mat)
1504: {
1507: PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
1508: PetscObjectStateIncrease((PetscObject)mat);
1509: return(0);
1510: }
1512: /*@C
1513: MatCreateDense - Creates a parallel matrix in dense format.
1515: Collective on MPI_Comm
1517: Input Parameters:
1518: + comm - MPI communicator
1519: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1520: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1521: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1522: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1523: - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1524: to control all matrix memory allocation.
1526: Output Parameter:
1527: . A - the matrix
1529: Notes:
1530: The dense format is fully compatible with standard Fortran 77
1531: storage by columns.
1533: The data input variable is intended primarily for Fortran programmers
1534: who wish to allocate their own matrix memory space. Most users should
1535: set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1537: The user MUST specify either the local or global matrix dimensions
1538: (possibly both).
1540: Level: intermediate
1542: .keywords: matrix,dense, parallel
1544: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1545: @*/
1546: PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1547: {
1549: PetscMPIInt size;
1552: MatCreate(comm,A);
1553: MatSetSizes(*A,m,n,M,N);
1554: MPI_Comm_size(comm,&size);
1555: if (size > 1) {
1556: MatSetType(*A,MATMPIDENSE);
1557: MatMPIDenseSetPreallocation(*A,data);
1558: if (data) { /* user provided data array, so no need to assemble */
1559: MatSetUpMultiply_MPIDense(*A);
1560: (*A)->assembled = PETSC_TRUE;
1561: }
1562: } else {
1563: MatSetType(*A,MATSEQDENSE);
1564: MatSeqDenseSetPreallocation(*A,data);
1565: }
1566: return(0);
1567: }
1569: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1570: {
1571: Mat mat;
1572: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1576: *newmat = 0;
1577: MatCreate(PetscObjectComm((PetscObject)A),&mat);
1578: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1579: MatSetType(mat,((PetscObject)A)->type_name);
1580: a = (Mat_MPIDense*)mat->data;
1582: mat->factortype = A->factortype;
1583: mat->assembled = PETSC_TRUE;
1584: mat->preallocated = PETSC_TRUE;
1586: a->size = oldmat->size;
1587: a->rank = oldmat->rank;
1588: mat->insertmode = NOT_SET_VALUES;
1589: a->nvec = oldmat->nvec;
1590: a->donotstash = oldmat->donotstash;
1592: PetscLayoutReference(A->rmap,&mat->rmap);
1593: PetscLayoutReference(A->cmap,&mat->cmap);
1595: MatSetUpMultiply_MPIDense(mat);
1596: MatDuplicate(oldmat->A,cpvalues,&a->A);
1597: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1599: *newmat = mat;
1600: return(0);
1601: }
1603: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1604: {
1606: PetscMPIInt rank,size;
1607: const PetscInt *rowners;
1608: PetscInt i,m,n,nz,j,mMax;
1609: PetscScalar *array,*vals,*vals_ptr;
1610: Mat_MPIDense *a = (Mat_MPIDense*)newmat->data;
1613: MPI_Comm_rank(comm,&rank);
1614: MPI_Comm_size(comm,&size);
1616: /* determine ownership of rows and columns */
1617: m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1618: n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1620: MatSetSizes(newmat,m,n,M,N);
1621: if (!a->A) {
1622: MatMPIDenseSetPreallocation(newmat,NULL);
1623: }
1624: MatDenseGetArray(newmat,&array);
1625: MatGetLocalSize(newmat,&m,NULL);
1626: MatGetOwnershipRanges(newmat,&rowners);
1627: MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);
1628: if (!rank) {
1629: PetscMalloc1(mMax*N,&vals);
1631: /* read in my part of the matrix numerical values */
1632: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1634: /* insert into matrix-by row (this is why cannot directly read into array */
1635: vals_ptr = vals;
1636: for (i=0; i<m; i++) {
1637: for (j=0; j<N; j++) {
1638: array[i + j*m] = *vals_ptr++;
1639: }
1640: }
1642: /* read in other processors and ship out */
1643: for (i=1; i<size; i++) {
1644: nz = (rowners[i+1] - rowners[i])*N;
1645: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1646: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1647: }
1648: } else {
1649: /* receive numeric values */
1650: PetscMalloc1(m*N,&vals);
1652: /* receive message of values*/
1653: MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);
1655: /* insert into matrix-by row (this is why cannot directly read into array */
1656: vals_ptr = vals;
1657: for (i=0; i<m; i++) {
1658: for (j=0; j<N; j++) {
1659: array[i + j*m] = *vals_ptr++;
1660: }
1661: }
1662: }
1663: MatDenseRestoreArray(newmat,&array);
1664: PetscFree(vals);
1665: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1666: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1667: return(0);
1668: }
1670: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1671: {
1672: Mat_MPIDense *a;
1673: PetscScalar *vals,*svals;
1674: MPI_Comm comm;
1675: MPI_Status status;
1676: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1677: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1678: PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1679: PetscInt i,nz,j,rstart,rend;
1680: int fd;
1681: PetscBool isbinary;
1685: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1686: if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
1688: /* force binary viewer to load .info file if it has not yet done so */
1689: PetscViewerSetUp(viewer);
1690: PetscObjectGetComm((PetscObject)viewer,&comm);
1691: MPI_Comm_size(comm,&size);
1692: MPI_Comm_rank(comm,&rank);
1693: PetscViewerBinaryGetDescriptor(viewer,&fd);
1694: if (!rank) {
1695: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1696: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1697: }
1698: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1699: M = header[1]; N = header[2]; nz = header[3];
1701: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1702: if (newmat->rmap->N < 0) newmat->rmap->N = M;
1703: if (newmat->cmap->N < 0) newmat->cmap->N = N;
1705: if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1706: if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1708: /*
1709: Handle case where matrix is stored on disk as a dense matrix
1710: */
1711: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1712: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1713: return(0);
1714: }
1716: /* determine ownership of all rows */
1717: if (newmat->rmap->n < 0) {
1718: PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1719: } else {
1720: PetscMPIIntCast(newmat->rmap->n,&m);
1721: }
1722: if (newmat->cmap->n < 0) {
1723: n = PETSC_DECIDE;
1724: } else {
1725: PetscMPIIntCast(newmat->cmap->n,&n);
1726: }
1728: PetscMalloc1(size+2,&rowners);
1729: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1730: rowners[0] = 0;
1731: for (i=2; i<=size; i++) {
1732: rowners[i] += rowners[i-1];
1733: }
1734: rstart = rowners[rank];
1735: rend = rowners[rank+1];
1737: /* distribute row lengths to all processors */
1738: PetscMalloc1(rend-rstart,&ourlens);
1739: if (!rank) {
1740: PetscMalloc1(M,&rowlengths);
1741: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1742: PetscMalloc1(size,&sndcounts);
1743: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1744: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1745: PetscFree(sndcounts);
1746: } else {
1747: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1748: }
1750: if (!rank) {
1751: /* calculate the number of nonzeros on each processor */
1752: PetscMalloc1(size,&procsnz);
1753: PetscMemzero(procsnz,size*sizeof(PetscInt));
1754: for (i=0; i<size; i++) {
1755: for (j=rowners[i]; j< rowners[i+1]; j++) {
1756: procsnz[i] += rowlengths[j];
1757: }
1758: }
1759: PetscFree(rowlengths);
1761: /* determine max buffer needed and allocate it */
1762: maxnz = 0;
1763: for (i=0; i<size; i++) {
1764: maxnz = PetscMax(maxnz,procsnz[i]);
1765: }
1766: PetscMalloc1(maxnz,&cols);
1768: /* read in my part of the matrix column indices */
1769: nz = procsnz[0];
1770: PetscMalloc1(nz,&mycols);
1771: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1773: /* read in every one elses and ship off */
1774: for (i=1; i<size; i++) {
1775: nz = procsnz[i];
1776: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1777: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1778: }
1779: PetscFree(cols);
1780: } else {
1781: /* determine buffer space needed for message */
1782: nz = 0;
1783: for (i=0; i<m; i++) {
1784: nz += ourlens[i];
1785: }
1786: PetscMalloc1(nz+1,&mycols);
1788: /* receive message of column indices*/
1789: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1790: MPI_Get_count(&status,MPIU_INT,&maxnz);
1791: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1792: }
1794: MatSetSizes(newmat,m,n,M,N);
1795: a = (Mat_MPIDense*)newmat->data;
1796: if (!a->A) {
1797: MatMPIDenseSetPreallocation(newmat,NULL);
1798: }
1800: if (!rank) {
1801: PetscMalloc1(maxnz,&vals);
1803: /* read in my part of the matrix numerical values */
1804: nz = procsnz[0];
1805: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1807: /* insert into matrix */
1808: jj = rstart;
1809: smycols = mycols;
1810: svals = vals;
1811: for (i=0; i<m; i++) {
1812: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1813: smycols += ourlens[i];
1814: svals += ourlens[i];
1815: jj++;
1816: }
1818: /* read in other processors and ship out */
1819: for (i=1; i<size; i++) {
1820: nz = procsnz[i];
1821: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1822: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1823: }
1824: PetscFree(procsnz);
1825: } else {
1826: /* receive numeric values */
1827: PetscMalloc1(nz+1,&vals);
1829: /* receive message of values*/
1830: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1831: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1832: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1834: /* insert into matrix */
1835: jj = rstart;
1836: smycols = mycols;
1837: svals = vals;
1838: for (i=0; i<m; i++) {
1839: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1840: smycols += ourlens[i];
1841: svals += ourlens[i];
1842: jj++;
1843: }
1844: }
1845: PetscFree(ourlens);
1846: PetscFree(vals);
1847: PetscFree(mycols);
1848: PetscFree(rowners);
1850: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1851: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1852: return(0);
1853: }
1855: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
1856: {
1857: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1858: Mat a,b;
1859: PetscBool flg;
1863: a = matA->A;
1864: b = matB->A;
1865: MatEqual(a,b,&flg);
1866: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1867: return(0);
1868: }
1870: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1871: {
1872: PetscErrorCode ierr;
1873: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1874: Mat_TransMatMultDense *atb = a->atbdense;
1877: PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1878: (atb->destroy)(A);
1879: PetscFree(atb);
1880: return(0);
1881: }
1883: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1884: {
1885: PetscErrorCode ierr;
1886: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1887: Mat_MatTransMultDense *abt = a->abtdense;
1890: PetscFree2(abt->buf[0],abt->buf[1]);
1891: PetscFree2(abt->recvcounts,abt->recvdispls);
1892: (abt->destroy)(A);
1893: PetscFree(abt);
1894: return(0);
1895: }
1897: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1898: {
1899: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1900: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1901: Mat_TransMatMultDense *atb = c->atbdense;
1903: MPI_Comm comm;
1904: PetscMPIInt rank,size,*recvcounts=atb->recvcounts;
1905: PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1906: PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1907: PetscScalar _DOne=1.0,_DZero=0.0;
1908: PetscBLASInt am,an,bn,aN;
1909: const PetscInt *ranges;
1912: PetscObjectGetComm((PetscObject)A,&comm);
1913: MPI_Comm_rank(comm,&rank);
1914: MPI_Comm_size(comm,&size);
1916: /* compute atbarray = aseq^T * bseq */
1917: PetscBLASIntCast(a->A->cmap->n,&an);
1918: PetscBLASIntCast(b->A->cmap->n,&bn);
1919: PetscBLASIntCast(a->A->rmap->n,&am);
1920: PetscBLASIntCast(A->cmap->N,&aN);
1921: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1923: MatGetOwnershipRanges(C,&ranges);
1924: for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1926: /* arrange atbarray into sendbuf */
1927: k = 0;
1928: for (proc=0; proc<size; proc++) {
1929: for (j=0; j<cN; j++) {
1930: for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1931: }
1932: }
1933: /* sum all atbarray to local values of C */
1934: MatDenseGetArray(c->A,&carray);
1935: MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1936: MatDenseRestoreArray(c->A,&carray);
1937: return(0);
1938: }
1940: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1941: {
1942: PetscErrorCode ierr;
1943: Mat Cdense;
1944: MPI_Comm comm;
1945: PetscMPIInt size;
1946: PetscInt cm=A->cmap->n,cM,cN=B->cmap->N;
1947: Mat_MPIDense *c;
1948: Mat_TransMatMultDense *atb;
1951: PetscObjectGetComm((PetscObject)A,&comm);
1952: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1953: SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1954: }
1956: /* create matrix product Cdense */
1957: MatCreate(comm,&Cdense);
1958: MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1959: MatSetType(Cdense,MATMPIDENSE);
1960: MatMPIDenseSetPreallocation(Cdense,NULL);
1961: MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
1962: MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
1963: *C = Cdense;
1965: /* create data structure for reuse Cdense */
1966: MPI_Comm_size(comm,&size);
1967: PetscNew(&atb);
1968: cM = Cdense->rmap->N;
1969: PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);
1970:
1971: c = (Mat_MPIDense*)Cdense->data;
1972: c->atbdense = atb;
1973: atb->destroy = Cdense->ops->destroy;
1974: Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1975: return(0);
1976: }
1978: PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1979: {
1983: if (scall == MAT_INITIAL_MATRIX) {
1984: MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1985: }
1986: MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1987: return(0);
1988: }
1990: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
1991: {
1992: PetscErrorCode ierr;
1993: Mat Cdense;
1994: MPI_Comm comm;
1995: PetscMPIInt i, size;
1996: PetscInt maxRows, bufsiz;
1997: Mat_MPIDense *c;
1998: PetscMPIInt tag;
1999: const char *algTypes[2] = {"allgatherv","cyclic"};
2000: PetscInt alg, nalg = 2;
2001: Mat_MatTransMultDense *abt;
2004: PetscObjectGetComm((PetscObject)A,&comm);
2005: alg = 0; /* default is allgatherv */
2006: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");
2007: PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);
2008: PetscOptionsEnd();
2009: if (A->cmap->N != B->cmap->N) {
2010: SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2011: }
2013: /* create matrix product Cdense */
2014: MatCreate(comm,&Cdense);
2015: MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);
2016: MatSetType(Cdense,MATMPIDENSE);
2017: MatMPIDenseSetPreallocation(Cdense,NULL);
2018: PetscObjectGetNewTag((PetscObject)Cdense, &tag);
2019: MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
2020: MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
2021: *C = Cdense;
2023: /* create data structure for reuse Cdense */
2024: MPI_Comm_size(comm,&size);
2025: PetscNew(&abt);
2026: abt->tag = tag;
2027: abt->alg = alg;
2028: switch (alg) {
2029: case 1:
2030: for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2031: bufsiz = A->cmap->N * maxRows;
2032: PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));
2033: break;
2034: default:
2035: PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));
2036: PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));
2037: for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2038: for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2039: break;
2040: }
2042: c = (Mat_MPIDense*)Cdense->data;
2043: c->abtdense = abt;
2044: abt->destroy = Cdense->ops->destroy;
2045: Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2046: return(0);
2047: }
2049: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2050: {
2051: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2052: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2053: Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data;
2054: Mat_MatTransMultDense *abt = c->abtdense;
2056: MPI_Comm comm;
2057: PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2058: PetscScalar *sendbuf, *recvbuf=0, *carray;
2059: PetscInt i,cK=A->cmap->N,k,j,bn;
2060: PetscScalar _DOne=1.0,_DZero=0.0;
2061: PetscBLASInt cm, cn, ck;
2062: MPI_Request reqs[2];
2063: const PetscInt *ranges;
2066: PetscObjectGetComm((PetscObject)A,&comm);
2067: MPI_Comm_rank(comm,&rank);
2068: MPI_Comm_size(comm,&size);
2070: MatGetOwnershipRanges(B,&ranges);
2071: bn = B->rmap->n;
2072: if (bseq->lda == bn) {
2073: sendbuf = bseq->v;
2074: } else {
2075: sendbuf = abt->buf[0];
2076: for (k = 0, i = 0; i < cK; i++) {
2077: for (j = 0; j < bn; j++, k++) {
2078: sendbuf[k] = bseq->v[i * bseq->lda + j];
2079: }
2080: }
2081: }
2082: if (size > 1) {
2083: sendto = (rank + size - 1) % size;
2084: recvfrom = (rank + size + 1) % size;
2085: } else {
2086: sendto = recvfrom = 0;
2087: }
2088: PetscBLASIntCast(cK,&ck);
2089: PetscBLASIntCast(c->A->rmap->n,&cm);
2090: recvisfrom = rank;
2091: for (i = 0; i < size; i++) {
2092: /* we have finished receiving in sending, bufs can be read/modified */
2093: PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2094: PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2096: if (nextrecvisfrom != rank) {
2097: /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2098: sendsiz = cK * bn;
2099: recvsiz = cK * nextbn;
2100: recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2101: MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);
2102: MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);
2103: }
2105: /* local aseq * sendbuf^T */
2106: PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);
2107: carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2108: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2110: if (nextrecvisfrom != rank) {
2111: /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2112: MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
2113: }
2114: bn = nextbn;
2115: recvisfrom = nextrecvisfrom;
2116: sendbuf = recvbuf;
2117: }
2118: return(0);
2119: }
2121: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2122: {
2123: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2124: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2125: Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data;
2126: Mat_MatTransMultDense *abt = c->abtdense;
2128: MPI_Comm comm;
2129: PetscMPIInt rank,size;
2130: PetscScalar *sendbuf, *recvbuf;
2131: PetscInt i,cK=A->cmap->N,k,j,bn;
2132: PetscScalar _DOne=1.0,_DZero=0.0;
2133: PetscBLASInt cm, cn, ck;
2136: PetscObjectGetComm((PetscObject)A,&comm);
2137: MPI_Comm_rank(comm,&rank);
2138: MPI_Comm_size(comm,&size);
2140: /* copy transpose of B into buf[0] */
2141: bn = B->rmap->n;
2142: sendbuf = abt->buf[0];
2143: recvbuf = abt->buf[1];
2144: for (k = 0, j = 0; j < bn; j++) {
2145: for (i = 0; i < cK; i++, k++) {
2146: sendbuf[k] = bseq->v[i * bseq->lda + j];
2147: }
2148: }
2149: MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);
2150: PetscBLASIntCast(cK,&ck);
2151: PetscBLASIntCast(c->A->rmap->n,&cm);
2152: PetscBLASIntCast(c->A->cmap->n,&cn);
2153: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2154: return(0);
2155: }
2157: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2158: {
2159: Mat_MPIDense *c=(Mat_MPIDense*)C->data;
2160: Mat_MatTransMultDense *abt = c->abtdense;
2164: switch (abt->alg) {
2165: case 1:
2166: MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);
2167: break;
2168: default:
2169: MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);
2170: break;
2171: }
2172: return(0);
2173: }
2175: static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2176: {
2180: if (scall == MAT_INITIAL_MATRIX) {
2181: MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
2182: }
2183: MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);
2184: return(0);
2185: }
2187: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2188: {
2189: PetscErrorCode ierr;
2190: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
2191: Mat_MatMultDense *ab = a->abdense;
2194: MatDestroy(&ab->Ce);
2195: MatDestroy(&ab->Ae);
2196: MatDestroy(&ab->Be);
2198: (ab->destroy)(A);
2199: PetscFree(ab);
2200: return(0);
2201: }
2203: #if defined(PETSC_HAVE_ELEMENTAL)
2204: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2205: {
2206: PetscErrorCode ierr;
2207: Mat_MPIDense *c=(Mat_MPIDense*)C->data;
2208: Mat_MatMultDense *ab=c->abdense;
2211: MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
2212: MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
2213: MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);
2214: MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
2215: return(0);
2216: }
2218: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2219: {
2220: PetscErrorCode ierr;
2221: Mat Ae,Be,Ce;
2222: Mat_MPIDense *c;
2223: Mat_MatMultDense *ab;
2226: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2227: SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2228: }
2230: /* convert A and B to Elemental matrices Ae and Be */
2231: MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);
2232: MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);
2234: /* Ce = Ae*Be */
2235: MatMatMultSymbolic(Ae,Be,fill,&Ce);
2236: MatMatMultNumeric(Ae,Be,Ce);
2237:
2238: /* convert Ce to C */
2239: MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);
2241: /* create data structure for reuse Cdense */
2242: PetscNew(&ab);
2243: c = (Mat_MPIDense*)(*C)->data;
2244: c->abdense = ab;
2246: ab->Ae = Ae;
2247: ab->Be = Be;
2248: ab->Ce = Ce;
2249: ab->destroy = (*C)->ops->destroy;
2250: (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2251: (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2252: return(0);
2253: }
2255: PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2256: {
2260: if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2261: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
2262: MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
2263: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
2264: } else {
2265: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
2266: MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
2267: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
2268: }
2269: return(0);
2270: }
2271: #endif