Actual source code: mpidense.c
petsc-3.8.4 2018-03-24
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
7: #include <../src/mat/impls/dense/mpi/mpidense.h>
8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9: #include <petscblaslapack.h>
11: /*@
13: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14: matrix that represents the operator. For sequential matrices it returns itself.
16: Input Parameter:
17: . A - the Seq or MPI dense matrix
19: Output Parameter:
20: . B - the inner matrix
22: Level: intermediate
24: @*/
25: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26: {
27: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
29: PetscBool flg;
32: PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
33: if (flg) *B = mat->A;
34: else *B = A;
35: return(0);
36: }
38: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39: {
40: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
42: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
45: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46: lrow = row - rstart;
47: MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
48: return(0);
49: }
51: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52: {
56: if (idx) {PetscFree(*idx);}
57: if (v) {PetscFree(*v);}
58: return(0);
59: }
61: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62: {
63: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
65: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
66: PetscScalar *array;
67: MPI_Comm comm;
68: Mat B;
71: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
73: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
74: if (!B) {
75: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
76: MatCreate(comm,&B);
77: MatSetSizes(B,m,m,m,m);
78: MatSetType(B,((PetscObject)mdn->A)->type_name);
79: MatDenseGetArray(mdn->A,&array);
80: MatSeqDenseSetPreallocation(B,array+m*rstart);
81: MatDenseRestoreArray(mdn->A,&array);
82: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
83: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
84: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
85: *a = B;
86: MatDestroy(&B);
87: } else *a = B;
88: return(0);
89: }
91: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92: {
93: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
95: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96: PetscBool roworiented = A->roworiented;
99: for (i=0; i<m; i++) {
100: if (idxm[i] < 0) continue;
101: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102: if (idxm[i] >= rstart && idxm[i] < rend) {
103: row = idxm[i] - rstart;
104: if (roworiented) {
105: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
106: } else {
107: for (j=0; j<n; j++) {
108: if (idxn[j] < 0) continue;
109: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
111: }
112: }
113: } else if (!A->donotstash) {
114: mat->assembled = PETSC_FALSE;
115: if (roworiented) {
116: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
117: } else {
118: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
119: }
120: }
121: }
122: return(0);
123: }
125: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126: {
127: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
129: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
132: for (i=0; i<m; i++) {
133: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135: if (idxm[i] >= rstart && idxm[i] < rend) {
136: row = idxm[i] - rstart;
137: for (j=0; j<n; j++) {
138: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
141: }
142: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143: }
144: return(0);
145: }
147: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148: {
149: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
153: MatDenseGetArray(a->A,array);
154: return(0);
155: }
157: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
158: {
159: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
163: MatDensePlaceArray(a->A,array);
164: return(0);
165: }
167: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
168: {
169: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
173: MatDenseResetArray(a->A);
174: return(0);
175: }
177: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
178: {
179: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
180: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
182: PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
183: const PetscInt *irow,*icol;
184: PetscScalar *av,*bv,*v = lmat->v;
185: Mat newmat;
186: IS iscol_local;
189: ISAllGather(iscol,&iscol_local);
190: ISGetIndices(isrow,&irow);
191: ISGetIndices(iscol_local,&icol);
192: ISGetLocalSize(isrow,&nrows);
193: ISGetLocalSize(iscol,&ncols);
194: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
196: /* No parallel redistribution currently supported! Should really check each index set
197: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
198: original matrix! */
200: MatGetLocalSize(A,&nlrows,&nlcols);
201: MatGetOwnershipRange(A,&rstart,&rend);
203: /* Check submatrix call */
204: if (scall == MAT_REUSE_MATRIX) {
205: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
206: /* Really need to test rows and column sizes! */
207: newmat = *B;
208: } else {
209: /* Create and fill new matrix */
210: MatCreate(PetscObjectComm((PetscObject)A),&newmat);
211: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
212: MatSetType(newmat,((PetscObject)A)->type_name);
213: MatMPIDenseSetPreallocation(newmat,NULL);
214: }
216: /* Now extract the data pointers and do the copy, column at a time */
217: newmatd = (Mat_MPIDense*)newmat->data;
218: bv = ((Mat_SeqDense*)newmatd->A->data)->v;
220: for (i=0; i<Ncols; i++) {
221: av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
222: for (j=0; j<nrows; j++) {
223: *bv++ = av[irow[j] - rstart];
224: }
225: }
227: /* Assemble the matrices so that the correct flags are set */
228: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
229: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
231: /* Free work space */
232: ISRestoreIndices(isrow,&irow);
233: ISRestoreIndices(iscol_local,&icol);
234: ISDestroy(&iscol_local);
235: *B = newmat;
236: return(0);
237: }
239: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
240: {
241: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
245: MatDenseRestoreArray(a->A,array);
246: return(0);
247: }
249: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250: {
251: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
252: MPI_Comm comm;
254: PetscInt nstash,reallocs;
255: InsertMode addv;
258: PetscObjectGetComm((PetscObject)mat,&comm);
259: /* make sure all processors are either in INSERTMODE or ADDMODE */
260: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
261: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262: mat->insertmode = addv; /* in case this processor had no cache */
264: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
265: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
266: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
267: return(0);
268: }
270: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
271: {
272: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
274: PetscInt i,*row,*col,flg,j,rstart,ncols;
275: PetscMPIInt n;
276: PetscScalar *val;
279: /* wait on receives */
280: while (1) {
281: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
282: if (!flg) break;
284: for (i=0; i<n;) {
285: /* Now identify the consecutive vals belonging to the same row */
286: for (j=i,rstart=row[j]; j<n; j++) {
287: if (row[j] != rstart) break;
288: }
289: if (j < n) ncols = j-i;
290: else ncols = n-i;
291: /* Now assemble all these values with a single function call */
292: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
293: i = j;
294: }
295: }
296: MatStashScatterEnd_Private(&mat->stash);
298: MatAssemblyBegin(mdn->A,mode);
299: MatAssemblyEnd(mdn->A,mode);
301: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
302: MatSetUpMultiply_MPIDense(mat);
303: }
304: return(0);
305: }
307: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
308: {
310: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
313: MatZeroEntries(l->A);
314: return(0);
315: }
317: /* the code does not do the diagonal entries correctly unless the
318: matrix is square and the column and row owerships are identical.
319: This is a BUG. The only way to fix it seems to be to access
320: mdn->A and mdn->B directly and not through the MatZeroRows()
321: routine.
322: */
323: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
324: {
325: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
326: PetscErrorCode ierr;
327: PetscInt i,*owners = A->rmap->range;
328: PetscInt *sizes,j,idx,nsends;
329: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
330: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
331: PetscInt *lens,*lrows,*values;
332: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
333: MPI_Comm comm;
334: MPI_Request *send_waits,*recv_waits;
335: MPI_Status recv_status,*send_status;
336: PetscBool found;
337: const PetscScalar *xx;
338: PetscScalar *bb;
341: PetscObjectGetComm((PetscObject)A,&comm);
342: if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
343: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
344: /* first count number of contributors to each processor */
345: PetscCalloc1(2*size,&sizes);
346: PetscMalloc1(N+1,&owner); /* see note*/
347: for (i=0; i<N; i++) {
348: idx = rows[i];
349: found = PETSC_FALSE;
350: for (j=0; j<size; j++) {
351: if (idx >= owners[j] && idx < owners[j+1]) {
352: sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
353: }
354: }
355: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
356: }
357: nsends = 0;
358: for (i=0; i<size; i++) nsends += sizes[2*i+1];
360: /* inform other processors of number of messages and max length*/
361: PetscMaxSum(comm,sizes,&nmax,&nrecvs);
363: /* post receives: */
364: PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
365: PetscMalloc1(nrecvs+1,&recv_waits);
366: for (i=0; i<nrecvs; i++) {
367: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
368: }
370: /* do sends:
371: 1) starts[i] gives the starting index in svalues for stuff going to
372: the ith processor
373: */
374: PetscMalloc1(N+1,&svalues);
375: PetscMalloc1(nsends+1,&send_waits);
376: PetscMalloc1(size+1,&starts);
378: starts[0] = 0;
379: for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
380: for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
382: starts[0] = 0;
383: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
384: count = 0;
385: for (i=0; i<size; i++) {
386: if (sizes[2*i+1]) {
387: MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
388: }
389: }
390: PetscFree(starts);
392: base = owners[rank];
394: /* wait on receives */
395: PetscMalloc2(nrecvs,&lens,nrecvs,&source);
396: count = nrecvs;
397: slen = 0;
398: while (count) {
399: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
400: /* unpack receives into our local space */
401: MPI_Get_count(&recv_status,MPIU_INT,&n);
403: source[imdex] = recv_status.MPI_SOURCE;
404: lens[imdex] = n;
405: slen += n;
406: count--;
407: }
408: PetscFree(recv_waits);
410: /* move the data into the send scatter */
411: PetscMalloc1(slen+1,&lrows);
412: count = 0;
413: for (i=0; i<nrecvs; i++) {
414: values = rvalues + i*nmax;
415: for (j=0; j<lens[i]; j++) {
416: lrows[count++] = values[j] - base;
417: }
418: }
419: PetscFree(rvalues);
420: PetscFree2(lens,source);
421: PetscFree(owner);
422: PetscFree(sizes);
424: /* fix right hand side if needed */
425: if (x && b) {
426: VecGetArrayRead(x,&xx);
427: VecGetArray(b,&bb);
428: for (i=0; i<slen; i++) {
429: bb[lrows[i]] = diag*xx[lrows[i]];
430: }
431: VecRestoreArrayRead(x,&xx);
432: VecRestoreArray(b,&bb);
433: }
435: /* actually zap the local rows */
436: MatZeroRows(l->A,slen,lrows,0.0,0,0);
437: if (diag != 0.0) {
438: Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
439: PetscInt m = ll->lda, i;
441: for (i=0; i<slen; i++) {
442: ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
443: }
444: }
445: PetscFree(lrows);
447: /* wait on sends */
448: if (nsends) {
449: PetscMalloc1(nsends,&send_status);
450: MPI_Waitall(nsends,send_waits,send_status);
451: PetscFree(send_status);
452: }
453: PetscFree(send_waits);
454: PetscFree(svalues);
455: return(0);
456: }
458: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
459: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
460: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
461: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
463: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
464: {
465: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
469: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
470: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
471: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
472: return(0);
473: }
475: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
476: {
477: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
481: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
482: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
483: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
484: return(0);
485: }
487: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
488: {
489: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
491: PetscScalar zero = 0.0;
494: VecSet(yy,zero);
495: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
496: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
497: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
498: return(0);
499: }
501: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
502: {
503: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
507: VecCopy(yy,zz);
508: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
509: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
510: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
511: return(0);
512: }
514: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
515: {
516: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
517: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
519: PetscInt len,i,n,m = A->rmap->n,radd;
520: PetscScalar *x,zero = 0.0;
523: VecSet(v,zero);
524: VecGetArray(v,&x);
525: VecGetSize(v,&n);
526: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
527: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
528: radd = A->rmap->rstart*m;
529: for (i=0; i<len; i++) {
530: x[i] = aloc->v[radd + i*m + i];
531: }
532: VecRestoreArray(v,&x);
533: return(0);
534: }
536: PetscErrorCode MatDestroy_MPIDense(Mat mat)
537: {
538: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
542: #if defined(PETSC_USE_LOG)
543: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
544: #endif
545: MatStashDestroy_Private(&mat->stash);
546: MatDestroy(&mdn->A);
547: VecDestroy(&mdn->lvec);
548: VecScatterDestroy(&mdn->Mvctx);
550: PetscFree(mat->data);
551: PetscObjectChangeTypeName((PetscObject)mat,0);
553: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
554: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);
555: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);
557: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
558: #if defined(PETSC_HAVE_ELEMENTAL)
559: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
560: #endif
561: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
562: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
563: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
564: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
565: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
566: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
567: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
568: return(0);
569: }
571: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
572: {
573: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
574: PetscErrorCode ierr;
575: PetscViewerFormat format;
576: int fd;
577: PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k;
578: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
579: PetscScalar *work,*v,*vv;
580: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
583: if (mdn->size == 1) {
584: MatView(mdn->A,viewer);
585: } else {
586: PetscViewerBinaryGetDescriptor(viewer,&fd);
587: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
588: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
590: PetscViewerGetFormat(viewer,&format);
591: if (format == PETSC_VIEWER_NATIVE) {
593: if (!rank) {
594: /* store the matrix as a dense matrix */
595: header[0] = MAT_FILE_CLASSID;
596: header[1] = mat->rmap->N;
597: header[2] = N;
598: header[3] = MATRIX_BINARY_FORMAT_DENSE;
599: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
601: /* get largest work array needed for transposing array */
602: mmax = mat->rmap->n;
603: for (i=1; i<size; i++) {
604: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
605: }
606: PetscMalloc1(mmax*N,&work);
608: /* write out local array, by rows */
609: m = mat->rmap->n;
610: v = a->v;
611: for (j=0; j<N; j++) {
612: for (i=0; i<m; i++) {
613: work[j + i*N] = *v++;
614: }
615: }
616: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
617: /* get largest work array to receive messages from other processes, excludes process zero */
618: mmax = 0;
619: for (i=1; i<size; i++) {
620: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
621: }
622: PetscMalloc1(mmax*N,&vv);
623: for (k = 1; k < size; k++) {
624: v = vv;
625: m = mat->rmap->range[k+1] - mat->rmap->range[k];
626: MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));
628: for (j = 0; j < N; j++) {
629: for (i = 0; i < m; i++) {
630: work[j + i*N] = *v++;
631: }
632: }
633: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
634: }
635: PetscFree(work);
636: PetscFree(vv);
637: } else {
638: MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
639: }
640: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
641: }
642: return(0);
643: }
645: extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
646: #include <petscdraw.h>
647: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
648: {
649: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
650: PetscErrorCode ierr;
651: PetscMPIInt rank = mdn->rank;
652: PetscViewerType vtype;
653: PetscBool iascii,isdraw;
654: PetscViewer sviewer;
655: PetscViewerFormat format;
658: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
659: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
660: if (iascii) {
661: PetscViewerGetType(viewer,&vtype);
662: PetscViewerGetFormat(viewer,&format);
663: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
664: MatInfo info;
665: MatGetInfo(mat,MAT_LOCAL,&info);
666: PetscViewerASCIIPushSynchronized(viewer);
667: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
668: PetscViewerFlush(viewer);
669: PetscViewerASCIIPopSynchronized(viewer);
670: VecScatterView(mdn->Mvctx,viewer);
671: return(0);
672: } else if (format == PETSC_VIEWER_ASCII_INFO) {
673: return(0);
674: }
675: } else if (isdraw) {
676: PetscDraw draw;
677: PetscBool isnull;
679: PetscViewerDrawGetDraw(viewer,0,&draw);
680: PetscDrawIsNull(draw,&isnull);
681: if (isnull) return(0);
682: }
684: {
685: /* assemble the entire matrix onto first processor. */
686: Mat A;
687: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
688: PetscInt *cols;
689: PetscScalar *vals;
691: MatCreate(PetscObjectComm((PetscObject)mat),&A);
692: if (!rank) {
693: MatSetSizes(A,M,N,M,N);
694: } else {
695: MatSetSizes(A,0,0,M,N);
696: }
697: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
698: MatSetType(A,MATMPIDENSE);
699: MatMPIDenseSetPreallocation(A,NULL);
700: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
702: /* Copy the matrix ... This isn't the most efficient means,
703: but it's quick for now */
704: A->insertmode = INSERT_VALUES;
706: row = mat->rmap->rstart;
707: m = mdn->A->rmap->n;
708: for (i=0; i<m; i++) {
709: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
710: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
711: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
712: row++;
713: }
715: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
716: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
717: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
718: if (!rank) {
719: PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
720: MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
721: }
722: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
723: PetscViewerFlush(viewer);
724: MatDestroy(&A);
725: }
726: return(0);
727: }
729: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
730: {
732: PetscBool iascii,isbinary,isdraw,issocket;
735: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
736: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
737: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
738: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
740: if (iascii || issocket || isdraw) {
741: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
742: } else if (isbinary) {
743: MatView_MPIDense_Binary(mat,viewer);
744: }
745: return(0);
746: }
748: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
749: {
750: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
751: Mat mdn = mat->A;
753: PetscReal isend[5],irecv[5];
756: info->block_size = 1.0;
758: MatGetInfo(mdn,MAT_LOCAL,info);
760: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
761: isend[3] = info->memory; isend[4] = info->mallocs;
762: if (flag == MAT_LOCAL) {
763: info->nz_used = isend[0];
764: info->nz_allocated = isend[1];
765: info->nz_unneeded = isend[2];
766: info->memory = isend[3];
767: info->mallocs = isend[4];
768: } else if (flag == MAT_GLOBAL_MAX) {
769: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
771: info->nz_used = irecv[0];
772: info->nz_allocated = irecv[1];
773: info->nz_unneeded = irecv[2];
774: info->memory = irecv[3];
775: info->mallocs = irecv[4];
776: } else if (flag == MAT_GLOBAL_SUM) {
777: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
779: info->nz_used = irecv[0];
780: info->nz_allocated = irecv[1];
781: info->nz_unneeded = irecv[2];
782: info->memory = irecv[3];
783: info->mallocs = irecv[4];
784: }
785: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
786: info->fill_ratio_needed = 0;
787: info->factor_mallocs = 0;
788: return(0);
789: }
791: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
792: {
793: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
797: switch (op) {
798: case MAT_NEW_NONZERO_LOCATIONS:
799: case MAT_NEW_NONZERO_LOCATION_ERR:
800: case MAT_NEW_NONZERO_ALLOCATION_ERR:
801: MatCheckPreallocated(A,1);
802: MatSetOption(a->A,op,flg);
803: break;
804: case MAT_ROW_ORIENTED:
805: MatCheckPreallocated(A,1);
806: a->roworiented = flg;
807: MatSetOption(a->A,op,flg);
808: break;
809: case MAT_NEW_DIAGONALS:
810: case MAT_KEEP_NONZERO_PATTERN:
811: case MAT_USE_HASH_TABLE:
812: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
813: break;
814: case MAT_IGNORE_OFF_PROC_ENTRIES:
815: a->donotstash = flg;
816: break;
817: case MAT_SYMMETRIC:
818: case MAT_STRUCTURALLY_SYMMETRIC:
819: case MAT_HERMITIAN:
820: case MAT_SYMMETRY_ETERNAL:
821: case MAT_IGNORE_LOWER_TRIANGULAR:
822: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
823: break;
824: default:
825: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
826: }
827: return(0);
828: }
831: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
832: {
833: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
834: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
835: const PetscScalar *l,*r;
836: PetscScalar x,*v;
837: PetscErrorCode ierr;
838: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
841: MatGetLocalSize(A,&s2,&s3);
842: if (ll) {
843: VecGetLocalSize(ll,&s2a);
844: if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
845: VecGetArrayRead(ll,&l);
846: for (i=0; i<m; i++) {
847: x = l[i];
848: v = mat->v + i;
849: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
850: }
851: VecRestoreArrayRead(ll,&l);
852: PetscLogFlops(n*m);
853: }
854: if (rr) {
855: VecGetLocalSize(rr,&s3a);
856: if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
857: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
858: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
859: VecGetArrayRead(mdn->lvec,&r);
860: for (i=0; i<n; i++) {
861: x = r[i];
862: v = mat->v + i*m;
863: for (j=0; j<m; j++) (*v++) *= x;
864: }
865: VecRestoreArrayRead(mdn->lvec,&r);
866: PetscLogFlops(n*m);
867: }
868: return(0);
869: }
871: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
872: {
873: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
874: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
876: PetscInt i,j;
877: PetscReal sum = 0.0;
878: PetscScalar *v = mat->v;
881: if (mdn->size == 1) {
882: MatNorm(mdn->A,type,nrm);
883: } else {
884: if (type == NORM_FROBENIUS) {
885: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
886: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
887: }
888: MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
889: *nrm = PetscSqrtReal(*nrm);
890: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
891: } else if (type == NORM_1) {
892: PetscReal *tmp,*tmp2;
893: PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
894: PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
895: PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
896: *nrm = 0.0;
897: v = mat->v;
898: for (j=0; j<mdn->A->cmap->n; j++) {
899: for (i=0; i<mdn->A->rmap->n; i++) {
900: tmp[j] += PetscAbsScalar(*v); v++;
901: }
902: }
903: MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
904: for (j=0; j<A->cmap->N; j++) {
905: if (tmp2[j] > *nrm) *nrm = tmp2[j];
906: }
907: PetscFree2(tmp,tmp2);
908: PetscLogFlops(A->cmap->n*A->rmap->n);
909: } else if (type == NORM_INFINITY) { /* max row norm */
910: PetscReal ntemp;
911: MatNorm(mdn->A,type,&ntemp);
912: MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
913: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
914: }
915: return(0);
916: }
918: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
919: {
920: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
921: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
922: Mat B;
923: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
925: PetscInt j,i;
926: PetscScalar *v;
929: if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
930: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
931: MatCreate(PetscObjectComm((PetscObject)A),&B);
932: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
933: MatSetType(B,((PetscObject)A)->type_name);
934: MatMPIDenseSetPreallocation(B,NULL);
935: } else {
936: B = *matout;
937: }
939: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
940: PetscMalloc1(m,&rwork);
941: for (i=0; i<m; i++) rwork[i] = rstart + i;
942: for (j=0; j<n; j++) {
943: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
944: v += m;
945: }
946: PetscFree(rwork);
947: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
948: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
949: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
950: *matout = B;
951: } else {
952: MatHeaderMerge(A,&B);
953: }
954: return(0);
955: }
958: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
959: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
961: PetscErrorCode MatSetUp_MPIDense(Mat A)
962: {
966: MatMPIDenseSetPreallocation(A,0);
967: return(0);
968: }
970: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
971: {
973: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
976: MatAXPY(A->A,alpha,B->A,str);
977: PetscObjectStateIncrease((PetscObject)Y);
978: return(0);
979: }
981: PetscErrorCode MatConjugate_MPIDense(Mat mat)
982: {
983: Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
987: MatConjugate(a->A);
988: return(0);
989: }
991: PetscErrorCode MatRealPart_MPIDense(Mat A)
992: {
993: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
997: MatRealPart(a->A);
998: return(0);
999: }
1001: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1002: {
1003: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1007: MatImaginaryPart(a->A);
1008: return(0);
1009: }
1011: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1012: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1013: {
1015: PetscInt i,n;
1016: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1017: PetscReal *work;
1020: MatGetSize(A,NULL,&n);
1021: PetscMalloc1(n,&work);
1022: MatGetColumnNorms_SeqDense(a->A,type,work);
1023: if (type == NORM_2) {
1024: for (i=0; i<n; i++) work[i] *= work[i];
1025: }
1026: if (type == NORM_INFINITY) {
1027: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1028: } else {
1029: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1030: }
1031: PetscFree(work);
1032: if (type == NORM_2) {
1033: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1034: }
1035: return(0);
1036: }
1038: static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1039: {
1040: Mat_MPIDense *d = (Mat_MPIDense*)x->data;
1042: PetscScalar *a;
1043: PetscInt m,n,i;
1046: MatGetSize(d->A,&m,&n);
1047: MatDenseGetArray(d->A,&a);
1048: for (i=0; i<m*n; i++) {
1049: PetscRandomGetValue(rctx,a+i);
1050: }
1051: MatDenseRestoreArray(d->A,&a);
1052: return(0);
1053: }
1055: extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1057: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d)
1058: {
1060: *missing = PETSC_FALSE;
1061: return(0);
1062: }
1064: /* -------------------------------------------------------------------*/
1065: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1066: MatGetRow_MPIDense,
1067: MatRestoreRow_MPIDense,
1068: MatMult_MPIDense,
1069: /* 4*/ MatMultAdd_MPIDense,
1070: MatMultTranspose_MPIDense,
1071: MatMultTransposeAdd_MPIDense,
1072: 0,
1073: 0,
1074: 0,
1075: /* 10*/ 0,
1076: 0,
1077: 0,
1078: 0,
1079: MatTranspose_MPIDense,
1080: /* 15*/ MatGetInfo_MPIDense,
1081: MatEqual_MPIDense,
1082: MatGetDiagonal_MPIDense,
1083: MatDiagonalScale_MPIDense,
1084: MatNorm_MPIDense,
1085: /* 20*/ MatAssemblyBegin_MPIDense,
1086: MatAssemblyEnd_MPIDense,
1087: MatSetOption_MPIDense,
1088: MatZeroEntries_MPIDense,
1089: /* 24*/ MatZeroRows_MPIDense,
1090: 0,
1091: 0,
1092: 0,
1093: 0,
1094: /* 29*/ MatSetUp_MPIDense,
1095: 0,
1096: 0,
1097: MatGetDiagonalBlock_MPIDense,
1098: 0,
1099: /* 34*/ MatDuplicate_MPIDense,
1100: 0,
1101: 0,
1102: 0,
1103: 0,
1104: /* 39*/ MatAXPY_MPIDense,
1105: MatCreateSubMatrices_MPIDense,
1106: 0,
1107: MatGetValues_MPIDense,
1108: 0,
1109: /* 44*/ 0,
1110: MatScale_MPIDense,
1111: MatShift_Basic,
1112: 0,
1113: 0,
1114: /* 49*/ MatSetRandom_MPIDense,
1115: 0,
1116: 0,
1117: 0,
1118: 0,
1119: /* 54*/ 0,
1120: 0,
1121: 0,
1122: 0,
1123: 0,
1124: /* 59*/ MatCreateSubMatrix_MPIDense,
1125: MatDestroy_MPIDense,
1126: MatView_MPIDense,
1127: 0,
1128: 0,
1129: /* 64*/ 0,
1130: 0,
1131: 0,
1132: 0,
1133: 0,
1134: /* 69*/ 0,
1135: 0,
1136: 0,
1137: 0,
1138: 0,
1139: /* 74*/ 0,
1140: 0,
1141: 0,
1142: 0,
1143: 0,
1144: /* 79*/ 0,
1145: 0,
1146: 0,
1147: 0,
1148: /* 83*/ MatLoad_MPIDense,
1149: 0,
1150: 0,
1151: 0,
1152: 0,
1153: 0,
1154: #if defined(PETSC_HAVE_ELEMENTAL)
1155: /* 89*/ MatMatMult_MPIDense_MPIDense,
1156: MatMatMultSymbolic_MPIDense_MPIDense,
1157: #else
1158: /* 89*/ 0,
1159: 0,
1160: #endif
1161: MatMatMultNumeric_MPIDense,
1162: 0,
1163: 0,
1164: /* 94*/ 0,
1165: 0,
1166: 0,
1167: 0,
1168: 0,
1169: /* 99*/ 0,
1170: 0,
1171: 0,
1172: MatConjugate_MPIDense,
1173: 0,
1174: /*104*/ 0,
1175: MatRealPart_MPIDense,
1176: MatImaginaryPart_MPIDense,
1177: 0,
1178: 0,
1179: /*109*/ 0,
1180: 0,
1181: 0,
1182: 0,
1183: MatMissingDiagonal_MPIDense,
1184: /*114*/ 0,
1185: 0,
1186: 0,
1187: 0,
1188: 0,
1189: /*119*/ 0,
1190: 0,
1191: 0,
1192: 0,
1193: 0,
1194: /*124*/ 0,
1195: MatGetColumnNorms_MPIDense,
1196: 0,
1197: 0,
1198: 0,
1199: /*129*/ 0,
1200: MatTransposeMatMult_MPIDense_MPIDense,
1201: MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1202: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1203: 0,
1204: /*134*/ 0,
1205: 0,
1206: 0,
1207: 0,
1208: 0,
1209: /*139*/ 0,
1210: 0,
1211: 0
1212: };
1214: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1215: {
1216: Mat_MPIDense *a;
1220: mat->preallocated = PETSC_TRUE;
1221: /* Note: For now, when data is specified above, this assumes the user correctly
1222: allocates the local dense storage space. We should add error checking. */
1224: a = (Mat_MPIDense*)mat->data;
1225: PetscLayoutSetUp(mat->rmap);
1226: PetscLayoutSetUp(mat->cmap);
1227: a->nvec = mat->cmap->n;
1229: MatCreate(PETSC_COMM_SELF,&a->A);
1230: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1231: MatSetType(a->A,MATSEQDENSE);
1232: MatSeqDenseSetPreallocation(a->A,data);
1233: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1234: return(0);
1235: }
1237: #if defined(PETSC_HAVE_ELEMENTAL)
1238: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1239: {
1240: Mat mat_elemental;
1242: PetscScalar *v;
1243: PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1244:
1246: if (reuse == MAT_REUSE_MATRIX) {
1247: mat_elemental = *newmat;
1248: MatZeroEntries(*newmat);
1249: } else {
1250: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1251: MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1252: MatSetType(mat_elemental,MATELEMENTAL);
1253: MatSetUp(mat_elemental);
1254: MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1255: }
1257: PetscMalloc2(m,&rows,N,&cols);
1258: for (i=0; i<N; i++) cols[i] = i;
1259: for (i=0; i<m; i++) rows[i] = rstart + i;
1260:
1261: /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1262: MatDenseGetArray(A,&v);
1263: MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1264: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1265: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1266: MatDenseRestoreArray(A,&v);
1267: PetscFree2(rows,cols);
1269: if (reuse == MAT_INPLACE_MATRIX) {
1270: MatHeaderReplace(A,&mat_elemental);
1271: } else {
1272: *newmat = mat_elemental;
1273: }
1274: return(0);
1275: }
1276: #endif
1278: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1279: {
1280: Mat_MPIDense *a;
1284: PetscNewLog(mat,&a);
1285: mat->data = (void*)a;
1286: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1288: mat->insertmode = NOT_SET_VALUES;
1289: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1290: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);
1292: /* build cache for off array entries formed */
1293: a->donotstash = PETSC_FALSE;
1295: MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);
1297: /* stuff used for matrix vector multiply */
1298: a->lvec = 0;
1299: a->Mvctx = 0;
1300: a->roworiented = PETSC_TRUE;
1302: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1303: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);
1304: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);
1305: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1306: #if defined(PETSC_HAVE_ELEMENTAL)
1307: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1308: #endif
1309: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1310: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1311: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1312: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);
1314: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1315: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1316: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1317: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1318: return(0);
1319: }
1321: /*MC
1322: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1324: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1325: and MATMPIDENSE otherwise.
1327: Options Database Keys:
1328: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1330: Level: beginner
1333: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1334: M*/
1336: /*@C
1337: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1339: Not collective
1341: Input Parameters:
1342: . B - the matrix
1343: - data - optional location of matrix data. Set data=NULL for PETSc
1344: to control all matrix memory allocation.
1346: Notes:
1347: The dense format is fully compatible with standard Fortran 77
1348: storage by columns.
1350: The data input variable is intended primarily for Fortran programmers
1351: who wish to allocate their own matrix memory space. Most users should
1352: set data=NULL.
1354: Level: intermediate
1356: .keywords: matrix,dense, parallel
1358: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1359: @*/
1360: PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1361: {
1365: PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1366: return(0);
1367: }
1369: /*@
1370: MatDensePlaceArray - Allows one to replace the array in a dense array with an
1371: array provided by the user. This is useful to avoid copying an array
1372: into a matrix
1374: Not Collective
1376: Input Parameters:
1377: + mat - the matrix
1378: - array - the array in column major order
1380: Notes:
1381: You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1382: freed when the matrix is destroyed.
1384: Level: developer
1386: .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1388: @*/
1389: PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[])
1390: {
1393: PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
1394: PetscObjectStateIncrease((PetscObject)mat);
1395: return(0);
1396: }
1398: /*@
1399: MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1401: Not Collective
1403: Input Parameters:
1404: . mat - the matrix
1406: Notes:
1407: You can only call this after a call to MatDensePlaceArray()
1409: Level: developer
1411: .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1413: @*/
1414: PetscErrorCode MatDenseResetArray(Mat mat)
1415: {
1418: PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
1419: PetscObjectStateIncrease((PetscObject)mat);
1420: return(0);
1421: }
1423: /*@C
1424: MatCreateDense - Creates a parallel matrix in dense format.
1426: Collective on MPI_Comm
1428: Input Parameters:
1429: + comm - MPI communicator
1430: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1431: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1432: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1433: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1434: - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1435: to control all matrix memory allocation.
1437: Output Parameter:
1438: . A - the matrix
1440: Notes:
1441: The dense format is fully compatible with standard Fortran 77
1442: storage by columns.
1444: The data input variable is intended primarily for Fortran programmers
1445: who wish to allocate their own matrix memory space. Most users should
1446: set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1448: The user MUST specify either the local or global matrix dimensions
1449: (possibly both).
1451: Level: intermediate
1453: .keywords: matrix,dense, parallel
1455: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1456: @*/
1457: PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1458: {
1460: PetscMPIInt size;
1463: MatCreate(comm,A);
1464: MatSetSizes(*A,m,n,M,N);
1465: MPI_Comm_size(comm,&size);
1466: if (size > 1) {
1467: MatSetType(*A,MATMPIDENSE);
1468: MatMPIDenseSetPreallocation(*A,data);
1469: if (data) { /* user provided data array, so no need to assemble */
1470: MatSetUpMultiply_MPIDense(*A);
1471: (*A)->assembled = PETSC_TRUE;
1472: }
1473: } else {
1474: MatSetType(*A,MATSEQDENSE);
1475: MatSeqDenseSetPreallocation(*A,data);
1476: }
1477: return(0);
1478: }
1480: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1481: {
1482: Mat mat;
1483: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1487: *newmat = 0;
1488: MatCreate(PetscObjectComm((PetscObject)A),&mat);
1489: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1490: MatSetType(mat,((PetscObject)A)->type_name);
1491: a = (Mat_MPIDense*)mat->data;
1492: PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1494: mat->factortype = A->factortype;
1495: mat->assembled = PETSC_TRUE;
1496: mat->preallocated = PETSC_TRUE;
1498: a->size = oldmat->size;
1499: a->rank = oldmat->rank;
1500: mat->insertmode = NOT_SET_VALUES;
1501: a->nvec = oldmat->nvec;
1502: a->donotstash = oldmat->donotstash;
1504: PetscLayoutReference(A->rmap,&mat->rmap);
1505: PetscLayoutReference(A->cmap,&mat->cmap);
1507: MatSetUpMultiply_MPIDense(mat);
1508: MatDuplicate(oldmat->A,cpvalues,&a->A);
1509: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1511: *newmat = mat;
1512: return(0);
1513: }
1515: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1516: {
1518: PetscMPIInt rank,size;
1519: const PetscInt *rowners;
1520: PetscInt i,m,n,nz,j,mMax;
1521: PetscScalar *array,*vals,*vals_ptr;
1522: Mat_MPIDense *a = (Mat_MPIDense*)newmat->data;
1525: MPI_Comm_rank(comm,&rank);
1526: MPI_Comm_size(comm,&size);
1528: /* determine ownership of rows and columns */
1529: m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1530: n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1532: MatSetSizes(newmat,m,n,M,N);
1533: if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1534: MatMPIDenseSetPreallocation(newmat,NULL);
1535: }
1536: MatDenseGetArray(newmat,&array);
1537: MatGetLocalSize(newmat,&m,NULL);
1538: MatGetOwnershipRanges(newmat,&rowners);
1539: MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);
1540: if (!rank) {
1541: PetscMalloc1(mMax*N,&vals);
1543: /* read in my part of the matrix numerical values */
1544: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1546: /* insert into matrix-by row (this is why cannot directly read into array */
1547: vals_ptr = vals;
1548: for (i=0; i<m; i++) {
1549: for (j=0; j<N; j++) {
1550: array[i + j*m] = *vals_ptr++;
1551: }
1552: }
1554: /* read in other processors and ship out */
1555: for (i=1; i<size; i++) {
1556: nz = (rowners[i+1] - rowners[i])*N;
1557: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1558: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1559: }
1560: } else {
1561: /* receive numeric values */
1562: PetscMalloc1(m*N,&vals);
1564: /* receive message of values*/
1565: MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);
1567: /* insert into matrix-by row (this is why cannot directly read into array */
1568: vals_ptr = vals;
1569: for (i=0; i<m; i++) {
1570: for (j=0; j<N; j++) {
1571: array[i + j*m] = *vals_ptr++;
1572: }
1573: }
1574: }
1575: MatDenseRestoreArray(newmat,&array);
1576: PetscFree(vals);
1577: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1578: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1579: return(0);
1580: }
1582: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1583: {
1584: Mat_MPIDense *a;
1585: PetscScalar *vals,*svals;
1586: MPI_Comm comm;
1587: MPI_Status status;
1588: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1589: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1590: PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1591: PetscInt i,nz,j,rstart,rend;
1592: int fd;
1596: /* force binary viewer to load .info file if it has not yet done so */
1597: PetscViewerSetUp(viewer);
1598: PetscObjectGetComm((PetscObject)viewer,&comm);
1599: MPI_Comm_size(comm,&size);
1600: MPI_Comm_rank(comm,&rank);
1601: PetscViewerBinaryGetDescriptor(viewer,&fd);
1602: if (!rank) {
1603: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1604: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1605: }
1606: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1607: M = header[1]; N = header[2]; nz = header[3];
1609: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1610: if (newmat->rmap->N < 0) newmat->rmap->N = M;
1611: if (newmat->cmap->N < 0) newmat->cmap->N = N;
1613: if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1614: if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1616: /*
1617: Handle case where matrix is stored on disk as a dense matrix
1618: */
1619: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1620: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1621: return(0);
1622: }
1624: /* determine ownership of all rows */
1625: if (newmat->rmap->n < 0) {
1626: PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1627: } else {
1628: PetscMPIIntCast(newmat->rmap->n,&m);
1629: }
1630: if (newmat->cmap->n < 0) {
1631: n = PETSC_DECIDE;
1632: } else {
1633: PetscMPIIntCast(newmat->cmap->n,&n);
1634: }
1636: PetscMalloc1(size+2,&rowners);
1637: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1638: rowners[0] = 0;
1639: for (i=2; i<=size; i++) {
1640: rowners[i] += rowners[i-1];
1641: }
1642: rstart = rowners[rank];
1643: rend = rowners[rank+1];
1645: /* distribute row lengths to all processors */
1646: PetscMalloc1(rend-rstart,&ourlens);
1647: if (!rank) {
1648: PetscMalloc1(M,&rowlengths);
1649: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1650: PetscMalloc1(size,&sndcounts);
1651: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1652: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1653: PetscFree(sndcounts);
1654: } else {
1655: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1656: }
1658: if (!rank) {
1659: /* calculate the number of nonzeros on each processor */
1660: PetscMalloc1(size,&procsnz);
1661: PetscMemzero(procsnz,size*sizeof(PetscInt));
1662: for (i=0; i<size; i++) {
1663: for (j=rowners[i]; j< rowners[i+1]; j++) {
1664: procsnz[i] += rowlengths[j];
1665: }
1666: }
1667: PetscFree(rowlengths);
1669: /* determine max buffer needed and allocate it */
1670: maxnz = 0;
1671: for (i=0; i<size; i++) {
1672: maxnz = PetscMax(maxnz,procsnz[i]);
1673: }
1674: PetscMalloc1(maxnz,&cols);
1676: /* read in my part of the matrix column indices */
1677: nz = procsnz[0];
1678: PetscMalloc1(nz,&mycols);
1679: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1681: /* read in every one elses and ship off */
1682: for (i=1; i<size; i++) {
1683: nz = procsnz[i];
1684: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1685: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1686: }
1687: PetscFree(cols);
1688: } else {
1689: /* determine buffer space needed for message */
1690: nz = 0;
1691: for (i=0; i<m; i++) {
1692: nz += ourlens[i];
1693: }
1694: PetscMalloc1(nz+1,&mycols);
1696: /* receive message of column indices*/
1697: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1698: MPI_Get_count(&status,MPIU_INT,&maxnz);
1699: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1700: }
1702: MatSetSizes(newmat,m,n,M,N);
1703: a = (Mat_MPIDense*)newmat->data;
1704: if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1705: MatMPIDenseSetPreallocation(newmat,NULL);
1706: }
1708: if (!rank) {
1709: PetscMalloc1(maxnz,&vals);
1711: /* read in my part of the matrix numerical values */
1712: nz = procsnz[0];
1713: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1715: /* insert into matrix */
1716: jj = rstart;
1717: smycols = mycols;
1718: svals = vals;
1719: for (i=0; i<m; i++) {
1720: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1721: smycols += ourlens[i];
1722: svals += ourlens[i];
1723: jj++;
1724: }
1726: /* read in other processors and ship out */
1727: for (i=1; i<size; i++) {
1728: nz = procsnz[i];
1729: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1730: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1731: }
1732: PetscFree(procsnz);
1733: } else {
1734: /* receive numeric values */
1735: PetscMalloc1(nz+1,&vals);
1737: /* receive message of values*/
1738: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1739: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1740: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1742: /* insert into matrix */
1743: jj = rstart;
1744: smycols = mycols;
1745: svals = vals;
1746: for (i=0; i<m; i++) {
1747: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1748: smycols += ourlens[i];
1749: svals += ourlens[i];
1750: jj++;
1751: }
1752: }
1753: PetscFree(ourlens);
1754: PetscFree(vals);
1755: PetscFree(mycols);
1756: PetscFree(rowners);
1758: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1759: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1760: return(0);
1761: }
1763: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
1764: {
1765: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1766: Mat a,b;
1767: PetscBool flg;
1771: a = matA->A;
1772: b = matB->A;
1773: MatEqual(a,b,&flg);
1774: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1775: return(0);
1776: }
1778: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1779: {
1780: PetscErrorCode ierr;
1781: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1782: Mat_TransMatMultDense *atb = a->atbdense;
1785: PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1786: (atb->destroy)(A);
1787: PetscFree(atb);
1788: return(0);
1789: }
1791: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1792: {
1793: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1794: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1795: Mat_TransMatMultDense *atb = c->atbdense;
1797: MPI_Comm comm;
1798: PetscMPIInt rank,size,*recvcounts=atb->recvcounts;
1799: PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1800: PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1801: PetscScalar _DOne=1.0,_DZero=0.0;
1802: PetscBLASInt am,an,bn,aN;
1803: const PetscInt *ranges;
1806: PetscObjectGetComm((PetscObject)A,&comm);
1807: MPI_Comm_rank(comm,&rank);
1808: MPI_Comm_size(comm,&size);
1810: /* compute atbarray = aseq^T * bseq */
1811: PetscBLASIntCast(a->A->cmap->n,&an);
1812: PetscBLASIntCast(b->A->cmap->n,&bn);
1813: PetscBLASIntCast(a->A->rmap->n,&am);
1814: PetscBLASIntCast(A->cmap->N,&aN);
1815: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1816:
1817: MatGetOwnershipRanges(C,&ranges);
1818: for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1819:
1820: /* arrange atbarray into sendbuf */
1821: k = 0;
1822: for (proc=0; proc<size; proc++) {
1823: for (j=0; j<cN; j++) {
1824: for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1825: }
1826: }
1827: /* sum all atbarray to local values of C */
1828: MatDenseGetArray(c->A,&carray);
1829: MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1830: MatDenseRestoreArray(c->A,&carray);
1831: return(0);
1832: }
1834: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1835: {
1836: PetscErrorCode ierr;
1837: Mat Cdense;
1838: MPI_Comm comm;
1839: PetscMPIInt size;
1840: PetscInt cm=A->cmap->n,cM,cN=B->cmap->N;
1841: Mat_MPIDense *c;
1842: Mat_TransMatMultDense *atb;
1845: PetscObjectGetComm((PetscObject)A,&comm);
1846: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1847: SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1848: }
1850: /* create matrix product Cdense */
1851: MatCreate(comm,&Cdense);
1852: MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1853: MatSetType(Cdense,MATMPIDENSE);
1854: MatMPIDenseSetPreallocation(Cdense,NULL);
1855: MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
1856: MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
1857: *C = Cdense;
1859: /* create data structure for reuse Cdense */
1860: MPI_Comm_size(comm,&size);
1861: PetscNew(&atb);
1862: cM = Cdense->rmap->N;
1863: PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);
1864:
1865: c = (Mat_MPIDense*)Cdense->data;
1866: c->atbdense = atb;
1867: atb->destroy = Cdense->ops->destroy;
1868: Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1869: return(0);
1870: }
1872: PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1873: {
1877: if (scall == MAT_INITIAL_MATRIX) {
1878: MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1879: }
1880: MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1881: return(0);
1882: }
1884: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1885: {
1886: PetscErrorCode ierr;
1887: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1888: Mat_MatMultDense *ab = a->abdense;
1891: MatDestroy(&ab->Ce);
1892: MatDestroy(&ab->Ae);
1893: MatDestroy(&ab->Be);
1895: (ab->destroy)(A);
1896: PetscFree(ab);
1897: return(0);
1898: }
1900: #if defined(PETSC_HAVE_ELEMENTAL)
1901: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1902: {
1903: PetscErrorCode ierr;
1904: Mat_MPIDense *c=(Mat_MPIDense*)C->data;
1905: Mat_MatMultDense *ab=c->abdense;
1908: MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
1909: MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
1910: MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);
1911: MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
1912: return(0);
1913: }
1915: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1916: {
1917: PetscErrorCode ierr;
1918: Mat Ae,Be,Ce;
1919: Mat_MPIDense *c;
1920: Mat_MatMultDense *ab;
1923: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1924: SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1925: }
1927: /* convert A and B to Elemental matrices Ae and Be */
1928: MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);
1929: MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);
1931: /* Ce = Ae*Be */
1932: MatMatMultSymbolic(Ae,Be,fill,&Ce);
1933: MatMatMultNumeric(Ae,Be,Ce);
1934:
1935: /* convert Ce to C */
1936: MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);
1938: /* create data structure for reuse Cdense */
1939: PetscNew(&ab);
1940: c = (Mat_MPIDense*)(*C)->data;
1941: c->abdense = ab;
1943: ab->Ae = Ae;
1944: ab->Be = Be;
1945: ab->Ce = Ce;
1946: ab->destroy = (*C)->ops->destroy;
1947: (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
1948: (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1949: return(0);
1950: }
1952: PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1953: {
1957: if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1958: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
1959: MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1960: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
1961: } else {
1962: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
1963: MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1964: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
1965: }
1966: return(0);
1967: }
1968: #endif