Actual source code: mpidense.c
petsc-3.6.1 2015-08-06
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
7: #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/
8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
12: /*@
14: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
15: matrix that represents the operator. For sequential matrices it returns itself.
17: Input Parameter:
18: . A - the Seq or MPI dense matrix
20: Output Parameter:
21: . B - the inner matrix
23: Level: intermediate
25: @*/
26: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
27: {
28: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
30: PetscBool flg;
33: PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
34: if (flg) *B = mat->A;
35: else *B = A;
36: return(0);
37: }
41: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
42: {
43: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
45: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
48: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
49: lrow = row - rstart;
50: MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
51: return(0);
52: }
56: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57: {
61: if (idx) {PetscFree(*idx);}
62: if (v) {PetscFree(*v);}
63: return(0);
64: }
68: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69: {
70: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
72: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
73: PetscScalar *array;
74: MPI_Comm comm;
75: Mat B;
78: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
80: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
81: if (!B) {
82: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
83: MatCreate(comm,&B);
84: MatSetSizes(B,m,m,m,m);
85: MatSetType(B,((PetscObject)mdn->A)->type_name);
86: MatDenseGetArray(mdn->A,&array);
87: MatSeqDenseSetPreallocation(B,array+m*rstart);
88: MatDenseRestoreArray(mdn->A,&array);
89: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
90: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
91: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
92: *a = B;
93: MatDestroy(&B);
94: } else *a = B;
95: return(0);
96: }
100: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
101: {
102: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
104: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
105: PetscBool roworiented = A->roworiented;
108: for (i=0; i<m; i++) {
109: if (idxm[i] < 0) continue;
110: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
111: if (idxm[i] >= rstart && idxm[i] < rend) {
112: row = idxm[i] - rstart;
113: if (roworiented) {
114: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
115: } else {
116: for (j=0; j<n; j++) {
117: if (idxn[j] < 0) continue;
118: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
119: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
120: }
121: }
122: } else if (!A->donotstash) {
123: mat->assembled = PETSC_FALSE;
124: if (roworiented) {
125: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
126: } else {
127: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
128: }
129: }
130: }
131: return(0);
132: }
136: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
137: {
138: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
140: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
143: for (i=0; i<m; i++) {
144: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
145: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
146: if (idxm[i] >= rstart && idxm[i] < rend) {
147: row = idxm[i] - rstart;
148: for (j=0; j<n; j++) {
149: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
150: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
152: }
153: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
154: }
155: return(0);
156: }
160: PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
161: {
162: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
166: MatDenseGetArray(a->A,array);
167: return(0);
168: }
172: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
173: {
174: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
175: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
177: PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
178: const PetscInt *irow,*icol;
179: PetscScalar *av,*bv,*v = lmat->v;
180: Mat newmat;
181: IS iscol_local;
184: ISAllGather(iscol,&iscol_local);
185: ISGetIndices(isrow,&irow);
186: ISGetIndices(iscol_local,&icol);
187: ISGetLocalSize(isrow,&nrows);
188: ISGetLocalSize(iscol,&ncols);
189: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
191: /* No parallel redistribution currently supported! Should really check each index set
192: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
193: original matrix! */
195: MatGetLocalSize(A,&nlrows,&nlcols);
196: MatGetOwnershipRange(A,&rstart,&rend);
198: /* Check submatrix call */
199: if (scall == MAT_REUSE_MATRIX) {
200: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
201: /* Really need to test rows and column sizes! */
202: newmat = *B;
203: } else {
204: /* Create and fill new matrix */
205: MatCreate(PetscObjectComm((PetscObject)A),&newmat);
206: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
207: MatSetType(newmat,((PetscObject)A)->type_name);
208: MatMPIDenseSetPreallocation(newmat,NULL);
209: }
211: /* Now extract the data pointers and do the copy, column at a time */
212: newmatd = (Mat_MPIDense*)newmat->data;
213: bv = ((Mat_SeqDense*)newmatd->A->data)->v;
215: for (i=0; i<Ncols; i++) {
216: av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
217: for (j=0; j<nrows; j++) {
218: *bv++ = av[irow[j] - rstart];
219: }
220: }
222: /* Assemble the matrices so that the correct flags are set */
223: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
224: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
226: /* Free work space */
227: ISRestoreIndices(isrow,&irow);
228: ISRestoreIndices(iscol_local,&icol);
229: ISDestroy(&iscol_local);
230: *B = newmat;
231: return(0);
232: }
236: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
237: {
238: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
242: MatDenseRestoreArray(a->A,array);
243: return(0);
244: }
248: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
249: {
250: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
251: MPI_Comm comm;
253: PetscInt nstash,reallocs;
254: InsertMode addv;
257: PetscObjectGetComm((PetscObject)mat,&comm);
258: /* make sure all processors are either in INSERTMODE or ADDMODE */
259: MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
260: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
261: mat->insertmode = addv; /* in case this processor had no cache */
263: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
264: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
265: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
266: return(0);
267: }
271: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
272: {
273: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
275: PetscInt i,*row,*col,flg,j,rstart,ncols;
276: PetscMPIInt n;
277: PetscScalar *val;
278: InsertMode addv=mat->insertmode;
281: /* wait on receives */
282: while (1) {
283: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
284: if (!flg) break;
286: for (i=0; i<n;) {
287: /* Now identify the consecutive vals belonging to the same row */
288: for (j=i,rstart=row[j]; j<n; j++) {
289: if (row[j] != rstart) break;
290: }
291: if (j < n) ncols = j-i;
292: else ncols = n-i;
293: /* Now assemble all these values with a single function call */
294: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
295: i = j;
296: }
297: }
298: MatStashScatterEnd_Private(&mat->stash);
300: MatAssemblyBegin(mdn->A,mode);
301: MatAssemblyEnd(mdn->A,mode);
303: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304: MatSetUpMultiply_MPIDense(mat);
305: }
306: return(0);
307: }
311: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312: {
314: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
317: MatZeroEntries(l->A);
318: return(0);
319: }
321: /* the code does not do the diagonal entries correctly unless the
322: matrix is square and the column and row owerships are identical.
323: This is a BUG. The only way to fix it seems to be to access
324: mdn->A and mdn->B directly and not through the MatZeroRows()
325: routine.
326: */
329: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330: {
331: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
332: PetscErrorCode ierr;
333: PetscInt i,*owners = A->rmap->range;
334: PetscInt *sizes,j,idx,nsends;
335: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
336: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337: PetscInt *lens,*lrows,*values;
338: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
339: MPI_Comm comm;
340: MPI_Request *send_waits,*recv_waits;
341: MPI_Status recv_status,*send_status;
342: PetscBool found;
343: const PetscScalar *xx;
344: PetscScalar *bb;
347: PetscObjectGetComm((PetscObject)A,&comm);
348: if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350: /* first count number of contributors to each processor */
351: PetscCalloc1(2*size,&sizes);
352: PetscMalloc1(N+1,&owner); /* see note*/
353: for (i=0; i<N; i++) {
354: idx = rows[i];
355: found = PETSC_FALSE;
356: for (j=0; j<size; j++) {
357: if (idx >= owners[j] && idx < owners[j+1]) {
358: sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359: }
360: }
361: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362: }
363: nsends = 0;
364: for (i=0; i<size; i++) nsends += sizes[2*i+1];
366: /* inform other processors of number of messages and max length*/
367: PetscMaxSum(comm,sizes,&nmax,&nrecvs);
369: /* post receives: */
370: PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
371: PetscMalloc1(nrecvs+1,&recv_waits);
372: for (i=0; i<nrecvs; i++) {
373: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
374: }
376: /* do sends:
377: 1) starts[i] gives the starting index in svalues for stuff going to
378: the ith processor
379: */
380: PetscMalloc1(N+1,&svalues);
381: PetscMalloc1(nsends+1,&send_waits);
382: PetscMalloc1(size+1,&starts);
384: starts[0] = 0;
385: for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386: for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
388: starts[0] = 0;
389: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390: count = 0;
391: for (i=0; i<size; i++) {
392: if (sizes[2*i+1]) {
393: MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
394: }
395: }
396: PetscFree(starts);
398: base = owners[rank];
400: /* wait on receives */
401: PetscMalloc2(nrecvs,&lens,nrecvs,&source);
402: count = nrecvs;
403: slen = 0;
404: while (count) {
405: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
406: /* unpack receives into our local space */
407: MPI_Get_count(&recv_status,MPIU_INT,&n);
409: source[imdex] = recv_status.MPI_SOURCE;
410: lens[imdex] = n;
411: slen += n;
412: count--;
413: }
414: PetscFree(recv_waits);
416: /* move the data into the send scatter */
417: PetscMalloc1(slen+1,&lrows);
418: count = 0;
419: for (i=0; i<nrecvs; i++) {
420: values = rvalues + i*nmax;
421: for (j=0; j<lens[i]; j++) {
422: lrows[count++] = values[j] - base;
423: }
424: }
425: PetscFree(rvalues);
426: PetscFree2(lens,source);
427: PetscFree(owner);
428: PetscFree(sizes);
430: /* fix right hand side if needed */
431: if (x && b) {
432: VecGetArrayRead(x,&xx);
433: VecGetArray(b,&bb);
434: for (i=0; i<slen; i++) {
435: bb[lrows[i]] = diag*xx[lrows[i]];
436: }
437: VecRestoreArrayRead(x,&xx);
438: VecRestoreArray(b,&bb);
439: }
441: /* actually zap the local rows */
442: MatZeroRows(l->A,slen,lrows,0.0,0,0);
443: if (diag != 0.0) {
444: Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445: PetscInt m = ll->lda, i;
447: for (i=0; i<slen; i++) {
448: ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449: }
450: }
451: PetscFree(lrows);
453: /* wait on sends */
454: if (nsends) {
455: PetscMalloc1(nsends,&send_status);
456: MPI_Waitall(nsends,send_waits,send_status);
457: PetscFree(send_status);
458: }
459: PetscFree(send_waits);
460: PetscFree(svalues);
461: return(0);
462: }
466: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
467: {
468: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
472: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
473: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
474: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
475: return(0);
476: }
480: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481: {
482: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
486: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
487: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
488: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
489: return(0);
490: }
494: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
495: {
496: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
498: PetscScalar zero = 0.0;
501: VecSet(yy,zero);
502: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
503: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
504: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
505: return(0);
506: }
510: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
511: {
512: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
516: VecCopy(yy,zz);
517: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
518: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
519: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
520: return(0);
521: }
525: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
526: {
527: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
528: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
530: PetscInt len,i,n,m = A->rmap->n,radd;
531: PetscScalar *x,zero = 0.0;
534: VecSet(v,zero);
535: VecGetArray(v,&x);
536: VecGetSize(v,&n);
537: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
538: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
539: radd = A->rmap->rstart*m;
540: for (i=0; i<len; i++) {
541: x[i] = aloc->v[radd + i*m + i];
542: }
543: VecRestoreArray(v,&x);
544: return(0);
545: }
549: PetscErrorCode MatDestroy_MPIDense(Mat mat)
550: {
551: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
555: #if defined(PETSC_USE_LOG)
556: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
557: #endif
558: MatStashDestroy_Private(&mat->stash);
559: MatDestroy(&mdn->A);
560: VecDestroy(&mdn->lvec);
561: VecScatterDestroy(&mdn->Mvctx);
563: PetscFree(mat->data);
564: PetscObjectChangeTypeName((PetscObject)mat,0);
566: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
567: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
568: #if defined(PETSC_HAVE_ELEMENTAL)
569: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
570: #endif
571: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
572: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
573: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
574: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
575: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
576: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
577: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
578: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
579: return(0);
580: }
584: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585: {
586: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
587: PetscErrorCode ierr;
588: PetscViewerFormat format;
589: int fd;
590: PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k;
591: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
592: PetscScalar *work,*v,*vv;
593: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
596: if (mdn->size == 1) {
597: MatView(mdn->A,viewer);
598: } else {
599: PetscViewerBinaryGetDescriptor(viewer,&fd);
600: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
601: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
603: PetscViewerGetFormat(viewer,&format);
604: if (format == PETSC_VIEWER_NATIVE) {
606: if (!rank) {
607: /* store the matrix as a dense matrix */
608: header[0] = MAT_FILE_CLASSID;
609: header[1] = mat->rmap->N;
610: header[2] = N;
611: header[3] = MATRIX_BINARY_FORMAT_DENSE;
612: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
614: /* get largest work array needed for transposing array */
615: mmax = mat->rmap->n;
616: for (i=1; i<size; i++) {
617: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
618: }
619: PetscMalloc1(mmax*N,&work);
621: /* write out local array, by rows */
622: m = mat->rmap->n;
623: v = a->v;
624: for (j=0; j<N; j++) {
625: for (i=0; i<m; i++) {
626: work[j + i*N] = *v++;
627: }
628: }
629: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
630: /* get largest work array to receive messages from other processes, excludes process zero */
631: mmax = 0;
632: for (i=1; i<size; i++) {
633: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
634: }
635: PetscMalloc1(mmax*N,&vv);
636: for (k = 1; k < size; k++) {
637: v = vv;
638: m = mat->rmap->range[k+1] - mat->rmap->range[k];
639: MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));
641: for (j = 0; j < N; j++) {
642: for (i = 0; i < m; i++) {
643: work[j + i*N] = *v++;
644: }
645: }
646: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
647: }
648: PetscFree(work);
649: PetscFree(vv);
650: } else {
651: MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
652: }
653: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
654: }
655: return(0);
656: }
658: extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
659: #include <petscdraw.h>
662: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663: {
664: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
665: PetscErrorCode ierr;
666: PetscMPIInt rank = mdn->rank;
667: PetscViewerType vtype;
668: PetscBool iascii,isdraw;
669: PetscViewer sviewer;
670: PetscViewerFormat format;
673: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
674: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
675: if (iascii) {
676: PetscViewerGetType(viewer,&vtype);
677: PetscViewerGetFormat(viewer,&format);
678: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679: MatInfo info;
680: MatGetInfo(mat,MAT_LOCAL,&info);
681: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
682: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
683: PetscViewerFlush(viewer);
684: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
685: VecScatterView(mdn->Mvctx,viewer);
686: return(0);
687: } else if (format == PETSC_VIEWER_ASCII_INFO) {
688: return(0);
689: }
690: } else if (isdraw) {
691: PetscDraw draw;
692: PetscBool isnull;
694: PetscViewerDrawGetDraw(viewer,0,&draw);
695: PetscDrawIsNull(draw,&isnull);
696: if (isnull) return(0);
697: }
699: {
700: /* assemble the entire matrix onto first processor. */
701: Mat A;
702: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
703: PetscInt *cols;
704: PetscScalar *vals;
706: MatCreate(PetscObjectComm((PetscObject)mat),&A);
707: if (!rank) {
708: MatSetSizes(A,M,N,M,N);
709: } else {
710: MatSetSizes(A,0,0,M,N);
711: }
712: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
713: MatSetType(A,MATMPIDENSE);
714: MatMPIDenseSetPreallocation(A,NULL);
715: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
717: /* Copy the matrix ... This isn't the most efficient means,
718: but it's quick for now */
719: A->insertmode = INSERT_VALUES;
721: row = mat->rmap->rstart;
722: m = mdn->A->rmap->n;
723: for (i=0; i<m; i++) {
724: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
725: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
726: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
727: row++;
728: }
730: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
731: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
732: PetscViewerGetSingleton(viewer,&sviewer);
733: if (!rank) {
734: MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
735: }
736: PetscViewerRestoreSingleton(viewer,&sviewer);
737: PetscViewerFlush(viewer);
738: MatDestroy(&A);
739: }
740: return(0);
741: }
745: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
746: {
748: PetscBool iascii,isbinary,isdraw,issocket;
751: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
752: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
753: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
754: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
756: if (iascii || issocket || isdraw) {
757: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
758: } else if (isbinary) {
759: MatView_MPIDense_Binary(mat,viewer);
760: }
761: return(0);
762: }
766: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
767: {
768: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
769: Mat mdn = mat->A;
771: PetscReal isend[5],irecv[5];
774: info->block_size = 1.0;
776: MatGetInfo(mdn,MAT_LOCAL,info);
778: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
779: isend[3] = info->memory; isend[4] = info->mallocs;
780: if (flag == MAT_LOCAL) {
781: info->nz_used = isend[0];
782: info->nz_allocated = isend[1];
783: info->nz_unneeded = isend[2];
784: info->memory = isend[3];
785: info->mallocs = isend[4];
786: } else if (flag == MAT_GLOBAL_MAX) {
787: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
789: info->nz_used = irecv[0];
790: info->nz_allocated = irecv[1];
791: info->nz_unneeded = irecv[2];
792: info->memory = irecv[3];
793: info->mallocs = irecv[4];
794: } else if (flag == MAT_GLOBAL_SUM) {
795: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
797: info->nz_used = irecv[0];
798: info->nz_allocated = irecv[1];
799: info->nz_unneeded = irecv[2];
800: info->memory = irecv[3];
801: info->mallocs = irecv[4];
802: }
803: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
804: info->fill_ratio_needed = 0;
805: info->factor_mallocs = 0;
806: return(0);
807: }
811: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
812: {
813: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
817: switch (op) {
818: case MAT_NEW_NONZERO_LOCATIONS:
819: case MAT_NEW_NONZERO_LOCATION_ERR:
820: case MAT_NEW_NONZERO_ALLOCATION_ERR:
821: MatSetOption(a->A,op,flg);
822: break;
823: case MAT_ROW_ORIENTED:
824: a->roworiented = flg;
826: MatSetOption(a->A,op,flg);
827: break;
828: case MAT_NEW_DIAGONALS:
829: case MAT_KEEP_NONZERO_PATTERN:
830: case MAT_USE_HASH_TABLE:
831: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
832: break;
833: case MAT_IGNORE_OFF_PROC_ENTRIES:
834: a->donotstash = flg;
835: break;
836: case MAT_SYMMETRIC:
837: case MAT_STRUCTURALLY_SYMMETRIC:
838: case MAT_HERMITIAN:
839: case MAT_SYMMETRY_ETERNAL:
840: case MAT_IGNORE_LOWER_TRIANGULAR:
841: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
842: break;
843: default:
844: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
845: }
846: return(0);
847: }
852: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
853: {
854: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
855: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
856: PetscScalar *l,*r,x,*v;
858: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
861: MatGetLocalSize(A,&s2,&s3);
862: if (ll) {
863: VecGetLocalSize(ll,&s2a);
864: if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
865: VecGetArray(ll,&l);
866: for (i=0; i<m; i++) {
867: x = l[i];
868: v = mat->v + i;
869: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
870: }
871: VecRestoreArray(ll,&l);
872: PetscLogFlops(n*m);
873: }
874: if (rr) {
875: VecGetLocalSize(rr,&s3a);
876: if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
877: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
878: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
879: VecGetArray(mdn->lvec,&r);
880: for (i=0; i<n; i++) {
881: x = r[i];
882: v = mat->v + i*m;
883: for (j=0; j<m; j++) (*v++) *= x;
884: }
885: VecRestoreArray(mdn->lvec,&r);
886: PetscLogFlops(n*m);
887: }
888: return(0);
889: }
893: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
894: {
895: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
896: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
898: PetscInt i,j;
899: PetscReal sum = 0.0;
900: PetscScalar *v = mat->v;
903: if (mdn->size == 1) {
904: MatNorm(mdn->A,type,nrm);
905: } else {
906: if (type == NORM_FROBENIUS) {
907: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
908: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909: }
910: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
911: *nrm = PetscSqrtReal(*nrm);
912: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
913: } else if (type == NORM_1) {
914: PetscReal *tmp,*tmp2;
915: PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
916: PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
917: PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
918: *nrm = 0.0;
919: v = mat->v;
920: for (j=0; j<mdn->A->cmap->n; j++) {
921: for (i=0; i<mdn->A->rmap->n; i++) {
922: tmp[j] += PetscAbsScalar(*v); v++;
923: }
924: }
925: MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
926: for (j=0; j<A->cmap->N; j++) {
927: if (tmp2[j] > *nrm) *nrm = tmp2[j];
928: }
929: PetscFree2(tmp,tmp);
930: PetscLogFlops(A->cmap->n*A->rmap->n);
931: } else if (type == NORM_INFINITY) { /* max row norm */
932: PetscReal ntemp;
933: MatNorm(mdn->A,type,&ntemp);
934: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
935: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
936: }
937: return(0);
938: }
942: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
943: {
944: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
945: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
946: Mat B;
947: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
949: PetscInt j,i;
950: PetscScalar *v;
953: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
954: if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
955: MatCreate(PetscObjectComm((PetscObject)A),&B);
956: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
957: MatSetType(B,((PetscObject)A)->type_name);
958: MatMPIDenseSetPreallocation(B,NULL);
959: } else {
960: B = *matout;
961: }
963: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
964: PetscMalloc1(m,&rwork);
965: for (i=0; i<m; i++) rwork[i] = rstart + i;
966: for (j=0; j<n; j++) {
967: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
968: v += m;
969: }
970: PetscFree(rwork);
971: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
972: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
973: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
974: *matout = B;
975: } else {
976: MatHeaderMerge(A,B);
977: }
978: return(0);
979: }
982: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
983: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
987: PetscErrorCode MatSetUp_MPIDense(Mat A)
988: {
992: MatMPIDenseSetPreallocation(A,0);
993: return(0);
994: }
998: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
999: {
1001: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1004: MatAXPY(A->A,alpha,B->A,str);
1005: PetscObjectStateIncrease((PetscObject)Y);
1006: return(0);
1007: }
1011: PetscErrorCode MatConjugate_MPIDense(Mat mat)
1012: {
1013: Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
1017: MatConjugate(a->A);
1018: return(0);
1019: }
1023: PetscErrorCode MatRealPart_MPIDense(Mat A)
1024: {
1025: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1029: MatRealPart(a->A);
1030: return(0);
1031: }
1035: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1036: {
1037: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1041: MatImaginaryPart(a->A);
1042: return(0);
1043: }
1045: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1048: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1049: {
1051: PetscInt i,n;
1052: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1053: PetscReal *work;
1056: MatGetSize(A,NULL,&n);
1057: PetscMalloc1(n,&work);
1058: MatGetColumnNorms_SeqDense(a->A,type,work);
1059: if (type == NORM_2) {
1060: for (i=0; i<n; i++) work[i] *= work[i];
1061: }
1062: if (type == NORM_INFINITY) {
1063: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1064: } else {
1065: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1066: }
1067: PetscFree(work);
1068: if (type == NORM_2) {
1069: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1070: }
1071: return(0);
1072: }
1076: static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1077: {
1078: Mat_MPIDense *d = (Mat_MPIDense*)x->data;
1080: PetscScalar *a;
1081: PetscInt m,n,i;
1084: MatGetSize(d->A,&m,&n);
1085: MatDenseGetArray(d->A,&a);
1086: for (i=0; i<m*n; i++) {
1087: PetscRandomGetValue(rctx,a+i);
1088: }
1089: MatDenseRestoreArray(d->A,&a);
1090: return(0);
1091: }
1093: /* -------------------------------------------------------------------*/
1094: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1095: MatGetRow_MPIDense,
1096: MatRestoreRow_MPIDense,
1097: MatMult_MPIDense,
1098: /* 4*/ MatMultAdd_MPIDense,
1099: MatMultTranspose_MPIDense,
1100: MatMultTransposeAdd_MPIDense,
1101: 0,
1102: 0,
1103: 0,
1104: /* 10*/ 0,
1105: 0,
1106: 0,
1107: 0,
1108: MatTranspose_MPIDense,
1109: /* 15*/ MatGetInfo_MPIDense,
1110: MatEqual_MPIDense,
1111: MatGetDiagonal_MPIDense,
1112: MatDiagonalScale_MPIDense,
1113: MatNorm_MPIDense,
1114: /* 20*/ MatAssemblyBegin_MPIDense,
1115: MatAssemblyEnd_MPIDense,
1116: MatSetOption_MPIDense,
1117: MatZeroEntries_MPIDense,
1118: /* 24*/ MatZeroRows_MPIDense,
1119: 0,
1120: 0,
1121: 0,
1122: 0,
1123: /* 29*/ MatSetUp_MPIDense,
1124: 0,
1125: 0,
1126: 0,
1127: 0,
1128: /* 34*/ MatDuplicate_MPIDense,
1129: 0,
1130: 0,
1131: 0,
1132: 0,
1133: /* 39*/ MatAXPY_MPIDense,
1134: MatGetSubMatrices_MPIDense,
1135: 0,
1136: MatGetValues_MPIDense,
1137: 0,
1138: /* 44*/ 0,
1139: MatScale_MPIDense,
1140: MatShift_Basic,
1141: 0,
1142: 0,
1143: /* 49*/ MatSetRandom_MPIDense,
1144: 0,
1145: 0,
1146: 0,
1147: 0,
1148: /* 54*/ 0,
1149: 0,
1150: 0,
1151: 0,
1152: 0,
1153: /* 59*/ MatGetSubMatrix_MPIDense,
1154: MatDestroy_MPIDense,
1155: MatView_MPIDense,
1156: 0,
1157: 0,
1158: /* 64*/ 0,
1159: 0,
1160: 0,
1161: 0,
1162: 0,
1163: /* 69*/ 0,
1164: 0,
1165: 0,
1166: 0,
1167: 0,
1168: /* 74*/ 0,
1169: 0,
1170: 0,
1171: 0,
1172: 0,
1173: /* 79*/ 0,
1174: 0,
1175: 0,
1176: 0,
1177: /* 83*/ MatLoad_MPIDense,
1178: 0,
1179: 0,
1180: 0,
1181: 0,
1182: 0,
1183: /* 89*/
1184: 0,
1185: 0,
1186: 0,
1187: 0,
1188: 0,
1189: /* 94*/ 0,
1190: 0,
1191: 0,
1192: 0,
1193: 0,
1194: /* 99*/ 0,
1195: 0,
1196: 0,
1197: MatConjugate_MPIDense,
1198: 0,
1199: /*104*/ 0,
1200: MatRealPart_MPIDense,
1201: MatImaginaryPart_MPIDense,
1202: 0,
1203: 0,
1204: /*109*/ 0,
1205: 0,
1206: 0,
1207: 0,
1208: 0,
1209: /*114*/ 0,
1210: 0,
1211: 0,
1212: 0,
1213: 0,
1214: /*119*/ 0,
1215: 0,
1216: 0,
1217: 0,
1218: 0,
1219: /*124*/ 0,
1220: MatGetColumnNorms_MPIDense,
1221: 0,
1222: 0,
1223: 0,
1224: /*129*/ 0,
1225: 0,
1226: 0,
1227: 0,
1228: 0,
1229: /*134*/ 0,
1230: 0,
1231: 0,
1232: 0,
1233: 0,
1234: /*139*/ 0,
1235: 0,
1236: 0
1237: };
1241: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1242: {
1243: Mat_MPIDense *a;
1247: mat->preallocated = PETSC_TRUE;
1248: /* Note: For now, when data is specified above, this assumes the user correctly
1249: allocates the local dense storage space. We should add error checking. */
1251: a = (Mat_MPIDense*)mat->data;
1252: PetscLayoutSetUp(mat->rmap);
1253: PetscLayoutSetUp(mat->cmap);
1254: a->nvec = mat->cmap->n;
1256: MatCreate(PETSC_COMM_SELF,&a->A);
1257: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1258: MatSetType(a->A,MATSEQDENSE);
1259: MatSeqDenseSetPreallocation(a->A,data);
1260: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1261: return(0);
1262: }
1264: #if defined(PETSC_HAVE_ELEMENTAL)
1267: PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1268: {
1269: Mat mat_elemental;
1271: PetscScalar *array,*v_rowwise;
1272: PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,j,k,*rows,*cols;
1273:
1275: PetscMalloc3(m*N,&v_rowwise,m,&rows,N,&cols);
1276: MatDenseGetArray(A,&array);
1277: /* convert column-wise array into row-wise v_rowwise, see MatSetValues_Elemental() */
1278: k = 0;
1279: for (j=0; j<N; j++) {
1280: cols[j] = j;
1281: for (i=0; i<m; i++) {
1282: v_rowwise[i*N+j] = array[k++];
1283: }
1284: }
1285: for (i=0; i<m; i++) {
1286: rows[i] = rstart + i;
1287: }
1288: MatDenseRestoreArray(A,&array);
1290: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1291: MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1292: MatSetType(mat_elemental,MATELEMENTAL);
1293: MatSetUp(mat_elemental);
1294:
1295: /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1296: MatSetValues(mat_elemental,m,rows,N,cols,v_rowwise,ADD_VALUES);
1297: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1298: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1299: PetscFree3(v_rowwise,rows,cols);
1301: if (reuse == MAT_REUSE_MATRIX) {
1302: MatHeaderReplace(A,mat_elemental);
1303: } else {
1304: *newmat = mat_elemental;
1305: }
1306: return(0);
1307: }
1308: #endif
1312: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1313: {
1314: Mat_MPIDense *a;
1318: PetscNewLog(mat,&a);
1319: mat->data = (void*)a;
1320: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1322: mat->insertmode = NOT_SET_VALUES;
1323: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1324: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);
1326: /* build cache for off array entries formed */
1327: a->donotstash = PETSC_FALSE;
1329: MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);
1331: /* stuff used for matrix vector multiply */
1332: a->lvec = 0;
1333: a->Mvctx = 0;
1334: a->roworiented = PETSC_TRUE;
1336: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1337: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1338: #if defined(PETSC_HAVE_ELEMENTAL)
1339: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1340: #endif
1341: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);
1342: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1343: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1344: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1345: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);
1347: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1348: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1349: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1350: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1351: return(0);
1352: }
1354: /*MC
1355: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1357: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1358: and MATMPIDENSE otherwise.
1360: Options Database Keys:
1361: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1363: Level: beginner
1366: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1367: M*/
1371: /*@C
1372: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1374: Not collective
1376: Input Parameters:
1377: . B - the matrix
1378: - data - optional location of matrix data. Set data=NULL for PETSc
1379: to control all matrix memory allocation.
1381: Notes:
1382: The dense format is fully compatible with standard Fortran 77
1383: storage by columns.
1385: The data input variable is intended primarily for Fortran programmers
1386: who wish to allocate their own matrix memory space. Most users should
1387: set data=NULL.
1389: Level: intermediate
1391: .keywords: matrix,dense, parallel
1393: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1394: @*/
1395: PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1396: {
1400: PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1401: return(0);
1402: }
1406: /*@C
1407: MatCreateDense - Creates a parallel matrix in dense format.
1409: Collective on MPI_Comm
1411: Input Parameters:
1412: + comm - MPI communicator
1413: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1414: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1415: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1416: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1417: - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1418: to control all matrix memory allocation.
1420: Output Parameter:
1421: . A - the matrix
1423: Notes:
1424: The dense format is fully compatible with standard Fortran 77
1425: storage by columns.
1427: The data input variable is intended primarily for Fortran programmers
1428: who wish to allocate their own matrix memory space. Most users should
1429: set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1431: The user MUST specify either the local or global matrix dimensions
1432: (possibly both).
1434: Level: intermediate
1436: .keywords: matrix,dense, parallel
1438: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1439: @*/
1440: PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1441: {
1443: PetscMPIInt size;
1446: MatCreate(comm,A);
1447: MatSetSizes(*A,m,n,M,N);
1448: MPI_Comm_size(comm,&size);
1449: if (size > 1) {
1450: MatSetType(*A,MATMPIDENSE);
1451: MatMPIDenseSetPreallocation(*A,data);
1452: if (data) { /* user provided data array, so no need to assemble */
1453: MatSetUpMultiply_MPIDense(*A);
1454: (*A)->assembled = PETSC_TRUE;
1455: }
1456: } else {
1457: MatSetType(*A,MATSEQDENSE);
1458: MatSeqDenseSetPreallocation(*A,data);
1459: }
1460: return(0);
1461: }
1465: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1466: {
1467: Mat mat;
1468: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1472: *newmat = 0;
1473: MatCreate(PetscObjectComm((PetscObject)A),&mat);
1474: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1475: MatSetType(mat,((PetscObject)A)->type_name);
1476: a = (Mat_MPIDense*)mat->data;
1477: PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1479: mat->factortype = A->factortype;
1480: mat->assembled = PETSC_TRUE;
1481: mat->preallocated = PETSC_TRUE;
1483: a->size = oldmat->size;
1484: a->rank = oldmat->rank;
1485: mat->insertmode = NOT_SET_VALUES;
1486: a->nvec = oldmat->nvec;
1487: a->donotstash = oldmat->donotstash;
1489: PetscLayoutReference(A->rmap,&mat->rmap);
1490: PetscLayoutReference(A->cmap,&mat->cmap);
1492: MatSetUpMultiply_MPIDense(mat);
1493: MatDuplicate(oldmat->A,cpvalues,&a->A);
1494: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1496: *newmat = mat;
1497: return(0);
1498: }
1502: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1503: {
1505: PetscMPIInt rank,size;
1506: const PetscInt *rowners;
1507: PetscInt i,m,n,nz,j,mMax;
1508: PetscScalar *array,*vals,*vals_ptr;
1509: Mat_MPIDense *a = (Mat_MPIDense*)newmat->data;
1512: MPI_Comm_rank(comm,&rank);
1513: MPI_Comm_size(comm,&size);
1515: /* determine ownership of rows and columns */
1516: m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1517: n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1519: MatSetSizes(newmat,m,n,M,N);
1520: if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1521: MatMPIDenseSetPreallocation(newmat,NULL);
1522: }
1523: MatDenseGetArray(newmat,&array);
1524: MatGetLocalSize(newmat,&m,NULL);
1525: MatGetOwnershipRanges(newmat,&rowners);
1526: MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);
1527: if (!rank) {
1528: PetscMalloc1(mMax*N,&vals);
1530: /* read in my part of the matrix numerical values */
1531: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1533: /* insert into matrix-by row (this is why cannot directly read into array */
1534: vals_ptr = vals;
1535: for (i=0; i<m; i++) {
1536: for (j=0; j<N; j++) {
1537: array[i + j*m] = *vals_ptr++;
1538: }
1539: }
1541: /* read in other processors and ship out */
1542: for (i=1; i<size; i++) {
1543: nz = (rowners[i+1] - rowners[i])*N;
1544: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1545: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1546: }
1547: } else {
1548: /* receive numeric values */
1549: PetscMalloc1(m*N,&vals);
1551: /* receive message of values*/
1552: MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);
1554: /* insert into matrix-by row (this is why cannot directly read into array */
1555: vals_ptr = vals;
1556: for (i=0; i<m; i++) {
1557: for (j=0; j<N; j++) {
1558: array[i + j*m] = *vals_ptr++;
1559: }
1560: }
1561: }
1562: MatDenseRestoreArray(newmat,&array);
1563: PetscFree(vals);
1564: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1565: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1566: return(0);
1567: }
1571: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1572: {
1573: Mat_MPIDense *a;
1574: PetscScalar *vals,*svals;
1575: MPI_Comm comm;
1576: MPI_Status status;
1577: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1578: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1579: PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1580: PetscInt i,nz,j,rstart,rend;
1581: int fd;
1585: /* force binary viewer to load .info file if it has not yet done so */
1586: PetscViewerSetUp(viewer);
1587: PetscObjectGetComm((PetscObject)viewer,&comm);
1588: MPI_Comm_size(comm,&size);
1589: MPI_Comm_rank(comm,&rank);
1590: PetscViewerBinaryGetDescriptor(viewer,&fd);
1591: if (!rank) {
1592: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1593: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1594: }
1595: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1596: M = header[1]; N = header[2]; nz = header[3];
1598: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1599: if (newmat->rmap->N < 0) newmat->rmap->N = M;
1600: if (newmat->cmap->N < 0) newmat->cmap->N = N;
1602: if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1603: if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1605: /*
1606: Handle case where matrix is stored on disk as a dense matrix
1607: */
1608: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1609: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1610: return(0);
1611: }
1613: /* determine ownership of all rows */
1614: if (newmat->rmap->n < 0) {
1615: PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1616: } else {
1617: PetscMPIIntCast(newmat->rmap->n,&m);
1618: }
1619: if (newmat->cmap->n < 0) {
1620: n = PETSC_DECIDE;
1621: } else {
1622: PetscMPIIntCast(newmat->cmap->n,&n);
1623: }
1625: PetscMalloc1(size+2,&rowners);
1626: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1627: rowners[0] = 0;
1628: for (i=2; i<=size; i++) {
1629: rowners[i] += rowners[i-1];
1630: }
1631: rstart = rowners[rank];
1632: rend = rowners[rank+1];
1634: /* distribute row lengths to all processors */
1635: PetscMalloc1(rend-rstart,&ourlens);
1636: if (!rank) {
1637: PetscMalloc1(M,&rowlengths);
1638: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1639: PetscMalloc1(size,&sndcounts);
1640: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1641: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1642: PetscFree(sndcounts);
1643: } else {
1644: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1645: }
1647: if (!rank) {
1648: /* calculate the number of nonzeros on each processor */
1649: PetscMalloc1(size,&procsnz);
1650: PetscMemzero(procsnz,size*sizeof(PetscInt));
1651: for (i=0; i<size; i++) {
1652: for (j=rowners[i]; j< rowners[i+1]; j++) {
1653: procsnz[i] += rowlengths[j];
1654: }
1655: }
1656: PetscFree(rowlengths);
1658: /* determine max buffer needed and allocate it */
1659: maxnz = 0;
1660: for (i=0; i<size; i++) {
1661: maxnz = PetscMax(maxnz,procsnz[i]);
1662: }
1663: PetscMalloc1(maxnz,&cols);
1665: /* read in my part of the matrix column indices */
1666: nz = procsnz[0];
1667: PetscMalloc1(nz,&mycols);
1668: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1670: /* read in every one elses and ship off */
1671: for (i=1; i<size; i++) {
1672: nz = procsnz[i];
1673: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1674: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1675: }
1676: PetscFree(cols);
1677: } else {
1678: /* determine buffer space needed for message */
1679: nz = 0;
1680: for (i=0; i<m; i++) {
1681: nz += ourlens[i];
1682: }
1683: PetscMalloc1(nz+1,&mycols);
1685: /* receive message of column indices*/
1686: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1687: MPI_Get_count(&status,MPIU_INT,&maxnz);
1688: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1689: }
1691: MatSetSizes(newmat,m,n,M,N);
1692: a = (Mat_MPIDense*)newmat->data;
1693: if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1694: MatMPIDenseSetPreallocation(newmat,NULL);
1695: }
1697: if (!rank) {
1698: PetscMalloc1(maxnz,&vals);
1700: /* read in my part of the matrix numerical values */
1701: nz = procsnz[0];
1702: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1704: /* insert into matrix */
1705: jj = rstart;
1706: smycols = mycols;
1707: svals = vals;
1708: for (i=0; i<m; i++) {
1709: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1710: smycols += ourlens[i];
1711: svals += ourlens[i];
1712: jj++;
1713: }
1715: /* read in other processors and ship out */
1716: for (i=1; i<size; i++) {
1717: nz = procsnz[i];
1718: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1719: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1720: }
1721: PetscFree(procsnz);
1722: } else {
1723: /* receive numeric values */
1724: PetscMalloc1(nz+1,&vals);
1726: /* receive message of values*/
1727: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1728: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1729: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1731: /* insert into matrix */
1732: jj = rstart;
1733: smycols = mycols;
1734: svals = vals;
1735: for (i=0; i<m; i++) {
1736: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1737: smycols += ourlens[i];
1738: svals += ourlens[i];
1739: jj++;
1740: }
1741: }
1742: PetscFree(ourlens);
1743: PetscFree(vals);
1744: PetscFree(mycols);
1745: PetscFree(rowners);
1747: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1748: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1749: return(0);
1750: }
1754: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
1755: {
1756: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1757: Mat a,b;
1758: PetscBool flg;
1762: a = matA->A;
1763: b = matB->A;
1764: MatEqual(a,b,&flg);
1765: MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1766: return(0);
1767: }