Actual source code: mpidense.c
petsc-3.13.6 2020-09-29
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
6: #include <../src/mat/impls/dense/mpi/mpidense.h>
7: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8: #include <petscblaslapack.h>
10: /*@
12: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13: matrix that represents the operator. For sequential matrices it returns itself.
15: Input Parameter:
16: . A - the Seq or MPI dense matrix
18: Output Parameter:
19: . B - the inner matrix
21: Level: intermediate
23: @*/
24: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25: {
26: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
28: PetscBool flg;
31: PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
32: if (flg) *B = mat->A;
33: else *B = A;
34: return(0);
35: }
37: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
38: {
39: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
41: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
45: lrow = row - rstart;
46: MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
47: return(0);
48: }
50: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
51: {
55: if (idx) {PetscFree(*idx);}
56: if (v) {PetscFree(*v);}
57: return(0);
58: }
60: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
61: {
62: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
64: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
65: PetscScalar *array;
66: MPI_Comm comm;
67: Mat B;
70: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
73: if (!B) {
74: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
75: MatCreate(comm,&B);
76: MatSetSizes(B,m,m,m,m);
77: MatSetType(B,((PetscObject)mdn->A)->type_name);
78: MatDenseGetArray(mdn->A,&array);
79: MatSeqDenseSetPreallocation(B,array+m*rstart);
80: MatDenseRestoreArray(mdn->A,&array);
81: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
82: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
83: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
84: *a = B;
85: MatDestroy(&B);
86: } else *a = B;
87: return(0);
88: }
90: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
91: {
92: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
94: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
95: PetscBool roworiented = A->roworiented;
98: for (i=0; i<m; i++) {
99: if (idxm[i] < 0) continue;
100: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
101: if (idxm[i] >= rstart && idxm[i] < rend) {
102: row = idxm[i] - rstart;
103: if (roworiented) {
104: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
105: } else {
106: for (j=0; j<n; j++) {
107: if (idxn[j] < 0) continue;
108: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
109: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
110: }
111: }
112: } else if (!A->donotstash) {
113: mat->assembled = PETSC_FALSE;
114: if (roworiented) {
115: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
116: } else {
117: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
118: }
119: }
120: }
121: return(0);
122: }
124: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
125: {
126: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
128: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
131: for (i=0; i<m; i++) {
132: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
133: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
134: if (idxm[i] >= rstart && idxm[i] < rend) {
135: row = idxm[i] - rstart;
136: for (j=0; j<n; j++) {
137: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
138: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
139: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
140: }
141: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
142: }
143: return(0);
144: }
146: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
147: {
148: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
152: MatDenseGetLDA(a->A,lda);
153: return(0);
154: }
156: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
157: {
158: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
162: MatDenseGetArray(a->A,array);
163: return(0);
164: }
166: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
167: {
168: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
172: MatDenseGetArrayRead(a->A,array);
173: return(0);
174: }
176: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
177: {
178: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
182: MatDensePlaceArray(a->A,array);
183: return(0);
184: }
186: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
187: {
188: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
192: MatDenseResetArray(a->A);
193: return(0);
194: }
196: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
197: {
198: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
199: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
201: PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
202: const PetscInt *irow,*icol;
203: PetscScalar *av,*bv,*v = lmat->v;
204: Mat newmat;
205: IS iscol_local;
206: MPI_Comm comm_is,comm_mat;
209: PetscObjectGetComm((PetscObject)A,&comm_mat);
210: PetscObjectGetComm((PetscObject)iscol,&comm_is);
211: if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
213: ISAllGather(iscol,&iscol_local);
214: ISGetIndices(isrow,&irow);
215: ISGetIndices(iscol_local,&icol);
216: ISGetLocalSize(isrow,&nrows);
217: ISGetLocalSize(iscol,&ncols);
218: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
220: /* No parallel redistribution currently supported! Should really check each index set
221: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
222: original matrix! */
224: MatGetLocalSize(A,&nlrows,&nlcols);
225: MatGetOwnershipRange(A,&rstart,&rend);
227: /* Check submatrix call */
228: if (scall == MAT_REUSE_MATRIX) {
229: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
230: /* Really need to test rows and column sizes! */
231: newmat = *B;
232: } else {
233: /* Create and fill new matrix */
234: MatCreate(PetscObjectComm((PetscObject)A),&newmat);
235: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
236: MatSetType(newmat,((PetscObject)A)->type_name);
237: MatMPIDenseSetPreallocation(newmat,NULL);
238: }
240: /* Now extract the data pointers and do the copy, column at a time */
241: newmatd = (Mat_MPIDense*)newmat->data;
242: bv = ((Mat_SeqDense*)newmatd->A->data)->v;
244: for (i=0; i<Ncols; i++) {
245: av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
246: for (j=0; j<nrows; j++) {
247: *bv++ = av[irow[j] - rstart];
248: }
249: }
251: /* Assemble the matrices so that the correct flags are set */
252: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
253: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
255: /* Free work space */
256: ISRestoreIndices(isrow,&irow);
257: ISRestoreIndices(iscol_local,&icol);
258: ISDestroy(&iscol_local);
259: *B = newmat;
260: return(0);
261: }
263: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
264: {
265: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
269: MatDenseRestoreArray(a->A,array);
270: return(0);
271: }
273: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
274: {
275: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
279: MatDenseRestoreArrayRead(a->A,array);
280: return(0);
281: }
283: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
284: {
285: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
287: PetscInt nstash,reallocs;
290: if (mdn->donotstash || mat->nooffprocentries) return(0);
292: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
293: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
294: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
295: return(0);
296: }
298: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
299: {
300: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
302: PetscInt i,*row,*col,flg,j,rstart,ncols;
303: PetscMPIInt n;
304: PetscScalar *val;
307: if (!mdn->donotstash && !mat->nooffprocentries) {
308: /* wait on receives */
309: while (1) {
310: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
311: if (!flg) break;
313: for (i=0; i<n;) {
314: /* Now identify the consecutive vals belonging to the same row */
315: for (j=i,rstart=row[j]; j<n; j++) {
316: if (row[j] != rstart) break;
317: }
318: if (j < n) ncols = j-i;
319: else ncols = n-i;
320: /* Now assemble all these values with a single function call */
321: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
322: i = j;
323: }
324: }
325: MatStashScatterEnd_Private(&mat->stash);
326: }
328: MatAssemblyBegin(mdn->A,mode);
329: MatAssemblyEnd(mdn->A,mode);
331: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
332: MatSetUpMultiply_MPIDense(mat);
333: }
334: return(0);
335: }
337: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
338: {
340: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
343: MatZeroEntries(l->A);
344: return(0);
345: }
347: /* the code does not do the diagonal entries correctly unless the
348: matrix is square and the column and row owerships are identical.
349: This is a BUG. The only way to fix it seems to be to access
350: mdn->A and mdn->B directly and not through the MatZeroRows()
351: routine.
352: */
353: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
354: {
355: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
356: PetscErrorCode ierr;
357: PetscInt i,*owners = A->rmap->range;
358: PetscInt *sizes,j,idx,nsends;
359: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
360: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
361: PetscInt *lens,*lrows,*values;
362: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
363: MPI_Comm comm;
364: MPI_Request *send_waits,*recv_waits;
365: MPI_Status recv_status,*send_status;
366: PetscBool found;
367: const PetscScalar *xx;
368: PetscScalar *bb;
371: PetscObjectGetComm((PetscObject)A,&comm);
372: if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
373: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
374: /* first count number of contributors to each processor */
375: PetscCalloc1(2*size,&sizes);
376: PetscMalloc1(N+1,&owner); /* see note*/
377: for (i=0; i<N; i++) {
378: idx = rows[i];
379: found = PETSC_FALSE;
380: for (j=0; j<size; j++) {
381: if (idx >= owners[j] && idx < owners[j+1]) {
382: sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
383: }
384: }
385: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
386: }
387: nsends = 0;
388: for (i=0; i<size; i++) nsends += sizes[2*i+1];
390: /* inform other processors of number of messages and max length*/
391: PetscMaxSum(comm,sizes,&nmax,&nrecvs);
393: /* post receives: */
394: PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
395: PetscMalloc1(nrecvs+1,&recv_waits);
396: for (i=0; i<nrecvs; i++) {
397: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
398: }
400: /* do sends:
401: 1) starts[i] gives the starting index in svalues for stuff going to
402: the ith processor
403: */
404: PetscMalloc1(N+1,&svalues);
405: PetscMalloc1(nsends+1,&send_waits);
406: PetscMalloc1(size+1,&starts);
408: starts[0] = 0;
409: for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
410: for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
412: starts[0] = 0;
413: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
414: count = 0;
415: for (i=0; i<size; i++) {
416: if (sizes[2*i+1]) {
417: MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
418: }
419: }
420: PetscFree(starts);
422: base = owners[rank];
424: /* wait on receives */
425: PetscMalloc2(nrecvs,&lens,nrecvs,&source);
426: count = nrecvs;
427: slen = 0;
428: while (count) {
429: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
430: /* unpack receives into our local space */
431: MPI_Get_count(&recv_status,MPIU_INT,&n);
433: source[imdex] = recv_status.MPI_SOURCE;
434: lens[imdex] = n;
435: slen += n;
436: count--;
437: }
438: PetscFree(recv_waits);
440: /* move the data into the send scatter */
441: PetscMalloc1(slen+1,&lrows);
442: count = 0;
443: for (i=0; i<nrecvs; i++) {
444: values = rvalues + i*nmax;
445: for (j=0; j<lens[i]; j++) {
446: lrows[count++] = values[j] - base;
447: }
448: }
449: PetscFree(rvalues);
450: PetscFree2(lens,source);
451: PetscFree(owner);
452: PetscFree(sizes);
454: /* fix right hand side if needed */
455: if (x && b) {
456: VecGetArrayRead(x,&xx);
457: VecGetArray(b,&bb);
458: for (i=0; i<slen; i++) {
459: bb[lrows[i]] = diag*xx[lrows[i]];
460: }
461: VecRestoreArrayRead(x,&xx);
462: VecRestoreArray(b,&bb);
463: }
465: /* actually zap the local rows */
466: MatZeroRows(l->A,slen,lrows,0.0,0,0);
467: if (diag != 0.0) {
468: Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
469: PetscInt m = ll->lda, i;
471: for (i=0; i<slen; i++) {
472: ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
473: }
474: }
475: PetscFree(lrows);
477: /* wait on sends */
478: if (nsends) {
479: PetscMalloc1(nsends,&send_status);
480: MPI_Waitall(nsends,send_waits,send_status);
481: PetscFree(send_status);
482: }
483: PetscFree(send_waits);
484: PetscFree(svalues);
485: return(0);
486: }
488: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
489: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
490: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
491: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
493: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
494: {
495: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
499: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
500: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
501: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
502: return(0);
503: }
505: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
506: {
507: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
511: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
512: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
513: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
514: return(0);
515: }
517: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
518: {
519: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
521: PetscScalar zero = 0.0;
524: VecSet(yy,zero);
525: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
526: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
527: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
528: return(0);
529: }
531: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
532: {
533: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
537: VecCopy(yy,zz);
538: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
539: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
540: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
541: return(0);
542: }
544: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
545: {
546: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
547: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
549: PetscInt len,i,n,m = A->rmap->n,radd;
550: PetscScalar *x,zero = 0.0;
553: VecSet(v,zero);
554: VecGetArray(v,&x);
555: VecGetSize(v,&n);
556: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
557: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
558: radd = A->rmap->rstart*m;
559: for (i=0; i<len; i++) {
560: x[i] = aloc->v[radd + i*m + i];
561: }
562: VecRestoreArray(v,&x);
563: return(0);
564: }
566: PetscErrorCode MatDestroy_MPIDense(Mat mat)
567: {
568: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
572: #if defined(PETSC_USE_LOG)
573: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
574: #endif
575: MatStashDestroy_Private(&mat->stash);
576: MatDestroy(&mdn->A);
577: VecDestroy(&mdn->lvec);
578: VecScatterDestroy(&mdn->Mvctx);
580: PetscFree(mat->data);
581: PetscObjectChangeTypeName((PetscObject)mat,0);
583: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);
584: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
585: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
586: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);
587: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);
588: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);
589: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);
590: #if defined(PETSC_HAVE_ELEMENTAL)
591: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
592: #endif
593: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
594: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);
595: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);
596: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
597: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
598: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);
599: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);
600: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
601: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
602: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);
603: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);
604: return(0);
605: }
607: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
609: #include <petscdraw.h>
610: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
611: {
612: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
613: PetscErrorCode ierr;
614: PetscMPIInt rank = mdn->rank;
615: PetscViewerType vtype;
616: PetscBool iascii,isdraw;
617: PetscViewer sviewer;
618: PetscViewerFormat format;
621: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
622: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
623: if (iascii) {
624: PetscViewerGetType(viewer,&vtype);
625: PetscViewerGetFormat(viewer,&format);
626: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
627: MatInfo info;
628: MatGetInfo(mat,MAT_LOCAL,&info);
629: PetscViewerASCIIPushSynchronized(viewer);
630: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
631: PetscViewerFlush(viewer);
632: PetscViewerASCIIPopSynchronized(viewer);
633: VecScatterView(mdn->Mvctx,viewer);
634: return(0);
635: } else if (format == PETSC_VIEWER_ASCII_INFO) {
636: return(0);
637: }
638: } else if (isdraw) {
639: PetscDraw draw;
640: PetscBool isnull;
642: PetscViewerDrawGetDraw(viewer,0,&draw);
643: PetscDrawIsNull(draw,&isnull);
644: if (isnull) return(0);
645: }
647: {
648: /* assemble the entire matrix onto first processor. */
649: Mat A;
650: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
651: PetscInt *cols;
652: PetscScalar *vals;
654: MatCreate(PetscObjectComm((PetscObject)mat),&A);
655: if (!rank) {
656: MatSetSizes(A,M,N,M,N);
657: } else {
658: MatSetSizes(A,0,0,M,N);
659: }
660: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
661: MatSetType(A,MATMPIDENSE);
662: MatMPIDenseSetPreallocation(A,NULL);
663: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
665: /* Copy the matrix ... This isn't the most efficient means,
666: but it's quick for now */
667: A->insertmode = INSERT_VALUES;
669: row = mat->rmap->rstart;
670: m = mdn->A->rmap->n;
671: for (i=0; i<m; i++) {
672: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
673: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
674: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
675: row++;
676: }
678: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
679: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
680: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
681: if (!rank) {
682: PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
683: MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
684: }
685: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
686: PetscViewerFlush(viewer);
687: MatDestroy(&A);
688: }
689: return(0);
690: }
692: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
693: {
695: PetscBool iascii,isbinary,isdraw,issocket;
698: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
699: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
700: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
701: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
703: if (iascii || issocket || isdraw) {
704: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
705: } else if (isbinary) {
706: MatView_Dense_Binary(mat,viewer);
707: }
708: return(0);
709: }
711: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
712: {
713: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
714: Mat mdn = mat->A;
716: PetscLogDouble isend[5],irecv[5];
719: info->block_size = 1.0;
721: MatGetInfo(mdn,MAT_LOCAL,info);
723: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
724: isend[3] = info->memory; isend[4] = info->mallocs;
725: if (flag == MAT_LOCAL) {
726: info->nz_used = isend[0];
727: info->nz_allocated = isend[1];
728: info->nz_unneeded = isend[2];
729: info->memory = isend[3];
730: info->mallocs = isend[4];
731: } else if (flag == MAT_GLOBAL_MAX) {
732: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));
734: info->nz_used = irecv[0];
735: info->nz_allocated = irecv[1];
736: info->nz_unneeded = irecv[2];
737: info->memory = irecv[3];
738: info->mallocs = irecv[4];
739: } else if (flag == MAT_GLOBAL_SUM) {
740: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));
742: info->nz_used = irecv[0];
743: info->nz_allocated = irecv[1];
744: info->nz_unneeded = irecv[2];
745: info->memory = irecv[3];
746: info->mallocs = irecv[4];
747: }
748: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
749: info->fill_ratio_needed = 0;
750: info->factor_mallocs = 0;
751: return(0);
752: }
754: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
755: {
756: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
760: switch (op) {
761: case MAT_NEW_NONZERO_LOCATIONS:
762: case MAT_NEW_NONZERO_LOCATION_ERR:
763: case MAT_NEW_NONZERO_ALLOCATION_ERR:
764: MatCheckPreallocated(A,1);
765: MatSetOption(a->A,op,flg);
766: break;
767: case MAT_ROW_ORIENTED:
768: MatCheckPreallocated(A,1);
769: a->roworiented = flg;
770: MatSetOption(a->A,op,flg);
771: break;
772: case MAT_NEW_DIAGONALS:
773: case MAT_KEEP_NONZERO_PATTERN:
774: case MAT_USE_HASH_TABLE:
775: case MAT_SORTED_FULL:
776: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
777: break;
778: case MAT_IGNORE_OFF_PROC_ENTRIES:
779: a->donotstash = flg;
780: break;
781: case MAT_SYMMETRIC:
782: case MAT_STRUCTURALLY_SYMMETRIC:
783: case MAT_HERMITIAN:
784: case MAT_SYMMETRY_ETERNAL:
785: case MAT_IGNORE_LOWER_TRIANGULAR:
786: case MAT_IGNORE_ZERO_ENTRIES:
787: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
788: break;
789: default:
790: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
791: }
792: return(0);
793: }
796: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
797: {
798: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
799: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
800: const PetscScalar *l,*r;
801: PetscScalar x,*v;
802: PetscErrorCode ierr;
803: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
806: MatGetLocalSize(A,&s2,&s3);
807: if (ll) {
808: VecGetLocalSize(ll,&s2a);
809: if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
810: VecGetArrayRead(ll,&l);
811: for (i=0; i<m; i++) {
812: x = l[i];
813: v = mat->v + i;
814: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
815: }
816: VecRestoreArrayRead(ll,&l);
817: PetscLogFlops(n*m);
818: }
819: if (rr) {
820: VecGetLocalSize(rr,&s3a);
821: if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
822: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
823: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
824: VecGetArrayRead(mdn->lvec,&r);
825: for (i=0; i<n; i++) {
826: x = r[i];
827: v = mat->v + i*m;
828: for (j=0; j<m; j++) (*v++) *= x;
829: }
830: VecRestoreArrayRead(mdn->lvec,&r);
831: PetscLogFlops(n*m);
832: }
833: return(0);
834: }
836: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
837: {
838: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
839: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
841: PetscInt i,j;
842: PetscReal sum = 0.0;
843: PetscScalar *v = mat->v;
846: if (mdn->size == 1) {
847: MatNorm(mdn->A,type,nrm);
848: } else {
849: if (type == NORM_FROBENIUS) {
850: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
851: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
852: }
853: MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
854: *nrm = PetscSqrtReal(*nrm);
855: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
856: } else if (type == NORM_1) {
857: PetscReal *tmp,*tmp2;
858: PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
859: *nrm = 0.0;
860: v = mat->v;
861: for (j=0; j<mdn->A->cmap->n; j++) {
862: for (i=0; i<mdn->A->rmap->n; i++) {
863: tmp[j] += PetscAbsScalar(*v); v++;
864: }
865: }
866: MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
867: for (j=0; j<A->cmap->N; j++) {
868: if (tmp2[j] > *nrm) *nrm = tmp2[j];
869: }
870: PetscFree2(tmp,tmp2);
871: PetscLogFlops(A->cmap->n*A->rmap->n);
872: } else if (type == NORM_INFINITY) { /* max row norm */
873: PetscReal ntemp;
874: MatNorm(mdn->A,type,&ntemp);
875: MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
876: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
877: }
878: return(0);
879: }
881: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
882: {
883: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
884: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
885: Mat B;
886: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
888: PetscInt j,i;
889: PetscScalar *v;
892: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
893: MatCreate(PetscObjectComm((PetscObject)A),&B);
894: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
895: MatSetType(B,((PetscObject)A)->type_name);
896: MatMPIDenseSetPreallocation(B,NULL);
897: } else {
898: B = *matout;
899: }
901: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
902: PetscMalloc1(m,&rwork);
903: for (i=0; i<m; i++) rwork[i] = rstart + i;
904: for (j=0; j<n; j++) {
905: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
906: v += m;
907: }
908: PetscFree(rwork);
909: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
910: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
911: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
912: *matout = B;
913: } else {
914: MatHeaderMerge(A,&B);
915: }
916: return(0);
917: }
919: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
920: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
922: PetscErrorCode MatSetUp_MPIDense(Mat A)
923: {
927: PetscLayoutSetUp(A->rmap);
928: PetscLayoutSetUp(A->cmap);
929: if (!A->preallocated) {
930: MatMPIDenseSetPreallocation(A,0);
931: }
932: return(0);
933: }
935: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
936: {
938: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
941: MatAXPY(A->A,alpha,B->A,str);
942: PetscObjectStateIncrease((PetscObject)Y);
943: return(0);
944: }
946: PetscErrorCode MatConjugate_MPIDense(Mat mat)
947: {
948: Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
952: MatConjugate(a->A);
953: return(0);
954: }
956: PetscErrorCode MatRealPart_MPIDense(Mat A)
957: {
958: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
962: MatRealPart(a->A);
963: return(0);
964: }
966: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
967: {
968: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
972: MatImaginaryPart(a->A);
973: return(0);
974: }
976: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
977: {
978: PetscErrorCode ierr;
979: PetscScalar *x;
980: const PetscScalar *a;
981: PetscInt lda;
984: if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
985: MatDenseGetLDA(A,&lda);
986: MatDenseGetArrayRead(A,&a);
987: VecGetArray(v,&x);
988: PetscArraycpy(x,a+col*lda,A->rmap->n);
989: VecRestoreArray(v,&x);
990: MatDenseGetArrayRead(A,&a);
991: return(0);
992: }
994: PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
996: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
997: {
999: PetscInt i,n;
1000: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1001: PetscReal *work;
1004: MatGetSize(A,NULL,&n);
1005: PetscMalloc1(n,&work);
1006: MatGetColumnNorms_SeqDense(a->A,type,work);
1007: if (type == NORM_2) {
1008: for (i=0; i<n; i++) work[i] *= work[i];
1009: }
1010: if (type == NORM_INFINITY) {
1011: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1012: } else {
1013: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1014: }
1015: PetscFree(work);
1016: if (type == NORM_2) {
1017: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1018: }
1019: return(0);
1020: }
1022: static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1023: {
1024: Mat_MPIDense *d = (Mat_MPIDense*)x->data;
1026: PetscScalar *a;
1027: PetscInt m,n,i;
1030: MatGetSize(d->A,&m,&n);
1031: MatDenseGetArray(d->A,&a);
1032: for (i=0; i<m*n; i++) {
1033: PetscRandomGetValue(rctx,a+i);
1034: }
1035: MatDenseRestoreArray(d->A,&a);
1036: return(0);
1037: }
1039: PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1041: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d)
1042: {
1044: *missing = PETSC_FALSE;
1045: return(0);
1046: }
1048: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1049: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1051: /* -------------------------------------------------------------------*/
1052: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1053: MatGetRow_MPIDense,
1054: MatRestoreRow_MPIDense,
1055: MatMult_MPIDense,
1056: /* 4*/ MatMultAdd_MPIDense,
1057: MatMultTranspose_MPIDense,
1058: MatMultTransposeAdd_MPIDense,
1059: 0,
1060: 0,
1061: 0,
1062: /* 10*/ 0,
1063: 0,
1064: 0,
1065: 0,
1066: MatTranspose_MPIDense,
1067: /* 15*/ MatGetInfo_MPIDense,
1068: MatEqual_MPIDense,
1069: MatGetDiagonal_MPIDense,
1070: MatDiagonalScale_MPIDense,
1071: MatNorm_MPIDense,
1072: /* 20*/ MatAssemblyBegin_MPIDense,
1073: MatAssemblyEnd_MPIDense,
1074: MatSetOption_MPIDense,
1075: MatZeroEntries_MPIDense,
1076: /* 24*/ MatZeroRows_MPIDense,
1077: 0,
1078: 0,
1079: 0,
1080: 0,
1081: /* 29*/ MatSetUp_MPIDense,
1082: 0,
1083: 0,
1084: MatGetDiagonalBlock_MPIDense,
1085: 0,
1086: /* 34*/ MatDuplicate_MPIDense,
1087: 0,
1088: 0,
1089: 0,
1090: 0,
1091: /* 39*/ MatAXPY_MPIDense,
1092: MatCreateSubMatrices_MPIDense,
1093: 0,
1094: MatGetValues_MPIDense,
1095: 0,
1096: /* 44*/ 0,
1097: MatScale_MPIDense,
1098: MatShift_Basic,
1099: 0,
1100: 0,
1101: /* 49*/ MatSetRandom_MPIDense,
1102: 0,
1103: 0,
1104: 0,
1105: 0,
1106: /* 54*/ 0,
1107: 0,
1108: 0,
1109: 0,
1110: 0,
1111: /* 59*/ MatCreateSubMatrix_MPIDense,
1112: MatDestroy_MPIDense,
1113: MatView_MPIDense,
1114: 0,
1115: 0,
1116: /* 64*/ 0,
1117: 0,
1118: 0,
1119: 0,
1120: 0,
1121: /* 69*/ 0,
1122: 0,
1123: 0,
1124: 0,
1125: 0,
1126: /* 74*/ 0,
1127: 0,
1128: 0,
1129: 0,
1130: 0,
1131: /* 79*/ 0,
1132: 0,
1133: 0,
1134: 0,
1135: /* 83*/ MatLoad_MPIDense,
1136: 0,
1137: 0,
1138: 0,
1139: 0,
1140: 0,
1141: #if defined(PETSC_HAVE_ELEMENTAL)
1142: /* 89*/ 0,
1143: 0,
1144: #else
1145: /* 89*/ 0,
1146: 0,
1147: #endif
1148: MatMatMultNumeric_MPIDense,
1149: 0,
1150: 0,
1151: /* 94*/ 0,
1152: 0,
1153: 0,
1154: MatMatTransposeMultNumeric_MPIDense_MPIDense,
1155: 0,
1156: /* 99*/ MatProductSetFromOptions_MPIDense,
1157: 0,
1158: 0,
1159: MatConjugate_MPIDense,
1160: 0,
1161: /*104*/ 0,
1162: MatRealPart_MPIDense,
1163: MatImaginaryPart_MPIDense,
1164: 0,
1165: 0,
1166: /*109*/ 0,
1167: 0,
1168: 0,
1169: MatGetColumnVector_MPIDense,
1170: MatMissingDiagonal_MPIDense,
1171: /*114*/ 0,
1172: 0,
1173: 0,
1174: 0,
1175: 0,
1176: /*119*/ 0,
1177: 0,
1178: 0,
1179: 0,
1180: 0,
1181: /*124*/ 0,
1182: MatGetColumnNorms_MPIDense,
1183: 0,
1184: 0,
1185: 0,
1186: /*129*/ 0,
1187: 0,
1188: 0,
1189: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1190: 0,
1191: /*134*/ 0,
1192: 0,
1193: 0,
1194: 0,
1195: 0,
1196: /*139*/ 0,
1197: 0,
1198: 0,
1199: 0,
1200: 0,
1201: MatCreateMPIMatConcatenateSeqMat_MPIDense,
1202: /*145*/ 0,
1203: 0,
1204: 0
1205: };
1207: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1208: {
1209: Mat_MPIDense *a;
1213: if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1214: mat->preallocated = PETSC_TRUE;
1215: /* Note: For now, when data is specified above, this assumes the user correctly
1216: allocates the local dense storage space. We should add error checking. */
1218: a = (Mat_MPIDense*)mat->data;
1219: PetscLayoutSetUp(mat->rmap);
1220: PetscLayoutSetUp(mat->cmap);
1221: a->nvec = mat->cmap->n;
1223: MatCreate(PETSC_COMM_SELF,&a->A);
1224: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1225: MatSetType(a->A,MATSEQDENSE);
1226: MatSeqDenseSetPreallocation(a->A,data);
1227: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1228: return(0);
1229: }
1231: #if defined(PETSC_HAVE_ELEMENTAL)
1232: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1233: {
1234: Mat mat_elemental;
1236: PetscScalar *v;
1237: PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1240: if (reuse == MAT_REUSE_MATRIX) {
1241: mat_elemental = *newmat;
1242: MatZeroEntries(*newmat);
1243: } else {
1244: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1245: MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1246: MatSetType(mat_elemental,MATELEMENTAL);
1247: MatSetUp(mat_elemental);
1248: MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1249: }
1251: PetscMalloc2(m,&rows,N,&cols);
1252: for (i=0; i<N; i++) cols[i] = i;
1253: for (i=0; i<m; i++) rows[i] = rstart + i;
1255: /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1256: MatDenseGetArray(A,&v);
1257: MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1258: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1259: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1260: MatDenseRestoreArray(A,&v);
1261: PetscFree2(rows,cols);
1263: if (reuse == MAT_INPLACE_MATRIX) {
1264: MatHeaderReplace(A,&mat_elemental);
1265: } else {
1266: *newmat = mat_elemental;
1267: }
1268: return(0);
1269: }
1270: #endif
1272: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1273: {
1274: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1278: MatDenseGetColumn(mat->A,col,vals);
1279: return(0);
1280: }
1282: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1283: {
1284: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1288: MatDenseRestoreColumn(mat->A,vals);
1289: return(0);
1290: }
1292: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1293: {
1295: Mat_MPIDense *mat;
1296: PetscInt m,nloc,N;
1299: MatGetSize(inmat,&m,&N);
1300: MatGetLocalSize(inmat,NULL,&nloc);
1301: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1302: PetscInt sum;
1304: if (n == PETSC_DECIDE) {
1305: PetscSplitOwnership(comm,&n,&N);
1306: }
1307: /* Check sum(n) = N */
1308: MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
1309: if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1311: MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);
1312: }
1314: /* numeric phase */
1315: mat = (Mat_MPIDense*)(*outmat)->data;
1316: MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);
1317: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
1318: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
1319: return(0);
1320: }
1322: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1323: {
1324: Mat_MPIDense *a;
1328: PetscNewLog(mat,&a);
1329: mat->data = (void*)a;
1330: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1332: mat->insertmode = NOT_SET_VALUES;
1333: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1334: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);
1336: /* build cache for off array entries formed */
1337: a->donotstash = PETSC_FALSE;
1339: MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);
1341: /* stuff used for matrix vector multiply */
1342: a->lvec = 0;
1343: a->Mvctx = 0;
1344: a->roworiented = PETSC_TRUE;
1346: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);
1347: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1348: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1349: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);
1350: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);
1351: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);
1352: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);
1353: #if defined(PETSC_HAVE_ELEMENTAL)
1354: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1355: #endif
1356: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1357: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);
1358: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);
1359: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1360: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);
1361: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);
1362: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);
1364: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1365: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1366: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);
1367: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);
1368: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1369: return(0);
1370: }
1372: /*MC
1373: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1375: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1376: and MATMPIDENSE otherwise.
1378: Options Database Keys:
1379: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1381: Level: beginner
1384: .seealso: MATSEQDENSE,MATMPIDENSE
1385: M*/
1387: /*@C
1388: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1390: Collective
1392: Input Parameters:
1393: . B - the matrix
1394: - data - optional location of matrix data. Set data=NULL for PETSc
1395: to control all matrix memory allocation.
1397: Notes:
1398: The dense format is fully compatible with standard Fortran 77
1399: storage by columns.
1401: The data input variable is intended primarily for Fortran programmers
1402: who wish to allocate their own matrix memory space. Most users should
1403: set data=NULL.
1405: Level: intermediate
1407: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1408: @*/
1409: PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1410: {
1414: PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1415: return(0);
1416: }
1418: /*@
1419: MatDensePlaceArray - Allows one to replace the array in a dense array with an
1420: array provided by the user. This is useful to avoid copying an array
1421: into a matrix
1423: Not Collective
1425: Input Parameters:
1426: + mat - the matrix
1427: - array - the array in column major order
1429: Notes:
1430: You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1431: freed when the matrix is destroyed.
1433: Level: developer
1435: .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1437: @*/
1438: PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[])
1439: {
1442: PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
1443: PetscObjectStateIncrease((PetscObject)mat);
1444: return(0);
1445: }
1447: /*@
1448: MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1450: Not Collective
1452: Input Parameters:
1453: . mat - the matrix
1455: Notes:
1456: You can only call this after a call to MatDensePlaceArray()
1458: Level: developer
1460: .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1462: @*/
1463: PetscErrorCode MatDenseResetArray(Mat mat)
1464: {
1467: PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
1468: PetscObjectStateIncrease((PetscObject)mat);
1469: return(0);
1470: }
1472: /*@C
1473: MatCreateDense - Creates a parallel matrix in dense format.
1475: Collective
1477: Input Parameters:
1478: + comm - MPI communicator
1479: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1480: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1481: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1482: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1483: - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1484: to control all matrix memory allocation.
1486: Output Parameter:
1487: . A - the matrix
1489: Notes:
1490: The dense format is fully compatible with standard Fortran 77
1491: storage by columns.
1493: The data input variable is intended primarily for Fortran programmers
1494: who wish to allocate their own matrix memory space. Most users should
1495: set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1497: The user MUST specify either the local or global matrix dimensions
1498: (possibly both).
1500: Level: intermediate
1502: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1503: @*/
1504: PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1505: {
1507: PetscMPIInt size;
1510: MatCreate(comm,A);
1511: MatSetSizes(*A,m,n,M,N);
1512: MPI_Comm_size(comm,&size);
1513: if (size > 1) {
1514: PetscBool havedata = (PetscBool)!!data;
1516: MatSetType(*A,MATMPIDENSE);
1517: MatMPIDenseSetPreallocation(*A,data);
1518: MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);
1519: if (havedata) { /* user provided data array, so no need to assemble */
1520: MatSetUpMultiply_MPIDense(*A);
1521: (*A)->assembled = PETSC_TRUE;
1522: }
1523: } else {
1524: MatSetType(*A,MATSEQDENSE);
1525: MatSeqDenseSetPreallocation(*A,data);
1526: }
1527: return(0);
1528: }
1530: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1531: {
1532: Mat mat;
1533: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1537: *newmat = 0;
1538: MatCreate(PetscObjectComm((PetscObject)A),&mat);
1539: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1540: MatSetType(mat,((PetscObject)A)->type_name);
1541: a = (Mat_MPIDense*)mat->data;
1543: mat->factortype = A->factortype;
1544: mat->assembled = PETSC_TRUE;
1545: mat->preallocated = PETSC_TRUE;
1547: a->size = oldmat->size;
1548: a->rank = oldmat->rank;
1549: mat->insertmode = NOT_SET_VALUES;
1550: a->nvec = oldmat->nvec;
1551: a->donotstash = oldmat->donotstash;
1553: PetscLayoutReference(A->rmap,&mat->rmap);
1554: PetscLayoutReference(A->cmap,&mat->cmap);
1556: MatSetUpMultiply_MPIDense(mat);
1557: MatDuplicate(oldmat->A,cpvalues,&a->A);
1558: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1560: *newmat = mat;
1561: return(0);
1562: }
1564: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1565: {
1567: PetscBool isbinary;
1568: #if defined(PETSC_HAVE_HDF5)
1569: PetscBool ishdf5;
1570: #endif
1575: /* force binary viewer to load .info file if it has not yet done so */
1576: PetscViewerSetUp(viewer);
1577: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1578: #if defined(PETSC_HAVE_HDF5)
1579: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);
1580: #endif
1581: if (isbinary) {
1582: MatLoad_Dense_Binary(newMat,viewer);
1583: #if defined(PETSC_HAVE_HDF5)
1584: } else if (ishdf5) {
1585: MatLoad_Dense_HDF5(newMat,viewer);
1586: #endif
1587: } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1588: return(0);
1589: }
1591: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
1592: {
1593: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1594: Mat a,b;
1595: PetscBool flg;
1599: a = matA->A;
1600: b = matB->A;
1601: MatEqual(a,b,&flg);
1602: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1603: return(0);
1604: }
1606: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1607: {
1608: PetscErrorCode ierr;
1609: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1610: Mat_TransMatMultDense *atb = a->atbdense;
1613: PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1614: (atb->destroy)(A);
1615: PetscFree(atb);
1616: return(0);
1617: }
1619: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1620: {
1621: PetscErrorCode ierr;
1622: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1623: Mat_MatTransMultDense *abt = a->abtdense;
1626: PetscFree2(abt->buf[0],abt->buf[1]);
1627: PetscFree2(abt->recvcounts,abt->recvdispls);
1628: (abt->destroy)(A);
1629: PetscFree(abt);
1630: return(0);
1631: }
1633: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1634: {
1635: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1636: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1637: Mat_TransMatMultDense *atb = c->atbdense;
1639: MPI_Comm comm;
1640: PetscMPIInt rank,size,*recvcounts=atb->recvcounts;
1641: PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1642: PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1643: PetscScalar _DOne=1.0,_DZero=0.0;
1644: PetscBLASInt am,an,bn,aN;
1645: const PetscInt *ranges;
1648: PetscObjectGetComm((PetscObject)A,&comm);
1649: MPI_Comm_rank(comm,&rank);
1650: MPI_Comm_size(comm,&size);
1652: /* compute atbarray = aseq^T * bseq */
1653: PetscBLASIntCast(a->A->cmap->n,&an);
1654: PetscBLASIntCast(b->A->cmap->n,&bn);
1655: PetscBLASIntCast(a->A->rmap->n,&am);
1656: PetscBLASIntCast(A->cmap->N,&aN);
1657: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1659: MatGetOwnershipRanges(C,&ranges);
1660: for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1662: /* arrange atbarray into sendbuf */
1663: k = 0;
1664: for (proc=0; proc<size; proc++) {
1665: for (j=0; j<cN; j++) {
1666: for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1667: }
1668: }
1669: /* sum all atbarray to local values of C */
1670: MatDenseGetArray(c->A,&carray);
1671: MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1672: MatDenseRestoreArray(c->A,&carray);
1673: return(0);
1674: }
1676: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
1677: {
1678: PetscErrorCode ierr;
1679: MPI_Comm comm;
1680: PetscMPIInt size;
1681: PetscInt cm=A->cmap->n,cM,cN=B->cmap->N;
1682: Mat_MPIDense *c;
1683: Mat_TransMatMultDense *atb;
1686: PetscObjectGetComm((PetscObject)A,&comm);
1687: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1688: SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1689: }
1691: /* create matrix product C */
1692: MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1693: MatSetType(C,MATMPIDENSE);
1694: MatSetUp(C);
1695: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1696: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1698: /* create data structure for reuse C */
1699: MPI_Comm_size(comm,&size);
1700: PetscNew(&atb);
1701: cM = C->rmap->N;
1702: PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);
1704: c = (Mat_MPIDense*)C->data;
1705: c->atbdense = atb;
1706: atb->destroy = C->ops->destroy;
1707: C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1708: return(0);
1709: }
1711: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1712: {
1713: PetscErrorCode ierr;
1714: MPI_Comm comm;
1715: PetscMPIInt i, size;
1716: PetscInt maxRows, bufsiz;
1717: Mat_MPIDense *c;
1718: PetscMPIInt tag;
1719: PetscInt alg;
1720: Mat_MatTransMultDense *abt;
1721: Mat_Product *product = C->product;
1722: PetscBool flg;
1725: /* check local size of A and B */
1726: if (A->cmap->n != B->cmap->n) {
1727: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
1728: }
1730: PetscStrcmp(product->alg,"allgatherv",&flg);
1731: if (flg) {
1732: alg = 0;
1733: } else alg = 1;
1735: /* setup matrix product C */
1736: MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);
1737: MatSetType(C,MATMPIDENSE);
1738: MatSetUp(C);
1739: PetscObjectGetNewTag((PetscObject)C, &tag);
1740: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1741: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1743: /* create data structure for reuse C */
1744: PetscObjectGetComm((PetscObject)C,&comm);
1745: MPI_Comm_size(comm,&size);
1746: PetscNew(&abt);
1747: abt->tag = tag;
1748: abt->alg = alg;
1749: switch (alg) {
1750: case 1: /* alg: "cyclic" */
1751: for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
1752: bufsiz = A->cmap->N * maxRows;
1753: PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));
1754: break;
1755: default: /* alg: "allgatherv" */
1756: PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));
1757: PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));
1758: for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
1759: for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
1760: break;
1761: }
1763: c = (Mat_MPIDense*)C->data;
1764: c->abtdense = abt;
1765: abt->destroy = C->ops->destroy;
1766: C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
1767: C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
1768: return(0);
1769: }
1771: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
1772: {
1773: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1774: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1775: Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data;
1776: Mat_MatTransMultDense *abt = c->abtdense;
1778: MPI_Comm comm;
1779: PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
1780: PetscScalar *sendbuf, *recvbuf=0, *carray;
1781: PetscInt i,cK=A->cmap->N,k,j,bn;
1782: PetscScalar _DOne=1.0,_DZero=0.0;
1783: PetscBLASInt cm, cn, ck;
1784: MPI_Request reqs[2];
1785: const PetscInt *ranges;
1788: PetscObjectGetComm((PetscObject)A,&comm);
1789: MPI_Comm_rank(comm,&rank);
1790: MPI_Comm_size(comm,&size);
1792: MatGetOwnershipRanges(B,&ranges);
1793: bn = B->rmap->n;
1794: if (bseq->lda == bn) {
1795: sendbuf = bseq->v;
1796: } else {
1797: sendbuf = abt->buf[0];
1798: for (k = 0, i = 0; i < cK; i++) {
1799: for (j = 0; j < bn; j++, k++) {
1800: sendbuf[k] = bseq->v[i * bseq->lda + j];
1801: }
1802: }
1803: }
1804: if (size > 1) {
1805: sendto = (rank + size - 1) % size;
1806: recvfrom = (rank + size + 1) % size;
1807: } else {
1808: sendto = recvfrom = 0;
1809: }
1810: PetscBLASIntCast(cK,&ck);
1811: PetscBLASIntCast(c->A->rmap->n,&cm);
1812: recvisfrom = rank;
1813: for (i = 0; i < size; i++) {
1814: /* we have finished receiving in sending, bufs can be read/modified */
1815: PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
1816: PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
1818: if (nextrecvisfrom != rank) {
1819: /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
1820: sendsiz = cK * bn;
1821: recvsiz = cK * nextbn;
1822: recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
1823: MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);
1824: MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);
1825: }
1827: /* local aseq * sendbuf^T */
1828: PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);
1829: carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
1830: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
1832: if (nextrecvisfrom != rank) {
1833: /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
1834: MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
1835: }
1836: bn = nextbn;
1837: recvisfrom = nextrecvisfrom;
1838: sendbuf = recvbuf;
1839: }
1840: return(0);
1841: }
1843: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
1844: {
1845: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1846: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1847: Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data;
1848: Mat_MatTransMultDense *abt = c->abtdense;
1850: MPI_Comm comm;
1851: PetscMPIInt rank,size;
1852: PetscScalar *sendbuf, *recvbuf;
1853: PetscInt i,cK=A->cmap->N,k,j,bn;
1854: PetscScalar _DOne=1.0,_DZero=0.0;
1855: PetscBLASInt cm, cn, ck;
1858: PetscObjectGetComm((PetscObject)A,&comm);
1859: MPI_Comm_rank(comm,&rank);
1860: MPI_Comm_size(comm,&size);
1862: /* copy transpose of B into buf[0] */
1863: bn = B->rmap->n;
1864: sendbuf = abt->buf[0];
1865: recvbuf = abt->buf[1];
1866: for (k = 0, j = 0; j < bn; j++) {
1867: for (i = 0; i < cK; i++, k++) {
1868: sendbuf[k] = bseq->v[i * bseq->lda + j];
1869: }
1870: }
1871: MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);
1872: PetscBLASIntCast(cK,&ck);
1873: PetscBLASIntCast(c->A->rmap->n,&cm);
1874: PetscBLASIntCast(c->A->cmap->n,&cn);
1875: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
1876: return(0);
1877: }
1879: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1880: {
1881: Mat_MPIDense *c=(Mat_MPIDense*)C->data;
1882: Mat_MatTransMultDense *abt = c->abtdense;
1883: PetscErrorCode ierr;
1886: switch (abt->alg) {
1887: case 1:
1888: MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);
1889: break;
1890: default:
1891: MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);
1892: break;
1893: }
1894: return(0);
1895: }
1897: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1898: {
1899: PetscErrorCode ierr;
1900: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1901: Mat_MatMultDense *ab = a->abdense;
1904: MatDestroy(&ab->Ce);
1905: MatDestroy(&ab->Ae);
1906: MatDestroy(&ab->Be);
1908: (ab->destroy)(A);
1909: PetscFree(ab);
1910: return(0);
1911: }
1913: #if defined(PETSC_HAVE_ELEMENTAL)
1914: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1915: {
1916: PetscErrorCode ierr;
1917: Mat_MPIDense *c=(Mat_MPIDense*)C->data;
1918: Mat_MatMultDense *ab=c->abdense;
1921: MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
1922: MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
1923: MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);
1924: MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
1925: return(0);
1926: }
1928: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
1929: {
1930: PetscErrorCode ierr;
1931: Mat Ae,Be,Ce;
1932: Mat_MPIDense *c;
1933: Mat_MatMultDense *ab;
1936: /* check local size of A and B */
1937: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1938: SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1939: }
1941: /* create elemental matrices Ae and Be */
1942: MatCreate(PetscObjectComm((PetscObject)A), &Ae);
1943: MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1944: MatSetType(Ae,MATELEMENTAL);
1945: MatSetUp(Ae);
1946: MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);
1948: MatCreate(PetscObjectComm((PetscObject)B), &Be);
1949: MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);
1950: MatSetType(Be,MATELEMENTAL);
1951: MatSetUp(Be);
1952: MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);
1954: /* compute symbolic Ce = Ae*Be */
1955: MatCreate(PetscObjectComm((PetscObject)C),&Ce);
1956: MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);
1958: /* setup C */
1959: MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1960: MatSetType(C,MATDENSE);
1961: MatSetUp(C);
1963: /* create data structure for reuse Cdense */
1964: PetscNew(&ab);
1965: c = (Mat_MPIDense*)C->data;
1966: c->abdense = ab;
1968: ab->Ae = Ae;
1969: ab->Be = Be;
1970: ab->Ce = Ce;
1971: ab->destroy = C->ops->destroy;
1972: C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
1973: C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1974: C->ops->productnumeric = MatProductNumeric_AB;
1975: return(0);
1976: }
1977: #endif
1978: /* ----------------------------------------------- */
1979: #if defined(PETSC_HAVE_ELEMENTAL)
1980: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
1981: {
1983: C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
1984: C->ops->productsymbolic = MatProductSymbolic_AB;
1985: C->ops->productnumeric = MatProductNumeric_AB;
1986: return(0);
1987: }
1988: #endif
1990: static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
1991: {
1993: Mat_Product *product = C->product;
1996: MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);
1997: C->ops->productnumeric = MatProductNumeric_AtB;
1998: C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
1999: return(0);
2000: }
2002: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2003: {
2004: Mat_Product *product = C->product;
2005: Mat A = product->A,B=product->B;
2008: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2009: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2011: C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
2012: return(0);
2013: }
2015: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2016: {
2018: Mat_Product *product = C->product;
2019: const char *algTypes[2] = {"allgatherv","cyclic"};
2020: PetscInt alg,nalg = 2;
2021: PetscBool flg = PETSC_FALSE;
2024: /* Set default algorithm */
2025: alg = 0; /* default is allgatherv */
2026: PetscStrcmp(product->alg,"default",&flg);
2027: if (flg) {
2028: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2029: }
2031: /* Get runtime option */
2032: if (product->api_user) {
2033: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2034: PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);
2035: PetscOptionsEnd();
2036: } else {
2037: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2038: PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);
2039: PetscOptionsEnd();
2040: }
2041: if (flg) {
2042: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2043: }
2045: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2046: C->ops->productsymbolic = MatProductSymbolic_ABt;
2047: C->ops->productnumeric = MatProductNumeric_ABt;
2048: return(0);
2049: }
2051: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2052: {
2054: Mat_Product *product = C->product;
2057: switch (product->type) {
2058: #if defined(PETSC_HAVE_ELEMENTAL)
2059: case MATPRODUCT_AB:
2060: MatProductSetFromOptions_MPIDense_AB(C);
2061: break;
2062: #endif
2063: case MATPRODUCT_AtB:
2064: MatProductSetFromOptions_MPIDense_AtB(C);
2065: break;
2066: case MATPRODUCT_ABt:
2067: MatProductSetFromOptions_MPIDense_ABt(C);
2068: break;
2069: default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]);
2070: }
2071: return(0);
2072: }