Actual source code: mpidense.c
petsc-3.3-p7 2013-05-11
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
6:
7: #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/
8: #if defined(PETSC_HAVE_PLAPACK)
9: static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
10: static MPI_Comm Plapack_comm_2d;
11: EXTERN_C_BEGIN
12: #include <PLA.h>
13: EXTERN_C_END
15: typedef struct {
16: PLA_Obj A,pivots;
17: PLA_Template templ;
18: MPI_Datatype datatype;
19: PetscInt nb,rstart;
20: VecScatter ctx;
21: IS is_pla,is_petsc;
22: PetscBool pla_solved;
23: MatStructure mstruct;
24: } Mat_Plapack;
25: #endif
29: /*@
31: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
32: matrix that represents the operator. For sequential matrices it returns itself.
34: Input Parameter:
35: . A - the Seq or MPI dense matrix
37: Output Parameter:
38: . B - the inner matrix
40: Level: intermediate
42: @*/
43: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
44: {
45: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
47: PetscBool flg;
50: PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
51: if (flg) {
52: *B = mat->A;
53: } else {
54: *B = A;
55: }
56: return(0);
57: }
61: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
62: {
63: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
65: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
68: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
69: lrow = row - rstart;
70: MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
71: return(0);
72: }
76: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
77: {
81: if (idx) {PetscFree(*idx);}
82: if (v) {PetscFree(*v);}
83: return(0);
84: }
86: EXTERN_C_BEGIN
89: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
90: {
91: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
93: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
94: PetscScalar *array;
95: MPI_Comm comm;
96: Mat B;
99: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
101: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
102: if (!B) {
103: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
104: MatCreate(comm,&B);
105: MatSetSizes(B,m,m,m,m);
106: MatSetType(B,((PetscObject)mdn->A)->type_name);
107: MatGetArray(mdn->A,&array);
108: MatSeqDenseSetPreallocation(B,array+m*rstart);
109: MatRestoreArray(mdn->A,&array);
110: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
111: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
112: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
113: *a = B;
114: MatDestroy(&B);
115: } else {
116: *a = B;
117: }
118: return(0);
119: }
120: EXTERN_C_END
124: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
125: {
126: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
128: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129: PetscBool roworiented = A->roworiented;
133: for (i=0; i<m; i++) {
134: if (idxm[i] < 0) continue;
135: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
136: if (idxm[i] >= rstart && idxm[i] < rend) {
137: row = idxm[i] - rstart;
138: if (roworiented) {
139: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
140: } else {
141: for (j=0; j<n; j++) {
142: if (idxn[j] < 0) continue;
143: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
144: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
145: }
146: }
147: } else {
148: if (!A->donotstash) {
149: mat->assembled = PETSC_FALSE;
150: if (roworiented) {
151: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
152: } else {
153: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
154: }
155: }
156: }
157: }
158: return(0);
159: }
163: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
164: {
165: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
167: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
170: for (i=0; i<m; i++) {
171: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
172: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
173: if (idxm[i] >= rstart && idxm[i] < rend) {
174: row = idxm[i] - rstart;
175: for (j=0; j<n; j++) {
176: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
177: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
178: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
179: }
180: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
181: }
182: return(0);
183: }
187: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
188: {
189: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
193: MatGetArray(a->A,array);
194: return(0);
195: }
199: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
200: {
201: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
202: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
204: PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
205: const PetscInt *irow,*icol;
206: PetscScalar *av,*bv,*v = lmat->v;
207: Mat newmat;
208: IS iscol_local;
211: ISAllGather(iscol,&iscol_local);
212: ISGetIndices(isrow,&irow);
213: ISGetIndices(iscol_local,&icol);
214: ISGetLocalSize(isrow,&nrows);
215: ISGetLocalSize(iscol,&ncols);
216: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
218: /* No parallel redistribution currently supported! Should really check each index set
219: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
220: original matrix! */
222: MatGetLocalSize(A,&nlrows,&nlcols);
223: MatGetOwnershipRange(A,&rstart,&rend);
224:
225: /* Check submatrix call */
226: if (scall == MAT_REUSE_MATRIX) {
227: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
228: /* Really need to test rows and column sizes! */
229: newmat = *B;
230: } else {
231: /* Create and fill new matrix */
232: MatCreate(((PetscObject)A)->comm,&newmat);
233: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
234: MatSetType(newmat,((PetscObject)A)->type_name);
235: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
236: }
238: /* Now extract the data pointers and do the copy, column at a time */
239: newmatd = (Mat_MPIDense*)newmat->data;
240: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
241:
242: for (i=0; i<Ncols; i++) {
243: av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
244: for (j=0; j<nrows; j++) {
245: *bv++ = av[irow[j] - rstart];
246: }
247: }
249: /* Assemble the matrices so that the correct flags are set */
250: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
251: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
253: /* Free work space */
254: ISRestoreIndices(isrow,&irow);
255: ISRestoreIndices(iscol_local,&icol);
256: ISDestroy(&iscol_local);
257: *B = newmat;
258: return(0);
259: }
263: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
264: {
266: return(0);
267: }
271: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
272: {
273: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
274: MPI_Comm comm = ((PetscObject)mat)->comm;
276: PetscInt nstash,reallocs;
277: InsertMode addv;
280: /* make sure all processors are either in INSERTMODE or ADDMODE */
281: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
282: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
283: mat->insertmode = addv; /* in case this processor had no cache */
285: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
286: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
287: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
288: return(0);
289: }
293: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
294: {
295: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
296: PetscErrorCode ierr;
297: PetscInt i,*row,*col,flg,j,rstart,ncols;
298: PetscMPIInt n;
299: PetscScalar *val;
300: InsertMode addv=mat->insertmode;
303: /* wait on receives */
304: while (1) {
305: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
306: if (!flg) break;
307:
308: for (i=0; i<n;) {
309: /* Now identify the consecutive vals belonging to the same row */
310: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
311: if (j < n) ncols = j-i;
312: else ncols = n-i;
313: /* Now assemble all these values with a single function call */
314: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
315: i = j;
316: }
317: }
318: MatStashScatterEnd_Private(&mat->stash);
319:
320: MatAssemblyBegin(mdn->A,mode);
321: MatAssemblyEnd(mdn->A,mode);
323: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
324: MatSetUpMultiply_MPIDense(mat);
325: }
326: return(0);
327: }
331: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
332: {
334: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
337: MatZeroEntries(l->A);
338: return(0);
339: }
341: /* the code does not do the diagonal entries correctly unless the
342: matrix is square and the column and row owerships are identical.
343: This is a BUG. The only way to fix it seems to be to access
344: mdn->A and mdn->B directly and not through the MatZeroRows()
345: routine.
346: */
349: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
350: {
351: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
352: PetscErrorCode ierr;
353: PetscInt i,*owners = A->rmap->range;
354: PetscInt *nprocs,j,idx,nsends;
355: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
356: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
357: PetscInt *lens,*lrows,*values;
358: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
359: MPI_Comm comm = ((PetscObject)A)->comm;
360: MPI_Request *send_waits,*recv_waits;
361: MPI_Status recv_status,*send_status;
362: PetscBool found;
363: const PetscScalar *xx;
364: PetscScalar *bb;
367: if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices");
368: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
369: /* first count number of contributors to each processor */
370: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
371: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
372: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
373: for (i=0; i<N; i++) {
374: idx = rows[i];
375: found = PETSC_FALSE;
376: for (j=0; j<size; j++) {
377: if (idx >= owners[j] && idx < owners[j+1]) {
378: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
379: }
380: }
381: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
382: }
383: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
385: /* inform other processors of number of messages and max length*/
386: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
388: /* post receives: */
389: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
390: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
391: for (i=0; i<nrecvs; i++) {
392: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
393: }
395: /* do sends:
396: 1) starts[i] gives the starting index in svalues for stuff going to
397: the ith processor
398: */
399: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
400: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
401: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
402: starts[0] = 0;
403: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
404: for (i=0; i<N; i++) {
405: svalues[starts[owner[i]]++] = rows[i];
406: }
408: starts[0] = 0;
409: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
410: count = 0;
411: for (i=0; i<size; i++) {
412: if (nprocs[2*i+1]) {
413: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
414: }
415: }
416: PetscFree(starts);
418: base = owners[rank];
420: /* wait on receives */
421: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
422: count = nrecvs;
423: slen = 0;
424: while (count) {
425: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
426: /* unpack receives into our local space */
427: MPI_Get_count(&recv_status,MPIU_INT,&n);
428: source[imdex] = recv_status.MPI_SOURCE;
429: lens[imdex] = n;
430: slen += n;
431: count--;
432: }
433: PetscFree(recv_waits);
434:
435: /* move the data into the send scatter */
436: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
437: count = 0;
438: for (i=0; i<nrecvs; i++) {
439: values = rvalues + i*nmax;
440: for (j=0; j<lens[i]; j++) {
441: lrows[count++] = values[j] - base;
442: }
443: }
444: PetscFree(rvalues);
445: PetscFree2(lens,source);
446: PetscFree(owner);
447: PetscFree(nprocs);
448:
449: /* fix right hand side if needed */
450: if (x && b) {
451: VecGetArrayRead(x,&xx);
452: VecGetArray(b,&bb);
453: for (i=0; i<slen; i++) {
454: bb[lrows[i]] = diag*xx[lrows[i]];
455: }
456: VecRestoreArrayRead(x,&xx);
457: VecRestoreArray(b,&bb);
458: }
460: /* actually zap the local rows */
461: MatZeroRows(l->A,slen,lrows,0.0,0,0);
462: if (diag != 0.0) {
463: Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
464: PetscInt m = ll->lda, i;
465:
466: for (i=0; i<slen; i++) {
467: ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
468: }
469: }
470: PetscFree(lrows);
472: /* wait on sends */
473: if (nsends) {
474: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
475: MPI_Waitall(nsends,send_waits,send_status);
476: PetscFree(send_status);
477: }
478: PetscFree(send_waits);
479: PetscFree(svalues);
481: return(0);
482: }
486: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
487: {
488: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
492: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
493: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
494: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
495: return(0);
496: }
500: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
501: {
502: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
506: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
507: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
508: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
509: return(0);
510: }
514: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
515: {
516: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
518: PetscScalar zero = 0.0;
521: VecSet(yy,zero);
522: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
523: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
524: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
525: return(0);
526: }
530: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
531: {
532: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
536: VecCopy(yy,zz);
537: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
538: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
539: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
540: return(0);
541: }
545: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
546: {
547: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
548: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
550: PetscInt len,i,n,m = A->rmap->n,radd;
551: PetscScalar *x,zero = 0.0;
552:
554: VecSet(v,zero);
555: VecGetArray(v,&x);
556: VecGetSize(v,&n);
557: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
558: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
559: radd = A->rmap->rstart*m;
560: for (i=0; i<len; i++) {
561: x[i] = aloc->v[radd + i*m + i];
562: }
563: VecRestoreArray(v,&x);
564: return(0);
565: }
569: PetscErrorCode MatDestroy_MPIDense(Mat mat)
570: {
571: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
573: #if defined(PETSC_HAVE_PLAPACK)
574: Mat_Plapack *lu=(Mat_Plapack*)mat->spptr;
575: #endif
579: #if defined(PETSC_USE_LOG)
580: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
581: #endif
582: MatStashDestroy_Private(&mat->stash);
583: MatDestroy(&mdn->A);
584: VecDestroy(&mdn->lvec);
585: VecScatterDestroy(&mdn->Mvctx);
586: #if defined(PETSC_HAVE_PLAPACK)
587: if (lu) {
588: PLA_Obj_free(&lu->A);
589: PLA_Obj_free (&lu->pivots);
590: PLA_Temp_free(&lu->templ);
591: ISDestroy(&lu->is_pla);
592: ISDestroy(&lu->is_petsc);
593: VecScatterDestroy(&lu->ctx);
594: }
595: PetscFree(mat->spptr);
596: #endif
598: PetscFree(mat->data);
599: PetscObjectChangeTypeName((PetscObject)mat,0);
600: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
601: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
602: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);
603: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);
604: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);
605: return(0);
606: }
610: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
611: {
612: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
613: PetscErrorCode ierr;
614: PetscViewerFormat format;
615: int fd;
616: PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k;
617: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
618: PetscScalar *work,*v,*vv;
619: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
622: if (mdn->size == 1) {
623: MatView(mdn->A,viewer);
624: } else {
625: PetscViewerBinaryGetDescriptor(viewer,&fd);
626: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
627: MPI_Comm_size(((PetscObject)mat)->comm,&size);
629: PetscViewerGetFormat(viewer,&format);
630: if (format == PETSC_VIEWER_NATIVE) {
632: if (!rank) {
633: /* store the matrix as a dense matrix */
634: header[0] = MAT_FILE_CLASSID;
635: header[1] = mat->rmap->N;
636: header[2] = N;
637: header[3] = MATRIX_BINARY_FORMAT_DENSE;
638: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
640: /* get largest work array needed for transposing array */
641: mmax = mat->rmap->n;
642: for (i=1; i<size; i++) {
643: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644: }
645: PetscMalloc(mmax*N*sizeof(PetscScalar),&work);
647: /* write out local array, by rows */
648: m = mat->rmap->n;
649: v = a->v;
650: for (j=0; j<N; j++) {
651: for (i=0; i<m; i++) {
652: work[j + i*N] = *v++;
653: }
654: }
655: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
656: /* get largest work array to receive messages from other processes, excludes process zero */
657: mmax = 0;
658: for (i=1; i<size; i++) {
659: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
660: }
661: PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);
662: for(k = 1; k < size; k++) {
663: v = vv;
664: m = mat->rmap->range[k+1] - mat->rmap->range[k];
665: MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);
667: for(j = 0; j < N; j++) {
668: for(i = 0; i < m; i++) {
669: work[j + i*N] = *v++;
670: }
671: }
672: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
673: }
674: PetscFree(work);
675: PetscFree(vv);
676: } else {
677: MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
678: }
679: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
680: }
681: return(0);
682: }
686: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
687: {
688: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
689: PetscErrorCode ierr;
690: PetscMPIInt size = mdn->size,rank = mdn->rank;
691: const PetscViewerType vtype;
692: PetscBool iascii,isdraw;
693: PetscViewer sviewer;
694: PetscViewerFormat format;
695: #if defined(PETSC_HAVE_PLAPACK)
696: Mat_Plapack *lu;
697: #endif
700: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
701: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
702: if (iascii) {
703: PetscViewerGetType(viewer,&vtype);
704: PetscViewerGetFormat(viewer,&format);
705: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
706: MatInfo info;
707: MatGetInfo(mat,MAT_LOCAL,&info);
708: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
709: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
710: PetscViewerFlush(viewer);
711: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
712: #if defined(PETSC_HAVE_PLAPACK)
713: PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");
714: PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);
715: PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);
716: PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);
717: if (mat->factortype){
718: lu=(Mat_Plapack*)(mat->spptr);
719: PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);
720: }
721: #else
722: VecScatterView(mdn->Mvctx,viewer);
723: #endif
724: return(0);
725: } else if (format == PETSC_VIEWER_ASCII_INFO) {
726: return(0);
727: }
728: } else if (isdraw) {
729: PetscDraw draw;
730: PetscBool isnull;
732: PetscViewerDrawGetDraw(viewer,0,&draw);
733: PetscDrawIsNull(draw,&isnull);
734: if (isnull) return(0);
735: }
737: if (size == 1) {
738: MatView(mdn->A,viewer);
739: } else {
740: /* assemble the entire matrix onto first processor. */
741: Mat A;
742: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
743: PetscInt *cols;
744: PetscScalar *vals;
746: MatCreate(((PetscObject)mat)->comm,&A);
747: if (!rank) {
748: MatSetSizes(A,M,N,M,N);
749: } else {
750: MatSetSizes(A,0,0,M,N);
751: }
752: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
753: MatSetType(A,MATMPIDENSE);
754: MatMPIDenseSetPreallocation(A,PETSC_NULL);
755: PetscLogObjectParent(mat,A);
757: /* Copy the matrix ... This isn't the most efficient means,
758: but it's quick for now */
759: A->insertmode = INSERT_VALUES;
760: row = mat->rmap->rstart; m = mdn->A->rmap->n;
761: for (i=0; i<m; i++) {
762: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
763: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
764: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
765: row++;
766: }
768: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
769: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
770: PetscViewerGetSingleton(viewer,&sviewer);
771: if (!rank) {
772: PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
773: /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
774: PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
775: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
776: }
777: PetscViewerRestoreSingleton(viewer,&sviewer);
778: PetscViewerFlush(viewer);
779: MatDestroy(&A);
780: }
781: return(0);
782: }
786: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
787: {
789: PetscBool iascii,isbinary,isdraw,issocket;
790:
792:
793: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
794: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
795: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
796: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
798: if (iascii || issocket || isdraw) {
799: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
800: } else if (isbinary) {
801: MatView_MPIDense_Binary(mat,viewer);
802: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
803: return(0);
804: }
808: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
809: {
810: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
811: Mat mdn = mat->A;
813: PetscReal isend[5],irecv[5];
816: info->block_size = 1.0;
817: MatGetInfo(mdn,MAT_LOCAL,info);
818: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
819: isend[3] = info->memory; isend[4] = info->mallocs;
820: if (flag == MAT_LOCAL) {
821: info->nz_used = isend[0];
822: info->nz_allocated = isend[1];
823: info->nz_unneeded = isend[2];
824: info->memory = isend[3];
825: info->mallocs = isend[4];
826: } else if (flag == MAT_GLOBAL_MAX) {
827: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);
828: info->nz_used = irecv[0];
829: info->nz_allocated = irecv[1];
830: info->nz_unneeded = irecv[2];
831: info->memory = irecv[3];
832: info->mallocs = irecv[4];
833: } else if (flag == MAT_GLOBAL_SUM) {
834: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);
835: info->nz_used = irecv[0];
836: info->nz_allocated = irecv[1];
837: info->nz_unneeded = irecv[2];
838: info->memory = irecv[3];
839: info->mallocs = irecv[4];
840: }
841: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
842: info->fill_ratio_needed = 0;
843: info->factor_mallocs = 0;
844: return(0);
845: }
849: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
850: {
851: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
855: switch (op) {
856: case MAT_NEW_NONZERO_LOCATIONS:
857: case MAT_NEW_NONZERO_LOCATION_ERR:
858: case MAT_NEW_NONZERO_ALLOCATION_ERR:
859: MatSetOption(a->A,op,flg);
860: break;
861: case MAT_ROW_ORIENTED:
862: a->roworiented = flg;
863: MatSetOption(a->A,op,flg);
864: break;
865: case MAT_NEW_DIAGONALS:
866: case MAT_KEEP_NONZERO_PATTERN:
867: case MAT_USE_HASH_TABLE:
868: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
869: break;
870: case MAT_IGNORE_OFF_PROC_ENTRIES:
871: a->donotstash = flg;
872: break;
873: case MAT_SYMMETRIC:
874: case MAT_STRUCTURALLY_SYMMETRIC:
875: case MAT_HERMITIAN:
876: case MAT_SYMMETRY_ETERNAL:
877: case MAT_IGNORE_LOWER_TRIANGULAR:
878: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
879: break;
880: default:
881: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
882: }
883: return(0);
884: }
889: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
890: {
891: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
892: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
893: PetscScalar *l,*r,x,*v;
895: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
898: MatGetLocalSize(A,&s2,&s3);
899: if (ll) {
900: VecGetLocalSize(ll,&s2a);
901: if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
902: VecGetArray(ll,&l);
903: for (i=0; i<m; i++) {
904: x = l[i];
905: v = mat->v + i;
906: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
907: }
908: VecRestoreArray(ll,&l);
909: PetscLogFlops(n*m);
910: }
911: if (rr) {
912: VecGetLocalSize(rr,&s3a);
913: if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
914: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
915: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
916: VecGetArray(mdn->lvec,&r);
917: for (i=0; i<n; i++) {
918: x = r[i];
919: v = mat->v + i*m;
920: for (j=0; j<m; j++) { (*v++) *= x;}
921: }
922: VecRestoreArray(mdn->lvec,&r);
923: PetscLogFlops(n*m);
924: }
925: return(0);
926: }
930: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
931: {
932: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
933: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
935: PetscInt i,j;
936: PetscReal sum = 0.0;
937: PetscScalar *v = mat->v;
940: if (mdn->size == 1) {
941: MatNorm(mdn->A,type,nrm);
942: } else {
943: if (type == NORM_FROBENIUS) {
944: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
945: #if defined(PETSC_USE_COMPLEX)
946: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
947: #else
948: sum += (*v)*(*v); v++;
949: #endif
950: }
951: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);
952: *nrm = PetscSqrtReal(*nrm);
953: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
954: } else if (type == NORM_1) {
955: PetscReal *tmp,*tmp2;
956: PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);
957: PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
958: PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
959: *nrm = 0.0;
960: v = mat->v;
961: for (j=0; j<mdn->A->cmap->n; j++) {
962: for (i=0; i<mdn->A->rmap->n; i++) {
963: tmp[j] += PetscAbsScalar(*v); v++;
964: }
965: }
966: MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);
967: for (j=0; j<A->cmap->N; j++) {
968: if (tmp2[j] > *nrm) *nrm = tmp2[j];
969: }
970: PetscFree2(tmp,tmp);
971: PetscLogFlops(A->cmap->n*A->rmap->n);
972: } else if (type == NORM_INFINITY) { /* max row norm */
973: PetscReal ntemp;
974: MatNorm(mdn->A,type,&ntemp);
975: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);
976: } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
977: }
978: return(0);
979: }
983: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
984: {
985: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
986: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
987: Mat B;
988: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
990: PetscInt j,i;
991: PetscScalar *v;
994: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
995: if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
996: MatCreate(((PetscObject)A)->comm,&B);
997: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
998: MatSetType(B,((PetscObject)A)->type_name);
999: MatMPIDenseSetPreallocation(B,PETSC_NULL);
1000: } else {
1001: B = *matout;
1002: }
1004: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
1005: PetscMalloc(m*sizeof(PetscInt),&rwork);
1006: for (i=0; i<m; i++) rwork[i] = rstart + i;
1007: for (j=0; j<n; j++) {
1008: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
1009: v += m;
1010: }
1011: PetscFree(rwork);
1012: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1013: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1014: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1015: *matout = B;
1016: } else {
1017: MatHeaderMerge(A,B);
1018: }
1019: return(0);
1020: }
1023: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1024: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
1028: PetscErrorCode MatSetUp_MPIDense(Mat A)
1029: {
1033: MatMPIDenseSetPreallocation(A,0);
1034: return(0);
1035: }
1037: #if defined(PETSC_HAVE_PLAPACK)
1041: PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1042: {
1043: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1045: PetscInt M=A->cmap->N,m=A->rmap->n,rstart;
1046: PetscScalar *array;
1047: PetscReal one = 1.0;
1050: /* Copy A into F->lu->A */
1051: PLA_Obj_set_to_zero(lu->A);
1052: PLA_API_begin();
1053: PLA_Obj_API_open(lu->A);
1054: MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1055: MatGetArray(A,&array);
1056: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1057: MatRestoreArray(A,&array);
1058: PLA_Obj_API_close(lu->A);
1059: PLA_API_end();
1060: lu->rstart = rstart;
1061: return(0);
1062: }
1066: PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1067: {
1068: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1070: PetscInt M=A->cmap->N,m=A->rmap->n,rstart;
1071: PetscScalar *array;
1072: PetscReal one = 1.0;
1075: /* Copy F into A->lu->A */
1076: MatZeroEntries(A);
1077: PLA_API_begin();
1078: PLA_Obj_API_open(lu->A);
1079: MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1080: MatGetArray(A,&array);
1081: PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);
1082: MatRestoreArray(A,&array);
1083: PLA_Obj_API_close(lu->A);
1084: PLA_API_end();
1085: lu->rstart = rstart;
1086: return(0);
1087: }
1091: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1092: {
1094: Mat_Plapack *luA = (Mat_Plapack*)A->spptr;
1095: Mat_Plapack *luB = (Mat_Plapack*)B->spptr;
1096: Mat_Plapack *luC = (Mat_Plapack*)C->spptr;
1097: PLA_Obj alpha = NULL,beta = NULL;
1100: MatMPIDenseCopyToPlapack(A,A);
1101: MatMPIDenseCopyToPlapack(B,B);
1103: /*
1104: PLA_Global_show("A = ",luA->A,"%g ","");
1105: PLA_Global_show("B = ",luB->A,"%g ","");
1106: */
1108: /* do the multiply in PLA */
1109: PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);
1110: PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);
1111: CHKMEMQ;
1113: PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* */
1114: CHKMEMQ;
1115: PLA_Obj_free(&alpha);
1116: PLA_Obj_free(&beta);
1118: /*
1119: PLA_Global_show("C = ",luC->A,"%g ","");
1120: */
1121: MatMPIDenseCopyFromPlapack(C,C);
1122: return(0);
1123: }
1125: extern PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C);
1129: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1130: {
1132: PetscInt m=A->rmap->n,n=B->cmap->n;
1133: Mat Cmat;
1136: if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1137: SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1138: MatCreate(((PetscObject)B)->comm,&Cmat);
1139: MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);
1140: MatSetType(Cmat,MATMPIDENSE);
1141: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
1142: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
1143: Cmat->ops->matmult = MatMatMult_MPIDense_MPIDense;
1144: *C = Cmat;
1145: return(0);
1146: }
1150: PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1151: {
1155: if (scall == MAT_INITIAL_MATRIX){
1156: MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1157: }
1158: MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1159: return(0);
1160: }
1164: PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1165: {
1166: MPI_Comm comm = ((PetscObject)A)->comm;
1167: Mat_Plapack *lu = (Mat_Plapack*)A->spptr;
1169: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1170: PetscScalar *array;
1171: PetscReal one = 1.0;
1172: PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1173: PLA_Obj v_pla = NULL;
1174: PetscScalar *loc_buf;
1175: Vec loc_x;
1176:
1178: MPI_Comm_size(comm,&size);
1179: MPI_Comm_rank(comm,&rank);
1181: /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1182: PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1183: PLA_Obj_set_to_zero(v_pla);
1185: /* Copy b into rhs_pla */
1186: PLA_API_begin();
1187: PLA_Obj_API_open(v_pla);
1188: VecGetArray(b,&array);
1189: PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1190: VecRestoreArray(b,&array);
1191: PLA_Obj_API_close(v_pla);
1192: PLA_API_end();
1194: if (A->factortype == MAT_FACTOR_LU){
1195: /* Apply the permutations to the right hand sides */
1196: PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1198: /* Solve L y = b, overwriting b with y */
1199: PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1201: /* Solve U x = y (=b), overwriting b with x */
1202: PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1203: } else { /* MAT_FACTOR_CHOLESKY */
1204: PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1205: PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1206: PLA_NONUNIT_DIAG,lu->A,v_pla);
1207: }
1209: /* Copy PLAPACK x into Petsc vector x */
1210: PLA_Obj_local_length(v_pla, &loc_m);
1211: PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1212: PLA_Obj_local_stride(v_pla, &loc_stride);
1213: /*
1214: PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1215: */
1216: VecCreateSeqWithArray(PETSC_COMM_SELF,1,loc_m*loc_stride,loc_buf,&loc_x);
1217: if (!lu->pla_solved){
1218:
1219: PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1220: PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1222: /* Create IS and cts for VecScatterring */
1223: PLA_Obj_local_length(v_pla, &loc_m);
1224: PLA_Obj_local_stride(v_pla, &loc_stride);
1225: PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);
1227: rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1228: for (i=0; i<loc_m; i+=lu->nb){
1229: j = 0;
1230: while (j < lu->nb && i+j < loc_m){
1231: idx_petsc[i+j] = rstart + j; j++;
1232: }
1233: rstart += size*lu->nb;
1234: }
1236: for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1238: ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);
1239: ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);
1240: PetscFree2(idx_pla,idx_petsc);
1241: VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);
1242: }
1243: VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1244: VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1245:
1246: /* Free data */
1247: VecDestroy(&loc_x);
1248: PLA_Obj_free(&v_pla);
1250: lu->pla_solved = PETSC_TRUE;
1251: return(0);
1252: }
1256: PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1257: {
1258: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1260: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend;
1261: PetscInt info_pla=0;
1262: PetscScalar *array,one = 1.0;
1265: if (lu->mstruct == SAME_NONZERO_PATTERN){
1266: PLA_Obj_free(&lu->A);
1267: PLA_Obj_free (&lu->pivots);
1268: }
1269: /* Create PLAPACK matrix object */
1270: lu->A = NULL; lu->pivots = NULL;
1271: PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1272: PLA_Obj_set_to_zero(lu->A);
1273: PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1275: /* Copy A into lu->A */
1276: PLA_API_begin();
1277: PLA_Obj_API_open(lu->A);
1278: MatGetOwnershipRange(A,&rstart,&rend);
1279: MatGetArray(A,&array);
1280: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1281: MatRestoreArray(A,&array);
1282: PLA_Obj_API_close(lu->A);
1283: PLA_API_end();
1285: /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1286: info_pla = PLA_LU(lu->A,lu->pivots);
1287: if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1289: lu->rstart = rstart;
1290: lu->mstruct = SAME_NONZERO_PATTERN;
1291: F->ops->solve = MatSolve_MPIDense;
1292: F->assembled = PETSC_TRUE; /* required by -ksp_view */
1293: return(0);
1294: }
1298: PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1299: {
1300: Mat_Plapack *lu = (Mat_Plapack*)F->spptr;
1302: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend;
1303: PetscInt info_pla=0;
1304: PetscScalar *array,one = 1.0;
1307: if (lu->mstruct == SAME_NONZERO_PATTERN){
1308: PLA_Obj_free(&lu->A);
1309: }
1310: /* Create PLAPACK matrix object */
1311: lu->A = NULL;
1312: lu->pivots = NULL;
1313: PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1315: /* Copy A into lu->A */
1316: PLA_API_begin();
1317: PLA_Obj_API_open(lu->A);
1318: MatGetOwnershipRange(A,&rstart,&rend);
1319: MatGetArray(A,&array);
1320: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1321: MatRestoreArray(A,&array);
1322: PLA_Obj_API_close(lu->A);
1323: PLA_API_end();
1325: /* Factor P A -> Chol */
1326: info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1327: if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1329: lu->rstart = rstart;
1330: lu->mstruct = SAME_NONZERO_PATTERN;
1331: F->ops->solve = MatSolve_MPIDense;
1332: F->assembled = PETSC_TRUE; /* required by -ksp_view */
1333: return(0);
1334: }
1336: /* Note the Petsc perm permutation is ignored */
1339: PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1340: {
1342: PetscBool issymmetric,set;
1345: MatIsSymmetricKnown(A,&set,&issymmetric);
1346: if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1347: F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense;
1348: F->preallocated = PETSC_TRUE;
1349: return(0);
1350: }
1352: /* Note the Petsc r and c permutations are ignored */
1355: PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1356: {
1358: PetscInt M = A->rmap->N;
1359: Mat_Plapack *lu;
1362: lu = (Mat_Plapack*)F->spptr;
1363: PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1364: F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense;
1365: F->preallocated = PETSC_TRUE;
1366: return(0);
1367: }
1369: EXTERN_C_BEGIN
1372: PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1373: {
1375: *type = MATSOLVERPLAPACK;
1376: return(0);
1377: }
1378: EXTERN_C_END
1380: EXTERN_C_BEGIN
1383: PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1384: {
1386: Mat_Plapack *lu;
1387: PetscMPIInt size;
1388: PetscInt M=A->rmap->N;
1391: /* Create the factorization matrix */
1392: MatCreate(((PetscObject)A)->comm,F);
1393: MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1394: MatSetType(*F,((PetscObject)A)->type_name);
1395: MatSetUp(*F);
1396: PetscNewLog(*F,Mat_Plapack,&lu);
1397: (*F)->spptr = (void*)lu;
1399: /* Set default Plapack parameters */
1400: MPI_Comm_size(((PetscObject)A)->comm,&size);
1401: lu->nb = M/size;
1402: if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1403:
1404: /* Set runtime options */
1405: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");
1406: PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);
1407: PetscOptionsEnd();
1409: /* Create object distribution template */
1410: lu->templ = NULL;
1411: PLA_Temp_create(lu->nb, 0, &lu->templ);
1413: /* Set the datatype */
1414: #if defined(PETSC_USE_COMPLEX)
1415: lu->datatype = MPI_DOUBLE_COMPLEX;
1416: #else
1417: lu->datatype = MPI_DOUBLE;
1418: #endif
1420: PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1423: lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1424: lu->mstruct = DIFFERENT_NONZERO_PATTERN;
1426: if (ftype == MAT_FACTOR_LU) {
1427: (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1428: } else if (ftype == MAT_FACTOR_CHOLESKY) {
1429: (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1430: } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1431: (*F)->factortype = ftype;
1432: PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);
1433: return(0);
1434: }
1435: EXTERN_C_END
1436: #endif
1440: PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1441: {
1442: #if defined(PETSC_HAVE_PLAPACK)
1444: #endif
1447: #if defined(PETSC_HAVE_PLAPACK)
1448: MatGetFactor_mpidense_plapack(A,ftype,F);
1449: #else
1450: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1451: #endif
1452: return(0);
1453: }
1457: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1458: {
1460: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1463: MatAXPY(A->A,alpha,B->A,str);
1464: return(0);
1465: }
1469: PetscErrorCode MatConjugate_MPIDense(Mat mat)
1470: {
1471: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1475: MatConjugate(a->A);
1476: return(0);
1477: }
1481: PetscErrorCode MatRealPart_MPIDense(Mat A)
1482: {
1483: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1487: MatRealPart(a->A);
1488: return(0);
1489: }
1493: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1494: {
1495: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1499: MatImaginaryPart(a->A);
1500: return(0);
1501: }
1503: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1506: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1507: {
1509: PetscInt i,n;
1510: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1511: PetscReal *work;
1514: MatGetSize(A,PETSC_NULL,&n);
1515: PetscMalloc(n*sizeof(PetscReal),&work);
1516: MatGetColumnNorms_SeqDense(a->A,type,work);
1517: if (type == NORM_2) {
1518: for (i=0; i<n; i++) work[i] *= work[i];
1519: }
1520: if (type == NORM_INFINITY) {
1521: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1522: } else {
1523: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1524: }
1525: PetscFree(work);
1526: if (type == NORM_2) {
1527: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1528: }
1529: return(0);
1530: }
1532: /* -------------------------------------------------------------------*/
1533: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1534: MatGetRow_MPIDense,
1535: MatRestoreRow_MPIDense,
1536: MatMult_MPIDense,
1537: /* 4*/ MatMultAdd_MPIDense,
1538: MatMultTranspose_MPIDense,
1539: MatMultTransposeAdd_MPIDense,
1540: 0,
1541: 0,
1542: 0,
1543: /*10*/ 0,
1544: 0,
1545: 0,
1546: 0,
1547: MatTranspose_MPIDense,
1548: /*15*/ MatGetInfo_MPIDense,
1549: MatEqual_MPIDense,
1550: MatGetDiagonal_MPIDense,
1551: MatDiagonalScale_MPIDense,
1552: MatNorm_MPIDense,
1553: /*20*/ MatAssemblyBegin_MPIDense,
1554: MatAssemblyEnd_MPIDense,
1555: MatSetOption_MPIDense,
1556: MatZeroEntries_MPIDense,
1557: /*24*/ MatZeroRows_MPIDense,
1558: 0,
1559: 0,
1560: 0,
1561: 0,
1562: /*29*/ MatSetUp_MPIDense,
1563: 0,
1564: 0,
1565: MatGetArray_MPIDense,
1566: MatRestoreArray_MPIDense,
1567: /*34*/ MatDuplicate_MPIDense,
1568: 0,
1569: 0,
1570: 0,
1571: 0,
1572: /*39*/ MatAXPY_MPIDense,
1573: MatGetSubMatrices_MPIDense,
1574: 0,
1575: MatGetValues_MPIDense,
1576: 0,
1577: /*44*/ 0,
1578: MatScale_MPIDense,
1579: 0,
1580: 0,
1581: 0,
1582: /*49*/ 0,
1583: 0,
1584: 0,
1585: 0,
1586: 0,
1587: /*54*/ 0,
1588: 0,
1589: 0,
1590: 0,
1591: 0,
1592: /*59*/ MatGetSubMatrix_MPIDense,
1593: MatDestroy_MPIDense,
1594: MatView_MPIDense,
1595: 0,
1596: 0,
1597: /*64*/ 0,
1598: 0,
1599: 0,
1600: 0,
1601: 0,
1602: /*69*/ 0,
1603: 0,
1604: 0,
1605: 0,
1606: 0,
1607: /*74*/ 0,
1608: 0,
1609: 0,
1610: 0,
1611: 0,
1612: /*79*/ 0,
1613: 0,
1614: 0,
1615: 0,
1616: /*83*/ MatLoad_MPIDense,
1617: 0,
1618: 0,
1619: 0,
1620: 0,
1621: 0,
1622: /*89*/
1623: #if defined(PETSC_HAVE_PLAPACK)
1624: MatMatMult_MPIDense_MPIDense,
1625: MatMatMultSymbolic_MPIDense_MPIDense,
1626: MatMatMultNumeric_MPIDense_MPIDense,
1627: #else
1628: 0,
1629: 0,
1630: 0,
1631: #endif
1632: 0,
1633: 0,
1634: /*94*/ 0,
1635: 0,
1636: 0,
1637: 0,
1638: 0,
1639: /*99*/ 0,
1640: 0,
1641: 0,
1642: MatConjugate_MPIDense,
1643: 0,
1644: /*104*/0,
1645: MatRealPart_MPIDense,
1646: MatImaginaryPart_MPIDense,
1647: 0,
1648: 0,
1649: /*109*/0,
1650: 0,
1651: 0,
1652: 0,
1653: 0,
1654: /*114*/0,
1655: 0,
1656: 0,
1657: 0,
1658: 0,
1659: /*119*/0,
1660: 0,
1661: 0,
1662: 0,
1663: 0,
1664: /*124*/0,
1665: MatGetColumnNorms_MPIDense
1666: };
1668: EXTERN_C_BEGIN
1671: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1672: {
1673: Mat_MPIDense *a;
1677: mat->preallocated = PETSC_TRUE;
1678: /* Note: For now, when data is specified above, this assumes the user correctly
1679: allocates the local dense storage space. We should add error checking. */
1681: a = (Mat_MPIDense*)mat->data;
1682: PetscLayoutSetUp(mat->rmap);
1683: PetscLayoutSetUp(mat->cmap);
1684: a->nvec = mat->cmap->n;
1686: MatCreate(PETSC_COMM_SELF,&a->A);
1687: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1688: MatSetType(a->A,MATSEQDENSE);
1689: MatSeqDenseSetPreallocation(a->A,data);
1690: PetscLogObjectParent(mat,a->A);
1691: return(0);
1692: }
1693: EXTERN_C_END
1695: /*MC
1696: MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1698: run ./configure with the option --download-plapack
1701: Options Database Keys:
1702: . -mat_plapack_nprows <n> - number of rows in processor partition
1703: . -mat_plapack_npcols <n> - number of columns in processor partition
1704: . -mat_plapack_nb <n> - block size of template vector
1705: . -mat_plapack_nb_alg <n> - algorithmic block size
1706: - -mat_plapack_ckerror <n> - error checking flag
1708: Level: intermediate
1710: .seealso: MatCreateDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1712: M*/
1714: EXTERN_C_BEGIN
1717: PetscErrorCode MatCreate_MPIDense(Mat mat)
1718: {
1719: Mat_MPIDense *a;
1723: PetscNewLog(mat,Mat_MPIDense,&a);
1724: mat->data = (void*)a;
1725: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1727: mat->insertmode = NOT_SET_VALUES;
1728: MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);
1729: MPI_Comm_size(((PetscObject)mat)->comm,&a->size);
1731: /* build cache for off array entries formed */
1732: a->donotstash = PETSC_FALSE;
1733: MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);
1735: /* stuff used for matrix vector multiply */
1736: a->lvec = 0;
1737: a->Mvctx = 0;
1738: a->roworiented = PETSC_TRUE;
1740: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1741: "MatGetDiagonalBlock_MPIDense",
1742: MatGetDiagonalBlock_MPIDense);
1743: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1744: "MatMPIDenseSetPreallocation_MPIDense",
1745: MatMPIDenseSetPreallocation_MPIDense);
1746: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1747: "MatMatMult_MPIAIJ_MPIDense",
1748: MatMatMult_MPIAIJ_MPIDense);
1749: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1750: "MatMatMultSymbolic_MPIAIJ_MPIDense",
1751: MatMatMultSymbolic_MPIAIJ_MPIDense);
1752: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1753: "MatMatMultNumeric_MPIAIJ_MPIDense",
1754: MatMatMultNumeric_MPIAIJ_MPIDense);
1755: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1756: "MatGetFactor_mpidense_petsc",
1757: MatGetFactor_mpidense_petsc);
1758: #if defined(PETSC_HAVE_PLAPACK)
1759: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1760: "MatGetFactor_mpidense_plapack",
1761: MatGetFactor_mpidense_plapack);
1762: PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);
1763: #endif
1764: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1766: return(0);
1767: }
1768: EXTERN_C_END
1770: /*MC
1771: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1773: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1774: and MATMPIDENSE otherwise.
1776: Options Database Keys:
1777: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1779: Level: beginner
1782: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1783: M*/
1787: /*@C
1788: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1790: Not collective
1792: Input Parameters:
1793: . A - the matrix
1794: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1795: to control all matrix memory allocation.
1797: Notes:
1798: The dense format is fully compatible with standard Fortran 77
1799: storage by columns.
1801: The data input variable is intended primarily for Fortran programmers
1802: who wish to allocate their own matrix memory space. Most users should
1803: set data=PETSC_NULL.
1805: Level: intermediate
1807: .keywords: matrix,dense, parallel
1809: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1810: @*/
1811: PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1812: {
1816: PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));
1817: return(0);
1818: }
1822: /*@C
1823: MatCreateDense - Creates a parallel matrix in dense format.
1825: Collective on MPI_Comm
1827: Input Parameters:
1828: + comm - MPI communicator
1829: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1830: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1831: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1832: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1833: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1834: to control all matrix memory allocation.
1836: Output Parameter:
1837: . A - the matrix
1839: Notes:
1840: The dense format is fully compatible with standard Fortran 77
1841: storage by columns.
1843: The data input variable is intended primarily for Fortran programmers
1844: who wish to allocate their own matrix memory space. Most users should
1845: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1847: The user MUST specify either the local or global matrix dimensions
1848: (possibly both).
1850: Level: intermediate
1852: .keywords: matrix,dense, parallel
1854: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1855: @*/
1856: PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1857: {
1859: PetscMPIInt size;
1862: MatCreate(comm,A);
1863: MatSetSizes(*A,m,n,M,N);
1864: MPI_Comm_size(comm,&size);
1865: if (size > 1) {
1866: MatSetType(*A,MATMPIDENSE);
1867: MatMPIDenseSetPreallocation(*A,data);
1868: } else {
1869: MatSetType(*A,MATSEQDENSE);
1870: MatSeqDenseSetPreallocation(*A,data);
1871: }
1872: return(0);
1873: }
1877: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1878: {
1879: Mat mat;
1880: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1884: *newmat = 0;
1885: MatCreate(((PetscObject)A)->comm,&mat);
1886: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1887: MatSetType(mat,((PetscObject)A)->type_name);
1888: a = (Mat_MPIDense*)mat->data;
1889: PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1891: mat->factortype = A->factortype;
1892: mat->assembled = PETSC_TRUE;
1893: mat->preallocated = PETSC_TRUE;
1895: a->size = oldmat->size;
1896: a->rank = oldmat->rank;
1897: mat->insertmode = NOT_SET_VALUES;
1898: a->nvec = oldmat->nvec;
1899: a->donotstash = oldmat->donotstash;
1901: PetscLayoutReference(A->rmap,&mat->rmap);
1902: PetscLayoutReference(A->cmap,&mat->cmap);
1904: MatSetUpMultiply_MPIDense(mat);
1905: MatDuplicate(oldmat->A,cpvalues,&a->A);
1906: PetscLogObjectParent(mat,a->A);
1908: *newmat = mat;
1909: return(0);
1910: }
1914: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1915: {
1917: PetscMPIInt rank,size;
1918: PetscInt *rowners,i,m,nz,j;
1919: PetscScalar *array,*vals,*vals_ptr;
1922: MPI_Comm_rank(comm,&rank);
1923: MPI_Comm_size(comm,&size);
1925: /* determine ownership of all rows */
1926: if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1927: else m = newmat->rmap->n;
1928: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1929: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1930: rowners[0] = 0;
1931: for (i=2; i<=size; i++) {
1932: rowners[i] += rowners[i-1];
1933: }
1935: if (!sizesset) {
1936: MatSetSizes(newmat,m,PETSC_DECIDE,M,N);
1937: }
1938: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
1939: MatGetArray(newmat,&array);
1941: if (!rank) {
1942: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1944: /* read in my part of the matrix numerical values */
1945: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1946:
1947: /* insert into matrix-by row (this is why cannot directly read into array */
1948: vals_ptr = vals;
1949: for (i=0; i<m; i++) {
1950: for (j=0; j<N; j++) {
1951: array[i + j*m] = *vals_ptr++;
1952: }
1953: }
1955: /* read in other processors and ship out */
1956: for (i=1; i<size; i++) {
1957: nz = (rowners[i+1] - rowners[i])*N;
1958: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1959: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1960: }
1961: } else {
1962: /* receive numeric values */
1963: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1965: /* receive message of values*/
1966: MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);
1968: /* insert into matrix-by row (this is why cannot directly read into array */
1969: vals_ptr = vals;
1970: for (i=0; i<m; i++) {
1971: for (j=0; j<N; j++) {
1972: array[i + j*m] = *vals_ptr++;
1973: }
1974: }
1975: }
1976: PetscFree(rowners);
1977: PetscFree(vals);
1978: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1979: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1980: return(0);
1981: }
1985: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1986: {
1987: PetscScalar *vals,*svals;
1988: MPI_Comm comm = ((PetscObject)viewer)->comm;
1989: MPI_Status status;
1990: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1991: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1992: PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1993: PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1994: int fd;
1998: MPI_Comm_size(comm,&size);
1999: MPI_Comm_rank(comm,&rank);
2000: if (!rank) {
2001: PetscViewerBinaryGetDescriptor(viewer,&fd);
2002: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
2003: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2004: }
2005: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2007: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2008: M = header[1]; N = header[2]; nz = header[3];
2010: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2011: if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2012: if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2013:
2014: /* If global sizes are set, check if they are consistent with that given in the file */
2015: if (sizesset) {
2016: MatGetSize(newmat,&grows,&gcols);
2017: }
2018: if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2019: if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2021: /*
2022: Handle case where matrix is stored on disk as a dense matrix
2023: */
2024: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
2025: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);
2026: return(0);
2027: }
2029: /* determine ownership of all rows */
2030: if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank));
2031: else m = PetscMPIIntCast(newmat->rmap->n);
2032: PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);
2033: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
2034: rowners[0] = 0;
2035: for (i=2; i<=size; i++) {
2036: rowners[i] += rowners[i-1];
2037: }
2038: rstart = rowners[rank];
2039: rend = rowners[rank+1];
2041: /* distribute row lengths to all processors */
2042: PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);
2043: if (!rank) {
2044: PetscMalloc(M*sizeof(PetscInt),&rowlengths);
2045: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2046: PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
2047: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2048: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
2049: PetscFree(sndcounts);
2050: } else {
2051: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
2052: }
2054: if (!rank) {
2055: /* calculate the number of nonzeros on each processor */
2056: PetscMalloc(size*sizeof(PetscInt),&procsnz);
2057: PetscMemzero(procsnz,size*sizeof(PetscInt));
2058: for (i=0; i<size; i++) {
2059: for (j=rowners[i]; j< rowners[i+1]; j++) {
2060: procsnz[i] += rowlengths[j];
2061: }
2062: }
2063: PetscFree(rowlengths);
2065: /* determine max buffer needed and allocate it */
2066: maxnz = 0;
2067: for (i=0; i<size; i++) {
2068: maxnz = PetscMax(maxnz,procsnz[i]);
2069: }
2070: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
2072: /* read in my part of the matrix column indices */
2073: nz = procsnz[0];
2074: PetscMalloc(nz*sizeof(PetscInt),&mycols);
2075: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2077: /* read in every one elses and ship off */
2078: for (i=1; i<size; i++) {
2079: nz = procsnz[i];
2080: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2081: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2082: }
2083: PetscFree(cols);
2084: } else {
2085: /* determine buffer space needed for message */
2086: nz = 0;
2087: for (i=0; i<m; i++) {
2088: nz += ourlens[i];
2089: }
2090: PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);
2092: /* receive message of column indices*/
2093: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2094: MPI_Get_count(&status,MPIU_INT,&maxnz);
2095: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2096: }
2098: /* loop over local rows, determining number of off diagonal entries */
2099: PetscMemzero(offlens,m*sizeof(PetscInt));
2100: jj = 0;
2101: for (i=0; i<m; i++) {
2102: for (j=0; j<ourlens[i]; j++) {
2103: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2104: jj++;
2105: }
2106: }
2108: /* create our matrix */
2109: for (i=0; i<m; i++) {
2110: ourlens[i] -= offlens[i];
2111: }
2113: if (!sizesset) {
2114: MatSetSizes(newmat,m,PETSC_DECIDE,M,N);
2115: }
2116: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
2117: for (i=0; i<m; i++) {
2118: ourlens[i] += offlens[i];
2119: }
2121: if (!rank) {
2122: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
2124: /* read in my part of the matrix numerical values */
2125: nz = procsnz[0];
2126: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2127:
2128: /* insert into matrix */
2129: jj = rstart;
2130: smycols = mycols;
2131: svals = vals;
2132: for (i=0; i<m; i++) {
2133: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2134: smycols += ourlens[i];
2135: svals += ourlens[i];
2136: jj++;
2137: }
2139: /* read in other processors and ship out */
2140: for (i=1; i<size; i++) {
2141: nz = procsnz[i];
2142: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2143: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
2144: }
2145: PetscFree(procsnz);
2146: } else {
2147: /* receive numeric values */
2148: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2150: /* receive message of values*/
2151: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
2152: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2153: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2155: /* insert into matrix */
2156: jj = rstart;
2157: smycols = mycols;
2158: svals = vals;
2159: for (i=0; i<m; i++) {
2160: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2161: smycols += ourlens[i];
2162: svals += ourlens[i];
2163: jj++;
2164: }
2165: }
2166: PetscFree2(ourlens,offlens);
2167: PetscFree(vals);
2168: PetscFree(mycols);
2169: PetscFree(rowners);
2171: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
2172: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
2173: return(0);
2174: }
2178: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2179: {
2180: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2181: Mat a,b;
2182: PetscBool flg;
2186: a = matA->A;
2187: b = matB->A;
2188: MatEqual(a,b,&flg);
2189: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2190: return(0);
2191: }
2193: #if defined(PETSC_HAVE_PLAPACK)
2197: /*@C
2198: PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2199: Level: developer
2201: .keywords: Petsc, destroy, package, PLAPACK
2202: .seealso: PetscFinalize()
2203: @*/
2204: PetscErrorCode PetscPLAPACKFinalizePackage(void)
2205: {
2209: PLA_Finalize();
2210: return(0);
2211: }
2215: /*@C
2216: PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2217: called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2219: Input Parameter:
2220: . comm - the communicator the matrix lives on
2222: Level: developer
2224: Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2225: same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2226: PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2227: cannot overlap.
2229: .keywords: Petsc, initialize, package, PLAPACK
2230: .seealso: PetscSysInitializePackage(), PetscInitialize()
2231: @*/
2232: PetscErrorCode PetscPLAPACKInitializePackage(MPI_Comm comm)
2233: {
2234: PetscMPIInt size;
2238: if (!PLA_Initialized(PETSC_NULL)) {
2240: MPI_Comm_size(comm,&size);
2241: Plapack_nprows = 1;
2242: Plapack_npcols = size;
2243:
2244: PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");
2245: PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);
2246: PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);
2247: #if defined(PETSC_USE_DEBUG)
2248: Plapack_ierror = 3;
2249: #else
2250: Plapack_ierror = 0;
2251: #endif
2252: PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);
2253: if (Plapack_ierror){
2254: PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );
2255: } else {
2256: PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );
2257: }
2258:
2259: Plapack_nb_alg = 0;
2260: PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);
2261: if (Plapack_nb_alg) {
2262: pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);
2263: }
2264: PetscOptionsEnd();
2266: PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);
2267: PLA_Init(Plapack_comm_2d);
2268: PetscRegisterFinalize(PetscPLAPACKFinalizePackage);
2269: }
2270: return(0);
2271: }
2273: #endif