Actual source code: matscalapack.c
1: #include <petsc/private/petscscalapack.h>
3: const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4: " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5: " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6: " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7: " TITLE = {Sca{LAPACK} Users' Guide},\n"
8: " PUBLISHER = {SIAM},\n"
9: " ADDRESS = {Philadelphia, PA},\n"
10: " YEAR = 1997\n"
11: "}\n";
12: static PetscBool ScaLAPACKCite = PETSC_FALSE;
14: #define DEFAULT_BLOCKSIZE 64
16: /*
17: The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18: is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19: */
20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23: {
27: PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");
28: MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
29: return(0);
30: }
32: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
33: {
34: PetscErrorCode ierr;
35: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
36: PetscBool iascii;
37: PetscViewerFormat format;
38: Mat Adense;
41: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
42: if (iascii) {
43: PetscViewerGetFormat(viewer,&format);
44: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
45: PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
46: PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
47: PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
48: PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
49: return(0);
50: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
51: return(0);
52: }
53: }
54: /* convert to dense format and call MatView() */
55: MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
56: MatView(Adense,viewer);
57: MatDestroy(&Adense);
58: return(0);
59: }
61: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
62: {
64: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
65: PetscLogDouble isend[2],irecv[2];
68: info->block_size = 1.0;
70: isend[0] = a->lld*a->locc; /* locally allocated */
71: isend[1] = a->locr*a->locc; /* used submatrix */
72: if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
73: info->nz_allocated = isend[0];
74: info->nz_used = isend[1];
75: } else if (flag == MAT_GLOBAL_MAX) {
76: MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
77: info->nz_allocated = irecv[0];
78: info->nz_used = irecv[1];
79: } else if (flag == MAT_GLOBAL_SUM) {
80: MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
81: info->nz_allocated = irecv[0];
82: info->nz_used = irecv[1];
83: }
85: info->nz_unneeded = 0;
86: info->assemblies = A->num_ass;
87: info->mallocs = 0;
88: info->memory = ((PetscObject)A)->mem;
89: info->fill_ratio_given = 0;
90: info->fill_ratio_needed = 0;
91: info->factor_mallocs = 0;
92: return(0);
93: }
95: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
96: {
98: switch (op) {
99: case MAT_NEW_NONZERO_LOCATIONS:
100: case MAT_NEW_NONZERO_LOCATION_ERR:
101: case MAT_NEW_NONZERO_ALLOCATION_ERR:
102: case MAT_SYMMETRIC:
103: case MAT_SORTED_FULL:
104: case MAT_HERMITIAN:
105: break;
106: default:
107: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
108: }
109: return(0);
110: }
112: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
113: {
114: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
116: PetscInt i,j;
117: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
120: for (i=0;i<nr;i++) {
121: if (rows[i] < 0) continue;
122: PetscBLASIntCast(rows[i]+1,&gridx);
123: for (j=0;j<nc;j++) {
124: if (cols[j] < 0) continue;
125: PetscBLASIntCast(cols[j]+1,&gcidx);
126: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
127: if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
128: switch (imode) {
129: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
130: case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
131: default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
132: }
133: } else {
134: if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
135: A->assembled = PETSC_FALSE;
136: MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));
137: }
138: }
139: }
140: return(0);
141: }
143: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
144: {
146: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
147: PetscScalar *x2d,*y2d,alpha=1.0;
148: const PetscInt *ranges;
149: PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
152: if (transpose) {
154: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
155: PetscLayoutGetRanges(A->rmap,&ranges);
156: PetscBLASIntCast(ranges[1],&mb); /* x block size */
157: xlld = PetscMax(1,A->rmap->n);
158: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
160: PetscLayoutGetRanges(A->cmap,&ranges);
161: PetscBLASIntCast(ranges[1],&nb); /* y block size */
162: ylld = 1;
163: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
166: /* allocate 2d vectors */
167: lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
168: lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
169: PetscMalloc2(lszx,&x2d,lszy,&y2d);
170: xlld = PetscMax(1,lszx);
172: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
173: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
175: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
178: /* redistribute x as a column of a 2d matrix */
179: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
181: /* redistribute y as a row of a 2d matrix */
182: if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
184: /* call PBLAS subroutine */
185: PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
187: /* redistribute y from a row of a 2d matrix */
188: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
190: } else { /* non-transpose */
192: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
193: PetscLayoutGetRanges(A->cmap,&ranges);
194: PetscBLASIntCast(ranges[1],&nb); /* x block size */
195: xlld = 1;
196: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
198: PetscLayoutGetRanges(A->rmap,&ranges);
199: PetscBLASIntCast(ranges[1],&mb); /* y block size */
200: ylld = PetscMax(1,A->rmap->n);
201: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
204: /* allocate 2d vectors */
205: lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
206: lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
207: PetscMalloc2(lszx,&x2d,lszy,&y2d);
208: ylld = PetscMax(1,lszy);
210: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
211: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
213: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
216: /* redistribute x as a row of a 2d matrix */
217: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
219: /* redistribute y as a column of a 2d matrix */
220: if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
222: /* call PBLAS subroutine */
223: PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
225: /* redistribute y from a column of a 2d matrix */
226: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
228: }
229: PetscFree2(x2d,y2d);
230: return(0);
231: }
233: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
234: {
235: PetscErrorCode ierr;
236: const PetscScalar *xarray;
237: PetscScalar *yarray;
240: VecGetArrayRead(x,&xarray);
241: VecGetArray(y,&yarray);
242: MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
243: VecRestoreArrayRead(x,&xarray);
244: VecRestoreArray(y,&yarray);
245: return(0);
246: }
248: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
249: {
250: PetscErrorCode ierr;
251: const PetscScalar *xarray;
252: PetscScalar *yarray;
255: VecGetArrayRead(x,&xarray);
256: VecGetArray(y,&yarray);
257: MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
258: VecRestoreArrayRead(x,&xarray);
259: VecRestoreArray(y,&yarray);
260: return(0);
261: }
263: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
264: {
265: PetscErrorCode ierr;
266: const PetscScalar *xarray;
267: PetscScalar *zarray;
270: if (y != z) { VecCopy(y,z); }
271: VecGetArrayRead(x,&xarray);
272: VecGetArray(z,&zarray);
273: MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
274: VecRestoreArrayRead(x,&xarray);
275: VecRestoreArray(z,&zarray);
276: return(0);
277: }
279: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
280: {
281: PetscErrorCode ierr;
282: const PetscScalar *xarray;
283: PetscScalar *zarray;
286: if (y != z) { VecCopy(y,z); }
287: VecGetArrayRead(x,&xarray);
288: VecGetArray(z,&zarray);
289: MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
290: VecRestoreArrayRead(x,&xarray);
291: VecRestoreArray(z,&zarray);
292: return(0);
293: }
295: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
296: {
297: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
298: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
299: Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
300: PetscScalar sone=1.0,zero=0.0;
301: PetscBLASInt one=1;
304: PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
305: C->assembled = PETSC_TRUE;
306: return(0);
307: }
309: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
310: {
314: MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
315: MatSetType(C,MATSCALAPACK);
316: MatSetUp(C);
317: C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
318: return(0);
319: }
321: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
322: {
323: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
324: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
325: Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
326: PetscScalar sone=1.0,zero=0.0;
327: PetscBLASInt one=1;
330: PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
331: C->assembled = PETSC_TRUE;
332: return(0);
333: }
335: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
336: {
340: MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
341: MatSetType(C,MATSCALAPACK);
342: MatSetUp(C);
343: return(0);
344: }
346: /* --------------------------------------- */
347: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
348: {
350: C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
351: C->ops->productsymbolic = MatProductSymbolic_AB;
352: return(0);
353: }
355: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
356: {
358: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
359: C->ops->productsymbolic = MatProductSymbolic_ABt;
360: return(0);
361: }
363: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
364: {
366: Mat_Product *product = C->product;
369: switch (product->type) {
370: case MATPRODUCT_AB:
371: MatProductSetFromOptions_ScaLAPACK_AB(C);
372: break;
373: case MATPRODUCT_ABt:
374: MatProductSetFromOptions_ScaLAPACK_ABt(C);
375: break;
376: default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
377: }
378: return(0);
379: }
380: /* --------------------------------------- */
382: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
383: {
384: PetscErrorCode ierr;
385: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
386: PetscScalar *darray,*d2d,v;
387: const PetscInt *ranges;
388: PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
391: VecGetArray(D,&darray);
393: if (A->rmap->N<=A->cmap->N) { /* row version */
395: /* create ScaLAPACK descriptor for vector (1d block distribution) */
396: PetscLayoutGetRanges(A->rmap,&ranges);
397: PetscBLASIntCast(ranges[1],&mb); /* D block size */
398: dlld = PetscMax(1,A->rmap->n);
399: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
402: /* allocate 2d vector */
403: lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
404: PetscCalloc1(lszd,&d2d);
405: dlld = PetscMax(1,lszd);
407: /* create ScaLAPACK descriptor for vector (2d block distribution) */
408: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
411: /* collect diagonal */
412: for (j=1;j<=a->M;j++) {
413: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
414: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
415: }
417: /* redistribute d from a column of a 2d matrix */
418: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
419: PetscFree(d2d);
421: } else { /* column version */
423: /* create ScaLAPACK descriptor for vector (1d block distribution) */
424: PetscLayoutGetRanges(A->cmap,&ranges);
425: PetscBLASIntCast(ranges[1],&nb); /* D block size */
426: dlld = 1;
427: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
430: /* allocate 2d vector */
431: lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
432: PetscCalloc1(lszd,&d2d);
434: /* create ScaLAPACK descriptor for vector (2d block distribution) */
435: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
438: /* collect diagonal */
439: for (j=1;j<=a->N;j++) {
440: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
441: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
442: }
444: /* redistribute d from a row of a 2d matrix */
445: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
446: PetscFree(d2d);
447: }
449: VecRestoreArray(D,&darray);
450: VecAssemblyBegin(D);
451: VecAssemblyEnd(D);
452: return(0);
453: }
455: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
456: {
457: PetscErrorCode ierr;
458: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
459: const PetscScalar *d;
460: const PetscInt *ranges;
461: PetscScalar *d2d;
462: PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
465: if (R) {
466: VecGetArrayRead(R,(const PetscScalar **)&d);
467: /* create ScaLAPACK descriptor for vector (1d block distribution) */
468: PetscLayoutGetRanges(A->cmap,&ranges);
469: PetscBLASIntCast(ranges[1],&nb); /* D block size */
470: dlld = 1;
471: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
474: /* allocate 2d vector */
475: lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
476: PetscCalloc1(lszd,&d2d);
478: /* create ScaLAPACK descriptor for vector (2d block distribution) */
479: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
482: /* redistribute d to a row of a 2d matrix */
483: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
485: /* broadcast along process columns */
486: if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
487: else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
489: /* local scaling */
490: for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
492: PetscFree(d2d);
493: VecRestoreArrayRead(R,(const PetscScalar **)&d);
494: }
495: if (L) {
496: VecGetArrayRead(L,(const PetscScalar **)&d);
497: /* create ScaLAPACK descriptor for vector (1d block distribution) */
498: PetscLayoutGetRanges(A->rmap,&ranges);
499: PetscBLASIntCast(ranges[1],&mb); /* D block size */
500: dlld = PetscMax(1,A->rmap->n);
501: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
504: /* allocate 2d vector */
505: lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
506: PetscCalloc1(lszd,&d2d);
507: dlld = PetscMax(1,lszd);
509: /* create ScaLAPACK descriptor for vector (2d block distribution) */
510: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
513: /* redistribute d to a column of a 2d matrix */
514: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
516: /* broadcast along process rows */
517: if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
518: else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
520: /* local scaling */
521: for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
523: PetscFree(d2d);
524: VecRestoreArrayRead(L,(const PetscScalar **)&d);
525: }
526: return(0);
527: }
529: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
530: {
532: *missing = PETSC_FALSE;
533: return(0);
534: }
536: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
537: {
538: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539: PetscBLASInt n,one=1;
542: n = x->lld*x->locc;
543: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
544: return(0);
545: }
547: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
548: {
549: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
550: PetscBLASInt i,n;
551: PetscScalar v;
554: n = PetscMin(x->M,x->N);
555: for (i=1;i<=n;i++) {
556: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
557: v += alpha;
558: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
559: }
560: return(0);
561: }
563: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
564: {
566: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
567: Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data;
568: PetscBLASInt one=1;
569: PetscScalar beta=1.0;
572: MatScaLAPACKCheckDistribution(Y,1,X,3);
573: PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
574: PetscObjectStateIncrease((PetscObject)Y);
575: return(0);
576: }
578: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
579: {
581: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
582: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
585: PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
586: PetscObjectStateIncrease((PetscObject)B);
587: return(0);
588: }
590: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
591: {
592: Mat Bs;
593: MPI_Comm comm;
594: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b;
598: PetscObjectGetComm((PetscObject)A,&comm);
599: MatCreate(comm,&Bs);
600: MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
601: MatSetType(Bs,MATSCALAPACK);
602: b = (Mat_ScaLAPACK*)Bs->data;
603: b->M = a->M;
604: b->N = a->N;
605: b->mb = a->mb;
606: b->nb = a->nb;
607: b->rsrc = a->rsrc;
608: b->csrc = a->csrc;
609: MatSetUp(Bs);
610: *B = Bs;
611: if (op == MAT_COPY_VALUES) {
612: PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
613: }
614: Bs->assembled = PETSC_TRUE;
615: return(0);
616: }
618: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
619: {
621: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
622: Mat Bs = *B;
623: PetscBLASInt one=1;
624: PetscScalar sone=1.0,zero=0.0;
625: #if defined(PETSC_USE_COMPLEX)
626: PetscInt i,j;
627: #endif
630: if (reuse == MAT_INITIAL_MATRIX) {
631: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
632: *B = Bs;
633: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
634: b = (Mat_ScaLAPACK*)Bs->data;
635: PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
636: #if defined(PETSC_USE_COMPLEX)
637: /* undo conjugation */
638: for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
639: #endif
640: Bs->assembled = PETSC_TRUE;
641: return(0);
642: }
644: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
645: {
646: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
647: PetscInt i,j;
650: for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
651: return(0);
652: }
654: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
655: {
657: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
658: Mat Bs = *B;
659: PetscBLASInt one=1;
660: PetscScalar sone=1.0,zero=0.0;
663: if (reuse == MAT_INITIAL_MATRIX) {
664: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
665: *B = Bs;
666: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
667: b = (Mat_ScaLAPACK*)Bs->data;
668: PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
669: Bs->assembled = PETSC_TRUE;
670: return(0);
671: }
673: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
674: {
676: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
677: PetscScalar *x,*x2d;
678: const PetscInt *ranges;
679: PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
682: VecCopy(B,X);
683: VecGetArray(X,&x);
685: /* create ScaLAPACK descriptor for a vector (1d block distribution) */
686: PetscLayoutGetRanges(A->rmap,&ranges);
687: PetscBLASIntCast(ranges[1],&mb); /* x block size */
688: xlld = PetscMax(1,A->rmap->n);
689: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
692: /* allocate 2d vector */
693: lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
694: PetscMalloc1(lszx,&x2d);
695: xlld = PetscMax(1,lszx);
697: /* create ScaLAPACK descriptor for a vector (2d block distribution) */
698: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
701: /* redistribute x as a column of a 2d matrix */
702: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
704: /* call ScaLAPACK subroutine */
705: switch (A->factortype) {
706: case MAT_FACTOR_LU:
707: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
709: break;
710: case MAT_FACTOR_CHOLESKY:
711: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
713: break;
714: default:
715: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
716: }
718: /* redistribute x from a column of a 2d matrix */
719: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
721: PetscFree(x2d);
722: VecRestoreArray(X,&x);
723: return(0);
724: }
726: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
727: {
731: MatSolve_ScaLAPACK(A,B,X);
732: VecAXPY(X,1,Y);
733: return(0);
734: }
736: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
737: {
739: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
740: PetscBool flg1,flg2;
741: PetscBLASInt one=1,info;
744: PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
745: PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
746: if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
747: MatScaLAPACKCheckDistribution(B,1,X,2);
748: b = (Mat_ScaLAPACK*)B->data;
749: x = (Mat_ScaLAPACK*)X->data;
750: PetscArraycpy(x->loc,b->loc,b->lld*b->locc);
752: switch (A->factortype) {
753: case MAT_FACTOR_LU:
754: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
756: break;
757: case MAT_FACTOR_CHOLESKY:
758: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
760: break;
761: default:
762: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
763: }
764: return(0);
765: }
767: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
768: {
770: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
771: PetscBLASInt one=1,info;
774: if (!a->pivots) {
775: PetscMalloc1(a->locr+a->mb,&a->pivots);
776: PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
777: }
778: PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
780: A->factortype = MAT_FACTOR_LU;
781: A->assembled = PETSC_TRUE;
783: PetscFree(A->solvertype);
784: PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
785: return(0);
786: }
788: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
789: {
793: MatCopy(A,F,SAME_NONZERO_PATTERN);
794: MatLUFactor_ScaLAPACK(F,0,0,info);
795: return(0);
796: }
798: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
799: {
801: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
802: return(0);
803: }
805: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
806: {
808: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
809: PetscBLASInt one=1,info;
812: PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
814: A->factortype = MAT_FACTOR_CHOLESKY;
815: A->assembled = PETSC_TRUE;
817: PetscFree(A->solvertype);
818: PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
819: return(0);
820: }
822: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
823: {
827: MatCopy(A,F,SAME_NONZERO_PATTERN);
828: MatCholeskyFactor_ScaLAPACK(F,0,info);
829: return(0);
830: }
832: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
833: {
835: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
836: return(0);
837: }
839: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
840: {
842: *type = MATSOLVERSCALAPACK;
843: return(0);
844: }
846: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
847: {
848: Mat B;
849: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
853: /* Create the factorization matrix */
854: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);
855: B->trivialsymbolic = PETSC_TRUE;
856: B->factortype = ftype;
857: PetscFree(B->solvertype);
858: PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);
860: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
861: *F = B;
862: return(0);
863: }
865: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
866: {
870: MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
871: MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
872: return(0);
873: }
875: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
876: {
878: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
879: PetscBLASInt one=1,lwork=0;
880: const char *ntype;
881: PetscScalar *work=NULL,dummy;
884: switch (type) {
885: case NORM_1:
886: ntype = "1";
887: lwork = PetscMax(a->locr,a->locc);
888: break;
889: case NORM_FROBENIUS:
890: ntype = "F";
891: work = &dummy;
892: break;
893: case NORM_INFINITY:
894: ntype = "I";
895: lwork = PetscMax(a->locr,a->locc);
896: break;
897: default:
898: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
899: }
900: if (lwork) { PetscMalloc1(lwork,&work); }
901: *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
902: if (lwork) { PetscFree(work); }
903: return(0);
904: }
906: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
907: {
908: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
912: PetscArrayzero(a->loc,a->lld*a->locc);
913: return(0);
914: }
916: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
917: {
918: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
920: PetscInt i,n,nb,isrc,nproc,iproc,*idx;
923: if (rows) {
924: n = a->locr;
925: nb = a->mb;
926: isrc = a->rsrc;
927: nproc = a->grid->nprow;
928: iproc = a->grid->myrow;
929: PetscMalloc1(n,&idx);
930: for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
931: ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
932: }
933: if (cols) {
934: n = a->locc;
935: nb = a->nb;
936: isrc = a->csrc;
937: nproc = a->grid->npcol;
938: iproc = a->grid->mycol;
939: PetscMalloc1(n,&idx);
940: for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
941: ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
942: }
943: return(0);
944: }
946: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
947: {
949: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
950: Mat Bmpi;
951: MPI_Comm comm;
952: PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
953: const PetscInt *ranges,*branges,*cwork;
954: const PetscScalar *vwork;
955: PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info;
956: PetscScalar *barray;
957: PetscBool differ=PETSC_FALSE;
958: PetscMPIInt size;
961: PetscObjectGetComm((PetscObject)A,&comm);
962: PetscLayoutGetRanges(A->rmap,&ranges);
964: if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
965: MPI_Comm_size(comm,&size);
966: PetscLayoutGetRanges((*B)->rmap,&branges);
967: for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
968: }
970: if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
971: MatCreate(comm,&Bmpi);
972: m = PETSC_DECIDE;
973: PetscSplitOwnershipEqual(comm,&m,&M);
974: n = PETSC_DECIDE;
975: PetscSplitOwnershipEqual(comm,&n,&N);
976: MatSetSizes(Bmpi,m,n,M,N);
977: MatSetType(Bmpi,MATDENSE);
978: MatSetUp(Bmpi);
980: /* create ScaLAPACK descriptor for B (1d block distribution) */
981: PetscBLASIntCast(ranges[1],&bmb); /* row block size */
982: lld = PetscMax(A->rmap->n,1); /* local leading dimension */
983: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
986: /* redistribute matrix */
987: MatDenseGetArray(Bmpi,&barray);
988: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
989: MatDenseRestoreArray(Bmpi,&barray);
990: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
991: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
993: /* transfer rows of auxiliary matrix to the final matrix B */
994: MatGetOwnershipRange(Bmpi,&rstart,&rend);
995: for (i=rstart;i<rend;i++) {
996: MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
997: MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
998: MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
999: }
1000: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
1001: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
1002: MatDestroy(&Bmpi);
1004: } else { /* normal cases */
1006: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1007: else {
1008: MatCreate(comm,&Bmpi);
1009: m = PETSC_DECIDE;
1010: PetscSplitOwnershipEqual(comm,&m,&M);
1011: n = PETSC_DECIDE;
1012: PetscSplitOwnershipEqual(comm,&n,&N);
1013: MatSetSizes(Bmpi,m,n,M,N);
1014: MatSetType(Bmpi,MATDENSE);
1015: MatSetUp(Bmpi);
1016: }
1018: /* create ScaLAPACK descriptor for B (1d block distribution) */
1019: PetscBLASIntCast(ranges[1],&bmb); /* row block size */
1020: lld = PetscMax(A->rmap->n,1); /* local leading dimension */
1021: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1024: /* redistribute matrix */
1025: MatDenseGetArray(Bmpi,&barray);
1026: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1027: MatDenseRestoreArray(Bmpi,&barray);
1029: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1030: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1031: if (reuse == MAT_INPLACE_MATRIX) {
1032: MatHeaderReplace(A,&Bmpi);
1033: } else *B = Bmpi;
1034: }
1035: return(0);
1036: }
1038: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1039: {
1041: Mat_ScaLAPACK *b;
1042: Mat Bmpi;
1043: MPI_Comm comm;
1044: PetscInt M=A->rmap->N,N=A->cmap->N,m,n;
1045: const PetscInt *ranges;
1046: PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info;
1047: PetscScalar *aarray;
1048: PetscInt lda;
1051: PetscObjectGetComm((PetscObject)A,&comm);
1053: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1054: else {
1055: MatCreate(comm,&Bmpi);
1056: m = PETSC_DECIDE;
1057: PetscSplitOwnershipEqual(comm,&m,&M);
1058: n = PETSC_DECIDE;
1059: PetscSplitOwnershipEqual(comm,&n,&N);
1060: MatSetSizes(Bmpi,m,n,M,N);
1061: MatSetType(Bmpi,MATSCALAPACK);
1062: MatSetUp(Bmpi);
1063: }
1064: b = (Mat_ScaLAPACK*)Bmpi->data;
1066: /* create ScaLAPACK descriptor for A (1d block distribution) */
1067: PetscLayoutGetRanges(A->rmap,&ranges);
1068: PetscBLASIntCast(ranges[1],&amb); /* row block size */
1069: MatDenseGetLDA(A,&lda);
1070: lld = PetscMax(lda,1); /* local leading dimension */
1071: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1074: /* redistribute matrix */
1075: MatDenseGetArray(A,&aarray);
1076: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1077: MatDenseRestoreArray(A,&aarray);
1079: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1080: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1081: if (reuse == MAT_INPLACE_MATRIX) {
1082: MatHeaderReplace(A,&Bmpi);
1083: } else *B = Bmpi;
1084: return(0);
1085: }
1087: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1088: {
1089: Mat mat_scal;
1090: PetscErrorCode ierr;
1091: PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1092: const PetscInt *cols;
1093: const PetscScalar *vals;
1096: if (reuse == MAT_REUSE_MATRIX) {
1097: mat_scal = *newmat;
1098: MatZeroEntries(mat_scal);
1099: } else {
1100: MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1101: m = PETSC_DECIDE;
1102: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1103: n = PETSC_DECIDE;
1104: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1105: MatSetSizes(mat_scal,m,n,M,N);
1106: MatSetType(mat_scal,MATSCALAPACK);
1107: MatSetUp(mat_scal);
1108: }
1109: for (row=rstart;row<rend;row++) {
1110: MatGetRow(A,row,&ncols,&cols,&vals);
1111: MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1112: MatRestoreRow(A,row,&ncols,&cols,&vals);
1113: }
1114: MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1115: MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);
1117: if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1118: else *newmat = mat_scal;
1119: return(0);
1120: }
1122: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1123: {
1124: Mat mat_scal;
1125: PetscErrorCode ierr;
1126: PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1127: const PetscInt *cols;
1128: const PetscScalar *vals;
1129: PetscScalar v;
1132: if (reuse == MAT_REUSE_MATRIX) {
1133: mat_scal = *newmat;
1134: MatZeroEntries(mat_scal);
1135: } else {
1136: MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1137: m = PETSC_DECIDE;
1138: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1139: n = PETSC_DECIDE;
1140: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1141: MatSetSizes(mat_scal,m,n,M,N);
1142: MatSetType(mat_scal,MATSCALAPACK);
1143: MatSetUp(mat_scal);
1144: }
1145: MatGetRowUpperTriangular(A);
1146: for (row=rstart;row<rend;row++) {
1147: MatGetRow(A,row,&ncols,&cols,&vals);
1148: MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1149: for (j=0;j<ncols;j++) { /* lower triangular part */
1150: if (cols[j] == row) continue;
1151: v = A->hermitian ? PetscConj(vals[j]) : vals[j];
1152: MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1153: }
1154: MatRestoreRow(A,row,&ncols,&cols,&vals);
1155: }
1156: MatRestoreRowUpperTriangular(A);
1157: MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1158: MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);
1160: if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1161: else *newmat = mat_scal;
1162: return(0);
1163: }
1165: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1166: {
1167: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1169: PetscInt sz=0;
1172: PetscLayoutSetUp(A->rmap);
1173: PetscLayoutSetUp(A->cmap);
1174: if (!a->lld) a->lld = a->locr;
1176: PetscFree(a->loc);
1177: PetscIntMultError(a->lld,a->locc,&sz);
1178: PetscCalloc1(sz,&a->loc);
1179: PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));
1181: A->preallocated = PETSC_TRUE;
1182: return(0);
1183: }
1185: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1186: {
1187: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1188: PetscErrorCode ierr;
1189: Mat_ScaLAPACK_Grid *grid;
1190: PetscBool flg;
1191: MPI_Comm icomm;
1194: MatStashDestroy_Private(&A->stash);
1195: PetscFree(a->loc);
1196: PetscFree(a->pivots);
1197: PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1198: MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1199: if (--grid->grid_refct == 0) {
1200: Cblacs_gridexit(grid->ictxt);
1201: Cblacs_gridexit(grid->ictxrow);
1202: Cblacs_gridexit(grid->ictxcol);
1203: PetscFree(grid);
1204: MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);
1205: }
1206: PetscCommDestroy(&icomm);
1207: PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1208: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1209: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1210: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1211: PetscFree(A->data);
1212: return(0);
1213: }
1215: PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1216: {
1218: const PetscInt *ranges;
1219: PetscMPIInt size;
1220: PetscInt i,n;
1223: MPI_Comm_size(map->comm,&size);
1224: if (size>2) {
1225: PetscLayoutGetRanges(map,&ranges);
1226: n = ranges[1]-ranges[0];
1227: for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1228: if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1229: }
1230: return(0);
1231: }
1233: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1234: {
1235: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1237: PetscBLASInt info=0;
1240: PetscLayoutSetUp(A->rmap);
1241: PetscLayoutSetUp(A->cmap);
1243: /* check that the layout is as enforced by MatCreateScaLAPACK */
1244: MatScaLAPACKCheckLayout(A->rmap);
1245: MatScaLAPACKCheckLayout(A->cmap);
1247: /* compute local sizes */
1248: PetscBLASIntCast(A->rmap->N,&a->M);
1249: PetscBLASIntCast(A->cmap->N,&a->N);
1250: a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1251: a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1252: a->lld = PetscMax(1,a->locr);
1254: /* allocate local array */
1255: MatScaLAPACKSetPreallocation(A);
1257: /* set up ScaLAPACK descriptor */
1258: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1260: return(0);
1261: }
1263: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1264: {
1266: PetscInt nstash,reallocs;
1269: if (A->nooffprocentries) return(0);
1270: MatStashScatterBegin_Private(A,&A->stash,NULL);
1271: MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1272: PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
1273: return(0);
1274: }
1276: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1277: {
1279: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1280: PetscMPIInt n;
1281: PetscInt i,flg,*row,*col;
1282: PetscScalar *val;
1283: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1286: if (A->nooffprocentries) return(0);
1287: while (1) {
1288: MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1289: if (!flg) break;
1290: for (i=0;i<n;i++) {
1291: PetscBLASIntCast(row[i]+1,&gridx);
1292: PetscBLASIntCast(col[i]+1,&gcidx);
1293: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1294: if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1295: switch (A->insertmode) {
1296: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1297: case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1298: default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1299: }
1300: }
1301: }
1302: MatStashScatterEnd_Private(&A->stash);
1303: return(0);
1304: }
1306: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1307: {
1309: Mat Adense,As;
1310: MPI_Comm comm;
1313: PetscObjectGetComm((PetscObject)newMat,&comm);
1314: MatCreate(comm,&Adense);
1315: MatSetType(Adense,MATDENSE);
1316: MatLoad(Adense,viewer);
1317: MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1318: MatDestroy(&Adense);
1319: MatHeaderReplace(newMat,&As);
1320: return(0);
1321: }
1323: /* -------------------------------------------------------------------*/
1324: static struct _MatOps MatOps_Values = {
1325: MatSetValues_ScaLAPACK,
1326: 0,
1327: 0,
1328: MatMult_ScaLAPACK,
1329: /* 4*/ MatMultAdd_ScaLAPACK,
1330: MatMultTranspose_ScaLAPACK,
1331: MatMultTransposeAdd_ScaLAPACK,
1332: MatSolve_ScaLAPACK,
1333: MatSolveAdd_ScaLAPACK,
1334: 0,
1335: /*10*/ 0,
1336: MatLUFactor_ScaLAPACK,
1337: MatCholeskyFactor_ScaLAPACK,
1338: 0,
1339: MatTranspose_ScaLAPACK,
1340: /*15*/ MatGetInfo_ScaLAPACK,
1341: 0,
1342: MatGetDiagonal_ScaLAPACK,
1343: MatDiagonalScale_ScaLAPACK,
1344: MatNorm_ScaLAPACK,
1345: /*20*/ MatAssemblyBegin_ScaLAPACK,
1346: MatAssemblyEnd_ScaLAPACK,
1347: MatSetOption_ScaLAPACK,
1348: MatZeroEntries_ScaLAPACK,
1349: /*24*/ 0,
1350: MatLUFactorSymbolic_ScaLAPACK,
1351: MatLUFactorNumeric_ScaLAPACK,
1352: MatCholeskyFactorSymbolic_ScaLAPACK,
1353: MatCholeskyFactorNumeric_ScaLAPACK,
1354: /*29*/ MatSetUp_ScaLAPACK,
1355: 0,
1356: 0,
1357: 0,
1358: 0,
1359: /*34*/ MatDuplicate_ScaLAPACK,
1360: 0,
1361: 0,
1362: 0,
1363: 0,
1364: /*39*/ MatAXPY_ScaLAPACK,
1365: 0,
1366: 0,
1367: 0,
1368: MatCopy_ScaLAPACK,
1369: /*44*/ 0,
1370: MatScale_ScaLAPACK,
1371: MatShift_ScaLAPACK,
1372: 0,
1373: 0,
1374: /*49*/ 0,
1375: 0,
1376: 0,
1377: 0,
1378: 0,
1379: /*54*/ 0,
1380: 0,
1381: 0,
1382: 0,
1383: 0,
1384: /*59*/ 0,
1385: MatDestroy_ScaLAPACK,
1386: MatView_ScaLAPACK,
1387: 0,
1388: 0,
1389: /*64*/ 0,
1390: 0,
1391: 0,
1392: 0,
1393: 0,
1394: /*69*/ 0,
1395: 0,
1396: MatConvert_ScaLAPACK_Dense,
1397: 0,
1398: 0,
1399: /*74*/ 0,
1400: 0,
1401: 0,
1402: 0,
1403: 0,
1404: /*79*/ 0,
1405: 0,
1406: 0,
1407: 0,
1408: MatLoad_ScaLAPACK,
1409: /*84*/ 0,
1410: 0,
1411: 0,
1412: 0,
1413: 0,
1414: /*89*/ 0,
1415: 0,
1416: MatMatMultNumeric_ScaLAPACK,
1417: 0,
1418: 0,
1419: /*94*/ 0,
1420: 0,
1421: 0,
1422: MatMatTransposeMultNumeric_ScaLAPACK,
1423: 0,
1424: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1425: 0,
1426: 0,
1427: MatConjugate_ScaLAPACK,
1428: 0,
1429: /*104*/0,
1430: 0,
1431: 0,
1432: 0,
1433: 0,
1434: /*109*/MatMatSolve_ScaLAPACK,
1435: 0,
1436: 0,
1437: 0,
1438: MatMissingDiagonal_ScaLAPACK,
1439: /*114*/0,
1440: 0,
1441: 0,
1442: 0,
1443: 0,
1444: /*119*/0,
1445: MatHermitianTranspose_ScaLAPACK,
1446: 0,
1447: 0,
1448: 0,
1449: /*124*/0,
1450: 0,
1451: 0,
1452: 0,
1453: 0,
1454: /*129*/0,
1455: 0,
1456: 0,
1457: 0,
1458: 0,
1459: /*134*/0,
1460: 0,
1461: 0,
1462: 0,
1463: 0,
1464: 0,
1465: /*140*/0,
1466: 0,
1467: 0,
1468: 0,
1469: 0,
1470: /*145*/0,
1471: 0,
1472: 0
1473: };
1475: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1476: {
1477: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1478: PetscInt size=stash->size,nsends;
1479: PetscErrorCode ierr;
1480: PetscInt count,*sindices,**rindices,i,j,l;
1481: PetscScalar **rvalues,*svalues;
1482: MPI_Comm comm = stash->comm;
1483: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1484: PetscMPIInt *sizes,*nlengths,nreceives;
1485: PetscInt *sp_idx,*sp_idy;
1486: PetscScalar *sp_val;
1487: PetscMatStashSpace space,space_next;
1488: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1489: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data;
1492: { /* make sure all processors are either in INSERTMODE or ADDMODE */
1493: InsertMode addv;
1494: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1495: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1496: mat->insertmode = addv; /* in case this processor had no cache */
1497: }
1499: bs2 = stash->bs*stash->bs;
1501: /* first count number of contributors to each processor */
1502: PetscCalloc1(size,&nlengths);
1503: PetscMalloc1(stash->n+1,&owner);
1505: i = j = 0;
1506: space = stash->space_head;
1507: while (space) {
1508: space_next = space->next;
1509: for (l=0; l<space->local_used; l++) {
1510: PetscBLASIntCast(space->idx[l]+1,&gridx);
1511: PetscBLASIntCast(space->idy[l]+1,&gcidx);
1512: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1513: j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1514: nlengths[j]++; owner[i] = j;
1515: i++;
1516: }
1517: space = space_next;
1518: }
1520: /* Now check what procs get messages - and compute nsends. */
1521: PetscCalloc1(size,&sizes);
1522: for (i=0, nsends=0; i<size; i++) {
1523: if (nlengths[i]) {
1524: sizes[i] = 1; nsends++;
1525: }
1526: }
1528: {PetscMPIInt *onodes,*olengths;
1529: /* Determine the number of messages to expect, their lengths, from from-ids */
1530: PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1531: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1532: /* since clubbing row,col - lengths are multiplied by 2 */
1533: for (i=0; i<nreceives; i++) olengths[i] *=2;
1534: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1535: /* values are size 'bs2' lengths (and remove earlier factor 2 */
1536: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1537: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1538: PetscFree(onodes);
1539: PetscFree(olengths);}
1541: /* do sends:
1542: 1) starts[i] gives the starting index in svalues for stuff going to
1543: the ith processor
1544: */
1545: PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1546: PetscMalloc1(2*nsends,&send_waits);
1547: PetscMalloc2(size,&startv,size,&starti);
1548: /* use 2 sends the first with all_a, the next with all_i and all_j */
1549: startv[0] = 0; starti[0] = 0;
1550: for (i=1; i<size; i++) {
1551: startv[i] = startv[i-1] + nlengths[i-1];
1552: starti[i] = starti[i-1] + 2*nlengths[i-1];
1553: }
1555: i = 0;
1556: space = stash->space_head;
1557: while (space) {
1558: space_next = space->next;
1559: sp_idx = space->idx;
1560: sp_idy = space->idy;
1561: sp_val = space->val;
1562: for (l=0; l<space->local_used; l++) {
1563: j = owner[i];
1564: if (bs2 == 1) {
1565: svalues[startv[j]] = sp_val[l];
1566: } else {
1567: PetscInt k;
1568: PetscScalar *buf1,*buf2;
1569: buf1 = svalues+bs2*startv[j];
1570: buf2 = space->val + bs2*l;
1571: for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1572: }
1573: sindices[starti[j]] = sp_idx[l];
1574: sindices[starti[j]+nlengths[j]] = sp_idy[l];
1575: startv[j]++;
1576: starti[j]++;
1577: i++;
1578: }
1579: space = space_next;
1580: }
1581: startv[0] = 0;
1582: for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1584: for (i=0,count=0; i<size; i++) {
1585: if (sizes[i]) {
1586: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1587: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1588: }
1589: }
1590: #if defined(PETSC_USE_INFO)
1591: PetscInfo1(NULL,"No of messages: %d \n",nsends);
1592: for (i=0; i<size; i++) {
1593: if (sizes[i]) {
1594: PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
1595: }
1596: }
1597: #endif
1598: PetscFree(nlengths);
1599: PetscFree(owner);
1600: PetscFree2(startv,starti);
1601: PetscFree(sizes);
1603: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1604: PetscMalloc1(2*nreceives,&recv_waits);
1606: for (i=0; i<nreceives; i++) {
1607: recv_waits[2*i] = recv_waits1[i];
1608: recv_waits[2*i+1] = recv_waits2[i];
1609: }
1610: stash->recv_waits = recv_waits;
1612: PetscFree(recv_waits1);
1613: PetscFree(recv_waits2);
1615: stash->svalues = svalues;
1616: stash->sindices = sindices;
1617: stash->rvalues = rvalues;
1618: stash->rindices = rindices;
1619: stash->send_waits = send_waits;
1620: stash->nsends = nsends;
1621: stash->nrecvs = nreceives;
1622: stash->reproduce_count = 0;
1623: return(0);
1624: }
1626: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1627: {
1629: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1632: if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1633: if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1634: if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1635: PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1636: PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1637: return(0);
1638: }
1640: /*@
1641: MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1642: the ScaLAPACK matrix
1644: Logically Collective on A
1646: Input Parameters:
1647: + A - a MATSCALAPACK matrix
1648: . mb - the row block size
1649: - nb - the column block size
1651: Level: intermediate
1653: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1654: @*/
1655: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1656: {
1663: PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1664: return(0);
1665: }
1667: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1668: {
1669: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1672: if (mb) *mb = a->mb;
1673: if (nb) *nb = a->nb;
1674: return(0);
1675: }
1677: /*@
1678: MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1679: the ScaLAPACK matrix
1681: Not collective
1683: Input Parameter:
1684: . A - a MATSCALAPACK matrix
1686: Output Parameters:
1687: + mb - the row block size
1688: - nb - the column block size
1690: Level: intermediate
1692: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1693: @*/
1694: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1695: {
1700: PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1701: return(0);
1702: }
1704: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1705: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1707: /*MC
1708: MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1710: Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1712: Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1714: Options Database Keys:
1715: + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1716: . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1717: - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1719: Level: beginner
1721: .seealso: MATDENSE, MATELEMENTAL
1722: M*/
1724: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1725: {
1726: Mat_ScaLAPACK *a;
1727: PetscErrorCode ierr;
1728: PetscBool flg,flg1;
1729: Mat_ScaLAPACK_Grid *grid;
1730: MPI_Comm icomm;
1731: PetscBLASInt nprow,npcol,myrow,mycol;
1732: PetscInt optv1,k=2,array[2]={0,0};
1733: PetscMPIInt size;
1736: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1737: A->insertmode = NOT_SET_VALUES;
1739: MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1740: A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1741: A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1742: A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1743: A->stash.ScatterDestroy = NULL;
1745: PetscNewLog(A,&a);
1746: A->data = (void*)a;
1748: /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1749: if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1750: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1751: PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1752: PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);
1753: }
1754: PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1755: MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1756: if (!flg) {
1757: PetscNewLog(A,&grid);
1759: MPI_Comm_size(icomm,&size);
1760: grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1762: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1763: PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1764: if (flg1) {
1765: if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1766: grid->nprow = optv1;
1767: }
1768: PetscOptionsEnd();
1770: if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1771: grid->npcol = size/grid->nprow;
1772: PetscBLASIntCast(grid->nprow,&nprow);
1773: PetscBLASIntCast(grid->npcol,&npcol);
1774: grid->ictxt = Csys2blacs_handle(icomm);
1775: Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1776: Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1777: grid->grid_refct = 1;
1778: grid->nprow = nprow;
1779: grid->npcol = npcol;
1780: grid->myrow = myrow;
1781: grid->mycol = mycol;
1782: /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1783: grid->ictxrow = Csys2blacs_handle(icomm);
1784: Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1785: grid->ictxcol = Csys2blacs_handle(icomm);
1786: Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1787: MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);
1789: } else grid->grid_refct++;
1790: PetscCommDestroy(&icomm);
1791: a->grid = grid;
1792: a->mb = DEFAULT_BLOCKSIZE;
1793: a->nb = DEFAULT_BLOCKSIZE;
1795: PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1796: PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1797: if (flg) {
1798: a->mb = array[0];
1799: a->nb = (k>1)? array[1]: a->mb;
1800: }
1801: PetscOptionsEnd();
1803: PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1804: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1805: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1806: PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1807: return(0);
1808: }
1810: /*@C
1811: MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1812: (2D block cyclic distribution).
1814: Collective
1816: Input Parameters:
1817: + comm - MPI communicator
1818: . mb - row block size (or PETSC_DECIDE to have it set)
1819: . nb - column block size (or PETSC_DECIDE to have it set)
1820: . M - number of global rows
1821: . N - number of global columns
1822: . rsrc - coordinate of process that owns the first row of the distributed matrix
1823: - csrc - coordinate of process that owns the first column of the distributed matrix
1825: Output Parameter:
1826: . A - the matrix
1828: Options Database Keys:
1829: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1831: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1832: MatXXXXSetPreallocation() paradigm instead of this routine directly.
1833: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1835: Notes:
1836: If PETSC_DECIDE is used for the block sizes, then an appropriate value
1837: is chosen.
1839: Storage Information:
1840: Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1841: configured with ScaLAPACK. In particular, PETSc's local sizes lose
1842: significance and are thus ignored. The block sizes refer to the values
1843: used for the distributed matrix, not the same meaning as in BAIJ.
1845: Level: intermediate
1847: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1848: @*/
1849: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1850: {
1852: Mat_ScaLAPACK *a;
1853: PetscInt m,n;
1856: MatCreate(comm,A);
1857: MatSetType(*A,MATSCALAPACK);
1858: if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1859: /* rows and columns are NOT distributed according to PetscSplitOwnership */
1860: m = PETSC_DECIDE;
1861: PetscSplitOwnershipEqual(comm,&m,&M);
1862: n = PETSC_DECIDE;
1863: PetscSplitOwnershipEqual(comm,&n,&N);
1864: MatSetSizes(*A,m,n,M,N);
1865: a = (Mat_ScaLAPACK*)(*A)->data;
1866: PetscBLASIntCast(M,&a->M);
1867: PetscBLASIntCast(N,&a->N);
1868: PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1869: PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1870: PetscBLASIntCast(rsrc,&a->rsrc);
1871: PetscBLASIntCast(csrc,&a->csrc);
1872: MatSetUp(*A);
1873: return(0);
1874: }