Actual source code: matscalapack.c
1: #include <petsc/private/petscscalapack.h>
3: const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4: " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5: " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6: " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7: " TITLE = {Sca{LAPACK} Users' Guide},\n"
8: " PUBLISHER = {SIAM},\n"
9: " ADDRESS = {Philadelphia, PA},\n"
10: " YEAR = 1997\n"
11: "}\n";
12: static PetscBool ScaLAPACKCite = PETSC_FALSE;
14: #define DEFAULT_BLOCKSIZE 64
16: /*
17: The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18: is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19: */
20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23: {
24: PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");
25: MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
26: return 0;
27: }
29: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
30: {
31: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
32: PetscBool iascii;
33: PetscViewerFormat format;
34: Mat Adense;
36: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
37: if (iascii) {
38: PetscViewerGetFormat(viewer,&format);
39: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
40: PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
41: PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
42: PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
43: PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
44: return 0;
45: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
46: return 0;
47: }
48: }
49: /* convert to dense format and call MatView() */
50: MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
51: MatView(Adense,viewer);
52: MatDestroy(&Adense);
53: return 0;
54: }
56: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
57: {
58: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
59: PetscLogDouble isend[2],irecv[2];
61: info->block_size = 1.0;
63: isend[0] = a->lld*a->locc; /* locally allocated */
64: isend[1] = a->locr*a->locc; /* used submatrix */
65: if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
66: info->nz_allocated = isend[0];
67: info->nz_used = isend[1];
68: } else if (flag == MAT_GLOBAL_MAX) {
69: MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
70: info->nz_allocated = irecv[0];
71: info->nz_used = irecv[1];
72: } else if (flag == MAT_GLOBAL_SUM) {
73: MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
74: info->nz_allocated = irecv[0];
75: info->nz_used = irecv[1];
76: }
78: info->nz_unneeded = 0;
79: info->assemblies = A->num_ass;
80: info->mallocs = 0;
81: info->memory = ((PetscObject)A)->mem;
82: info->fill_ratio_given = 0;
83: info->fill_ratio_needed = 0;
84: info->factor_mallocs = 0;
85: return 0;
86: }
88: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
89: {
90: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
92: switch (op) {
93: case MAT_NEW_NONZERO_LOCATIONS:
94: case MAT_NEW_NONZERO_LOCATION_ERR:
95: case MAT_NEW_NONZERO_ALLOCATION_ERR:
96: case MAT_SYMMETRIC:
97: case MAT_SORTED_FULL:
98: case MAT_HERMITIAN:
99: break;
100: case MAT_ROW_ORIENTED:
101: a->roworiented = flg;
102: break;
103: default:
104: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
105: }
106: return 0;
107: }
109: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
110: {
111: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
112: PetscInt i,j;
113: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
114: PetscBool roworiented = a->roworiented;
117: for (i=0;i<nr;i++) {
118: if (rows[i] < 0) continue;
119: PetscBLASIntCast(rows[i]+1,&gridx);
120: for (j=0;j<nc;j++) {
121: if (cols[j] < 0) continue;
122: PetscBLASIntCast(cols[j]+1,&gcidx);
123: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
124: if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
125: if (roworiented) {
126: switch (imode) {
127: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
128: default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
129: }
130: } else {
131: switch (imode) {
132: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i+j*nr]; break;
133: default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i+j*nr]; break;
134: }
135: }
136: } else {
138: A->assembled = PETSC_FALSE;
139: MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,roworiented ? vals+i*nc+j : vals+i+j*nr,(PetscBool)(imode==ADD_VALUES));
140: }
141: }
142: }
143: return 0;
144: }
146: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
147: {
148: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
149: PetscScalar *x2d,*y2d,alpha=1.0;
150: const PetscInt *ranges;
151: PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
153: if (transpose) {
155: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
156: PetscLayoutGetRanges(A->rmap,&ranges);
157: PetscBLASIntCast(ranges[1],&mb); /* x block size */
158: xlld = PetscMax(1,A->rmap->n);
159: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
161: PetscLayoutGetRanges(A->cmap,&ranges);
162: PetscBLASIntCast(ranges[1],&nb); /* y block size */
163: ylld = 1;
164: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
167: /* allocate 2d vectors */
168: lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
169: lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
170: PetscMalloc2(lszx,&x2d,lszy,&y2d);
171: xlld = PetscMax(1,lszx);
173: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
174: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
176: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
179: /* redistribute x as a column of a 2d matrix */
180: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
182: /* redistribute y as a row of a 2d matrix */
183: if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
185: /* call PBLAS subroutine */
186: PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
188: /* redistribute y from a row of a 2d matrix */
189: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
191: } else { /* non-transpose */
193: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
194: PetscLayoutGetRanges(A->cmap,&ranges);
195: PetscBLASIntCast(ranges[1],&nb); /* x block size */
196: xlld = 1;
197: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
199: PetscLayoutGetRanges(A->rmap,&ranges);
200: PetscBLASIntCast(ranges[1],&mb); /* y block size */
201: ylld = PetscMax(1,A->rmap->n);
202: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
205: /* allocate 2d vectors */
206: lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
207: lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
208: PetscMalloc2(lszx,&x2d,lszy,&y2d);
209: ylld = PetscMax(1,lszy);
211: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
212: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
214: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
217: /* redistribute x as a row of a 2d matrix */
218: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
220: /* redistribute y as a column of a 2d matrix */
221: if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
223: /* call PBLAS subroutine */
224: PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
226: /* redistribute y from a column of a 2d matrix */
227: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
229: }
230: PetscFree2(x2d,y2d);
231: return 0;
232: }
234: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
235: {
236: const PetscScalar *xarray;
237: PetscScalar *yarray;
239: VecGetArrayRead(x,&xarray);
240: VecGetArray(y,&yarray);
241: MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
242: VecRestoreArrayRead(x,&xarray);
243: VecRestoreArray(y,&yarray);
244: return 0;
245: }
247: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
248: {
249: const PetscScalar *xarray;
250: PetscScalar *yarray;
252: VecGetArrayRead(x,&xarray);
253: VecGetArray(y,&yarray);
254: MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
255: VecRestoreArrayRead(x,&xarray);
256: VecRestoreArray(y,&yarray);
257: return 0;
258: }
260: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
261: {
262: const PetscScalar *xarray;
263: PetscScalar *zarray;
265: if (y != z) VecCopy(y,z);
266: VecGetArrayRead(x,&xarray);
267: VecGetArray(z,&zarray);
268: MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
269: VecRestoreArrayRead(x,&xarray);
270: VecRestoreArray(z,&zarray);
271: return 0;
272: }
274: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
275: {
276: const PetscScalar *xarray;
277: PetscScalar *zarray;
279: if (y != z) VecCopy(y,z);
280: VecGetArrayRead(x,&xarray);
281: VecGetArray(z,&zarray);
282: MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
283: VecRestoreArrayRead(x,&xarray);
284: VecRestoreArray(z,&zarray);
285: return 0;
286: }
288: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
289: {
290: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
291: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
292: Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
293: PetscScalar sone=1.0,zero=0.0;
294: PetscBLASInt one=1;
296: PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
297: C->assembled = PETSC_TRUE;
298: return 0;
299: }
301: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
302: {
303: MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
304: MatSetType(C,MATSCALAPACK);
305: MatSetUp(C);
306: C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
307: return 0;
308: }
310: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
311: {
312: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
313: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
314: Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
315: PetscScalar sone=1.0,zero=0.0;
316: PetscBLASInt one=1;
318: PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
319: C->assembled = PETSC_TRUE;
320: return 0;
321: }
323: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
324: {
325: MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
326: MatSetType(C,MATSCALAPACK);
327: MatSetUp(C);
328: return 0;
329: }
331: /* --------------------------------------- */
332: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
333: {
334: C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
335: C->ops->productsymbolic = MatProductSymbolic_AB;
336: return 0;
337: }
339: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
340: {
341: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
342: C->ops->productsymbolic = MatProductSymbolic_ABt;
343: return 0;
344: }
346: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
347: {
348: Mat_Product *product = C->product;
350: switch (product->type) {
351: case MATPRODUCT_AB:
352: MatProductSetFromOptions_ScaLAPACK_AB(C);
353: break;
354: case MATPRODUCT_ABt:
355: MatProductSetFromOptions_ScaLAPACK_ABt(C);
356: break;
357: default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
358: }
359: return 0;
360: }
361: /* --------------------------------------- */
363: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
364: {
365: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
366: PetscScalar *darray,*d2d,v;
367: const PetscInt *ranges;
368: PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
370: VecGetArray(D,&darray);
372: if (A->rmap->N<=A->cmap->N) { /* row version */
374: /* create ScaLAPACK descriptor for vector (1d block distribution) */
375: PetscLayoutGetRanges(A->rmap,&ranges);
376: PetscBLASIntCast(ranges[1],&mb); /* D block size */
377: dlld = PetscMax(1,A->rmap->n);
378: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
381: /* allocate 2d vector */
382: lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383: PetscCalloc1(lszd,&d2d);
384: dlld = PetscMax(1,lszd);
386: /* create ScaLAPACK descriptor for vector (2d block distribution) */
387: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
390: /* collect diagonal */
391: for (j=1;j<=a->M;j++) {
392: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394: }
396: /* redistribute d from a column of a 2d matrix */
397: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398: PetscFree(d2d);
400: } else { /* column version */
402: /* create ScaLAPACK descriptor for vector (1d block distribution) */
403: PetscLayoutGetRanges(A->cmap,&ranges);
404: PetscBLASIntCast(ranges[1],&nb); /* D block size */
405: dlld = 1;
406: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
409: /* allocate 2d vector */
410: lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411: PetscCalloc1(lszd,&d2d);
413: /* create ScaLAPACK descriptor for vector (2d block distribution) */
414: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
417: /* collect diagonal */
418: for (j=1;j<=a->N;j++) {
419: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421: }
423: /* redistribute d from a row of a 2d matrix */
424: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425: PetscFree(d2d);
426: }
428: VecRestoreArray(D,&darray);
429: VecAssemblyBegin(D);
430: VecAssemblyEnd(D);
431: return 0;
432: }
434: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435: {
436: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
437: const PetscScalar *d;
438: const PetscInt *ranges;
439: PetscScalar *d2d;
440: PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
442: if (R) {
443: VecGetArrayRead(R,(const PetscScalar **)&d);
444: /* create ScaLAPACK descriptor for vector (1d block distribution) */
445: PetscLayoutGetRanges(A->cmap,&ranges);
446: PetscBLASIntCast(ranges[1],&nb); /* D block size */
447: dlld = 1;
448: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
451: /* allocate 2d vector */
452: lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
453: PetscCalloc1(lszd,&d2d);
455: /* create ScaLAPACK descriptor for vector (2d block distribution) */
456: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
459: /* redistribute d to a row of a 2d matrix */
460: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
462: /* broadcast along process columns */
463: if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
464: else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
466: /* local scaling */
467: for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
469: PetscFree(d2d);
470: VecRestoreArrayRead(R,(const PetscScalar **)&d);
471: }
472: if (L) {
473: VecGetArrayRead(L,(const PetscScalar **)&d);
474: /* create ScaLAPACK descriptor for vector (1d block distribution) */
475: PetscLayoutGetRanges(A->rmap,&ranges);
476: PetscBLASIntCast(ranges[1],&mb); /* D block size */
477: dlld = PetscMax(1,A->rmap->n);
478: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
481: /* allocate 2d vector */
482: lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
483: PetscCalloc1(lszd,&d2d);
484: dlld = PetscMax(1,lszd);
486: /* create ScaLAPACK descriptor for vector (2d block distribution) */
487: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
490: /* redistribute d to a column of a 2d matrix */
491: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
493: /* broadcast along process rows */
494: if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
495: else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
497: /* local scaling */
498: for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
500: PetscFree(d2d);
501: VecRestoreArrayRead(L,(const PetscScalar **)&d);
502: }
503: return 0;
504: }
506: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
507: {
508: *missing = PETSC_FALSE;
509: return 0;
510: }
512: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
513: {
514: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
515: PetscBLASInt n,one=1;
517: n = x->lld*x->locc;
518: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
519: return 0;
520: }
522: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
523: {
524: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
525: PetscBLASInt i,n;
526: PetscScalar v;
528: n = PetscMin(x->M,x->N);
529: for (i=1;i<=n;i++) {
530: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
531: v += alpha;
532: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
533: }
534: return 0;
535: }
537: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
538: {
539: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
540: Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data;
541: PetscBLASInt one=1;
542: PetscScalar beta=1.0;
544: MatScaLAPACKCheckDistribution(Y,1,X,3);
545: PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
546: PetscObjectStateIncrease((PetscObject)Y);
547: return 0;
548: }
550: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
551: {
552: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
553: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
555: PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
556: PetscObjectStateIncrease((PetscObject)B);
557: return 0;
558: }
560: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
561: {
562: Mat Bs;
563: MPI_Comm comm;
564: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b;
566: PetscObjectGetComm((PetscObject)A,&comm);
567: MatCreate(comm,&Bs);
568: MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
569: MatSetType(Bs,MATSCALAPACK);
570: b = (Mat_ScaLAPACK*)Bs->data;
571: b->M = a->M;
572: b->N = a->N;
573: b->mb = a->mb;
574: b->nb = a->nb;
575: b->rsrc = a->rsrc;
576: b->csrc = a->csrc;
577: MatSetUp(Bs);
578: *B = Bs;
579: if (op == MAT_COPY_VALUES) {
580: PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
581: }
582: Bs->assembled = PETSC_TRUE;
583: return 0;
584: }
586: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
587: {
588: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
589: Mat Bs = *B;
590: PetscBLASInt one=1;
591: PetscScalar sone=1.0,zero=0.0;
592: #if defined(PETSC_USE_COMPLEX)
593: PetscInt i,j;
594: #endif
596: if (reuse == MAT_INITIAL_MATRIX) {
597: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
598: *B = Bs;
599: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
600: b = (Mat_ScaLAPACK*)Bs->data;
601: PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
602: #if defined(PETSC_USE_COMPLEX)
603: /* undo conjugation */
604: for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
605: #endif
606: Bs->assembled = PETSC_TRUE;
607: return 0;
608: }
610: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
611: {
612: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
613: PetscInt i,j;
615: for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
616: return 0;
617: }
619: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
620: {
621: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
622: Mat Bs = *B;
623: PetscBLASInt one=1;
624: PetscScalar sone=1.0,zero=0.0;
626: if (reuse == MAT_INITIAL_MATRIX) {
627: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
628: *B = Bs;
629: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
630: b = (Mat_ScaLAPACK*)Bs->data;
631: PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
632: Bs->assembled = PETSC_TRUE;
633: return 0;
634: }
636: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
637: {
638: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
639: PetscScalar *x,*x2d;
640: const PetscInt *ranges;
641: PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
643: VecCopy(B,X);
644: VecGetArray(X,&x);
646: /* create ScaLAPACK descriptor for a vector (1d block distribution) */
647: PetscLayoutGetRanges(A->rmap,&ranges);
648: PetscBLASIntCast(ranges[1],&mb); /* x block size */
649: xlld = PetscMax(1,A->rmap->n);
650: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
653: /* allocate 2d vector */
654: lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
655: PetscMalloc1(lszx,&x2d);
656: xlld = PetscMax(1,lszx);
658: /* create ScaLAPACK descriptor for a vector (2d block distribution) */
659: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
662: /* redistribute x as a column of a 2d matrix */
663: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
665: /* call ScaLAPACK subroutine */
666: switch (A->factortype) {
667: case MAT_FACTOR_LU:
668: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
670: break;
671: case MAT_FACTOR_CHOLESKY:
672: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
674: break;
675: default:
676: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
677: }
679: /* redistribute x from a column of a 2d matrix */
680: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
682: PetscFree(x2d);
683: VecRestoreArray(X,&x);
684: return 0;
685: }
687: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
688: {
689: MatSolve_ScaLAPACK(A,B,X);
690: VecAXPY(X,1,Y);
691: return 0;
692: }
694: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
695: {
696: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
697: PetscBool flg1,flg2;
698: PetscBLASInt one=1,info;
700: PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
701: PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
703: MatScaLAPACKCheckDistribution(B,1,X,2);
704: b = (Mat_ScaLAPACK*)B->data;
705: x = (Mat_ScaLAPACK*)X->data;
706: PetscArraycpy(x->loc,b->loc,b->lld*b->locc);
708: switch (A->factortype) {
709: case MAT_FACTOR_LU:
710: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
712: break;
713: case MAT_FACTOR_CHOLESKY:
714: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
716: break;
717: default:
718: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
719: }
720: return 0;
721: }
723: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
724: {
725: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
726: PetscBLASInt one=1,info;
728: if (!a->pivots) {
729: PetscMalloc1(a->locr+a->mb,&a->pivots);
730: PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
731: }
732: PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
734: A->factortype = MAT_FACTOR_LU;
735: A->assembled = PETSC_TRUE;
737: PetscFree(A->solvertype);
738: PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
739: return 0;
740: }
742: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
743: {
744: MatCopy(A,F,SAME_NONZERO_PATTERN);
745: MatLUFactor_ScaLAPACK(F,0,0,info);
746: return 0;
747: }
749: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
750: {
751: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
752: return 0;
753: }
755: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
756: {
757: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
758: PetscBLASInt one=1,info;
760: PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
762: A->factortype = MAT_FACTOR_CHOLESKY;
763: A->assembled = PETSC_TRUE;
765: PetscFree(A->solvertype);
766: PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
767: return 0;
768: }
770: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
771: {
772: MatCopy(A,F,SAME_NONZERO_PATTERN);
773: MatCholeskyFactor_ScaLAPACK(F,0,info);
774: return 0;
775: }
777: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
778: {
779: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
780: return 0;
781: }
783: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
784: {
785: *type = MATSOLVERSCALAPACK;
786: return 0;
787: }
789: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
790: {
791: Mat B;
792: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
794: /* Create the factorization matrix */
795: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);
796: B->trivialsymbolic = PETSC_TRUE;
797: B->factortype = ftype;
798: PetscFree(B->solvertype);
799: PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);
801: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
802: *F = B;
803: return 0;
804: }
806: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
807: {
808: MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
809: MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
810: return 0;
811: }
813: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
814: {
815: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
816: PetscBLASInt one=1,lwork=0;
817: const char *ntype;
818: PetscScalar *work=NULL,dummy;
820: switch (type) {
821: case NORM_1:
822: ntype = "1";
823: lwork = PetscMax(a->locr,a->locc);
824: break;
825: case NORM_FROBENIUS:
826: ntype = "F";
827: work = &dummy;
828: break;
829: case NORM_INFINITY:
830: ntype = "I";
831: lwork = PetscMax(a->locr,a->locc);
832: break;
833: default:
834: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
835: }
836: if (lwork) PetscMalloc1(lwork,&work);
837: *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
838: if (lwork) PetscFree(work);
839: return 0;
840: }
842: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
843: {
844: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
846: PetscArrayzero(a->loc,a->lld*a->locc);
847: return 0;
848: }
850: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
851: {
852: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
853: PetscInt i,n,nb,isrc,nproc,iproc,*idx;
855: if (rows) {
856: n = a->locr;
857: nb = a->mb;
858: isrc = a->rsrc;
859: nproc = a->grid->nprow;
860: iproc = a->grid->myrow;
861: PetscMalloc1(n,&idx);
862: for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
863: ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
864: }
865: if (cols) {
866: n = a->locc;
867: nb = a->nb;
868: isrc = a->csrc;
869: nproc = a->grid->npcol;
870: iproc = a->grid->mycol;
871: PetscMalloc1(n,&idx);
872: for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
873: ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
874: }
875: return 0;
876: }
878: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
879: {
880: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
881: Mat Bmpi;
882: MPI_Comm comm;
883: PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
884: const PetscInt *ranges,*branges,*cwork;
885: const PetscScalar *vwork;
886: PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info;
887: PetscScalar *barray;
888: PetscBool differ=PETSC_FALSE;
889: PetscMPIInt size;
891: PetscObjectGetComm((PetscObject)A,&comm);
892: PetscLayoutGetRanges(A->rmap,&ranges);
894: if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
895: MPI_Comm_size(comm,&size);
896: PetscLayoutGetRanges((*B)->rmap,&branges);
897: for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
898: }
900: if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
901: MatCreate(comm,&Bmpi);
902: m = PETSC_DECIDE;
903: PetscSplitOwnershipEqual(comm,&m,&M);
904: n = PETSC_DECIDE;
905: PetscSplitOwnershipEqual(comm,&n,&N);
906: MatSetSizes(Bmpi,m,n,M,N);
907: MatSetType(Bmpi,MATDENSE);
908: MatSetUp(Bmpi);
910: /* create ScaLAPACK descriptor for B (1d block distribution) */
911: PetscBLASIntCast(ranges[1],&bmb); /* row block size */
912: lld = PetscMax(A->rmap->n,1); /* local leading dimension */
913: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
916: /* redistribute matrix */
917: MatDenseGetArray(Bmpi,&barray);
918: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
919: MatDenseRestoreArray(Bmpi,&barray);
920: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
921: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
923: /* transfer rows of auxiliary matrix to the final matrix B */
924: MatGetOwnershipRange(Bmpi,&rstart,&rend);
925: for (i=rstart;i<rend;i++) {
926: MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
927: MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
928: MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
929: }
930: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
931: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
932: MatDestroy(&Bmpi);
934: } else { /* normal cases */
936: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
937: else {
938: MatCreate(comm,&Bmpi);
939: m = PETSC_DECIDE;
940: PetscSplitOwnershipEqual(comm,&m,&M);
941: n = PETSC_DECIDE;
942: PetscSplitOwnershipEqual(comm,&n,&N);
943: MatSetSizes(Bmpi,m,n,M,N);
944: MatSetType(Bmpi,MATDENSE);
945: MatSetUp(Bmpi);
946: }
948: /* create ScaLAPACK descriptor for B (1d block distribution) */
949: PetscBLASIntCast(ranges[1],&bmb); /* row block size */
950: lld = PetscMax(A->rmap->n,1); /* local leading dimension */
951: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
954: /* redistribute matrix */
955: MatDenseGetArray(Bmpi,&barray);
956: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
957: MatDenseRestoreArray(Bmpi,&barray);
959: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
960: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
961: if (reuse == MAT_INPLACE_MATRIX) {
962: MatHeaderReplace(A,&Bmpi);
963: } else *B = Bmpi;
964: }
965: return 0;
966: }
968: static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map,PetscBool *correct)
969: {
970: const PetscInt *ranges;
971: PetscMPIInt size;
972: PetscInt i,n;
974: *correct = PETSC_TRUE;
975: MPI_Comm_size(map->comm,&size);
976: if (size>1) {
977: PetscLayoutGetRanges(map,&ranges);
978: n = ranges[1]-ranges[0];
979: for (i=1;i<size;i++) if (ranges[i+1]-ranges[i]!=n) break;
980: *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i+1]-ranges[i] <= n));
981: }
982: return 0;
983: }
985: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
986: {
987: Mat_ScaLAPACK *b;
988: Mat Bmpi;
989: MPI_Comm comm;
990: PetscInt M=A->rmap->N,N=A->cmap->N,m,n;
991: const PetscInt *ranges,*rows,*cols;
992: PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info;
993: PetscScalar *aarray;
994: IS ir,ic;
995: PetscInt lda;
996: PetscBool flg;
998: PetscObjectGetComm((PetscObject)A,&comm);
1000: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1001: else {
1002: MatCreate(comm,&Bmpi);
1003: m = PETSC_DECIDE;
1004: PetscSplitOwnershipEqual(comm,&m,&M);
1005: n = PETSC_DECIDE;
1006: PetscSplitOwnershipEqual(comm,&n,&N);
1007: MatSetSizes(Bmpi,m,n,M,N);
1008: MatSetType(Bmpi,MATSCALAPACK);
1009: MatSetUp(Bmpi);
1010: }
1011: b = (Mat_ScaLAPACK*)Bmpi->data;
1013: MatDenseGetLDA(A,&lda);
1014: MatDenseGetArray(A,&aarray);
1015: MatScaLAPACKCheckLayout(A->rmap,&flg);
1016: if (flg) MatScaLAPACKCheckLayout(A->cmap,&flg);
1017: if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1018: /* create ScaLAPACK descriptor for A (1d block distribution) */
1019: PetscLayoutGetRanges(A->rmap,&ranges);
1020: PetscBLASIntCast(ranges[1],&amb); /* row block size */
1021: lld = PetscMax(lda,1); /* local leading dimension */
1022: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1025: /* redistribute matrix */
1026: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1027: Bmpi->nooffprocentries = PETSC_TRUE;
1028: } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1030: b->roworiented = PETSC_FALSE;
1031: MatGetOwnershipIS(A,&ir,&ic);
1032: ISGetIndices(ir,&rows);
1033: ISGetIndices(ic,&cols);
1034: MatSetValues(Bmpi,A->rmap->n,rows,A->cmap->N,cols,aarray,INSERT_VALUES);
1035: ISRestoreIndices(ir,&rows);
1036: ISRestoreIndices(ic,&cols);
1037: ISDestroy(&ic);
1038: ISDestroy(&ir);
1039: }
1040: MatDenseRestoreArray(A,&aarray);
1041: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1042: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1043: if (reuse == MAT_INPLACE_MATRIX) {
1044: MatHeaderReplace(A,&Bmpi);
1045: } else *B = Bmpi;
1046: return 0;
1047: }
1049: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1050: {
1051: Mat mat_scal;
1052: PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1053: const PetscInt *cols;
1054: const PetscScalar *vals;
1056: if (reuse == MAT_REUSE_MATRIX) {
1057: mat_scal = *newmat;
1058: MatZeroEntries(mat_scal);
1059: } else {
1060: MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1061: m = PETSC_DECIDE;
1062: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1063: n = PETSC_DECIDE;
1064: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1065: MatSetSizes(mat_scal,m,n,M,N);
1066: MatSetType(mat_scal,MATSCALAPACK);
1067: MatSetUp(mat_scal);
1068: }
1069: for (row=rstart;row<rend;row++) {
1070: MatGetRow(A,row,&ncols,&cols,&vals);
1071: MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1072: MatRestoreRow(A,row,&ncols,&cols,&vals);
1073: }
1074: MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1075: MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);
1077: if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A,&mat_scal);
1078: else *newmat = mat_scal;
1079: return 0;
1080: }
1082: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1083: {
1084: Mat mat_scal;
1085: PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1086: const PetscInt *cols;
1087: const PetscScalar *vals;
1088: PetscScalar v;
1090: if (reuse == MAT_REUSE_MATRIX) {
1091: mat_scal = *newmat;
1092: MatZeroEntries(mat_scal);
1093: } else {
1094: MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1095: m = PETSC_DECIDE;
1096: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1097: n = PETSC_DECIDE;
1098: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1099: MatSetSizes(mat_scal,m,n,M,N);
1100: MatSetType(mat_scal,MATSCALAPACK);
1101: MatSetUp(mat_scal);
1102: }
1103: MatGetRowUpperTriangular(A);
1104: for (row=rstart;row<rend;row++) {
1105: MatGetRow(A,row,&ncols,&cols,&vals);
1106: MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1107: for (j=0;j<ncols;j++) { /* lower triangular part */
1108: if (cols[j] == row) continue;
1109: v = A->hermitian ? PetscConj(vals[j]) : vals[j];
1110: MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1111: }
1112: MatRestoreRow(A,row,&ncols,&cols,&vals);
1113: }
1114: MatRestoreRowUpperTriangular(A);
1115: MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1116: MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);
1118: if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A,&mat_scal);
1119: else *newmat = mat_scal;
1120: return 0;
1121: }
1123: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1124: {
1125: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1126: PetscInt sz=0;
1128: PetscLayoutSetUp(A->rmap);
1129: PetscLayoutSetUp(A->cmap);
1130: if (!a->lld) a->lld = a->locr;
1132: PetscFree(a->loc);
1133: PetscIntMultError(a->lld,a->locc,&sz);
1134: PetscCalloc1(sz,&a->loc);
1135: PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));
1137: A->preallocated = PETSC_TRUE;
1138: return 0;
1139: }
1141: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1142: {
1143: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1144: Mat_ScaLAPACK_Grid *grid;
1145: PetscBool flg;
1146: MPI_Comm icomm;
1148: MatStashDestroy_Private(&A->stash);
1149: PetscFree(a->loc);
1150: PetscFree(a->pivots);
1151: PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1152: MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1153: if (--grid->grid_refct == 0) {
1154: Cblacs_gridexit(grid->ictxt);
1155: Cblacs_gridexit(grid->ictxrow);
1156: Cblacs_gridexit(grid->ictxcol);
1157: PetscFree(grid);
1158: MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);
1159: }
1160: PetscCommDestroy(&icomm);
1161: PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1162: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1163: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1164: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1165: PetscFree(A->data);
1166: return 0;
1167: }
1169: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1170: {
1171: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1172: PetscBLASInt info=0;
1173: PetscBool flg;
1175: PetscLayoutSetUp(A->rmap);
1176: PetscLayoutSetUp(A->cmap);
1178: /* check that the layout is as enforced by MatCreateScaLAPACK() */
1179: MatScaLAPACKCheckLayout(A->rmap,&flg);
1181: MatScaLAPACKCheckLayout(A->cmap,&flg);
1184: /* compute local sizes */
1185: PetscBLASIntCast(A->rmap->N,&a->M);
1186: PetscBLASIntCast(A->cmap->N,&a->N);
1187: a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1188: a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1189: a->lld = PetscMax(1,a->locr);
1191: /* allocate local array */
1192: MatScaLAPACKSetPreallocation(A);
1194: /* set up ScaLAPACK descriptor */
1195: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1197: return 0;
1198: }
1200: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1201: {
1202: PetscInt nstash,reallocs;
1204: if (A->nooffprocentries) return 0;
1205: MatStashScatterBegin_Private(A,&A->stash,NULL);
1206: MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1207: PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);
1208: return 0;
1209: }
1211: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1212: {
1213: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1214: PetscMPIInt n;
1215: PetscInt i,flg,*row,*col;
1216: PetscScalar *val;
1217: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1219: if (A->nooffprocentries) return 0;
1220: while (1) {
1221: MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1222: if (!flg) break;
1223: for (i=0;i<n;i++) {
1224: PetscBLASIntCast(row[i]+1,&gridx);
1225: PetscBLASIntCast(col[i]+1,&gcidx);
1226: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1228: switch (A->insertmode) {
1229: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1230: case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1231: default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1232: }
1233: }
1234: }
1235: MatStashScatterEnd_Private(&A->stash);
1236: return 0;
1237: }
1239: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1240: {
1241: Mat Adense,As;
1242: MPI_Comm comm;
1244: PetscObjectGetComm((PetscObject)newMat,&comm);
1245: MatCreate(comm,&Adense);
1246: MatSetType(Adense,MATDENSE);
1247: MatLoad(Adense,viewer);
1248: MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1249: MatDestroy(&Adense);
1250: MatHeaderReplace(newMat,&As);
1251: return 0;
1252: }
1254: /* -------------------------------------------------------------------*/
1255: static struct _MatOps MatOps_Values = {
1256: MatSetValues_ScaLAPACK,
1257: 0,
1258: 0,
1259: MatMult_ScaLAPACK,
1260: /* 4*/ MatMultAdd_ScaLAPACK,
1261: MatMultTranspose_ScaLAPACK,
1262: MatMultTransposeAdd_ScaLAPACK,
1263: MatSolve_ScaLAPACK,
1264: MatSolveAdd_ScaLAPACK,
1265: 0,
1266: /*10*/ 0,
1267: MatLUFactor_ScaLAPACK,
1268: MatCholeskyFactor_ScaLAPACK,
1269: 0,
1270: MatTranspose_ScaLAPACK,
1271: /*15*/ MatGetInfo_ScaLAPACK,
1272: 0,
1273: MatGetDiagonal_ScaLAPACK,
1274: MatDiagonalScale_ScaLAPACK,
1275: MatNorm_ScaLAPACK,
1276: /*20*/ MatAssemblyBegin_ScaLAPACK,
1277: MatAssemblyEnd_ScaLAPACK,
1278: MatSetOption_ScaLAPACK,
1279: MatZeroEntries_ScaLAPACK,
1280: /*24*/ 0,
1281: MatLUFactorSymbolic_ScaLAPACK,
1282: MatLUFactorNumeric_ScaLAPACK,
1283: MatCholeskyFactorSymbolic_ScaLAPACK,
1284: MatCholeskyFactorNumeric_ScaLAPACK,
1285: /*29*/ MatSetUp_ScaLAPACK,
1286: 0,
1287: 0,
1288: 0,
1289: 0,
1290: /*34*/ MatDuplicate_ScaLAPACK,
1291: 0,
1292: 0,
1293: 0,
1294: 0,
1295: /*39*/ MatAXPY_ScaLAPACK,
1296: 0,
1297: 0,
1298: 0,
1299: MatCopy_ScaLAPACK,
1300: /*44*/ 0,
1301: MatScale_ScaLAPACK,
1302: MatShift_ScaLAPACK,
1303: 0,
1304: 0,
1305: /*49*/ 0,
1306: 0,
1307: 0,
1308: 0,
1309: 0,
1310: /*54*/ 0,
1311: 0,
1312: 0,
1313: 0,
1314: 0,
1315: /*59*/ 0,
1316: MatDestroy_ScaLAPACK,
1317: MatView_ScaLAPACK,
1318: 0,
1319: 0,
1320: /*64*/ 0,
1321: 0,
1322: 0,
1323: 0,
1324: 0,
1325: /*69*/ 0,
1326: 0,
1327: MatConvert_ScaLAPACK_Dense,
1328: 0,
1329: 0,
1330: /*74*/ 0,
1331: 0,
1332: 0,
1333: 0,
1334: 0,
1335: /*79*/ 0,
1336: 0,
1337: 0,
1338: 0,
1339: MatLoad_ScaLAPACK,
1340: /*84*/ 0,
1341: 0,
1342: 0,
1343: 0,
1344: 0,
1345: /*89*/ 0,
1346: 0,
1347: MatMatMultNumeric_ScaLAPACK,
1348: 0,
1349: 0,
1350: /*94*/ 0,
1351: 0,
1352: 0,
1353: MatMatTransposeMultNumeric_ScaLAPACK,
1354: 0,
1355: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1356: 0,
1357: 0,
1358: MatConjugate_ScaLAPACK,
1359: 0,
1360: /*104*/0,
1361: 0,
1362: 0,
1363: 0,
1364: 0,
1365: /*109*/MatMatSolve_ScaLAPACK,
1366: 0,
1367: 0,
1368: 0,
1369: MatMissingDiagonal_ScaLAPACK,
1370: /*114*/0,
1371: 0,
1372: 0,
1373: 0,
1374: 0,
1375: /*119*/0,
1376: MatHermitianTranspose_ScaLAPACK,
1377: 0,
1378: 0,
1379: 0,
1380: /*124*/0,
1381: 0,
1382: 0,
1383: 0,
1384: 0,
1385: /*129*/0,
1386: 0,
1387: 0,
1388: 0,
1389: 0,
1390: /*134*/0,
1391: 0,
1392: 0,
1393: 0,
1394: 0,
1395: 0,
1396: /*140*/0,
1397: 0,
1398: 0,
1399: 0,
1400: 0,
1401: /*145*/0,
1402: 0,
1403: 0
1404: };
1406: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1407: {
1408: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1409: PetscInt size=stash->size,nsends;
1410: PetscInt count,*sindices,**rindices,i,j,l;
1411: PetscScalar **rvalues,*svalues;
1412: MPI_Comm comm = stash->comm;
1413: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1414: PetscMPIInt *sizes,*nlengths,nreceives;
1415: PetscInt *sp_idx,*sp_idy;
1416: PetscScalar *sp_val;
1417: PetscMatStashSpace space,space_next;
1418: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1419: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data;
1421: { /* make sure all processors are either in INSERTMODE or ADDMODE */
1422: InsertMode addv;
1423: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1425: mat->insertmode = addv; /* in case this processor had no cache */
1426: }
1428: bs2 = stash->bs*stash->bs;
1430: /* first count number of contributors to each processor */
1431: PetscCalloc1(size,&nlengths);
1432: PetscMalloc1(stash->n+1,&owner);
1434: i = j = 0;
1435: space = stash->space_head;
1436: while (space) {
1437: space_next = space->next;
1438: for (l=0; l<space->local_used; l++) {
1439: PetscBLASIntCast(space->idx[l]+1,&gridx);
1440: PetscBLASIntCast(space->idy[l]+1,&gcidx);
1441: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1442: j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1443: nlengths[j]++; owner[i] = j;
1444: i++;
1445: }
1446: space = space_next;
1447: }
1449: /* Now check what procs get messages - and compute nsends. */
1450: PetscCalloc1(size,&sizes);
1451: for (i=0, nsends=0; i<size; i++) {
1452: if (nlengths[i]) {
1453: sizes[i] = 1; nsends++;
1454: }
1455: }
1457: {PetscMPIInt *onodes,*olengths;
1458: /* Determine the number of messages to expect, their lengths, from from-ids */
1459: PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1460: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1461: /* since clubbing row,col - lengths are multiplied by 2 */
1462: for (i=0; i<nreceives; i++) olengths[i] *=2;
1463: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1464: /* values are size 'bs2' lengths (and remove earlier factor 2 */
1465: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1466: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1467: PetscFree(onodes);
1468: PetscFree(olengths);}
1470: /* do sends:
1471: 1) starts[i] gives the starting index in svalues for stuff going to
1472: the ith processor
1473: */
1474: PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1475: PetscMalloc1(2*nsends,&send_waits);
1476: PetscMalloc2(size,&startv,size,&starti);
1477: /* use 2 sends the first with all_a, the next with all_i and all_j */
1478: startv[0] = 0; starti[0] = 0;
1479: for (i=1; i<size; i++) {
1480: startv[i] = startv[i-1] + nlengths[i-1];
1481: starti[i] = starti[i-1] + 2*nlengths[i-1];
1482: }
1484: i = 0;
1485: space = stash->space_head;
1486: while (space) {
1487: space_next = space->next;
1488: sp_idx = space->idx;
1489: sp_idy = space->idy;
1490: sp_val = space->val;
1491: for (l=0; l<space->local_used; l++) {
1492: j = owner[i];
1493: if (bs2 == 1) {
1494: svalues[startv[j]] = sp_val[l];
1495: } else {
1496: PetscInt k;
1497: PetscScalar *buf1,*buf2;
1498: buf1 = svalues+bs2*startv[j];
1499: buf2 = space->val + bs2*l;
1500: for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1501: }
1502: sindices[starti[j]] = sp_idx[l];
1503: sindices[starti[j]+nlengths[j]] = sp_idy[l];
1504: startv[j]++;
1505: starti[j]++;
1506: i++;
1507: }
1508: space = space_next;
1509: }
1510: startv[0] = 0;
1511: for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1513: for (i=0,count=0; i<size; i++) {
1514: if (sizes[i]) {
1515: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1516: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1517: }
1518: }
1519: #if defined(PETSC_USE_INFO)
1520: PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends);
1521: for (i=0; i<size; i++) {
1522: if (sizes[i]) {
1523: PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))));
1524: }
1525: }
1526: #endif
1527: PetscFree(nlengths);
1528: PetscFree(owner);
1529: PetscFree2(startv,starti);
1530: PetscFree(sizes);
1532: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1533: PetscMalloc1(2*nreceives,&recv_waits);
1535: for (i=0; i<nreceives; i++) {
1536: recv_waits[2*i] = recv_waits1[i];
1537: recv_waits[2*i+1] = recv_waits2[i];
1538: }
1539: stash->recv_waits = recv_waits;
1541: PetscFree(recv_waits1);
1542: PetscFree(recv_waits2);
1544: stash->svalues = svalues;
1545: stash->sindices = sindices;
1546: stash->rvalues = rvalues;
1547: stash->rindices = rindices;
1548: stash->send_waits = send_waits;
1549: stash->nsends = nsends;
1550: stash->nrecvs = nreceives;
1551: stash->reproduce_count = 0;
1552: return 0;
1553: }
1555: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1556: {
1557: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1562: PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1563: PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1564: return 0;
1565: }
1567: /*@
1568: MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1569: the ScaLAPACK matrix
1571: Logically Collective on A
1573: Input Parameters:
1574: + A - a MATSCALAPACK matrix
1575: . mb - the row block size
1576: - nb - the column block size
1578: Level: intermediate
1580: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1581: @*/
1582: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1583: {
1587: PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1588: return 0;
1589: }
1591: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1592: {
1593: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1595: if (mb) *mb = a->mb;
1596: if (nb) *nb = a->nb;
1597: return 0;
1598: }
1600: /*@
1601: MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1602: the ScaLAPACK matrix
1604: Not collective
1606: Input Parameter:
1607: . A - a MATSCALAPACK matrix
1609: Output Parameters:
1610: + mb - the row block size
1611: - nb - the column block size
1613: Level: intermediate
1615: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1616: @*/
1617: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1618: {
1620: PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1621: return 0;
1622: }
1624: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1625: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1627: /*MC
1628: MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1630: Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1632: Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1634: Options Database Keys:
1635: + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1636: . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1637: - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1639: Level: beginner
1641: .seealso: MATDENSE, MATELEMENTAL
1642: M*/
1644: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1645: {
1646: Mat_ScaLAPACK *a;
1647: PetscErrorCode ierr;
1648: PetscBool flg,flg1;
1649: Mat_ScaLAPACK_Grid *grid;
1650: MPI_Comm icomm;
1651: PetscBLASInt nprow,npcol,myrow,mycol;
1652: PetscInt optv1,k=2,array[2]={0,0};
1653: PetscMPIInt size;
1655: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1656: A->insertmode = NOT_SET_VALUES;
1658: MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1659: A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1660: A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1661: A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1662: A->stash.ScatterDestroy = NULL;
1664: PetscNewLog(A,&a);
1665: A->data = (void*)a;
1667: /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1668: if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1669: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1670: PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1671: PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);
1672: }
1673: PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1674: MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1675: if (!flg) {
1676: PetscNewLog(A,&grid);
1678: MPI_Comm_size(icomm,&size);
1679: grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1681: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1682: PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1683: if (flg1) {
1685: grid->nprow = optv1;
1686: }
1687: PetscOptionsEnd();
1689: if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1690: grid->npcol = size/grid->nprow;
1691: PetscBLASIntCast(grid->nprow,&nprow);
1692: PetscBLASIntCast(grid->npcol,&npcol);
1693: grid->ictxt = Csys2blacs_handle(icomm);
1694: Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1695: Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1696: grid->grid_refct = 1;
1697: grid->nprow = nprow;
1698: grid->npcol = npcol;
1699: grid->myrow = myrow;
1700: grid->mycol = mycol;
1701: /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1702: grid->ictxrow = Csys2blacs_handle(icomm);
1703: Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1704: grid->ictxcol = Csys2blacs_handle(icomm);
1705: Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1706: MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);
1708: } else grid->grid_refct++;
1709: PetscCommDestroy(&icomm);
1710: a->grid = grid;
1711: a->mb = DEFAULT_BLOCKSIZE;
1712: a->nb = DEFAULT_BLOCKSIZE;
1714: PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1715: PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1716: if (flg) {
1717: a->mb = array[0];
1718: a->nb = (k>1)? array[1]: a->mb;
1719: }
1720: PetscOptionsEnd();
1722: a->roworiented = PETSC_TRUE;
1723: PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1724: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1725: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1726: PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1727: return 0;
1728: }
1730: /*@C
1731: MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1732: (2D block cyclic distribution).
1734: Collective
1736: Input Parameters:
1737: + comm - MPI communicator
1738: . mb - row block size (or PETSC_DECIDE to have it set)
1739: . nb - column block size (or PETSC_DECIDE to have it set)
1740: . M - number of global rows
1741: . N - number of global columns
1742: . rsrc - coordinate of process that owns the first row of the distributed matrix
1743: - csrc - coordinate of process that owns the first column of the distributed matrix
1745: Output Parameter:
1746: . A - the matrix
1748: Options Database Keys:
1749: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1751: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1752: MatXXXXSetPreallocation() paradigm instead of this routine directly.
1753: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1755: Notes:
1756: If PETSC_DECIDE is used for the block sizes, then an appropriate value
1757: is chosen.
1759: Storage Information:
1760: Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1761: configured with ScaLAPACK. In particular, PETSc's local sizes lose
1762: significance and are thus ignored. The block sizes refer to the values
1763: used for the distributed matrix, not the same meaning as in BAIJ.
1765: Level: intermediate
1767: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1768: @*/
1769: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1770: {
1771: Mat_ScaLAPACK *a;
1772: PetscInt m,n;
1774: MatCreate(comm,A);
1775: MatSetType(*A,MATSCALAPACK);
1777: /* rows and columns are NOT distributed according to PetscSplitOwnership */
1778: m = PETSC_DECIDE;
1779: PetscSplitOwnershipEqual(comm,&m,&M);
1780: n = PETSC_DECIDE;
1781: PetscSplitOwnershipEqual(comm,&n,&N);
1782: MatSetSizes(*A,m,n,M,N);
1783: a = (Mat_ScaLAPACK*)(*A)->data;
1784: PetscBLASIntCast(M,&a->M);
1785: PetscBLASIntCast(N,&a->N);
1786: PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1787: PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1788: PetscBLASIntCast(rsrc,&a->rsrc);
1789: PetscBLASIntCast(csrc,&a->csrc);
1790: MatSetUp(*A);
1791: return 0;
1792: }