Actual source code: matscalapack.c

  1: #include <petsc/private/petscscalapack.h>

  3: const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
  4: "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
  5: "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
  6: "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
  7: "       TITLE = {Sca{LAPACK} Users' Guide},\n"
  8: "       PUBLISHER = {SIAM},\n"
  9: "       ADDRESS = {Philadelphia, PA},\n"
 10: "       YEAR = 1997\n"
 11: "}\n";
 12: static PetscBool ScaLAPACKCite = PETSC_FALSE;

 14: #define DEFAULT_BLOCKSIZE 64

 16: /*
 17:     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
 18:   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
 19: */
 20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;

 22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
 23: {
 24:   PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");
 25:   MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
 26:   return 0;
 27: }

 29: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
 30: {
 31:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
 32:   PetscBool         iascii;
 33:   PetscViewerFormat format;
 34:   Mat               Adense;

 36:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 37:   if (iascii) {
 38:     PetscViewerGetFormat(viewer,&format);
 39:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 40:       PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
 41:       PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
 42:       PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
 43:       PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
 44:       return 0;
 45:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
 46:       return 0;
 47:     }
 48:   }
 49:   /* convert to dense format and call MatView() */
 50:   MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
 51:   MatView(Adense,viewer);
 52:   MatDestroy(&Adense);
 53:   return 0;
 54: }

 56: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
 57: {
 58:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
 59:   PetscLogDouble isend[2],irecv[2];

 61:   info->block_size = 1.0;

 63:   isend[0] = a->lld*a->locc;     /* locally allocated */
 64:   isend[1] = a->locr*a->locc;    /* used submatrix */
 65:   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
 66:     info->nz_allocated   = isend[0];
 67:     info->nz_used        = isend[1];
 68:   } else if (flag == MAT_GLOBAL_MAX) {
 69:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
 70:     info->nz_allocated   = irecv[0];
 71:     info->nz_used        = irecv[1];
 72:   } else if (flag == MAT_GLOBAL_SUM) {
 73:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
 74:     info->nz_allocated   = irecv[0];
 75:     info->nz_used        = irecv[1];
 76:   }

 78:   info->nz_unneeded       = 0;
 79:   info->assemblies        = A->num_ass;
 80:   info->mallocs           = 0;
 81:   info->memory            = ((PetscObject)A)->mem;
 82:   info->fill_ratio_given  = 0;
 83:   info->fill_ratio_needed = 0;
 84:   info->factor_mallocs    = 0;
 85:   return 0;
 86: }

 88: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
 89: {
 90:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;

 92:   switch (op) {
 93:     case MAT_NEW_NONZERO_LOCATIONS:
 94:     case MAT_NEW_NONZERO_LOCATION_ERR:
 95:     case MAT_NEW_NONZERO_ALLOCATION_ERR:
 96:     case MAT_SYMMETRIC:
 97:     case MAT_SORTED_FULL:
 98:     case MAT_HERMITIAN:
 99:       break;
100:     case MAT_ROW_ORIENTED:
101:       a->roworiented = flg;
102:       break;
103:     default:
104:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
105:   }
106:   return 0;
107: }

109: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
110: {
111:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
112:   PetscInt       i,j;
113:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
114:   PetscBool      roworiented = a->roworiented;

117:   for (i=0;i<nr;i++) {
118:     if (rows[i] < 0) continue;
119:     PetscBLASIntCast(rows[i]+1,&gridx);
120:     for (j=0;j<nc;j++) {
121:       if (cols[j] < 0) continue;
122:       PetscBLASIntCast(cols[j]+1,&gcidx);
123:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
124:       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
125:         if (roworiented) {
126:           switch (imode) {
127:             case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
128:             default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
129:           }
130:         } else {
131:           switch (imode) {
132:             case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i+j*nr]; break;
133:             default: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i+j*nr]; break;
134:           }
135:         }
136:       } else {
138:         A->assembled = PETSC_FALSE;
139:         MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,roworiented ? vals+i*nc+j : vals+i+j*nr,(PetscBool)(imode==ADD_VALUES));
140:       }
141:     }
142:   }
143:   return 0;
144: }

146: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
147: {
148:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
149:   PetscScalar    *x2d,*y2d,alpha=1.0;
150:   const PetscInt *ranges;
151:   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;

153:   if (transpose) {

155:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
156:     PetscLayoutGetRanges(A->rmap,&ranges);
157:     PetscBLASIntCast(ranges[1],&mb);  /* x block size */
158:     xlld = PetscMax(1,A->rmap->n);
159:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
161:     PetscLayoutGetRanges(A->cmap,&ranges);
162:     PetscBLASIntCast(ranges[1],&nb);  /* y block size */
163:     ylld = 1;
164:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));

167:     /* allocate 2d vectors */
168:     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
169:     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
170:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
171:     xlld = PetscMax(1,lszx);

173:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
174:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
176:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));

179:     /* redistribute x as a column of a 2d matrix */
180:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

182:     /* redistribute y as a row of a 2d matrix */
183:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));

185:     /* call PBLAS subroutine */
186:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

188:     /* redistribute y from a row of a 2d matrix */
189:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));

191:   } else {   /* non-transpose */

193:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
194:     PetscLayoutGetRanges(A->cmap,&ranges);
195:     PetscBLASIntCast(ranges[1],&nb);  /* x block size */
196:     xlld = 1;
197:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
199:     PetscLayoutGetRanges(A->rmap,&ranges);
200:     PetscBLASIntCast(ranges[1],&mb);  /* y block size */
201:     ylld = PetscMax(1,A->rmap->n);
202:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));

205:     /* allocate 2d vectors */
206:     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
207:     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
208:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
209:     ylld = PetscMax(1,lszy);

211:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
212:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
214:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));

217:     /* redistribute x as a row of a 2d matrix */
218:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));

220:     /* redistribute y as a column of a 2d matrix */
221:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));

223:     /* call PBLAS subroutine */
224:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

226:     /* redistribute y from a column of a 2d matrix */
227:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));

229:   }
230:   PetscFree2(x2d,y2d);
231:   return 0;
232: }

234: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
235: {
236:   const PetscScalar *xarray;
237:   PetscScalar       *yarray;

239:   VecGetArrayRead(x,&xarray);
240:   VecGetArray(y,&yarray);
241:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
242:   VecRestoreArrayRead(x,&xarray);
243:   VecRestoreArray(y,&yarray);
244:   return 0;
245: }

247: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
248: {
249:   const PetscScalar *xarray;
250:   PetscScalar       *yarray;

252:   VecGetArrayRead(x,&xarray);
253:   VecGetArray(y,&yarray);
254:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
255:   VecRestoreArrayRead(x,&xarray);
256:   VecRestoreArray(y,&yarray);
257:   return 0;
258: }

260: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
261: {
262:   const PetscScalar *xarray;
263:   PetscScalar       *zarray;

265:   if (y != z) VecCopy(y,z);
266:   VecGetArrayRead(x,&xarray);
267:   VecGetArray(z,&zarray);
268:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
269:   VecRestoreArrayRead(x,&xarray);
270:   VecRestoreArray(z,&zarray);
271:   return 0;
272: }

274: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
275: {
276:   const PetscScalar *xarray;
277:   PetscScalar       *zarray;

279:   if (y != z) VecCopy(y,z);
280:   VecGetArrayRead(x,&xarray);
281:   VecGetArray(z,&zarray);
282:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
283:   VecRestoreArrayRead(x,&xarray);
284:   VecRestoreArray(z,&zarray);
285:   return 0;
286: }

288: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
289: {
290:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
291:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
292:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
293:   PetscScalar   sone=1.0,zero=0.0;
294:   PetscBLASInt  one=1;

296:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
297:   C->assembled = PETSC_TRUE;
298:   return 0;
299: }

301: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
302: {
303:   MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
304:   MatSetType(C,MATSCALAPACK);
305:   MatSetUp(C);
306:   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
307:   return 0;
308: }

310: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
311: {
312:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
313:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
314:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
315:   PetscScalar   sone=1.0,zero=0.0;
316:   PetscBLASInt  one=1;

318:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
319:   C->assembled = PETSC_TRUE;
320:   return 0;
321: }

323: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
324: {
325:   MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
326:   MatSetType(C,MATSCALAPACK);
327:   MatSetUp(C);
328:   return 0;
329: }

331: /* --------------------------------------- */
332: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
333: {
334:   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
335:   C->ops->productsymbolic = MatProductSymbolic_AB;
336:   return 0;
337: }

339: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
340: {
341:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
342:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
343:   return 0;
344: }

346: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
347: {
348:   Mat_Product    *product = C->product;

350:   switch (product->type) {
351:     case MATPRODUCT_AB:
352:       MatProductSetFromOptions_ScaLAPACK_AB(C);
353:       break;
354:     case MATPRODUCT_ABt:
355:       MatProductSetFromOptions_ScaLAPACK_ABt(C);
356:       break;
357:     default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
358:   }
359:   return 0;
360: }
361: /* --------------------------------------- */

363: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
364: {
365:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
366:   PetscScalar       *darray,*d2d,v;
367:   const PetscInt    *ranges;
368:   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

370:   VecGetArray(D,&darray);

372:   if (A->rmap->N<=A->cmap->N) {   /* row version */

374:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
375:     PetscLayoutGetRanges(A->rmap,&ranges);
376:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
377:     dlld = PetscMax(1,A->rmap->n);
378:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

381:     /* allocate 2d vector */
382:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383:     PetscCalloc1(lszd,&d2d);
384:     dlld = PetscMax(1,lszd);

386:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
387:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

390:     /* collect diagonal */
391:     for (j=1;j<=a->M;j++) {
392:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394:     }

396:     /* redistribute d from a column of a 2d matrix */
397:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398:     PetscFree(d2d);

400:   } else {   /* column version */

402:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
403:     PetscLayoutGetRanges(A->cmap,&ranges);
404:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
405:     dlld = 1;
406:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

409:     /* allocate 2d vector */
410:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411:     PetscCalloc1(lszd,&d2d);

413:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
414:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

417:     /* collect diagonal */
418:     for (j=1;j<=a->N;j++) {
419:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421:     }

423:     /* redistribute d from a row of a 2d matrix */
424:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425:     PetscFree(d2d);
426:   }

428:   VecRestoreArray(D,&darray);
429:   VecAssemblyBegin(D);
430:   VecAssemblyEnd(D);
431:   return 0;
432: }

434: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435: {
436:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
437:   const PetscScalar *d;
438:   const PetscInt    *ranges;
439:   PetscScalar       *d2d;
440:   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

442:   if (R) {
443:     VecGetArrayRead(R,(const PetscScalar **)&d);
444:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
445:     PetscLayoutGetRanges(A->cmap,&ranges);
446:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
447:     dlld = 1;
448:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

451:     /* allocate 2d vector */
452:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
453:     PetscCalloc1(lszd,&d2d);

455:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
456:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

459:     /* redistribute d to a row of a 2d matrix */
460:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));

462:     /* broadcast along process columns */
463:     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
464:     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);

466:     /* local scaling */
467:     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];

469:     PetscFree(d2d);
470:     VecRestoreArrayRead(R,(const PetscScalar **)&d);
471:   }
472:   if (L) {
473:     VecGetArrayRead(L,(const PetscScalar **)&d);
474:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
475:     PetscLayoutGetRanges(A->rmap,&ranges);
476:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
477:     dlld = PetscMax(1,A->rmap->n);
478:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

481:     /* allocate 2d vector */
482:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
483:     PetscCalloc1(lszd,&d2d);
484:     dlld = PetscMax(1,lszd);

486:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
487:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

490:     /* redistribute d to a column of a 2d matrix */
491:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));

493:     /* broadcast along process rows */
494:     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
495:     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);

497:     /* local scaling */
498:     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];

500:     PetscFree(d2d);
501:     VecRestoreArrayRead(L,(const PetscScalar **)&d);
502:   }
503:   return 0;
504: }

506: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
507: {
508:   *missing = PETSC_FALSE;
509:   return 0;
510: }

512: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
513: {
514:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
515:   PetscBLASInt  n,one=1;

517:   n = x->lld*x->locc;
518:   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
519:   return 0;
520: }

522: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
523: {
524:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
525:   PetscBLASInt  i,n;
526:   PetscScalar   v;

528:   n = PetscMin(x->M,x->N);
529:   for (i=1;i<=n;i++) {
530:     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
531:     v += alpha;
532:     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
533:   }
534:   return 0;
535: }

537: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
538: {
539:   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
540:   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
541:   PetscBLASInt   one=1;
542:   PetscScalar    beta=1.0;

544:   MatScaLAPACKCheckDistribution(Y,1,X,3);
545:   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
546:   PetscObjectStateIncrease((PetscObject)Y);
547:   return 0;
548: }

550: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
551: {
552:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
553:   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;

555:   PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
556:   PetscObjectStateIncrease((PetscObject)B);
557:   return 0;
558: }

560: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
561: {
562:   Mat            Bs;
563:   MPI_Comm       comm;
564:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;

566:   PetscObjectGetComm((PetscObject)A,&comm);
567:   MatCreate(comm,&Bs);
568:   MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
569:   MatSetType(Bs,MATSCALAPACK);
570:   b = (Mat_ScaLAPACK*)Bs->data;
571:   b->M    = a->M;
572:   b->N    = a->N;
573:   b->mb   = a->mb;
574:   b->nb   = a->nb;
575:   b->rsrc = a->rsrc;
576:   b->csrc = a->csrc;
577:   MatSetUp(Bs);
578:   *B = Bs;
579:   if (op == MAT_COPY_VALUES) {
580:     PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
581:   }
582:   Bs->assembled = PETSC_TRUE;
583:   return 0;
584: }

586: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
587: {
588:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
589:   Mat            Bs = *B;
590:   PetscBLASInt   one=1;
591:   PetscScalar    sone=1.0,zero=0.0;
592: #if defined(PETSC_USE_COMPLEX)
593:   PetscInt       i,j;
594: #endif

596:   if (reuse == MAT_INITIAL_MATRIX) {
597:     MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
598:     *B = Bs;
599:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
600:   b = (Mat_ScaLAPACK*)Bs->data;
601:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
602: #if defined(PETSC_USE_COMPLEX)
603:   /* undo conjugation */
604:   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
605: #endif
606:   Bs->assembled = PETSC_TRUE;
607:   return 0;
608: }

610: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
611: {
612:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
613:   PetscInt      i,j;

615:   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
616:   return 0;
617: }

619: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
620: {
621:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
622:   Mat            Bs = *B;
623:   PetscBLASInt   one=1;
624:   PetscScalar    sone=1.0,zero=0.0;

626:   if (reuse == MAT_INITIAL_MATRIX) {
627:     MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
628:     *B = Bs;
629:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
630:   b = (Mat_ScaLAPACK*)Bs->data;
631:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
632:   Bs->assembled = PETSC_TRUE;
633:   return 0;
634: }

636: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
637: {
638:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
639:   PetscScalar    *x,*x2d;
640:   const PetscInt *ranges;
641:   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;

643:   VecCopy(B,X);
644:   VecGetArray(X,&x);

646:   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
647:   PetscLayoutGetRanges(A->rmap,&ranges);
648:   PetscBLASIntCast(ranges[1],&mb);  /* x block size */
649:   xlld = PetscMax(1,A->rmap->n);
650:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));

653:   /* allocate 2d vector */
654:   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
655:   PetscMalloc1(lszx,&x2d);
656:   xlld = PetscMax(1,lszx);

658:   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
659:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));

662:   /* redistribute x as a column of a 2d matrix */
663:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

665:   /* call ScaLAPACK subroutine */
666:   switch (A->factortype) {
667:     case MAT_FACTOR_LU:
668:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
670:       break;
671:     case MAT_FACTOR_CHOLESKY:
672:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
674:       break;
675:     default:
676:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
677:   }

679:   /* redistribute x from a column of a 2d matrix */
680:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));

682:   PetscFree(x2d);
683:   VecRestoreArray(X,&x);
684:   return 0;
685: }

687: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
688: {
689:   MatSolve_ScaLAPACK(A,B,X);
690:   VecAXPY(X,1,Y);
691:   return 0;
692: }

694: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
695: {
696:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
697:   PetscBool      flg1,flg2;
698:   PetscBLASInt   one=1,info;

700:   PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
701:   PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
703:   MatScaLAPACKCheckDistribution(B,1,X,2);
704:   b = (Mat_ScaLAPACK*)B->data;
705:   x = (Mat_ScaLAPACK*)X->data;
706:   PetscArraycpy(x->loc,b->loc,b->lld*b->locc);

708:   switch (A->factortype) {
709:     case MAT_FACTOR_LU:
710:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
712:       break;
713:     case MAT_FACTOR_CHOLESKY:
714:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
716:       break;
717:     default:
718:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
719:   }
720:   return 0;
721: }

723: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
724: {
725:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
726:   PetscBLASInt   one=1,info;

728:   if (!a->pivots) {
729:     PetscMalloc1(a->locr+a->mb,&a->pivots);
730:     PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
731:   }
732:   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
734:   A->factortype = MAT_FACTOR_LU;
735:   A->assembled  = PETSC_TRUE;

737:   PetscFree(A->solvertype);
738:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
739:   return 0;
740: }

742: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
743: {
744:   MatCopy(A,F,SAME_NONZERO_PATTERN);
745:   MatLUFactor_ScaLAPACK(F,0,0,info);
746:   return 0;
747: }

749: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
750: {
751:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
752:   return 0;
753: }

755: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
756: {
757:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
758:   PetscBLASInt   one=1,info;

760:   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
762:   A->factortype = MAT_FACTOR_CHOLESKY;
763:   A->assembled  = PETSC_TRUE;

765:   PetscFree(A->solvertype);
766:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
767:   return 0;
768: }

770: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
771: {
772:   MatCopy(A,F,SAME_NONZERO_PATTERN);
773:   MatCholeskyFactor_ScaLAPACK(F,0,info);
774:   return 0;
775: }

777: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
778: {
779:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
780:   return 0;
781: }

783: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
784: {
785:   *type = MATSOLVERSCALAPACK;
786:   return 0;
787: }

789: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
790: {
791:   Mat            B;
792:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

794:   /* Create the factorization matrix */
795:   MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);
796:   B->trivialsymbolic = PETSC_TRUE;
797:   B->factortype = ftype;
798:   PetscFree(B->solvertype);
799:   PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);

801:   PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
802:   *F = B;
803:   return 0;
804: }

806: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
807: {
808:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
809:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
810:   return 0;
811: }

813: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
814: {
815:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
816:   PetscBLASInt   one=1,lwork=0;
817:   const char     *ntype;
818:   PetscScalar    *work=NULL,dummy;

820:   switch (type) {
821:     case NORM_1:
822:       ntype = "1";
823:       lwork = PetscMax(a->locr,a->locc);
824:       break;
825:     case NORM_FROBENIUS:
826:       ntype = "F";
827:       work  = &dummy;
828:       break;
829:     case NORM_INFINITY:
830:       ntype = "I";
831:       lwork = PetscMax(a->locr,a->locc);
832:       break;
833:     default:
834:       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
835:   }
836:   if (lwork) PetscMalloc1(lwork,&work);
837:   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
838:   if (lwork) PetscFree(work);
839:   return 0;
840: }

842: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
843: {
844:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

846:   PetscArrayzero(a->loc,a->lld*a->locc);
847:   return 0;
848: }

850: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
851: {
852:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
853:   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;

855:   if (rows) {
856:     n     = a->locr;
857:     nb    = a->mb;
858:     isrc  = a->rsrc;
859:     nproc = a->grid->nprow;
860:     iproc = a->grid->myrow;
861:     PetscMalloc1(n,&idx);
862:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
863:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
864:   }
865:   if (cols) {
866:     n     = a->locc;
867:     nb    = a->nb;
868:     isrc  = a->csrc;
869:     nproc = a->grid->npcol;
870:     iproc = a->grid->mycol;
871:     PetscMalloc1(n,&idx);
872:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
873:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
874:   }
875:   return 0;
876: }

878: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
879: {
880:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
881:   Mat            Bmpi;
882:   MPI_Comm       comm;
883:   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
884:   const PetscInt *ranges,*branges,*cwork;
885:   const PetscScalar *vwork;
886:   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
887:   PetscScalar    *barray;
888:   PetscBool      differ=PETSC_FALSE;
889:   PetscMPIInt    size;

891:   PetscObjectGetComm((PetscObject)A,&comm);
892:   PetscLayoutGetRanges(A->rmap,&ranges);

894:   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
895:     MPI_Comm_size(comm,&size);
896:     PetscLayoutGetRanges((*B)->rmap,&branges);
897:     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
898:   }

900:   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
901:     MatCreate(comm,&Bmpi);
902:     m = PETSC_DECIDE;
903:     PetscSplitOwnershipEqual(comm,&m,&M);
904:     n = PETSC_DECIDE;
905:     PetscSplitOwnershipEqual(comm,&n,&N);
906:     MatSetSizes(Bmpi,m,n,M,N);
907:     MatSetType(Bmpi,MATDENSE);
908:     MatSetUp(Bmpi);

910:     /* create ScaLAPACK descriptor for B (1d block distribution) */
911:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
912:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
913:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

916:     /* redistribute matrix */
917:     MatDenseGetArray(Bmpi,&barray);
918:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
919:     MatDenseRestoreArray(Bmpi,&barray);
920:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
921:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);

923:     /* transfer rows of auxiliary matrix to the final matrix B */
924:     MatGetOwnershipRange(Bmpi,&rstart,&rend);
925:     for (i=rstart;i<rend;i++) {
926:       MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
927:       MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
928:       MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
929:     }
930:     MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
931:     MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
932:     MatDestroy(&Bmpi);

934:   } else {  /* normal cases */

936:     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
937:     else {
938:       MatCreate(comm,&Bmpi);
939:       m = PETSC_DECIDE;
940:       PetscSplitOwnershipEqual(comm,&m,&M);
941:       n = PETSC_DECIDE;
942:       PetscSplitOwnershipEqual(comm,&n,&N);
943:       MatSetSizes(Bmpi,m,n,M,N);
944:       MatSetType(Bmpi,MATDENSE);
945:       MatSetUp(Bmpi);
946:     }

948:     /* create ScaLAPACK descriptor for B (1d block distribution) */
949:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
950:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
951:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

954:     /* redistribute matrix */
955:     MatDenseGetArray(Bmpi,&barray);
956:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
957:     MatDenseRestoreArray(Bmpi,&barray);

959:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
960:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
961:     if (reuse == MAT_INPLACE_MATRIX) {
962:       MatHeaderReplace(A,&Bmpi);
963:     } else *B = Bmpi;
964:   }
965:   return 0;
966: }

968: static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map,PetscBool *correct)
969: {
970:   const PetscInt *ranges;
971:   PetscMPIInt    size;
972:   PetscInt       i,n;

974:   *correct = PETSC_TRUE;
975:   MPI_Comm_size(map->comm,&size);
976:   if (size>1) {
977:     PetscLayoutGetRanges(map,&ranges);
978:     n = ranges[1]-ranges[0];
979:     for (i=1;i<size;i++) if (ranges[i+1]-ranges[i]!=n) break;
980:     *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i+1]-ranges[i] <= n));
981:   }
982:   return 0;
983: }

985: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
986: {
987:   Mat_ScaLAPACK  *b;
988:   Mat            Bmpi;
989:   MPI_Comm       comm;
990:   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
991:   const PetscInt *ranges,*rows,*cols;
992:   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
993:   PetscScalar    *aarray;
994:   IS             ir,ic;
995:   PetscInt       lda;
996:   PetscBool      flg;

998:   PetscObjectGetComm((PetscObject)A,&comm);

1000:   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1001:   else {
1002:     MatCreate(comm,&Bmpi);
1003:     m = PETSC_DECIDE;
1004:     PetscSplitOwnershipEqual(comm,&m,&M);
1005:     n = PETSC_DECIDE;
1006:     PetscSplitOwnershipEqual(comm,&n,&N);
1007:     MatSetSizes(Bmpi,m,n,M,N);
1008:     MatSetType(Bmpi,MATSCALAPACK);
1009:     MatSetUp(Bmpi);
1010:   }
1011:   b = (Mat_ScaLAPACK*)Bmpi->data;

1013:   MatDenseGetLDA(A,&lda);
1014:   MatDenseGetArray(A,&aarray);
1015:   MatScaLAPACKCheckLayout(A->rmap,&flg);
1016:   if (flg) MatScaLAPACKCheckLayout(A->cmap,&flg);
1017:   if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1018:     /* create ScaLAPACK descriptor for A (1d block distribution) */
1019:     PetscLayoutGetRanges(A->rmap,&ranges);
1020:     PetscBLASIntCast(ranges[1],&amb);  /* row block size */
1021:     lld = PetscMax(lda,1);  /* local leading dimension */
1022:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));

1025:     /* redistribute matrix */
1026:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1027:     Bmpi->nooffprocentries = PETSC_TRUE;
1028:   } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1030:     b->roworiented = PETSC_FALSE;
1031:     MatGetOwnershipIS(A,&ir,&ic);
1032:     ISGetIndices(ir,&rows);
1033:     ISGetIndices(ic,&cols);
1034:     MatSetValues(Bmpi,A->rmap->n,rows,A->cmap->N,cols,aarray,INSERT_VALUES);
1035:     ISRestoreIndices(ir,&rows);
1036:     ISRestoreIndices(ic,&cols);
1037:     ISDestroy(&ic);
1038:     ISDestroy(&ir);
1039:   }
1040:   MatDenseRestoreArray(A,&aarray);
1041:   MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1042:   MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1043:   if (reuse == MAT_INPLACE_MATRIX) {
1044:     MatHeaderReplace(A,&Bmpi);
1045:   } else *B = Bmpi;
1046:   return 0;
1047: }

1049: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1050: {
1051:   Mat               mat_scal;
1052:   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1053:   const PetscInt    *cols;
1054:   const PetscScalar *vals;

1056:   if (reuse == MAT_REUSE_MATRIX) {
1057:     mat_scal = *newmat;
1058:     MatZeroEntries(mat_scal);
1059:   } else {
1060:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1061:     m = PETSC_DECIDE;
1062:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1063:     n = PETSC_DECIDE;
1064:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1065:     MatSetSizes(mat_scal,m,n,M,N);
1066:     MatSetType(mat_scal,MATSCALAPACK);
1067:     MatSetUp(mat_scal);
1068:   }
1069:   for (row=rstart;row<rend;row++) {
1070:     MatGetRow(A,row,&ncols,&cols,&vals);
1071:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1072:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1073:   }
1074:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1075:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1077:   if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A,&mat_scal);
1078:   else *newmat = mat_scal;
1079:   return 0;
1080: }

1082: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1083: {
1084:   Mat               mat_scal;
1085:   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1086:   const PetscInt    *cols;
1087:   const PetscScalar *vals;
1088:   PetscScalar       v;

1090:   if (reuse == MAT_REUSE_MATRIX) {
1091:     mat_scal = *newmat;
1092:     MatZeroEntries(mat_scal);
1093:   } else {
1094:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1095:     m = PETSC_DECIDE;
1096:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1097:     n = PETSC_DECIDE;
1098:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1099:     MatSetSizes(mat_scal,m,n,M,N);
1100:     MatSetType(mat_scal,MATSCALAPACK);
1101:     MatSetUp(mat_scal);
1102:   }
1103:   MatGetRowUpperTriangular(A);
1104:   for (row=rstart;row<rend;row++) {
1105:     MatGetRow(A,row,&ncols,&cols,&vals);
1106:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1107:     for (j=0;j<ncols;j++) { /* lower triangular part */
1108:       if (cols[j] == row) continue;
1109:       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1110:       MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1111:     }
1112:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1113:   }
1114:   MatRestoreRowUpperTriangular(A);
1115:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1116:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1118:   if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A,&mat_scal);
1119:   else *newmat = mat_scal;
1120:   return 0;
1121: }

1123: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1124: {
1125:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1126:   PetscInt       sz=0;

1128:   PetscLayoutSetUp(A->rmap);
1129:   PetscLayoutSetUp(A->cmap);
1130:   if (!a->lld) a->lld = a->locr;

1132:   PetscFree(a->loc);
1133:   PetscIntMultError(a->lld,a->locc,&sz);
1134:   PetscCalloc1(sz,&a->loc);
1135:   PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));

1137:   A->preallocated = PETSC_TRUE;
1138:   return 0;
1139: }

1141: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1142: {
1143:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1144:   Mat_ScaLAPACK_Grid *grid;
1145:   PetscBool          flg;
1146:   MPI_Comm           icomm;

1148:   MatStashDestroy_Private(&A->stash);
1149:   PetscFree(a->loc);
1150:   PetscFree(a->pivots);
1151:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1152:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1153:   if (--grid->grid_refct == 0) {
1154:     Cblacs_gridexit(grid->ictxt);
1155:     Cblacs_gridexit(grid->ictxrow);
1156:     Cblacs_gridexit(grid->ictxcol);
1157:     PetscFree(grid);
1158:     MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);
1159:   }
1160:   PetscCommDestroy(&icomm);
1161:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1162:   PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1163:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1164:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1165:   PetscFree(A->data);
1166:   return 0;
1167: }

1169: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1170: {
1171:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1172:   PetscBLASInt   info=0;
1173:   PetscBool      flg;

1175:   PetscLayoutSetUp(A->rmap);
1176:   PetscLayoutSetUp(A->cmap);

1178:   /* check that the layout is as enforced by MatCreateScaLAPACK() */
1179:   MatScaLAPACKCheckLayout(A->rmap,&flg);
1181:   MatScaLAPACKCheckLayout(A->cmap,&flg);

1184:   /* compute local sizes */
1185:   PetscBLASIntCast(A->rmap->N,&a->M);
1186:   PetscBLASIntCast(A->cmap->N,&a->N);
1187:   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1188:   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1189:   a->lld  = PetscMax(1,a->locr);

1191:   /* allocate local array */
1192:   MatScaLAPACKSetPreallocation(A);

1194:   /* set up ScaLAPACK descriptor */
1195:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1197:   return 0;
1198: }

1200: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1201: {
1202:   PetscInt       nstash,reallocs;

1204:   if (A->nooffprocentries) return 0;
1205:   MatStashScatterBegin_Private(A,&A->stash,NULL);
1206:   MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1207:   PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);
1208:   return 0;
1209: }

1211: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1212: {
1213:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1214:   PetscMPIInt    n;
1215:   PetscInt       i,flg,*row,*col;
1216:   PetscScalar    *val;
1217:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

1219:   if (A->nooffprocentries) return 0;
1220:   while (1) {
1221:     MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1222:     if (!flg) break;
1223:     for (i=0;i<n;i++) {
1224:       PetscBLASIntCast(row[i]+1,&gridx);
1225:       PetscBLASIntCast(col[i]+1,&gcidx);
1226:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1228:       switch (A->insertmode) {
1229:         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1230:         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1231:         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1232:       }
1233:     }
1234:   }
1235:   MatStashScatterEnd_Private(&A->stash);
1236:   return 0;
1237: }

1239: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1240: {
1241:   Mat            Adense,As;
1242:   MPI_Comm       comm;

1244:   PetscObjectGetComm((PetscObject)newMat,&comm);
1245:   MatCreate(comm,&Adense);
1246:   MatSetType(Adense,MATDENSE);
1247:   MatLoad(Adense,viewer);
1248:   MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1249:   MatDestroy(&Adense);
1250:   MatHeaderReplace(newMat,&As);
1251:   return 0;
1252: }

1254: /* -------------------------------------------------------------------*/
1255: static struct _MatOps MatOps_Values = {
1256:        MatSetValues_ScaLAPACK,
1257:        0,
1258:        0,
1259:        MatMult_ScaLAPACK,
1260: /* 4*/ MatMultAdd_ScaLAPACK,
1261:        MatMultTranspose_ScaLAPACK,
1262:        MatMultTransposeAdd_ScaLAPACK,
1263:        MatSolve_ScaLAPACK,
1264:        MatSolveAdd_ScaLAPACK,
1265:        0,
1266: /*10*/ 0,
1267:        MatLUFactor_ScaLAPACK,
1268:        MatCholeskyFactor_ScaLAPACK,
1269:        0,
1270:        MatTranspose_ScaLAPACK,
1271: /*15*/ MatGetInfo_ScaLAPACK,
1272:        0,
1273:        MatGetDiagonal_ScaLAPACK,
1274:        MatDiagonalScale_ScaLAPACK,
1275:        MatNorm_ScaLAPACK,
1276: /*20*/ MatAssemblyBegin_ScaLAPACK,
1277:        MatAssemblyEnd_ScaLAPACK,
1278:        MatSetOption_ScaLAPACK,
1279:        MatZeroEntries_ScaLAPACK,
1280: /*24*/ 0,
1281:        MatLUFactorSymbolic_ScaLAPACK,
1282:        MatLUFactorNumeric_ScaLAPACK,
1283:        MatCholeskyFactorSymbolic_ScaLAPACK,
1284:        MatCholeskyFactorNumeric_ScaLAPACK,
1285: /*29*/ MatSetUp_ScaLAPACK,
1286:        0,
1287:        0,
1288:        0,
1289:        0,
1290: /*34*/ MatDuplicate_ScaLAPACK,
1291:        0,
1292:        0,
1293:        0,
1294:        0,
1295: /*39*/ MatAXPY_ScaLAPACK,
1296:        0,
1297:        0,
1298:        0,
1299:        MatCopy_ScaLAPACK,
1300: /*44*/ 0,
1301:        MatScale_ScaLAPACK,
1302:        MatShift_ScaLAPACK,
1303:        0,
1304:        0,
1305: /*49*/ 0,
1306:        0,
1307:        0,
1308:        0,
1309:        0,
1310: /*54*/ 0,
1311:        0,
1312:        0,
1313:        0,
1314:        0,
1315: /*59*/ 0,
1316:        MatDestroy_ScaLAPACK,
1317:        MatView_ScaLAPACK,
1318:        0,
1319:        0,
1320: /*64*/ 0,
1321:        0,
1322:        0,
1323:        0,
1324:        0,
1325: /*69*/ 0,
1326:        0,
1327:        MatConvert_ScaLAPACK_Dense,
1328:        0,
1329:        0,
1330: /*74*/ 0,
1331:        0,
1332:        0,
1333:        0,
1334:        0,
1335: /*79*/ 0,
1336:        0,
1337:        0,
1338:        0,
1339:        MatLoad_ScaLAPACK,
1340: /*84*/ 0,
1341:        0,
1342:        0,
1343:        0,
1344:        0,
1345: /*89*/ 0,
1346:        0,
1347:        MatMatMultNumeric_ScaLAPACK,
1348:        0,
1349:        0,
1350: /*94*/ 0,
1351:        0,
1352:        0,
1353:        MatMatTransposeMultNumeric_ScaLAPACK,
1354:        0,
1355: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1356:        0,
1357:        0,
1358:        MatConjugate_ScaLAPACK,
1359:        0,
1360: /*104*/0,
1361:        0,
1362:        0,
1363:        0,
1364:        0,
1365: /*109*/MatMatSolve_ScaLAPACK,
1366:        0,
1367:        0,
1368:        0,
1369:        MatMissingDiagonal_ScaLAPACK,
1370: /*114*/0,
1371:        0,
1372:        0,
1373:        0,
1374:        0,
1375: /*119*/0,
1376:        MatHermitianTranspose_ScaLAPACK,
1377:        0,
1378:        0,
1379:        0,
1380: /*124*/0,
1381:        0,
1382:        0,
1383:        0,
1384:        0,
1385: /*129*/0,
1386:        0,
1387:        0,
1388:        0,
1389:        0,
1390: /*134*/0,
1391:        0,
1392:        0,
1393:        0,
1394:        0,
1395:        0,
1396: /*140*/0,
1397:        0,
1398:        0,
1399:        0,
1400:        0,
1401: /*145*/0,
1402:        0,
1403:        0
1404: };

1406: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1407: {
1408:   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1409:   PetscInt           size=stash->size,nsends;
1410:   PetscInt           count,*sindices,**rindices,i,j,l;
1411:   PetscScalar        **rvalues,*svalues;
1412:   MPI_Comm           comm = stash->comm;
1413:   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1414:   PetscMPIInt        *sizes,*nlengths,nreceives;
1415:   PetscInt           *sp_idx,*sp_idy;
1416:   PetscScalar        *sp_val;
1417:   PetscMatStashSpace space,space_next;
1418:   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1419:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;

1421:   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1422:     InsertMode addv;
1423:     MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1425:     mat->insertmode = addv; /* in case this processor had no cache */
1426:   }

1428:   bs2 = stash->bs*stash->bs;

1430:   /*  first count number of contributors to each processor */
1431:   PetscCalloc1(size,&nlengths);
1432:   PetscMalloc1(stash->n+1,&owner);

1434:   i     = j    = 0;
1435:   space = stash->space_head;
1436:   while (space) {
1437:     space_next = space->next;
1438:     for (l=0; l<space->local_used; l++) {
1439:       PetscBLASIntCast(space->idx[l]+1,&gridx);
1440:       PetscBLASIntCast(space->idy[l]+1,&gcidx);
1441:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1442:       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1443:       nlengths[j]++; owner[i] = j;
1444:       i++;
1445:     }
1446:     space = space_next;
1447:   }

1449:   /* Now check what procs get messages - and compute nsends. */
1450:   PetscCalloc1(size,&sizes);
1451:   for (i=0, nsends=0; i<size; i++) {
1452:     if (nlengths[i]) {
1453:       sizes[i] = 1; nsends++;
1454:     }
1455:   }

1457:   {PetscMPIInt *onodes,*olengths;
1458:    /* Determine the number of messages to expect, their lengths, from from-ids */
1459:    PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1460:    PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1461:    /* since clubbing row,col - lengths are multiplied by 2 */
1462:    for (i=0; i<nreceives; i++) olengths[i] *=2;
1463:    PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1464:    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1465:    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1466:    PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1467:    PetscFree(onodes);
1468:    PetscFree(olengths);}

1470:   /* do sends:
1471:       1) starts[i] gives the starting index in svalues for stuff going to
1472:          the ith processor
1473:   */
1474:   PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1475:   PetscMalloc1(2*nsends,&send_waits);
1476:   PetscMalloc2(size,&startv,size,&starti);
1477:   /* use 2 sends the first with all_a, the next with all_i and all_j */
1478:   startv[0] = 0; starti[0] = 0;
1479:   for (i=1; i<size; i++) {
1480:     startv[i] = startv[i-1] + nlengths[i-1];
1481:     starti[i] = starti[i-1] + 2*nlengths[i-1];
1482:   }

1484:   i     = 0;
1485:   space = stash->space_head;
1486:   while (space) {
1487:     space_next = space->next;
1488:     sp_idx     = space->idx;
1489:     sp_idy     = space->idy;
1490:     sp_val     = space->val;
1491:     for (l=0; l<space->local_used; l++) {
1492:       j = owner[i];
1493:       if (bs2 == 1) {
1494:         svalues[startv[j]] = sp_val[l];
1495:       } else {
1496:         PetscInt    k;
1497:         PetscScalar *buf1,*buf2;
1498:         buf1 = svalues+bs2*startv[j];
1499:         buf2 = space->val + bs2*l;
1500:         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1501:       }
1502:       sindices[starti[j]]             = sp_idx[l];
1503:       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1504:       startv[j]++;
1505:       starti[j]++;
1506:       i++;
1507:     }
1508:     space = space_next;
1509:   }
1510:   startv[0] = 0;
1511:   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];

1513:   for (i=0,count=0; i<size; i++) {
1514:     if (sizes[i]) {
1515:       MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1516:       MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1517:     }
1518:   }
1519: #if defined(PETSC_USE_INFO)
1520:   PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends);
1521:   for (i=0; i<size; i++) {
1522:     if (sizes[i]) {
1523:       PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))));
1524:     }
1525:   }
1526: #endif
1527:   PetscFree(nlengths);
1528:   PetscFree(owner);
1529:   PetscFree2(startv,starti);
1530:   PetscFree(sizes);

1532:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1533:   PetscMalloc1(2*nreceives,&recv_waits);

1535:   for (i=0; i<nreceives; i++) {
1536:     recv_waits[2*i]   = recv_waits1[i];
1537:     recv_waits[2*i+1] = recv_waits2[i];
1538:   }
1539:   stash->recv_waits = recv_waits;

1541:   PetscFree(recv_waits1);
1542:   PetscFree(recv_waits2);

1544:   stash->svalues         = svalues;
1545:   stash->sindices        = sindices;
1546:   stash->rvalues         = rvalues;
1547:   stash->rindices        = rindices;
1548:   stash->send_waits      = send_waits;
1549:   stash->nsends          = nsends;
1550:   stash->nrecvs          = nreceives;
1551:   stash->reproduce_count = 0;
1552:   return 0;
1553: }

1555: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1556: {
1557:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

1562:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1563:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1564:   return 0;
1565: }

1567: /*@
1568:    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1569:    the ScaLAPACK matrix

1571:    Logically Collective on A

1573:    Input Parameters:
1574: +  A  - a MATSCALAPACK matrix
1575: .  mb - the row block size
1576: -  nb - the column block size

1578:    Level: intermediate

1580: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1581: @*/
1582: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1583: {
1587:   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1588:   return 0;
1589: }

1591: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1592: {
1593:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;

1595:   if (mb) *mb = a->mb;
1596:   if (nb) *nb = a->nb;
1597:   return 0;
1598: }

1600: /*@
1601:    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1602:    the ScaLAPACK matrix

1604:    Not collective

1606:    Input Parameter:
1607: .  A  - a MATSCALAPACK matrix

1609:    Output Parameters:
1610: +  mb - the row block size
1611: -  nb - the column block size

1613:    Level: intermediate

1615: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1616: @*/
1617: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1618: {
1620:   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1621:   return 0;
1622: }

1624: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1625: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);

1627: /*MC
1628:    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package

1630:    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK

1632:    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver

1634:    Options Database Keys:
1635: +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1636: .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1637: -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1639:    Level: beginner

1641: .seealso: MATDENSE, MATELEMENTAL
1642: M*/

1644: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1645: {
1646:   Mat_ScaLAPACK      *a;
1647:   PetscErrorCode     ierr;
1648:   PetscBool          flg,flg1;
1649:   Mat_ScaLAPACK_Grid *grid;
1650:   MPI_Comm           icomm;
1651:   PetscBLASInt       nprow,npcol,myrow,mycol;
1652:   PetscInt           optv1,k=2,array[2]={0,0};
1653:   PetscMPIInt        size;

1655:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1656:   A->insertmode = NOT_SET_VALUES;

1658:   MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1659:   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1660:   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1661:   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1662:   A->stash.ScatterDestroy = NULL;

1664:   PetscNewLog(A,&a);
1665:   A->data = (void*)a;

1667:   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1668:   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1669:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1670:     PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1671:     PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);
1672:   }
1673:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1674:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1675:   if (!flg) {
1676:     PetscNewLog(A,&grid);

1678:     MPI_Comm_size(icomm,&size);
1679:     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);

1681:     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1682:     PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1683:     if (flg1) {
1685:       grid->nprow = optv1;
1686:     }
1687:     PetscOptionsEnd();

1689:     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1690:     grid->npcol = size/grid->nprow;
1691:     PetscBLASIntCast(grid->nprow,&nprow);
1692:     PetscBLASIntCast(grid->npcol,&npcol);
1693:     grid->ictxt = Csys2blacs_handle(icomm);
1694:     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1695:     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1696:     grid->grid_refct = 1;
1697:     grid->nprow      = nprow;
1698:     grid->npcol      = npcol;
1699:     grid->myrow      = myrow;
1700:     grid->mycol      = mycol;
1701:     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1702:     grid->ictxrow = Csys2blacs_handle(icomm);
1703:     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1704:     grid->ictxcol = Csys2blacs_handle(icomm);
1705:     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1706:     MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);

1708:   } else grid->grid_refct++;
1709:   PetscCommDestroy(&icomm);
1710:   a->grid = grid;
1711:   a->mb   = DEFAULT_BLOCKSIZE;
1712:   a->nb   = DEFAULT_BLOCKSIZE;

1714:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1715:   PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1716:   if (flg) {
1717:     a->mb = array[0];
1718:     a->nb = (k>1)? array[1]: a->mb;
1719:   }
1720:   PetscOptionsEnd();

1722:   a->roworiented = PETSC_TRUE;
1723:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1724:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1725:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1726:   PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1727:   return 0;
1728: }

1730: /*@C
1731:    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1732:    (2D block cyclic distribution).

1734:    Collective

1736:    Input Parameters:
1737: +  comm - MPI communicator
1738: .  mb   - row block size (or PETSC_DECIDE to have it set)
1739: .  nb   - column block size (or PETSC_DECIDE to have it set)
1740: .  M    - number of global rows
1741: .  N    - number of global columns
1742: .  rsrc - coordinate of process that owns the first row of the distributed matrix
1743: -  csrc - coordinate of process that owns the first column of the distributed matrix

1745:    Output Parameter:
1746: .  A - the matrix

1748:    Options Database Keys:
1749: .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1751:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1752:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1753:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

1755:    Notes:
1756:    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1757:    is chosen.

1759:    Storage Information:
1760:    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1761:    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1762:    significance and are thus ignored. The block sizes refer to the values
1763:    used for the distributed matrix, not the same meaning as in BAIJ.

1765:    Level: intermediate

1767: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1768: @*/
1769: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1770: {
1771:   Mat_ScaLAPACK  *a;
1772:   PetscInt       m,n;

1774:   MatCreate(comm,A);
1775:   MatSetType(*A,MATSCALAPACK);
1777:   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1778:   m = PETSC_DECIDE;
1779:   PetscSplitOwnershipEqual(comm,&m,&M);
1780:   n = PETSC_DECIDE;
1781:   PetscSplitOwnershipEqual(comm,&n,&N);
1782:   MatSetSizes(*A,m,n,M,N);
1783:   a = (Mat_ScaLAPACK*)(*A)->data;
1784:   PetscBLASIntCast(M,&a->M);
1785:   PetscBLASIntCast(N,&a->N);
1786:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1787:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1788:   PetscBLASIntCast(rsrc,&a->rsrc);
1789:   PetscBLASIntCast(csrc,&a->csrc);
1790:   MatSetUp(*A);
1791:   return 0;
1792: }