Actual source code: matscalapack.c

  1: #include <petsc/private/petscscalapack.h>

  3: const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
  4: "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
  5: "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
  6: "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
  7: "       TITLE = {Sca{LAPACK} Users' Guide},\n"
  8: "       PUBLISHER = {SIAM},\n"
  9: "       ADDRESS = {Philadelphia, PA},\n"
 10: "       YEAR = 1997\n"
 11: "}\n";
 12: static PetscBool ScaLAPACKCite = PETSC_FALSE;

 14: #define DEFAULT_BLOCKSIZE 64

 16: /*
 17:     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
 18:   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
 19: */
 20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;

 22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
 23: {

 27:   PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");
 28:   MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
 29:   return(0);
 30: }

 32: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
 33: {
 34:   PetscErrorCode    ierr;
 35:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
 36:   PetscBool         iascii;
 37:   PetscViewerFormat format;
 38:   Mat               Adense;

 41:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 42:   if (iascii) {
 43:     PetscViewerGetFormat(viewer,&format);
 44:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 45:       PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
 46:       PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
 47:       PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
 48:       PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
 49:       return(0);
 50:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
 51:       return(0);
 52:     }
 53:   }
 54:   /* convert to dense format and call MatView() */
 55:   MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
 56:   MatView(Adense,viewer);
 57:   MatDestroy(&Adense);
 58:   return(0);
 59: }

 61: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
 62: {
 64:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
 65:   PetscLogDouble isend[2],irecv[2];

 68:   info->block_size = 1.0;

 70:   isend[0] = a->lld*a->locc;     /* locally allocated */
 71:   isend[1] = a->locr*a->locc;    /* used submatrix */
 72:   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
 73:     info->nz_allocated   = isend[0];
 74:     info->nz_used        = isend[1];
 75:   } else if (flag == MAT_GLOBAL_MAX) {
 76:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
 77:     info->nz_allocated   = irecv[0];
 78:     info->nz_used        = irecv[1];
 79:   } else if (flag == MAT_GLOBAL_SUM) {
 80:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
 81:     info->nz_allocated   = irecv[0];
 82:     info->nz_used        = irecv[1];
 83:   }

 85:   info->nz_unneeded       = 0;
 86:   info->assemblies        = A->num_ass;
 87:   info->mallocs           = 0;
 88:   info->memory            = ((PetscObject)A)->mem;
 89:   info->fill_ratio_given  = 0;
 90:   info->fill_ratio_needed = 0;
 91:   info->factor_mallocs    = 0;
 92:   return(0);
 93: }

 95: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
 96: {
 98:   switch (op) {
 99:     case MAT_NEW_NONZERO_LOCATIONS:
100:     case MAT_NEW_NONZERO_LOCATION_ERR:
101:     case MAT_NEW_NONZERO_ALLOCATION_ERR:
102:     case MAT_SYMMETRIC:
103:     case MAT_SORTED_FULL:
104:     case MAT_HERMITIAN:
105:       break;
106:     default:
107:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
108:   }
109:   return(0);
110: }

112: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
113: {
114:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
116:   PetscInt       i,j;
117:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

120:   for (i=0;i<nr;i++) {
121:     if (rows[i] < 0) continue;
122:     PetscBLASIntCast(rows[i]+1,&gridx);
123:     for (j=0;j<nc;j++) {
124:       if (cols[j] < 0) continue;
125:       PetscBLASIntCast(cols[j]+1,&gcidx);
126:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
127:       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
128:         switch (imode) {
129:           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
130:           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
131:           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
132:         }
133:       } else {
134:         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
135:         A->assembled = PETSC_FALSE;
136:         MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));
137:       }
138:     }
139:   }
140:   return(0);
141: }

143: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
144: {
146:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
147:   PetscScalar    *x2d,*y2d,alpha=1.0;
148:   const PetscInt *ranges;
149:   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;

152:   if (transpose) {

154:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
155:     PetscLayoutGetRanges(A->rmap,&ranges);
156:     PetscBLASIntCast(ranges[1],&mb);  /* x block size */
157:     xlld = PetscMax(1,A->rmap->n);
158:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
160:     PetscLayoutGetRanges(A->cmap,&ranges);
161:     PetscBLASIntCast(ranges[1],&nb);  /* y block size */
162:     ylld = 1;
163:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));

166:     /* allocate 2d vectors */
167:     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
168:     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
169:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
170:     xlld = PetscMax(1,lszx);

172:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
173:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
175:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));

178:     /* redistribute x as a column of a 2d matrix */
179:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

181:     /* redistribute y as a row of a 2d matrix */
182:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));

184:     /* call PBLAS subroutine */
185:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

187:     /* redistribute y from a row of a 2d matrix */
188:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));

190:   } else {   /* non-transpose */

192:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
193:     PetscLayoutGetRanges(A->cmap,&ranges);
194:     PetscBLASIntCast(ranges[1],&nb);  /* x block size */
195:     xlld = 1;
196:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
198:     PetscLayoutGetRanges(A->rmap,&ranges);
199:     PetscBLASIntCast(ranges[1],&mb);  /* y block size */
200:     ylld = PetscMax(1,A->rmap->n);
201:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));

204:     /* allocate 2d vectors */
205:     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
206:     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
207:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
208:     ylld = PetscMax(1,lszy);

210:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
211:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
213:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));

216:     /* redistribute x as a row of a 2d matrix */
217:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));

219:     /* redistribute y as a column of a 2d matrix */
220:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));

222:     /* call PBLAS subroutine */
223:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

225:     /* redistribute y from a column of a 2d matrix */
226:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));

228:   }
229:   PetscFree2(x2d,y2d);
230:   return(0);
231: }

233: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
234: {
235:   PetscErrorCode    ierr;
236:   const PetscScalar *xarray;
237:   PetscScalar       *yarray;

240:   VecGetArrayRead(x,&xarray);
241:   VecGetArray(y,&yarray);
242:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
243:   VecRestoreArrayRead(x,&xarray);
244:   VecRestoreArray(y,&yarray);
245:   return(0);
246: }

248: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
249: {
250:   PetscErrorCode    ierr;
251:   const PetscScalar *xarray;
252:   PetscScalar       *yarray;

255:   VecGetArrayRead(x,&xarray);
256:   VecGetArray(y,&yarray);
257:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
258:   VecRestoreArrayRead(x,&xarray);
259:   VecRestoreArray(y,&yarray);
260:   return(0);
261: }

263: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
264: {
265:   PetscErrorCode    ierr;
266:   const PetscScalar *xarray;
267:   PetscScalar       *zarray;

270:   if (y != z) { VecCopy(y,z); }
271:   VecGetArrayRead(x,&xarray);
272:   VecGetArray(z,&zarray);
273:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
274:   VecRestoreArrayRead(x,&xarray);
275:   VecRestoreArray(z,&zarray);
276:   return(0);
277: }

279: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
280: {
281:   PetscErrorCode    ierr;
282:   const PetscScalar *xarray;
283:   PetscScalar       *zarray;

286:   if (y != z) { VecCopy(y,z); }
287:   VecGetArrayRead(x,&xarray);
288:   VecGetArray(z,&zarray);
289:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
290:   VecRestoreArrayRead(x,&xarray);
291:   VecRestoreArray(z,&zarray);
292:   return(0);
293: }

295: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
296: {
297:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
298:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
299:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
300:   PetscScalar   sone=1.0,zero=0.0;
301:   PetscBLASInt  one=1;

304:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
305:   C->assembled = PETSC_TRUE;
306:   return(0);
307: }

309: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
310: {

314:   MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
315:   MatSetType(C,MATSCALAPACK);
316:   MatSetUp(C);
317:   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
318:   return(0);
319: }

321: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
322: {
323:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
324:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
325:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
326:   PetscScalar   sone=1.0,zero=0.0;
327:   PetscBLASInt  one=1;

330:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
331:   C->assembled = PETSC_TRUE;
332:   return(0);
333: }

335: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
336: {

340:   MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
341:   MatSetType(C,MATSCALAPACK);
342:   MatSetUp(C);
343:   return(0);
344: }

346: /* --------------------------------------- */
347: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
348: {
350:   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
351:   C->ops->productsymbolic = MatProductSymbolic_AB;
352:   return(0);
353: }

355: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
356: {
358:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
359:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
360:   return(0);
361: }

363: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
364: {
366:   Mat_Product    *product = C->product;

369:   switch (product->type) {
370:     case MATPRODUCT_AB:
371:       MatProductSetFromOptions_ScaLAPACK_AB(C);
372:       break;
373:     case MATPRODUCT_ABt:
374:       MatProductSetFromOptions_ScaLAPACK_ABt(C);
375:       break;
376:     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
377:   }
378:   return(0);
379: }
380: /* --------------------------------------- */

382: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
383: {
384:   PetscErrorCode    ierr;
385:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
386:   PetscScalar       *darray,*d2d,v;
387:   const PetscInt    *ranges;
388:   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

391:   VecGetArray(D,&darray);

393:   if (A->rmap->N<=A->cmap->N) {   /* row version */

395:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
396:     PetscLayoutGetRanges(A->rmap,&ranges);
397:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
398:     dlld = PetscMax(1,A->rmap->n);
399:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

402:     /* allocate 2d vector */
403:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
404:     PetscCalloc1(lszd,&d2d);
405:     dlld = PetscMax(1,lszd);

407:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
408:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

411:     /* collect diagonal */
412:     for (j=1;j<=a->M;j++) {
413:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
414:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
415:     }

417:     /* redistribute d from a column of a 2d matrix */
418:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
419:     PetscFree(d2d);

421:   } else {   /* column version */

423:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
424:     PetscLayoutGetRanges(A->cmap,&ranges);
425:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
426:     dlld = 1;
427:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

430:     /* allocate 2d vector */
431:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
432:     PetscCalloc1(lszd,&d2d);

434:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
435:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

438:     /* collect diagonal */
439:     for (j=1;j<=a->N;j++) {
440:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
441:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
442:     }

444:     /* redistribute d from a row of a 2d matrix */
445:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
446:     PetscFree(d2d);
447:   }

449:   VecRestoreArray(D,&darray);
450:   VecAssemblyBegin(D);
451:   VecAssemblyEnd(D);
452:   return(0);
453: }

455: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
456: {
457:   PetscErrorCode    ierr;
458:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
459:   const PetscScalar *d;
460:   const PetscInt    *ranges;
461:   PetscScalar       *d2d;
462:   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

465:   if (R) {
466:     VecGetArrayRead(R,(const PetscScalar **)&d);
467:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
468:     PetscLayoutGetRanges(A->cmap,&ranges);
469:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
470:     dlld = 1;
471:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

474:     /* allocate 2d vector */
475:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
476:     PetscCalloc1(lszd,&d2d);

478:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
479:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

482:     /* redistribute d to a row of a 2d matrix */
483:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));

485:     /* broadcast along process columns */
486:     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
487:     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);

489:     /* local scaling */
490:     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];

492:     PetscFree(d2d);
493:     VecRestoreArrayRead(R,(const PetscScalar **)&d);
494:   }
495:   if (L) {
496:     VecGetArrayRead(L,(const PetscScalar **)&d);
497:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
498:     PetscLayoutGetRanges(A->rmap,&ranges);
499:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
500:     dlld = PetscMax(1,A->rmap->n);
501:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

504:     /* allocate 2d vector */
505:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
506:     PetscCalloc1(lszd,&d2d);
507:     dlld = PetscMax(1,lszd);

509:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
510:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

513:     /* redistribute d to a column of a 2d matrix */
514:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));

516:     /* broadcast along process rows */
517:     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
518:     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);

520:     /* local scaling */
521:     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];

523:     PetscFree(d2d);
524:     VecRestoreArrayRead(L,(const PetscScalar **)&d);
525:   }
526:   return(0);
527: }

529: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
530: {
532:   *missing = PETSC_FALSE;
533:   return(0);
534: }

536: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
537: {
538:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539:   PetscBLASInt  n,one=1;

542:   n = x->lld*x->locc;
543:   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
544:   return(0);
545: }

547: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
548: {
549:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
550:   PetscBLASInt  i,n;
551:   PetscScalar   v;

554:   n = PetscMin(x->M,x->N);
555:   for (i=1;i<=n;i++) {
556:     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
557:     v += alpha;
558:     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
559:   }
560:   return(0);
561: }

563: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
564: {
566:   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
567:   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
568:   PetscBLASInt   one=1;
569:   PetscScalar    beta=1.0;

572:   MatScaLAPACKCheckDistribution(Y,1,X,3);
573:   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
574:   PetscObjectStateIncrease((PetscObject)Y);
575:   return(0);
576: }

578: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
579: {
581:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
582:   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;

585:   PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
586:   PetscObjectStateIncrease((PetscObject)B);
587:   return(0);
588: }

590: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
591: {
592:   Mat            Bs;
593:   MPI_Comm       comm;
594:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;

598:   PetscObjectGetComm((PetscObject)A,&comm);
599:   MatCreate(comm,&Bs);
600:   MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
601:   MatSetType(Bs,MATSCALAPACK);
602:   b = (Mat_ScaLAPACK*)Bs->data;
603:   b->M    = a->M;
604:   b->N    = a->N;
605:   b->mb   = a->mb;
606:   b->nb   = a->nb;
607:   b->rsrc = a->rsrc;
608:   b->csrc = a->csrc;
609:   MatSetUp(Bs);
610:   *B = Bs;
611:   if (op == MAT_COPY_VALUES) {
612:     PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
613:   }
614:   Bs->assembled = PETSC_TRUE;
615:   return(0);
616: }

618: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
619: {
621:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
622:   Mat            Bs = *B;
623:   PetscBLASInt   one=1;
624:   PetscScalar    sone=1.0,zero=0.0;
625: #if defined(PETSC_USE_COMPLEX)
626:   PetscInt       i,j;
627: #endif

630:   if (reuse == MAT_INITIAL_MATRIX) {
631:     MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
632:     *B = Bs;
633:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
634:   b = (Mat_ScaLAPACK*)Bs->data;
635:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
636: #if defined(PETSC_USE_COMPLEX)
637:   /* undo conjugation */
638:   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
639: #endif
640:   Bs->assembled = PETSC_TRUE;
641:   return(0);
642: }

644: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
645: {
646:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
647:   PetscInt      i,j;

650:   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
651:   return(0);
652: }

654: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
655: {
657:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
658:   Mat            Bs = *B;
659:   PetscBLASInt   one=1;
660:   PetscScalar    sone=1.0,zero=0.0;

663:   if (reuse == MAT_INITIAL_MATRIX) {
664:     MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
665:     *B = Bs;
666:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
667:   b = (Mat_ScaLAPACK*)Bs->data;
668:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
669:   Bs->assembled = PETSC_TRUE;
670:   return(0);
671: }

673: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
674: {
676:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
677:   PetscScalar    *x,*x2d;
678:   const PetscInt *ranges;
679:   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;

682:   VecCopy(B,X);
683:   VecGetArray(X,&x);

685:   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
686:   PetscLayoutGetRanges(A->rmap,&ranges);
687:   PetscBLASIntCast(ranges[1],&mb);  /* x block size */
688:   xlld = PetscMax(1,A->rmap->n);
689:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));

692:   /* allocate 2d vector */
693:   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
694:   PetscMalloc1(lszx,&x2d);
695:   xlld = PetscMax(1,lszx);

697:   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
698:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));

701:   /* redistribute x as a column of a 2d matrix */
702:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

704:   /* call ScaLAPACK subroutine */
705:   switch (A->factortype) {
706:     case MAT_FACTOR_LU:
707:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
709:       break;
710:     case MAT_FACTOR_CHOLESKY:
711:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
713:       break;
714:     default:
715:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
716:   }

718:   /* redistribute x from a column of a 2d matrix */
719:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));

721:   PetscFree(x2d);
722:   VecRestoreArray(X,&x);
723:   return(0);
724: }

726: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
727: {

731:   MatSolve_ScaLAPACK(A,B,X);
732:   VecAXPY(X,1,Y);
733:   return(0);
734: }

736: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
737: {
739:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
740:   PetscBool      flg1,flg2;
741:   PetscBLASInt   one=1,info;

744:   PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
745:   PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
746:   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
747:   MatScaLAPACKCheckDistribution(B,1,X,2);
748:   b = (Mat_ScaLAPACK*)B->data;
749:   x = (Mat_ScaLAPACK*)X->data;
750:   PetscArraycpy(x->loc,b->loc,b->lld*b->locc);

752:   switch (A->factortype) {
753:     case MAT_FACTOR_LU:
754:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
756:       break;
757:     case MAT_FACTOR_CHOLESKY:
758:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
760:       break;
761:     default:
762:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
763:   }
764:   return(0);
765: }

767: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
768: {
770:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
771:   PetscBLASInt   one=1,info;

774:   if (!a->pivots) {
775:     PetscMalloc1(a->locr+a->mb,&a->pivots);
776:     PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
777:   }
778:   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
780:   A->factortype = MAT_FACTOR_LU;
781:   A->assembled  = PETSC_TRUE;

783:   PetscFree(A->solvertype);
784:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
785:   return(0);
786: }

788: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
789: {

793:   MatCopy(A,F,SAME_NONZERO_PATTERN);
794:   MatLUFactor_ScaLAPACK(F,0,0,info);
795:   return(0);
796: }

798: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
799: {
801:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
802:   return(0);
803: }

805: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
806: {
808:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
809:   PetscBLASInt   one=1,info;

812:   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
814:   A->factortype = MAT_FACTOR_CHOLESKY;
815:   A->assembled  = PETSC_TRUE;

817:   PetscFree(A->solvertype);
818:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
819:   return(0);
820: }

822: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
823: {

827:   MatCopy(A,F,SAME_NONZERO_PATTERN);
828:   MatCholeskyFactor_ScaLAPACK(F,0,info);
829:   return(0);
830: }

832: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
833: {
835:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
836:   return(0);
837: }

839: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
840: {
842:   *type = MATSOLVERSCALAPACK;
843:   return(0);
844: }

846: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
847: {
848:   Mat            B;
849:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

853:   /* Create the factorization matrix */
854:   MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);
855:   B->trivialsymbolic = PETSC_TRUE;
856:   B->factortype = ftype;
857:   PetscFree(B->solvertype);
858:   PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);

860:   PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
861:   *F = B;
862:   return(0);
863: }

865: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
866: {

870:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
871:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
872:   return(0);
873: }

875: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
876: {
878:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
879:   PetscBLASInt   one=1,lwork=0;
880:   const char     *ntype;
881:   PetscScalar    *work=NULL,dummy;

884:   switch (type) {
885:     case NORM_1:
886:       ntype = "1";
887:       lwork = PetscMax(a->locr,a->locc);
888:       break;
889:     case NORM_FROBENIUS:
890:       ntype = "F";
891:       work  = &dummy;
892:       break;
893:     case NORM_INFINITY:
894:       ntype = "I";
895:       lwork = PetscMax(a->locr,a->locc);
896:       break;
897:     default:
898:       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
899:   }
900:   if (lwork) { PetscMalloc1(lwork,&work); }
901:   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
902:   if (lwork) { PetscFree(work); }
903:   return(0);
904: }

906: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
907: {
908:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

912:   PetscArrayzero(a->loc,a->lld*a->locc);
913:   return(0);
914: }

916: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
917: {
918:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
920:   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;

923:   if (rows) {
924:     n     = a->locr;
925:     nb    = a->mb;
926:     isrc  = a->rsrc;
927:     nproc = a->grid->nprow;
928:     iproc = a->grid->myrow;
929:     PetscMalloc1(n,&idx);
930:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
931:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
932:   }
933:   if (cols) {
934:     n     = a->locc;
935:     nb    = a->nb;
936:     isrc  = a->csrc;
937:     nproc = a->grid->npcol;
938:     iproc = a->grid->mycol;
939:     PetscMalloc1(n,&idx);
940:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
941:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
942:   }
943:   return(0);
944: }

946: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
947: {
949:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
950:   Mat            Bmpi;
951:   MPI_Comm       comm;
952:   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
953:   const PetscInt *ranges,*branges,*cwork;
954:   const PetscScalar *vwork;
955:   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
956:   PetscScalar    *barray;
957:   PetscBool      differ=PETSC_FALSE;
958:   PetscMPIInt    size;

961:   PetscObjectGetComm((PetscObject)A,&comm);
962:   PetscLayoutGetRanges(A->rmap,&ranges);

964:   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
965:     MPI_Comm_size(comm,&size);
966:     PetscLayoutGetRanges((*B)->rmap,&branges);
967:     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
968:   }

970:   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
971:     MatCreate(comm,&Bmpi);
972:     m = PETSC_DECIDE;
973:     PetscSplitOwnershipEqual(comm,&m,&M);
974:     n = PETSC_DECIDE;
975:     PetscSplitOwnershipEqual(comm,&n,&N);
976:     MatSetSizes(Bmpi,m,n,M,N);
977:     MatSetType(Bmpi,MATDENSE);
978:     MatSetUp(Bmpi);

980:     /* create ScaLAPACK descriptor for B (1d block distribution) */
981:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
982:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
983:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

986:     /* redistribute matrix */
987:     MatDenseGetArray(Bmpi,&barray);
988:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
989:     MatDenseRestoreArray(Bmpi,&barray);
990:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
991:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);

993:     /* transfer rows of auxiliary matrix to the final matrix B */
994:     MatGetOwnershipRange(Bmpi,&rstart,&rend);
995:     for (i=rstart;i<rend;i++) {
996:       MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
997:       MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
998:       MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
999:     }
1000:     MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
1001:     MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
1002:     MatDestroy(&Bmpi);

1004:   } else {  /* normal cases */

1006:     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1007:     else {
1008:       MatCreate(comm,&Bmpi);
1009:       m = PETSC_DECIDE;
1010:       PetscSplitOwnershipEqual(comm,&m,&M);
1011:       n = PETSC_DECIDE;
1012:       PetscSplitOwnershipEqual(comm,&n,&N);
1013:       MatSetSizes(Bmpi,m,n,M,N);
1014:       MatSetType(Bmpi,MATDENSE);
1015:       MatSetUp(Bmpi);
1016:     }

1018:     /* create ScaLAPACK descriptor for B (1d block distribution) */
1019:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
1020:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1021:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

1024:     /* redistribute matrix */
1025:     MatDenseGetArray(Bmpi,&barray);
1026:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1027:     MatDenseRestoreArray(Bmpi,&barray);

1029:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1030:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1031:     if (reuse == MAT_INPLACE_MATRIX) {
1032:       MatHeaderReplace(A,&Bmpi);
1033:     } else *B = Bmpi;
1034:   }
1035:   return(0);
1036: }

1038: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1039: {
1041:   Mat_ScaLAPACK  *b;
1042:   Mat            Bmpi;
1043:   MPI_Comm       comm;
1044:   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1045:   const PetscInt *ranges;
1046:   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1047:   PetscScalar    *aarray;
1048:   PetscInt       lda;

1051:   PetscObjectGetComm((PetscObject)A,&comm);

1053:   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1054:   else {
1055:     MatCreate(comm,&Bmpi);
1056:     m = PETSC_DECIDE;
1057:     PetscSplitOwnershipEqual(comm,&m,&M);
1058:     n = PETSC_DECIDE;
1059:     PetscSplitOwnershipEqual(comm,&n,&N);
1060:     MatSetSizes(Bmpi,m,n,M,N);
1061:     MatSetType(Bmpi,MATSCALAPACK);
1062:     MatSetUp(Bmpi);
1063:   }
1064:   b = (Mat_ScaLAPACK*)Bmpi->data;

1066:   /* create ScaLAPACK descriptor for A (1d block distribution) */
1067:   PetscLayoutGetRanges(A->rmap,&ranges);
1068:   PetscBLASIntCast(ranges[1],&amb);  /* row block size */
1069:   MatDenseGetLDA(A,&lda);
1070:   lld = PetscMax(lda,1);  /* local leading dimension */
1071:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));

1074:   /* redistribute matrix */
1075:   MatDenseGetArray(A,&aarray);
1076:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1077:   MatDenseRestoreArray(A,&aarray);

1079:   MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1080:   MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1081:   if (reuse == MAT_INPLACE_MATRIX) {
1082:     MatHeaderReplace(A,&Bmpi);
1083:   } else *B = Bmpi;
1084:   return(0);
1085: }

1087: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1088: {
1089:   Mat               mat_scal;
1090:   PetscErrorCode    ierr;
1091:   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1092:   const PetscInt    *cols;
1093:   const PetscScalar *vals;

1096:   if (reuse == MAT_REUSE_MATRIX) {
1097:     mat_scal = *newmat;
1098:     MatZeroEntries(mat_scal);
1099:   } else {
1100:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1101:     m = PETSC_DECIDE;
1102:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1103:     n = PETSC_DECIDE;
1104:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1105:     MatSetSizes(mat_scal,m,n,M,N);
1106:     MatSetType(mat_scal,MATSCALAPACK);
1107:     MatSetUp(mat_scal);
1108:   }
1109:   for (row=rstart;row<rend;row++) {
1110:     MatGetRow(A,row,&ncols,&cols,&vals);
1111:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1112:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1113:   }
1114:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1115:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1117:   if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1118:   else *newmat = mat_scal;
1119:   return(0);
1120: }

1122: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1123: {
1124:   Mat               mat_scal;
1125:   PetscErrorCode    ierr;
1126:   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1127:   const PetscInt    *cols;
1128:   const PetscScalar *vals;
1129:   PetscScalar       v;

1132:   if (reuse == MAT_REUSE_MATRIX) {
1133:     mat_scal = *newmat;
1134:     MatZeroEntries(mat_scal);
1135:   } else {
1136:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1137:     m = PETSC_DECIDE;
1138:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1139:     n = PETSC_DECIDE;
1140:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1141:     MatSetSizes(mat_scal,m,n,M,N);
1142:     MatSetType(mat_scal,MATSCALAPACK);
1143:     MatSetUp(mat_scal);
1144:   }
1145:   MatGetRowUpperTriangular(A);
1146:   for (row=rstart;row<rend;row++) {
1147:     MatGetRow(A,row,&ncols,&cols,&vals);
1148:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1149:     for (j=0;j<ncols;j++) { /* lower triangular part */
1150:       if (cols[j] == row) continue;
1151:       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1152:       MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1153:     }
1154:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1155:   }
1156:   MatRestoreRowUpperTriangular(A);
1157:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1158:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1160:   if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1161:   else *newmat = mat_scal;
1162:   return(0);
1163: }

1165: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1166: {
1167:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1169:   PetscInt       sz=0;

1172:   PetscLayoutSetUp(A->rmap);
1173:   PetscLayoutSetUp(A->cmap);
1174:   if (!a->lld) a->lld = a->locr;

1176:   PetscFree(a->loc);
1177:   PetscIntMultError(a->lld,a->locc,&sz);
1178:   PetscCalloc1(sz,&a->loc);
1179:   PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));

1181:   A->preallocated = PETSC_TRUE;
1182:   return(0);
1183: }

1185: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1186: {
1187:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1188:   PetscErrorCode     ierr;
1189:   Mat_ScaLAPACK_Grid *grid;
1190:   PetscBool          flg;
1191:   MPI_Comm           icomm;

1194:   MatStashDestroy_Private(&A->stash);
1195:   PetscFree(a->loc);
1196:   PetscFree(a->pivots);
1197:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1198:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1199:   if (--grid->grid_refct == 0) {
1200:     Cblacs_gridexit(grid->ictxt);
1201:     Cblacs_gridexit(grid->ictxrow);
1202:     Cblacs_gridexit(grid->ictxcol);
1203:     PetscFree(grid);
1204:     MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);
1205:   }
1206:   PetscCommDestroy(&icomm);
1207:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1208:   PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1209:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1210:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1211:   PetscFree(A->data);
1212:   return(0);
1213: }

1215: PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1216: {
1218:   const PetscInt *ranges;
1219:   PetscMPIInt    size;
1220:   PetscInt       i,n;

1223:   MPI_Comm_size(map->comm,&size);
1224:   if (size>2) {
1225:     PetscLayoutGetRanges(map,&ranges);
1226:     n = ranges[1]-ranges[0];
1227:     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1228:     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1229:   }
1230:   return(0);
1231: }

1233: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1234: {
1235:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1237:   PetscBLASInt   info=0;

1240:   PetscLayoutSetUp(A->rmap);
1241:   PetscLayoutSetUp(A->cmap);

1243:   /* check that the layout is as enforced by MatCreateScaLAPACK */
1244:   MatScaLAPACKCheckLayout(A->rmap);
1245:   MatScaLAPACKCheckLayout(A->cmap);

1247:   /* compute local sizes */
1248:   PetscBLASIntCast(A->rmap->N,&a->M);
1249:   PetscBLASIntCast(A->cmap->N,&a->N);
1250:   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1251:   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1252:   a->lld  = PetscMax(1,a->locr);

1254:   /* allocate local array */
1255:   MatScaLAPACKSetPreallocation(A);

1257:   /* set up ScaLAPACK descriptor */
1258:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1260:   return(0);
1261: }

1263: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1264: {
1266:   PetscInt       nstash,reallocs;

1269:   if (A->nooffprocentries) return(0);
1270:   MatStashScatterBegin_Private(A,&A->stash,NULL);
1271:   MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1272:   PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
1273:   return(0);
1274: }

1276: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1277: {
1279:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1280:   PetscMPIInt    n;
1281:   PetscInt       i,flg,*row,*col;
1282:   PetscScalar    *val;
1283:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

1286:   if (A->nooffprocentries) return(0);
1287:   while (1) {
1288:     MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1289:     if (!flg) break;
1290:     for (i=0;i<n;i++) {
1291:       PetscBLASIntCast(row[i]+1,&gridx);
1292:       PetscBLASIntCast(col[i]+1,&gcidx);
1293:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1294:       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_LIB,"Something went wrong, received value does not belong to this process");
1295:       switch (A->insertmode) {
1296:         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1297:         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1298:         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1299:       }
1300:     }
1301:   }
1302:   MatStashScatterEnd_Private(&A->stash);
1303:   return(0);
1304: }

1306: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1307: {
1309:   Mat            Adense,As;
1310:   MPI_Comm       comm;

1313:   PetscObjectGetComm((PetscObject)newMat,&comm);
1314:   MatCreate(comm,&Adense);
1315:   MatSetType(Adense,MATDENSE);
1316:   MatLoad(Adense,viewer);
1317:   MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1318:   MatDestroy(&Adense);
1319:   MatHeaderReplace(newMat,&As);
1320:   return(0);
1321: }

1323: /* -------------------------------------------------------------------*/
1324: static struct _MatOps MatOps_Values = {
1325:        MatSetValues_ScaLAPACK,
1326:        0,
1327:        0,
1328:        MatMult_ScaLAPACK,
1329: /* 4*/ MatMultAdd_ScaLAPACK,
1330:        MatMultTranspose_ScaLAPACK,
1331:        MatMultTransposeAdd_ScaLAPACK,
1332:        MatSolve_ScaLAPACK,
1333:        MatSolveAdd_ScaLAPACK,
1334:        0,
1335: /*10*/ 0,
1336:        MatLUFactor_ScaLAPACK,
1337:        MatCholeskyFactor_ScaLAPACK,
1338:        0,
1339:        MatTranspose_ScaLAPACK,
1340: /*15*/ MatGetInfo_ScaLAPACK,
1341:        0,
1342:        MatGetDiagonal_ScaLAPACK,
1343:        MatDiagonalScale_ScaLAPACK,
1344:        MatNorm_ScaLAPACK,
1345: /*20*/ MatAssemblyBegin_ScaLAPACK,
1346:        MatAssemblyEnd_ScaLAPACK,
1347:        MatSetOption_ScaLAPACK,
1348:        MatZeroEntries_ScaLAPACK,
1349: /*24*/ 0,
1350:        MatLUFactorSymbolic_ScaLAPACK,
1351:        MatLUFactorNumeric_ScaLAPACK,
1352:        MatCholeskyFactorSymbolic_ScaLAPACK,
1353:        MatCholeskyFactorNumeric_ScaLAPACK,
1354: /*29*/ MatSetUp_ScaLAPACK,
1355:        0,
1356:        0,
1357:        0,
1358:        0,
1359: /*34*/ MatDuplicate_ScaLAPACK,
1360:        0,
1361:        0,
1362:        0,
1363:        0,
1364: /*39*/ MatAXPY_ScaLAPACK,
1365:        0,
1366:        0,
1367:        0,
1368:        MatCopy_ScaLAPACK,
1369: /*44*/ 0,
1370:        MatScale_ScaLAPACK,
1371:        MatShift_ScaLAPACK,
1372:        0,
1373:        0,
1374: /*49*/ 0,
1375:        0,
1376:        0,
1377:        0,
1378:        0,
1379: /*54*/ 0,
1380:        0,
1381:        0,
1382:        0,
1383:        0,
1384: /*59*/ 0,
1385:        MatDestroy_ScaLAPACK,
1386:        MatView_ScaLAPACK,
1387:        0,
1388:        0,
1389: /*64*/ 0,
1390:        0,
1391:        0,
1392:        0,
1393:        0,
1394: /*69*/ 0,
1395:        0,
1396:        MatConvert_ScaLAPACK_Dense,
1397:        0,
1398:        0,
1399: /*74*/ 0,
1400:        0,
1401:        0,
1402:        0,
1403:        0,
1404: /*79*/ 0,
1405:        0,
1406:        0,
1407:        0,
1408:        MatLoad_ScaLAPACK,
1409: /*84*/ 0,
1410:        0,
1411:        0,
1412:        0,
1413:        0,
1414: /*89*/ 0,
1415:        0,
1416:        MatMatMultNumeric_ScaLAPACK,
1417:        0,
1418:        0,
1419: /*94*/ 0,
1420:        0,
1421:        0,
1422:        MatMatTransposeMultNumeric_ScaLAPACK,
1423:        0,
1424: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1425:        0,
1426:        0,
1427:        MatConjugate_ScaLAPACK,
1428:        0,
1429: /*104*/0,
1430:        0,
1431:        0,
1432:        0,
1433:        0,
1434: /*109*/MatMatSolve_ScaLAPACK,
1435:        0,
1436:        0,
1437:        0,
1438:        MatMissingDiagonal_ScaLAPACK,
1439: /*114*/0,
1440:        0,
1441:        0,
1442:        0,
1443:        0,
1444: /*119*/0,
1445:        MatHermitianTranspose_ScaLAPACK,
1446:        0,
1447:        0,
1448:        0,
1449: /*124*/0,
1450:        0,
1451:        0,
1452:        0,
1453:        0,
1454: /*129*/0,
1455:        0,
1456:        0,
1457:        0,
1458:        0,
1459: /*134*/0,
1460:        0,
1461:        0,
1462:        0,
1463:        0,
1464:        0,
1465: /*140*/0,
1466:        0,
1467:        0,
1468:        0,
1469:        0,
1470: /*145*/0,
1471:        0,
1472:        0
1473: };

1475: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1476: {
1477:   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1478:   PetscInt           size=stash->size,nsends;
1479:   PetscErrorCode     ierr;
1480:   PetscInt           count,*sindices,**rindices,i,j,l;
1481:   PetscScalar        **rvalues,*svalues;
1482:   MPI_Comm           comm = stash->comm;
1483:   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1484:   PetscMPIInt        *sizes,*nlengths,nreceives;
1485:   PetscInt           *sp_idx,*sp_idy;
1486:   PetscScalar        *sp_val;
1487:   PetscMatStashSpace space,space_next;
1488:   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1489:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;

1492:   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1493:     InsertMode addv;
1494:     MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1495:     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1496:     mat->insertmode = addv; /* in case this processor had no cache */
1497:   }

1499:   bs2 = stash->bs*stash->bs;

1501:   /*  first count number of contributors to each processor */
1502:   PetscCalloc1(size,&nlengths);
1503:   PetscMalloc1(stash->n+1,&owner);

1505:   i     = j    = 0;
1506:   space = stash->space_head;
1507:   while (space) {
1508:     space_next = space->next;
1509:     for (l=0; l<space->local_used; l++) {
1510:       PetscBLASIntCast(space->idx[l]+1,&gridx);
1511:       PetscBLASIntCast(space->idy[l]+1,&gcidx);
1512:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1513:       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1514:       nlengths[j]++; owner[i] = j;
1515:       i++;
1516:     }
1517:     space = space_next;
1518:   }

1520:   /* Now check what procs get messages - and compute nsends. */
1521:   PetscCalloc1(size,&sizes);
1522:   for (i=0, nsends=0; i<size; i++) {
1523:     if (nlengths[i]) {
1524:       sizes[i] = 1; nsends++;
1525:     }
1526:   }

1528:   {PetscMPIInt *onodes,*olengths;
1529:    /* Determine the number of messages to expect, their lengths, from from-ids */
1530:    PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1531:    PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1532:    /* since clubbing row,col - lengths are multiplied by 2 */
1533:    for (i=0; i<nreceives; i++) olengths[i] *=2;
1534:    PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1535:    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1536:    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1537:    PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1538:    PetscFree(onodes);
1539:    PetscFree(olengths);}

1541:   /* do sends:
1542:       1) starts[i] gives the starting index in svalues for stuff going to
1543:          the ith processor
1544:   */
1545:   PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1546:   PetscMalloc1(2*nsends,&send_waits);
1547:   PetscMalloc2(size,&startv,size,&starti);
1548:   /* use 2 sends the first with all_a, the next with all_i and all_j */
1549:   startv[0] = 0; starti[0] = 0;
1550:   for (i=1; i<size; i++) {
1551:     startv[i] = startv[i-1] + nlengths[i-1];
1552:     starti[i] = starti[i-1] + 2*nlengths[i-1];
1553:   }

1555:   i     = 0;
1556:   space = stash->space_head;
1557:   while (space) {
1558:     space_next = space->next;
1559:     sp_idx     = space->idx;
1560:     sp_idy     = space->idy;
1561:     sp_val     = space->val;
1562:     for (l=0; l<space->local_used; l++) {
1563:       j = owner[i];
1564:       if (bs2 == 1) {
1565:         svalues[startv[j]] = sp_val[l];
1566:       } else {
1567:         PetscInt    k;
1568:         PetscScalar *buf1,*buf2;
1569:         buf1 = svalues+bs2*startv[j];
1570:         buf2 = space->val + bs2*l;
1571:         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1572:       }
1573:       sindices[starti[j]]             = sp_idx[l];
1574:       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1575:       startv[j]++;
1576:       starti[j]++;
1577:       i++;
1578:     }
1579:     space = space_next;
1580:   }
1581:   startv[0] = 0;
1582:   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];

1584:   for (i=0,count=0; i<size; i++) {
1585:     if (sizes[i]) {
1586:       MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1587:       MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1588:     }
1589:   }
1590: #if defined(PETSC_USE_INFO)
1591:   PetscInfo1(NULL,"No of messages: %d \n",nsends);
1592:   for (i=0; i<size; i++) {
1593:     if (sizes[i]) {
1594:       PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
1595:     }
1596:   }
1597: #endif
1598:   PetscFree(nlengths);
1599:   PetscFree(owner);
1600:   PetscFree2(startv,starti);
1601:   PetscFree(sizes);

1603:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1604:   PetscMalloc1(2*nreceives,&recv_waits);

1606:   for (i=0; i<nreceives; i++) {
1607:     recv_waits[2*i]   = recv_waits1[i];
1608:     recv_waits[2*i+1] = recv_waits2[i];
1609:   }
1610:   stash->recv_waits = recv_waits;

1612:   PetscFree(recv_waits1);
1613:   PetscFree(recv_waits2);

1615:   stash->svalues         = svalues;
1616:   stash->sindices        = sindices;
1617:   stash->rvalues         = rvalues;
1618:   stash->rindices        = rindices;
1619:   stash->send_waits      = send_waits;
1620:   stash->nsends          = nsends;
1621:   stash->nrecvs          = nreceives;
1622:   stash->reproduce_count = 0;
1623:   return(0);
1624: }

1626: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1627: {
1629:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

1632:   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1633:   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1634:   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1635:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1636:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1637:   return(0);
1638: }

1640: /*@
1641:    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1642:    the ScaLAPACK matrix

1644:    Logically Collective on A

1646:    Input Parameters:
1647: +  A  - a MATSCALAPACK matrix
1648: .  mb - the row block size
1649: -  nb - the column block size

1651:    Level: intermediate

1653: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1654: @*/
1655: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1656: {

1663:   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1664:   return(0);
1665: }

1667: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1668: {
1669:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;

1672:   if (mb) *mb = a->mb;
1673:   if (nb) *nb = a->nb;
1674:   return(0);
1675: }

1677: /*@
1678:    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1679:    the ScaLAPACK matrix

1681:    Not collective

1683:    Input Parameter:
1684: .  A  - a MATSCALAPACK matrix

1686:    Output Parameters:
1687: +  mb - the row block size
1688: -  nb - the column block size

1690:    Level: intermediate

1692: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1693: @*/
1694: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1695: {

1700:   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1701:   return(0);
1702: }

1704: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1705: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);

1707: /*MC
1708:    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package

1710:    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK

1712:    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver

1714:    Options Database Keys:
1715: +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1716: .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1717: -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1719:    Level: beginner

1721: .seealso: MATDENSE, MATELEMENTAL
1722: M*/

1724: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1725: {
1726:   Mat_ScaLAPACK      *a;
1727:   PetscErrorCode     ierr;
1728:   PetscBool          flg,flg1;
1729:   Mat_ScaLAPACK_Grid *grid;
1730:   MPI_Comm           icomm;
1731:   PetscBLASInt       nprow,npcol,myrow,mycol;
1732:   PetscInt           optv1,k=2,array[2]={0,0};
1733:   PetscMPIInt        size;

1736:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1737:   A->insertmode = NOT_SET_VALUES;

1739:   MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1740:   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1741:   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1742:   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1743:   A->stash.ScatterDestroy = NULL;

1745:   PetscNewLog(A,&a);
1746:   A->data = (void*)a;

1748:   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1749:   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1750:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1751:     PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1752:     PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);
1753:   }
1754:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1755:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1756:   if (!flg) {
1757:     PetscNewLog(A,&grid);

1759:     MPI_Comm_size(icomm,&size);
1760:     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);

1762:     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1763:     PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1764:     if (flg1) {
1765:       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1766:       grid->nprow = optv1;
1767:     }
1768:     PetscOptionsEnd();

1770:     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1771:     grid->npcol = size/grid->nprow;
1772:     PetscBLASIntCast(grid->nprow,&nprow);
1773:     PetscBLASIntCast(grid->npcol,&npcol);
1774:     grid->ictxt = Csys2blacs_handle(icomm);
1775:     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1776:     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1777:     grid->grid_refct = 1;
1778:     grid->nprow      = nprow;
1779:     grid->npcol      = npcol;
1780:     grid->myrow      = myrow;
1781:     grid->mycol      = mycol;
1782:     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1783:     grid->ictxrow = Csys2blacs_handle(icomm);
1784:     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1785:     grid->ictxcol = Csys2blacs_handle(icomm);
1786:     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1787:     MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);

1789:   } else grid->grid_refct++;
1790:   PetscCommDestroy(&icomm);
1791:   a->grid = grid;
1792:   a->mb   = DEFAULT_BLOCKSIZE;
1793:   a->nb   = DEFAULT_BLOCKSIZE;

1795:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1796:   PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1797:   if (flg) {
1798:     a->mb = array[0];
1799:     a->nb = (k>1)? array[1]: a->mb;
1800:   }
1801:   PetscOptionsEnd();

1803:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1804:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1805:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1806:   PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1807:   return(0);
1808: }

1810: /*@C
1811:    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1812:    (2D block cyclic distribution).

1814:    Collective

1816:    Input Parameters:
1817: +  comm - MPI communicator
1818: .  mb   - row block size (or PETSC_DECIDE to have it set)
1819: .  nb   - column block size (or PETSC_DECIDE to have it set)
1820: .  M    - number of global rows
1821: .  N    - number of global columns
1822: .  rsrc - coordinate of process that owns the first row of the distributed matrix
1823: -  csrc - coordinate of process that owns the first column of the distributed matrix

1825:    Output Parameter:
1826: .  A - the matrix

1828:    Options Database Keys:
1829: .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1831:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1832:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1833:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

1835:    Notes:
1836:    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1837:    is chosen.

1839:    Storage Information:
1840:    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1841:    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1842:    significance and are thus ignored. The block sizes refer to the values
1843:    used for the distributed matrix, not the same meaning as in BAIJ.

1845:    Level: intermediate

1847: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1848: @*/
1849: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1850: {
1852:   Mat_ScaLAPACK  *a;
1853:   PetscInt       m,n;

1856:   MatCreate(comm,A);
1857:   MatSetType(*A,MATSCALAPACK);
1858:   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1859:   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1860:   m = PETSC_DECIDE;
1861:   PetscSplitOwnershipEqual(comm,&m,&M);
1862:   n = PETSC_DECIDE;
1863:   PetscSplitOwnershipEqual(comm,&n,&N);
1864:   MatSetSizes(*A,m,n,M,N);
1865:   a = (Mat_ScaLAPACK*)(*A)->data;
1866:   PetscBLASIntCast(M,&a->M);
1867:   PetscBLASIntCast(N,&a->N);
1868:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1869:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1870:   PetscBLASIntCast(rsrc,&a->rsrc);
1871:   PetscBLASIntCast(csrc,&a->csrc);
1872:   MatSetUp(*A);
1873:   return(0);
1874: }