Actual source code: matscalapack.c

  1: #include <petsc/private/petscscalapack.h>

  3: const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
  4: "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
  5: "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
  6: "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
  7: "       TITLE = {Sca{LAPACK} Users' Guide},\n"
  8: "       PUBLISHER = {SIAM},\n"
  9: "       ADDRESS = {Philadelphia, PA},\n"
 10: "       YEAR = 1997\n"
 11: "}\n";
 12: static PetscBool ScaLAPACKCite = PETSC_FALSE;

 14: #define DEFAULT_BLOCKSIZE 64

 16: /*
 17:     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
 18:   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
 19: */
 20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;

 22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
 23: {

 27:   PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");
 28:   MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
 29:   return(0);
 30: }

 32: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
 33: {
 34:   PetscErrorCode    ierr;
 35:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
 36:   PetscBool         iascii;
 37:   PetscViewerFormat format;
 38:   Mat               Adense;

 41:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 42:   if (iascii) {
 43:     PetscViewerGetFormat(viewer,&format);
 44:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 45:       PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
 46:       PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
 47:       PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
 48:       PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
 49:       return(0);
 50:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
 51:       return(0);
 52:     }
 53:   }
 54:   /* convert to dense format and call MatView() */
 55:   MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
 56:   MatView(Adense,viewer);
 57:   MatDestroy(&Adense);
 58:   return(0);
 59: }

 61: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
 62: {
 64:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
 65:   PetscLogDouble isend[2],irecv[2];

 68:   info->block_size = 1.0;

 70:   isend[0] = a->lld*a->locc;     /* locally allocated */
 71:   isend[1] = a->locr*a->locc;    /* used submatrix */
 72:   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
 73:     info->nz_allocated   = isend[0];
 74:     info->nz_used        = isend[1];
 75:   } else if (flag == MAT_GLOBAL_MAX) {
 76:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
 77:     info->nz_allocated   = irecv[0];
 78:     info->nz_used        = irecv[1];
 79:   } else if (flag == MAT_GLOBAL_SUM) {
 80:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
 81:     info->nz_allocated   = irecv[0];
 82:     info->nz_used        = irecv[1];
 83:   }

 85:   info->nz_unneeded       = 0;
 86:   info->assemblies        = A->num_ass;
 87:   info->mallocs           = 0;
 88:   info->memory            = ((PetscObject)A)->mem;
 89:   info->fill_ratio_given  = 0;
 90:   info->fill_ratio_needed = 0;
 91:   info->factor_mallocs    = 0;
 92:   return(0);
 93: }

 95: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
 96: {
 98:   switch (op) {
 99:     case MAT_NEW_NONZERO_LOCATIONS:
100:     case MAT_NEW_NONZERO_LOCATION_ERR:
101:     case MAT_NEW_NONZERO_ALLOCATION_ERR:
102:     case MAT_SYMMETRIC:
103:     case MAT_SORTED_FULL:
104:     case MAT_HERMITIAN:
105:       break;
106:     default:
107:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
108:   }
109:   return(0);
110: }

112: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
113: {
114:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
116:   PetscInt       i,j;
117:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

120:   for (i=0;i<nr;i++) {
121:     if (rows[i] < 0) continue;
122:     PetscBLASIntCast(rows[i]+1,&gridx);
123:     for (j=0;j<nc;j++) {
124:       if (cols[j] < 0) continue;
125:       PetscBLASIntCast(cols[j]+1,&gcidx);
126:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
127:       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
128:         switch (imode) {
129:           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
130:           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
131:           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
132:         }
133:       } else {
134:         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
135:         A->assembled = PETSC_FALSE;
136:         MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));
137:       }
138:     }
139:   }
140:   return(0);
141: }

143: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
144: {
146:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
147:   PetscScalar    *x2d,*y2d,alpha=1.0;
148:   const PetscInt *ranges;
149:   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;

152:   if (transpose) {

154:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
155:     PetscLayoutGetRanges(A->rmap,&ranges);
156:     PetscBLASIntCast(ranges[1],&mb);  /* x block size */
157:     xlld = PetscMax(1,A->rmap->n);
158:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
160:     PetscLayoutGetRanges(A->cmap,&ranges);
161:     PetscBLASIntCast(ranges[1],&nb);  /* y block size */
162:     ylld = 1;
163:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));

166:     /* allocate 2d vectors */
167:     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
168:     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
169:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
170:     xlld = PetscMax(1,lszx);

172:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
173:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
175:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));

178:     /* redistribute x as a column of a 2d matrix */
179:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

181:     /* redistribute y as a row of a 2d matrix */
182:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));

184:     /* call PBLAS subroutine */
185:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

187:     /* redistribute y from a row of a 2d matrix */
188:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));

190:   } else {   /* non-transpose */

192:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
193:     PetscLayoutGetRanges(A->cmap,&ranges);
194:     PetscBLASIntCast(ranges[1],&nb);  /* x block size */
195:     xlld = 1;
196:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
198:     PetscLayoutGetRanges(A->rmap,&ranges);
199:     PetscBLASIntCast(ranges[1],&mb);  /* y block size */
200:     ylld = PetscMax(1,A->rmap->n);
201:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));

204:     /* allocate 2d vectors */
205:     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
206:     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
207:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
208:     ylld = PetscMax(1,lszy);

210:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
211:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
213:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));

216:     /* redistribute x as a row of a 2d matrix */
217:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));

219:     /* redistribute y as a column of a 2d matrix */
220:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));

222:     /* call PBLAS subroutine */
223:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

225:     /* redistribute y from a column of a 2d matrix */
226:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));

228:   }
229:   PetscFree2(x2d,y2d);
230:   return(0);
231: }

233: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
234: {
235:   PetscErrorCode    ierr;
236:   const PetscScalar *xarray;
237:   PetscScalar       *yarray;

240:   VecGetArrayRead(x,&xarray);
241:   VecGetArray(y,&yarray);
242:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
243:   VecRestoreArrayRead(x,&xarray);
244:   VecRestoreArray(y,&yarray);
245:   return(0);
246: }

248: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
249: {
250:   PetscErrorCode    ierr;
251:   const PetscScalar *xarray;
252:   PetscScalar       *yarray;

255:   VecGetArrayRead(x,&xarray);
256:   VecGetArray(y,&yarray);
257:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
258:   VecRestoreArrayRead(x,&xarray);
259:   VecRestoreArray(y,&yarray);
260:   return(0);
261: }

263: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
264: {
265:   PetscErrorCode    ierr;
266:   const PetscScalar *xarray;
267:   PetscScalar       *zarray;

270:   if (y != z) { VecCopy(y,z); }
271:   VecGetArrayRead(x,&xarray);
272:   VecGetArray(z,&zarray);
273:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
274:   VecRestoreArrayRead(x,&xarray);
275:   VecRestoreArray(z,&zarray);
276:   return(0);
277: }

279: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
280: {
281:   PetscErrorCode    ierr;
282:   const PetscScalar *xarray;
283:   PetscScalar       *zarray;

286:   if (y != z) { VecCopy(y,z); }
287:   VecGetArrayRead(x,&xarray);
288:   VecGetArray(z,&zarray);
289:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
290:   VecRestoreArrayRead(x,&xarray);
291:   VecRestoreArray(z,&zarray);
292:   return(0);
293: }

295: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
296: {
297:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
298:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
299:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
300:   PetscScalar   sone=1.0,zero=0.0;
301:   PetscBLASInt  one=1;

304:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
305:   C->assembled = PETSC_TRUE;
306:   return(0);
307: }

309: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
310: {

314:   MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
315:   MatSetType(C,MATSCALAPACK);
316:   MatSetUp(C);
317:   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
318:   return(0);
319: }

321: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
322: {
323:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
324:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
325:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
326:   PetscScalar   sone=1.0,zero=0.0;
327:   PetscBLASInt  one=1;

330:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
331:   C->assembled = PETSC_TRUE;
332:   return(0);
333: }

335: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
336: {

340:   MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
341:   MatSetType(C,MATSCALAPACK);
342:   MatSetUp(C);
343:   return(0);
344: }

346: /* --------------------------------------- */
347: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
348: {
350:   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
351:   C->ops->productsymbolic = MatProductSymbolic_AB;
352:   return(0);
353: }

355: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
356: {
358:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
359:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
360:   return(0);
361: }

363: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
364: {
366:   Mat_Product    *product = C->product;

369:   switch (product->type) {
370:     case MATPRODUCT_AB:
371:       MatProductSetFromOptions_ScaLAPACK_AB(C);
372:       break;
373:     case MATPRODUCT_ABt:
374:       MatProductSetFromOptions_ScaLAPACK_ABt(C);
375:       break;
376:     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
377:   }
378:   return(0);
379: }
380: /* --------------------------------------- */

382: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
383: {
384:   PetscErrorCode    ierr;
385:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
386:   PetscScalar       *darray,*d2d,v;
387:   const PetscInt    *ranges;
388:   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

391:   VecGetArray(D,&darray);

393:   if (A->rmap->N<=A->cmap->N) {   /* row version */

395:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
396:     PetscLayoutGetRanges(A->rmap,&ranges);
397:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
398:     dlld = PetscMax(1,A->rmap->n);
399:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

402:     /* allocate 2d vector */
403:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
404:     PetscCalloc1(lszd,&d2d);
405:     dlld = PetscMax(1,lszd);

407:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
408:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

411:     /* collect diagonal */
412:     for (j=1;j<=a->M;j++) {
413:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
414:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
415:     }

417:     /* redistribute d from a column of a 2d matrix */
418:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
419:     PetscFree(d2d);

421:   } else {   /* column version */

423:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
424:     PetscLayoutGetRanges(A->cmap,&ranges);
425:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
426:     dlld = 1;
427:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

430:     /* allocate 2d vector */
431:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
432:     PetscCalloc1(lszd,&d2d);

434:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
435:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

438:     /* collect diagonal */
439:     for (j=1;j<=a->N;j++) {
440:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
441:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
442:     }

444:     /* redistribute d from a row of a 2d matrix */
445:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
446:     PetscFree(d2d);
447:   }

449:   VecRestoreArray(D,&darray);
450:   VecAssemblyBegin(D);
451:   VecAssemblyEnd(D);
452:   return(0);
453: }

455: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
456: {
457:   PetscErrorCode    ierr;
458:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
459:   const PetscScalar *d;
460:   const PetscInt    *ranges;
461:   PetscScalar       *d2d;
462:   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

465:   if (R) {
466:     VecGetArrayRead(R,(const PetscScalar **)&d);
467:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
468:     PetscLayoutGetRanges(A->cmap,&ranges);
469:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
470:     dlld = 1;
471:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

474:     /* allocate 2d vector */
475:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
476:     PetscCalloc1(lszd,&d2d);

478:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
479:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

482:     /* redistribute d to a row of a 2d matrix */
483:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));

485:     /* broadcast along process columns */
486:     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
487:     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);

489:     /* local scaling */
490:     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];

492:     PetscFree(d2d);
493:     VecRestoreArrayRead(R,(const PetscScalar **)&d);
494:   }
495:   if (L) {
496:     VecGetArrayRead(L,(const PetscScalar **)&d);
497:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
498:     PetscLayoutGetRanges(A->rmap,&ranges);
499:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
500:     dlld = PetscMax(1,A->rmap->n);
501:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

504:     /* allocate 2d vector */
505:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
506:     PetscCalloc1(lszd,&d2d);
507:     dlld = PetscMax(1,lszd);

509:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
510:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

513:     /* redistribute d to a column of a 2d matrix */
514:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));

516:     /* broadcast along process rows */
517:     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
518:     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);

520:     /* local scaling */
521:     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];

523:     PetscFree(d2d);
524:     VecRestoreArrayRead(L,(const PetscScalar **)&d);
525:   }
526:   return(0);
527: }

529: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
530: {
532:   *missing = PETSC_FALSE;
533:   return(0);
534: }

536: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
537: {
538:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539:   PetscBLASInt  n,one=1;

542:   n = x->lld*x->locc;
543:   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
544:   return(0);
545: }

547: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
548: {
549:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
550:   PetscBLASInt  i,n;
551:   PetscScalar   v;

554:   n = PetscMin(x->M,x->N);
555:   for (i=1;i<=n;i++) {
556:     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
557:     v += alpha;
558:     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
559:   }
560:   return(0);
561: }

563: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
564: {
566:   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
567:   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
568:   PetscBLASInt   one=1;
569:   PetscScalar    beta=1.0;

572:   MatScaLAPACKCheckDistribution(Y,1,X,3);
573:   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
574:   PetscObjectStateIncrease((PetscObject)Y);
575:   return(0);
576: }

578: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
579: {
581:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
582:   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;

585:   PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
586:   PetscObjectStateIncrease((PetscObject)B);
587:   return(0);
588: }

590: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
591: {
592:   Mat            Bs;
593:   MPI_Comm       comm;
594:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;

598:   PetscObjectGetComm((PetscObject)A,&comm);
599:   MatCreate(comm,&Bs);
600:   MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
601:   MatSetType(Bs,MATSCALAPACK);
602:   b = (Mat_ScaLAPACK*)Bs->data;
603:   b->M    = a->M;
604:   b->N    = a->N;
605:   b->mb   = a->mb;
606:   b->nb   = a->nb;
607:   b->rsrc = a->rsrc;
608:   b->csrc = a->csrc;
609:   MatSetUp(Bs);
610:   *B = Bs;
611:   if (op == MAT_COPY_VALUES) {
612:     PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
613:   }
614:   Bs->assembled = PETSC_TRUE;
615:   return(0);
616: }

618: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
619: {
621:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
622:   Mat            Bs = *B;
623:   PetscBLASInt   one=1;
624:   PetscScalar    sone=1.0,zero=0.0;
625: #if defined(PETSC_USE_COMPLEX)
626:   PetscInt       i,j;
627: #endif

630:   if (reuse == MAT_INITIAL_MATRIX) {
631:     MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
632:     *B = Bs;
633:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
634:   b = (Mat_ScaLAPACK*)Bs->data;
635:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
636: #if defined(PETSC_USE_COMPLEX)
637:   /* undo conjugation */
638:   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
639: #endif
640:   Bs->assembled = PETSC_TRUE;
641:   return(0);
642: }

644: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
645: {
646:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
647:   PetscInt      i,j;

650:   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
651:   return(0);
652: }

654: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
655: {
657:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
658:   Mat            Bs = *B;
659:   PetscBLASInt   one=1;
660:   PetscScalar    sone=1.0,zero=0.0;

663:   if (reuse == MAT_INITIAL_MATRIX) {
664:     MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
665:     *B = Bs;
666:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
667:   b = (Mat_ScaLAPACK*)Bs->data;
668:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
669:   Bs->assembled = PETSC_TRUE;
670:   return(0);
671: }

673: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
674: {
676:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
677:   PetscScalar    *x,*x2d;
678:   const PetscInt *ranges;
679:   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;

682:   VecCopy(B,X);
683:   VecGetArray(X,&x);

685:   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
686:   PetscLayoutGetRanges(A->rmap,&ranges);
687:   PetscBLASIntCast(ranges[1],&mb);  /* x block size */
688:   xlld = PetscMax(1,A->rmap->n);
689:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));

692:   /* allocate 2d vector */
693:   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
694:   PetscMalloc1(lszx,&x2d);
695:   xlld = PetscMax(1,lszx);

697:   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
698:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));

701:   /* redistribute x as a column of a 2d matrix */
702:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

704:   /* call ScaLAPACK subroutine */
705:   switch (A->factortype) {
706:     case MAT_FACTOR_LU:
707:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
709:       break;
710:     case MAT_FACTOR_CHOLESKY:
711:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
713:       break;
714:     default:
715:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
716:   }

718:   /* redistribute x from a column of a 2d matrix */
719:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));

721:   PetscFree(x2d);
722:   VecRestoreArray(X,&x);
723:   return(0);
724: }

726: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
727: {

731:   MatSolve_ScaLAPACK(A,B,X);
732:   VecAXPY(X,1,Y);
733:   return(0);
734: }

736: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
737: {
739:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
740:   PetscBool      flg1,flg2;
741:   PetscBLASInt   one=1,info;

744:   PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
745:   PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
746:   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
747:   MatScaLAPACKCheckDistribution(B,1,X,2);
748:   b = (Mat_ScaLAPACK*)B->data;
749:   x = (Mat_ScaLAPACK*)X->data;
750:   PetscArraycpy(x->loc,b->loc,b->lld*b->locc);

752:   switch (A->factortype) {
753:     case MAT_FACTOR_LU:
754:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
756:       break;
757:     case MAT_FACTOR_CHOLESKY:
758:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
760:       break;
761:     default:
762:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
763:   }
764:   return(0);
765: }

767: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
768: {
770:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
771:   PetscBLASInt   one=1,info;

774:   if (!a->pivots) {
775:     PetscMalloc1(a->locr+a->mb,&a->pivots);
776:     PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
777:   }
778:   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
780:   A->factortype = MAT_FACTOR_LU;
781:   A->assembled  = PETSC_TRUE;

783:   PetscFree(A->solvertype);
784:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
785:   return(0);
786: }

788: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
789: {

793:   MatCopy(A,F,SAME_NONZERO_PATTERN);
794:   MatLUFactor_ScaLAPACK(F,0,0,info);
795:   return(0);
796: }

798: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
799: {
801:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
802:   return(0);
803: }

805: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
806: {
808:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
809:   PetscBLASInt   one=1,info;

812:   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
814:   A->factortype = MAT_FACTOR_CHOLESKY;
815:   A->assembled  = PETSC_TRUE;

817:   PetscFree(A->solvertype);
818:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
819:   return(0);
820: }

822: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
823: {

827:   MatCopy(A,F,SAME_NONZERO_PATTERN);
828:   MatCholeskyFactor_ScaLAPACK(F,0,info);
829:   return(0);
830: }

832: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
833: {
835:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
836:   return(0);
837: }

839: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
840: {
842:   *type = MATSOLVERSCALAPACK;
843:   return(0);
844: }

846: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
847: {
848:   Mat            B;
849:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

853:   /* Create the factorization matrix */
854:   MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);
855:   B->factortype = ftype;
856:   PetscFree(B->solvertype);
857:   PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);

859:   PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
860:   *F = B;
861:   return(0);
862: }

864: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
865: {

869:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
870:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
871:   return(0);
872: }

874: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
875: {
877:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
878:   PetscBLASInt   one=1,lwork=0;
879:   const char     *ntype;
880:   PetscScalar    *work=NULL,dummy;

883:   switch (type){
884:     case NORM_1:
885:       ntype = "1";
886:       lwork = PetscMax(a->locr,a->locc);
887:       break;
888:     case NORM_FROBENIUS:
889:       ntype = "F";
890:       work  = &dummy;
891:       break;
892:     case NORM_INFINITY:
893:       ntype = "I";
894:       lwork = PetscMax(a->locr,a->locc);
895:       break;
896:     default:
897:       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
898:   }
899:   if (lwork) { PetscMalloc1(lwork,&work); }
900:   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
901:   if (lwork) { PetscFree(work); }
902:   return(0);
903: }

905: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
906: {
907:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

911:   PetscArrayzero(a->loc,a->lld*a->locc);
912:   return(0);
913: }

915: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
916: {
917:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
919:   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;

922:   if (rows) {
923:     n     = a->locr;
924:     nb    = a->mb;
925:     isrc  = a->rsrc;
926:     nproc = a->grid->nprow;
927:     iproc = a->grid->myrow;
928:     PetscMalloc1(n,&idx);
929:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
930:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
931:   }
932:   if (cols) {
933:     n     = a->locc;
934:     nb    = a->nb;
935:     isrc  = a->csrc;
936:     nproc = a->grid->npcol;
937:     iproc = a->grid->mycol;
938:     PetscMalloc1(n,&idx);
939:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
940:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
941:   }
942:   return(0);
943: }

945: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
946: {
948:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
949:   Mat            Bmpi;
950:   MPI_Comm       comm;
951:   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
952:   const PetscInt *ranges,*branges,*cwork;
953:   const PetscScalar *vwork;
954:   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
955:   PetscScalar    *barray;
956:   PetscBool      differ=PETSC_FALSE;
957:   PetscMPIInt    size;

960:   PetscObjectGetComm((PetscObject)A,&comm);
961:   PetscLayoutGetRanges(A->rmap,&ranges);

963:   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
964:     MPI_Comm_size(comm,&size);
965:     PetscLayoutGetRanges((*B)->rmap,&branges);
966:     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
967:   }

969:   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
970:     MatCreate(comm,&Bmpi);
971:     m = PETSC_DECIDE;
972:     PetscSplitOwnershipEqual(comm,&m,&M);
973:     n = PETSC_DECIDE;
974:     PetscSplitOwnershipEqual(comm,&n,&N);
975:     MatSetSizes(Bmpi,m,n,M,N);
976:     MatSetType(Bmpi,MATDENSE);
977:     MatSetUp(Bmpi);

979:     /* create ScaLAPACK descriptor for B (1d block distribution) */
980:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
981:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
982:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

985:     /* redistribute matrix */
986:     MatDenseGetArray(Bmpi,&barray);
987:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
988:     MatDenseRestoreArray(Bmpi,&barray);
989:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
990:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);

992:     /* transfer rows of auxiliary matrix to the final matrix B */
993:     MatGetOwnershipRange(Bmpi,&rstart,&rend);
994:     for (i=rstart;i<rend;i++) {
995:       MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
996:       MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
997:       MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
998:     }
999:     MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
1000:     MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
1001:     MatDestroy(&Bmpi);

1003:   } else {  /* normal cases */

1005:     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1006:     else {
1007:       MatCreate(comm,&Bmpi);
1008:       m = PETSC_DECIDE;
1009:       PetscSplitOwnershipEqual(comm,&m,&M);
1010:       n = PETSC_DECIDE;
1011:       PetscSplitOwnershipEqual(comm,&n,&N);
1012:       MatSetSizes(Bmpi,m,n,M,N);
1013:       MatSetType(Bmpi,MATDENSE);
1014:       MatSetUp(Bmpi);
1015:     }

1017:     /* create ScaLAPACK descriptor for B (1d block distribution) */
1018:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
1019:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1020:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

1023:     /* redistribute matrix */
1024:     MatDenseGetArray(Bmpi,&barray);
1025:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1026:     MatDenseRestoreArray(Bmpi,&barray);

1028:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1029:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1030:     if (reuse == MAT_INPLACE_MATRIX) {
1031:       MatHeaderReplace(A,&Bmpi);
1032:     } else *B = Bmpi;
1033:   }
1034:   return(0);
1035: }

1037: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1038: {
1040:   Mat_ScaLAPACK  *b;
1041:   Mat            Bmpi;
1042:   MPI_Comm       comm;
1043:   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1044:   const PetscInt *ranges;
1045:   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1046:   PetscScalar    *aarray;
1047:   PetscInt       lda;

1050:   PetscObjectGetComm((PetscObject)A,&comm);

1052:   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1053:   else {
1054:     MatCreate(comm,&Bmpi);
1055:     m = PETSC_DECIDE;
1056:     PetscSplitOwnershipEqual(comm,&m,&M);
1057:     n = PETSC_DECIDE;
1058:     PetscSplitOwnershipEqual(comm,&n,&N);
1059:     MatSetSizes(Bmpi,m,n,M,N);
1060:     MatSetType(Bmpi,MATSCALAPACK);
1061:     MatSetUp(Bmpi);
1062:   }
1063:   b = (Mat_ScaLAPACK*)Bmpi->data;

1065:   /* create ScaLAPACK descriptor for A (1d block distribution) */
1066:   PetscLayoutGetRanges(A->rmap,&ranges);
1067:   PetscBLASIntCast(ranges[1],&amb);  /* row block size */
1068:   MatDenseGetLDA(A,&lda);
1069:   lld = PetscMax(lda,1);  /* local leading dimension */
1070:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));

1073:   /* redistribute matrix */
1074:   MatDenseGetArray(A,&aarray);
1075:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1076:   MatDenseRestoreArray(A,&aarray);

1078:   MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1079:   MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1080:   if (reuse == MAT_INPLACE_MATRIX) {
1081:     MatHeaderReplace(A,&Bmpi);
1082:   } else *B = Bmpi;
1083:   return(0);
1084: }

1086: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1087: {
1088:   Mat               mat_scal;
1089:   PetscErrorCode    ierr;
1090:   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1091:   const PetscInt    *cols;
1092:   const PetscScalar *vals;

1095:   if (reuse == MAT_REUSE_MATRIX) {
1096:     mat_scal = *newmat;
1097:     MatZeroEntries(mat_scal);
1098:   } else {
1099:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1100:     m = PETSC_DECIDE;
1101:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1102:     n = PETSC_DECIDE;
1103:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1104:     MatSetSizes(mat_scal,m,n,M,N);
1105:     MatSetType(mat_scal,MATSCALAPACK);
1106:     MatSetUp(mat_scal);
1107:   }
1108:   for (row=rstart;row<rend;row++) {
1109:     MatGetRow(A,row,&ncols,&cols,&vals);
1110:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1111:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1112:   }
1113:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1114:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1116:   if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1117:   else *newmat = mat_scal;
1118:   return(0);
1119: }

1121: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1122: {
1123:   Mat               mat_scal;
1124:   PetscErrorCode    ierr;
1125:   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1126:   const PetscInt    *cols;
1127:   const PetscScalar *vals;
1128:   PetscScalar       v;

1131:   if (reuse == MAT_REUSE_MATRIX) {
1132:     mat_scal = *newmat;
1133:     MatZeroEntries(mat_scal);
1134:   } else {
1135:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1136:     m = PETSC_DECIDE;
1137:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1138:     n = PETSC_DECIDE;
1139:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1140:     MatSetSizes(mat_scal,m,n,M,N);
1141:     MatSetType(mat_scal,MATSCALAPACK);
1142:     MatSetUp(mat_scal);
1143:   }
1144:   MatGetRowUpperTriangular(A);
1145:   for (row=rstart;row<rend;row++) {
1146:     MatGetRow(A,row,&ncols,&cols,&vals);
1147:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1148:     for (j=0;j<ncols;j++) { /* lower triangular part */
1149:       if (cols[j] == row) continue;
1150:       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1151:       MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1152:     }
1153:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1154:   }
1155:   MatRestoreRowUpperTriangular(A);
1156:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1157:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1159:   if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1160:   else *newmat = mat_scal;
1161:   return(0);
1162: }

1164: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1165: {
1166:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1168:   PetscInt       sz=0;

1171:   PetscLayoutSetUp(A->rmap);
1172:   PetscLayoutSetUp(A->cmap);
1173:   if (!a->lld) a->lld = a->locr;

1175:   PetscFree(a->loc);
1176:   PetscIntMultError(a->lld,a->locc,&sz);
1177:   PetscCalloc1(sz,&a->loc);
1178:   PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));

1180:   A->preallocated = PETSC_TRUE;
1181:   return(0);
1182: }

1184: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1185: {
1186:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1187:   PetscErrorCode     ierr;
1188:   Mat_ScaLAPACK_Grid *grid;
1189:   PetscBool          flg;
1190:   MPI_Comm           icomm;

1193:   MatStashDestroy_Private(&A->stash);
1194:   PetscFree(a->loc);
1195:   PetscFree(a->pivots);
1196:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1197:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1198:   if (--grid->grid_refct == 0) {
1199:     Cblacs_gridexit(grid->ictxt);
1200:     Cblacs_gridexit(grid->ictxrow);
1201:     Cblacs_gridexit(grid->ictxcol);
1202:     PetscFree(grid);
1203:     MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);
1204:   }
1205:   PetscCommDestroy(&icomm);
1206:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1207:   PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1208:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1209:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1210:   PetscFree(A->data);
1211:   return(0);
1212: }

1214: PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1215: {
1217:   const PetscInt *ranges;
1218:   PetscMPIInt    size;
1219:   PetscInt       i,n;

1222:   MPI_Comm_size(map->comm,&size);
1223:   if (size>2) {
1224:     PetscLayoutGetRanges(map,&ranges);
1225:     n = ranges[1]-ranges[0];
1226:     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1227:     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1228:   }
1229:   return(0);
1230: }

1232: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1233: {
1234:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1236:   PetscBLASInt   info=0;

1239:   PetscLayoutSetUp(A->rmap);
1240:   PetscLayoutSetUp(A->cmap);

1242:   /* check that the layout is as enforced by MatCreateScaLAPACK */
1243:   MatScaLAPACKCheckLayout(A->rmap);
1244:   MatScaLAPACKCheckLayout(A->cmap);

1246:   /* compute local sizes */
1247:   PetscBLASIntCast(A->rmap->N,&a->M);
1248:   PetscBLASIntCast(A->cmap->N,&a->N);
1249:   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1250:   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1251:   a->lld  = PetscMax(1,a->locr);

1253:   /* allocate local array */
1254:   MatScaLAPACKSetPreallocation(A);

1256:   /* set up ScaLAPACK descriptor */
1257:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1259:   return(0);
1260: }

1262: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1263: {
1265:   PetscInt       nstash,reallocs;

1268:   if (A->nooffprocentries) return(0);
1269:   MatStashScatterBegin_Private(A,&A->stash,NULL);
1270:   MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1271:   PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
1272:   return(0);
1273: }

1275: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1276: {
1278:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1279:   PetscMPIInt    n;
1280:   PetscInt       i,flg,*row,*col;
1281:   PetscScalar    *val;
1282:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

1285:   if (A->nooffprocentries) return(0);
1286:   while (1) {
1287:     MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1288:     if (!flg) break;
1289:     for (i=0;i<n;i++) {
1290:       PetscBLASIntCast(row[i]+1,&gridx);
1291:       PetscBLASIntCast(col[i]+1,&gcidx);
1292:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1293:       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1294:       switch (A->insertmode) {
1295:         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1296:         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1297:         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1298:       }
1299:     }
1300:   }
1301:   MatStashScatterEnd_Private(&A->stash);
1302:   return(0);
1303: }

1305: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1306: {
1308:   Mat            Adense,As;
1309:   MPI_Comm       comm;

1312:   PetscObjectGetComm((PetscObject)newMat,&comm);
1313:   MatCreate(comm,&Adense);
1314:   MatSetType(Adense,MATDENSE);
1315:   MatLoad(Adense,viewer);
1316:   MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1317:   MatDestroy(&Adense);
1318:   MatHeaderReplace(newMat,&As);
1319:   return(0);
1320: }

1322: /* -------------------------------------------------------------------*/
1323: static struct _MatOps MatOps_Values = {
1324:        MatSetValues_ScaLAPACK,
1325:        0,
1326:        0,
1327:        MatMult_ScaLAPACK,
1328: /* 4*/ MatMultAdd_ScaLAPACK,
1329:        MatMultTranspose_ScaLAPACK,
1330:        MatMultTransposeAdd_ScaLAPACK,
1331:        MatSolve_ScaLAPACK,
1332:        MatSolveAdd_ScaLAPACK,
1333:        0,
1334: /*10*/ 0,
1335:        MatLUFactor_ScaLAPACK,
1336:        MatCholeskyFactor_ScaLAPACK,
1337:        0,
1338:        MatTranspose_ScaLAPACK,
1339: /*15*/ MatGetInfo_ScaLAPACK,
1340:        0,
1341:        MatGetDiagonal_ScaLAPACK,
1342:        MatDiagonalScale_ScaLAPACK,
1343:        MatNorm_ScaLAPACK,
1344: /*20*/ MatAssemblyBegin_ScaLAPACK,
1345:        MatAssemblyEnd_ScaLAPACK,
1346:        MatSetOption_ScaLAPACK,
1347:        MatZeroEntries_ScaLAPACK,
1348: /*24*/ 0,
1349:        MatLUFactorSymbolic_ScaLAPACK,
1350:        MatLUFactorNumeric_ScaLAPACK,
1351:        MatCholeskyFactorSymbolic_ScaLAPACK,
1352:        MatCholeskyFactorNumeric_ScaLAPACK,
1353: /*29*/ MatSetUp_ScaLAPACK,
1354:        0,
1355:        0,
1356:        0,
1357:        0,
1358: /*34*/ MatDuplicate_ScaLAPACK,
1359:        0,
1360:        0,
1361:        0,
1362:        0,
1363: /*39*/ MatAXPY_ScaLAPACK,
1364:        0,
1365:        0,
1366:        0,
1367:        MatCopy_ScaLAPACK,
1368: /*44*/ 0,
1369:        MatScale_ScaLAPACK,
1370:        MatShift_ScaLAPACK,
1371:        0,
1372:        0,
1373: /*49*/ 0,
1374:        0,
1375:        0,
1376:        0,
1377:        0,
1378: /*54*/ 0,
1379:        0,
1380:        0,
1381:        0,
1382:        0,
1383: /*59*/ 0,
1384:        MatDestroy_ScaLAPACK,
1385:        MatView_ScaLAPACK,
1386:        0,
1387:        0,
1388: /*64*/ 0,
1389:        0,
1390:        0,
1391:        0,
1392:        0,
1393: /*69*/ 0,
1394:        0,
1395:        MatConvert_ScaLAPACK_Dense,
1396:        0,
1397:        0,
1398: /*74*/ 0,
1399:        0,
1400:        0,
1401:        0,
1402:        0,
1403: /*79*/ 0,
1404:        0,
1405:        0,
1406:        0,
1407:        MatLoad_ScaLAPACK,
1408: /*84*/ 0,
1409:        0,
1410:        0,
1411:        0,
1412:        0,
1413: /*89*/ 0,
1414:        0,
1415:        MatMatMultNumeric_ScaLAPACK,
1416:        0,
1417:        0,
1418: /*94*/ 0,
1419:        0,
1420:        0,
1421:        MatMatTransposeMultNumeric_ScaLAPACK,
1422:        0,
1423: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1424:        0,
1425:        0,
1426:        MatConjugate_ScaLAPACK,
1427:        0,
1428: /*104*/0,
1429:        0,
1430:        0,
1431:        0,
1432:        0,
1433: /*109*/MatMatSolve_ScaLAPACK,
1434:        0,
1435:        0,
1436:        0,
1437:        MatMissingDiagonal_ScaLAPACK,
1438: /*114*/0,
1439:        0,
1440:        0,
1441:        0,
1442:        0,
1443: /*119*/0,
1444:        MatHermitianTranspose_ScaLAPACK,
1445:        0,
1446:        0,
1447:        0,
1448: /*124*/0,
1449:        0,
1450:        0,
1451:        0,
1452:        0,
1453: /*129*/0,
1454:        0,
1455:        0,
1456:        0,
1457:        0,
1458: /*134*/0,
1459:        0,
1460:        0,
1461:        0,
1462:        0,
1463:        0,
1464: /*140*/0,
1465:        0,
1466:        0,
1467:        0,
1468:        0,
1469: /*145*/0,
1470:        0,
1471:        0
1472: };

1474: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1475: {
1476:   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1477:   PetscInt           size=stash->size,nsends;
1478:   PetscErrorCode     ierr;
1479:   PetscInt           count,*sindices,**rindices,i,j,l;
1480:   PetscScalar        **rvalues,*svalues;
1481:   MPI_Comm           comm = stash->comm;
1482:   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1483:   PetscMPIInt        *sizes,*nlengths,nreceives;
1484:   PetscInt           *sp_idx,*sp_idy;
1485:   PetscScalar        *sp_val;
1486:   PetscMatStashSpace space,space_next;
1487:   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1488:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;

1491:   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1492:     InsertMode addv;
1493:     MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1494:     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1495:     mat->insertmode = addv; /* in case this processor had no cache */
1496:   }

1498:   bs2 = stash->bs*stash->bs;

1500:   /*  first count number of contributors to each processor */
1501:   PetscCalloc1(size,&nlengths);
1502:   PetscMalloc1(stash->n+1,&owner);

1504:   i     = j    = 0;
1505:   space = stash->space_head;
1506:   while (space) {
1507:     space_next = space->next;
1508:     for (l=0; l<space->local_used; l++) {
1509:       PetscBLASIntCast(space->idx[l]+1,&gridx);
1510:       PetscBLASIntCast(space->idy[l]+1,&gcidx);
1511:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1512:       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1513:       nlengths[j]++; owner[i] = j;
1514:       i++;
1515:     }
1516:     space = space_next;
1517:   }

1519:   /* Now check what procs get messages - and compute nsends. */
1520:   PetscCalloc1(size,&sizes);
1521:   for (i=0, nsends=0; i<size; i++) {
1522:     if (nlengths[i]) {
1523:       sizes[i] = 1; nsends++;
1524:     }
1525:   }

1527:   {PetscMPIInt *onodes,*olengths;
1528:    /* Determine the number of messages to expect, their lengths, from from-ids */
1529:    PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1530:    PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1531:    /* since clubbing row,col - lengths are multiplied by 2 */
1532:    for (i=0; i<nreceives; i++) olengths[i] *=2;
1533:    PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1534:    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1535:    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1536:    PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1537:    PetscFree(onodes);
1538:    PetscFree(olengths);}

1540:   /* do sends:
1541:       1) starts[i] gives the starting index in svalues for stuff going to
1542:          the ith processor
1543:   */
1544:   PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1545:   PetscMalloc1(2*nsends,&send_waits);
1546:   PetscMalloc2(size,&startv,size,&starti);
1547:   /* use 2 sends the first with all_a, the next with all_i and all_j */
1548:   startv[0] = 0; starti[0] = 0;
1549:   for (i=1; i<size; i++) {
1550:     startv[i] = startv[i-1] + nlengths[i-1];
1551:     starti[i] = starti[i-1] + 2*nlengths[i-1];
1552:   }

1554:   i     = 0;
1555:   space = stash->space_head;
1556:   while (space) {
1557:     space_next = space->next;
1558:     sp_idx     = space->idx;
1559:     sp_idy     = space->idy;
1560:     sp_val     = space->val;
1561:     for (l=0; l<space->local_used; l++) {
1562:       j = owner[i];
1563:       if (bs2 == 1) {
1564:         svalues[startv[j]] = sp_val[l];
1565:       } else {
1566:         PetscInt    k;
1567:         PetscScalar *buf1,*buf2;
1568:         buf1 = svalues+bs2*startv[j];
1569:         buf2 = space->val + bs2*l;
1570:         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1571:       }
1572:       sindices[starti[j]]             = sp_idx[l];
1573:       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1574:       startv[j]++;
1575:       starti[j]++;
1576:       i++;
1577:     }
1578:     space = space_next;
1579:   }
1580:   startv[0] = 0;
1581:   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];

1583:   for (i=0,count=0; i<size; i++) {
1584:     if (sizes[i]) {
1585:       MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1586:       MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1587:     }
1588:   }
1589: #if defined(PETSC_USE_INFO)
1590:   PetscInfo1(NULL,"No of messages: %d \n",nsends);
1591:   for (i=0; i<size; i++) {
1592:     if (sizes[i]) {
1593:       PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
1594:     }
1595:   }
1596: #endif
1597:   PetscFree(nlengths);
1598:   PetscFree(owner);
1599:   PetscFree2(startv,starti);
1600:   PetscFree(sizes);

1602:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1603:   PetscMalloc1(2*nreceives,&recv_waits);

1605:   for (i=0; i<nreceives; i++) {
1606:     recv_waits[2*i]   = recv_waits1[i];
1607:     recv_waits[2*i+1] = recv_waits2[i];
1608:   }
1609:   stash->recv_waits = recv_waits;

1611:   PetscFree(recv_waits1);
1612:   PetscFree(recv_waits2);

1614:   stash->svalues         = svalues;
1615:   stash->sindices        = sindices;
1616:   stash->rvalues         = rvalues;
1617:   stash->rindices        = rindices;
1618:   stash->send_waits      = send_waits;
1619:   stash->nsends          = nsends;
1620:   stash->nrecvs          = nreceives;
1621:   stash->reproduce_count = 0;
1622:   return(0);
1623: }

1625: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1626: {
1628:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

1631:   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1632:   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1633:   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1634:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1635:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1636:   return(0);
1637: }

1639: /*@
1640:    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1641:    the ScaLAPACK matrix

1643:    Logically Collective on A

1645:    Input Parameter:
1646: +  A  - a MATSCALAPACK matrix
1647: .  mb - the row block size
1648: -  nb - the column block size

1650:    Level: intermediate

1652: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1653: @*/
1654: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1655: {

1662:   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1663:   return(0);
1664: }

1666: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1667: {
1668:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;

1671:   if (mb) *mb = a->mb;
1672:   if (nb) *nb = a->nb;
1673:   return(0);
1674: }

1676: /*@
1677:    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1678:    the ScaLAPACK matrix

1680:    Not collective

1682:    Input Parameter:
1683: .  A  - a MATSCALAPACK matrix

1685:    Output Parameters:
1686: +  mb - the row block size
1687: -  nb - the column block size

1689:    Level: intermediate

1691: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1692: @*/
1693: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1694: {

1699:   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1700:   return(0);
1701: }

1703: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1704: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);

1706: /*MC
1707:    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package

1709:    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK

1711:    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver

1713:    Options Database Keys:
1714: +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1715: .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1716: -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1718:    Level: beginner

1720: .seealso: MATDENSE, MATELEMENTAL
1721: M*/

1723: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1724: {
1725:   Mat_ScaLAPACK      *a;
1726:   PetscErrorCode     ierr;
1727:   PetscBool          flg,flg1;
1728:   Mat_ScaLAPACK_Grid *grid;
1729:   MPI_Comm           icomm;
1730:   PetscBLASInt       nprow,npcol,myrow,mycol;
1731:   PetscInt           optv1,k=2,array[2]={0,0};
1732:   PetscMPIInt        size;

1735:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1736:   A->insertmode = NOT_SET_VALUES;

1738:   MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1739:   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1740:   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1741:   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1742:   A->stash.ScatterDestroy = NULL;

1744:   PetscNewLog(A,&a);
1745:   A->data = (void*)a;

1747:   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1748:   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1749:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1750:     PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1751:     PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);
1752:   }
1753:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1754:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1755:   if (!flg) {
1756:     PetscNewLog(A,&grid);

1758:     MPI_Comm_size(icomm,&size);
1759:     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);

1761:     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1762:     PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1763:     if (flg1) {
1764:       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1765:       grid->nprow = optv1;
1766:     }
1767:     PetscOptionsEnd();

1769:     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1770:     grid->npcol = size/grid->nprow;
1771:     PetscBLASIntCast(grid->nprow,&nprow);
1772:     PetscBLASIntCast(grid->npcol,&npcol);
1773:     grid->ictxt = Csys2blacs_handle(icomm);
1774:     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1775:     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1776:     grid->grid_refct = 1;
1777:     grid->nprow      = nprow;
1778:     grid->npcol      = npcol;
1779:     grid->myrow      = myrow;
1780:     grid->mycol      = mycol;
1781:     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1782:     grid->ictxrow = Csys2blacs_handle(icomm);
1783:     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1784:     grid->ictxcol = Csys2blacs_handle(icomm);
1785:     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1786:     MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);

1788:   } else grid->grid_refct++;
1789:   PetscCommDestroy(&icomm);
1790:   a->grid = grid;
1791:   a->mb   = DEFAULT_BLOCKSIZE;
1792:   a->nb   = DEFAULT_BLOCKSIZE;

1794:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1795:   PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1796:   if (flg) {
1797:     a->mb = array[0];
1798:     a->nb = (k>1)? array[1]: a->mb;
1799:   }
1800:   PetscOptionsEnd();

1802:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1803:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1804:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1805:   PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1806:   return(0);
1807: }

1809: /*@C
1810:    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1811:    (2D block cyclic distribution).

1813:    Collective

1815:    Input Parameters:
1816: +  comm - MPI communicator
1817: .  mb   - row block size (or PETSC_DECIDE to have it set)
1818: .  nb   - column block size (or PETSC_DECIDE to have it set)
1819: .  M    - number of global rows
1820: .  N    - number of global columns
1821: .  rsrc - coordinate of process that owns the first row of the distributed matrix
1822: -  csrc - coordinate of process that owns the first column of the distributed matrix

1824:    Output Parameter:
1825: .  A - the matrix

1827:    Options Database Keys:
1828: .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1830:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1831:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1832:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

1834:    Notes:
1835:    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1836:    is chosen.

1838:    Storage Information:
1839:    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1840:    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1841:    significance and are thus ignored. The block sizes refer to the values
1842:    used for the distributed matrix, not the same meaning as in BAIJ.

1844:    Level: intermediate

1846: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1847: @*/
1848: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1849: {
1851:   Mat_ScaLAPACK  *a;
1852:   PetscInt       m,n;

1855:   MatCreate(comm,A);
1856:   MatSetType(*A,MATSCALAPACK);
1857:   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1858:   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1859:   m = PETSC_DECIDE;
1860:   PetscSplitOwnershipEqual(comm,&m,&M);
1861:   n = PETSC_DECIDE;
1862:   PetscSplitOwnershipEqual(comm,&n,&N);
1863:   MatSetSizes(*A,m,n,M,N);
1864:   a = (Mat_ScaLAPACK*)(*A)->data;
1865:   PetscBLASIntCast(M,&a->M);
1866:   PetscBLASIntCast(N,&a->N);
1867:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1868:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1869:   PetscBLASIntCast(rsrc,&a->rsrc);
1870:   PetscBLASIntCast(csrc,&a->csrc);
1871:   MatSetUp(*A);
1872:   return(0);
1873: }