Actual source code: ispai.c

  1: /* 
  2:    3/99 Modified by Stephen Barnard to support SPAI version 3.0 
  3: */

  5: /*
  6:       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
  7:    Code written by Stephen Barnard.

  9:       Note: there is some BAD memory bleeding below!

 11:       This code needs work

 13:    1) get rid of all memory bleeding
 14:    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
 15:       rather than having the sp flag for PC_SPAI

 17: */

 19:  #include src/ksp/pc/pcimpl.h

 21: /*
 22:     These are the SPAI include files
 23: */
 25: #include "spai.h"
 26: #include "matrix.h"
 27: #include "read_mm_matrix.h"

 30: EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
 31: EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
 32: EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
 33: EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *);

 35: typedef struct {

 37:   matrix *B;              /* matrix in SPAI format */
 38:   matrix *BT;             /* transpose of matrix in SPAI format */
 39:   matrix *M;              /* the approximate inverse in SPAI format */

 41:   Mat    PM;              /* the approximate inverse PETSc format */

 43:   double epsilon;         /* tolerance */
 44:   int    nbsteps;         /* max number of "improvement" steps per line */
 45:   int    max;             /* max dimensions of is_I, q, etc. */
 46:   int    maxnew;          /* max number of new entries per step */
 47:   int    block_size;      /* constant block size */
 48:   int    cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
 49:   int    verbose;         /* SPAI prints timing and statistics */

 51:   int    sp;              /* symmetric nonzero pattern */
 52:   MPI_Comm comm_spai;     /* communicator to be used with spai */
 53: } PC_SPAI;

 55: /**********************************************************************/

 59: static PetscErrorCode PCSetUp_SPAI(PC pc)
 60: {
 61:   PC_SPAI *ispai = (PC_SPAI*)pc->data;
 63:   Mat      AT;


 67:   init_SPAI();

 69:   if (ispai->sp) {
 70:     ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);
 71:   } else {
 72:     /* Use the transpose to get the column nonzero structure. */
 73:     MatTranspose(pc->pmat,&AT);
 74:     ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);
 75:     MatDestroy(AT);
 76:   }

 78:   /* Destroy the transpose */
 79:   /* Don't know how to do it. PETSc developers? */
 80: 
 81:   /* construct SPAI preconditioner */
 82:   /* FILE *messages */     /* file for warning messages */
 83:   /* double epsilon */     /* tolerance */
 84:   /* int nbsteps */        /* max number of "improvement" steps per line */
 85:   /* int max */            /* max dimensions of is_I, q, etc. */
 86:   /* int maxnew */         /* max number of new entries per step */
 87:   /* int block_size */     /* block_size == 1 specifies scalar elments
 88:                               block_size == n specifies nxn constant-block elements
 89:                               block_size == 0 specifies variable-block elements */
 90:   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
 91:                            /* cache_size == 0 indicates no caching */
 92:   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
 93:                               verbose == 1 prints timing and matrix statistics */

 95:   bspai(ispai->B,&ispai->M,
 96:                    stdout,
 97:                    ispai->epsilon,
 98:                    ispai->nbsteps,
 99:                    ispai->max,
100:                    ispai->maxnew,
101:                    ispai->block_size,
102:                    ispai->cache_size,
103:                ispai->verbose);

105:   ConvertMatrixToMat(pc->comm,ispai->M,&ispai->PM);

107:   /* free the SPAI matrices */
108:   sp_free_matrix(ispai->B);
109:   sp_free_matrix(ispai->M);

111:   return(0);
112: }

114: /**********************************************************************/

118: static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
119: {
120:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

124:   /* Now using PETSc's multiply */
125:   MatMult(ispai->PM,xx,y);
126:   return(0);
127: }

129: /**********************************************************************/

133: static PetscErrorCode PCDestroy_SPAI(PC pc)
134: {
136:   PC_SPAI *ispai = (PC_SPAI*)pc->data;

139:   if (ispai->PM) {MatDestroy(ispai->PM);}
140:   MPI_Comm_free(&(ispai->comm_spai));
141:   PetscFree(ispai);
142:   return(0);
143: }

145: /**********************************************************************/

149: static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
150: {
151:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
153:   PetscTruth iascii;

156:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
157:   if (iascii) {
158:     PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");
159:     PetscViewerASCIIPrintf(viewer,"    epsilon %g\n",   ispai->epsilon);
160:     PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);
161:     PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);
162:     PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);
163:     PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);
164:     PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);
165:     PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);
166:     PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);

168:   }
169:   return(0);
170: }

175: PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
176: {
177:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
179:   ispai->epsilon = epsilon1;
180:   return(0);
181: }
183: 
184: /**********************************************************************/

189: PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
190: {
191:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
193:   ispai->nbsteps = nbsteps1;
194:   return(0);
195: }

198: /**********************************************************************/

200: /* added 1/7/99 g.h. */
204: PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1)
205: {
206:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
208:   ispai->max = max1;
209:   return(0);
210: }

213: /**********************************************************************/

218: PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
219: {
220:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
222:   ispai->maxnew = maxnew1;
223:   return(0);
224: }

227: /**********************************************************************/

232: PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
233: {
234:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
236:   ispai->block_size = block_size1;
237:   return(0);
238: }

241: /**********************************************************************/

246: PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
247: {
248:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
250:   ispai->cache_size = cache_size;
251:   return(0);
252: }

255: /**********************************************************************/

260: PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose)
261: {
262:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
264:   ispai->verbose = verbose;
265:   return(0);
266: }

269: /**********************************************************************/

274: PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp)
275: {
276:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
278:   ispai->sp = sp;
279:   return(0);
280: }

283: /* -------------------------------------------------------------------*/

287: /*@
288:   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner

290:   Input Parameters:
291: + pc - the preconditioner
292: - eps - epsilon (default .4)

294:   Notes:  Espilon must be between 0 and 1. It controls the
295:                  quality of the approximation of M to the inverse of
296:                  A. Higher values of epsilon lead to more work, more
297:                  fill, and usually better preconditioners. In many
298:                  cases the best choice of epsilon is the one that
299:                  divides the total solution time equally between the
300:                  preconditioner and the solver.
301:   
302:   Level: intermediate

304: .seealso: PCSPAI, PCSetType()
305:   @*/
306: PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1)
307: {
308:   PetscErrorCode ierr,(*f)(PC,double);
310:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);
311:   if (f) {
312:     (*f)(pc,epsilon1);
313:   }
314:   return(0);
315: }
316: 
317: /**********************************************************************/

321: /*@
322:   PCSPAISetNBSteps - set maximum number of improvement steps per row in 
323:         the SPAI preconditioner

325:   Input Parameters:
326: + pc - the preconditioner
327: - n - number of steps (default 5)

329:   Notes:  SPAI constructs to approximation to every column of
330:                  the exact inverse of A in a series of improvement
331:                  steps. The quality of the approximation is determined
332:                  by epsilon. If an approximation achieving an accuracy
333:                  of epsilon is not obtained after ns steps, SPAI simply
334:                  uses the best approximation constructed so far.

336:   Level: intermediate

338: .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
339: @*/
340: PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1)
341: {
342:   PetscErrorCode ierr,(*f)(PC,int);
344:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);
345:   if (f) {
346:     (*f)(pc,nbsteps1);
347:   }
348:   return(0);
349: }

351: /**********************************************************************/

353: /* added 1/7/99 g.h. */
356: /*@
357:   PCSPAISetMax - set the size of various working buffers in 
358:         the SPAI preconditioner

360:   Input Parameters:
361: + pc - the preconditioner
362: - n - size (default is 5000)

364:   Level: intermediate

366: .seealso: PCSPAI, PCSetType()
367: @*/
368: PetscErrorCode PCSPAISetMax(PC pc,int max1)
369: {
370:   PetscErrorCode ierr,(*f)(PC,int);
372:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);
373:   if (f) {
374:     (*f)(pc,max1);
375:   }
376:   return(0);
377: }

379: /**********************************************************************/

383: /*@
384:   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
385:    in SPAI preconditioner

387:   Input Parameters:
388: + pc - the preconditioner
389: - n - maximum number (default 5)

391:   Level: intermediate

393: .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
394: @*/
395: PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1)
396: {
397:   PetscErrorCode ierr,(*f)(PC,int);
399:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);
400:   if (f) {
401:     (*f)(pc,maxnew1);
402:   }
403:   return(0);
404: }

406: /**********************************************************************/

410: /*@
411:   PCSPAISetBlockSize - set the block size for the SPAI preconditioner

413:   Input Parameters:
414: + pc - the preconditioner
415: - n - block size (default 1)

417:   Notes: A block
418:                  size of 1 treats A as a matrix of scalar elements. A
419:                  block size of s > 1 treats A as a matrix of sxs
420:                  blocks. A block size of 0 treats A as a matrix with
421:                  variable sized blocks, which are determined by
422:                  searching for dense square diagonal blocks in A.
423:                  This can be very effective for finite-element
424:                  matrices.

426:                  SPAI will convert A to block form, use a block
427:                  version of the preconditioner algorithm, and then
428:                  convert the result back to scalar form.

430:                  In many cases the a block-size parameter other than 1
431:                  can lead to very significant improvement in
432:                  performance.


435:   Level: intermediate

437: .seealso: PCSPAI, PCSetType()
438: @*/
439: PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1)
440: {
441:   PetscErrorCode ierr,(*f)(PC,int);
443:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);
444:   if (f) {
445:     (*f)(pc,block_size1);
446:   }
447:   return(0);
448: }

450: /**********************************************************************/

454: /*@
455:   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner

457:   Input Parameters:
458: + pc - the preconditioner
459: - n -  cache size {0,1,2,3,4,5} (default 5)

461:   Notes:    SPAI uses a hash table to cache messages and avoid
462:                  redundant communication. If suggest always using
463:                  5. This parameter is irrelevant in the serial
464:                  version.

466:   Level: intermediate

468: .seealso: PCSPAI, PCSetType()
469: @*/
470: PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size)
471: {
472:   PetscErrorCode ierr,(*f)(PC,int);
474:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);
475:   if (f) {
476:     (*f)(pc,cache_size);
477:   }
478:   return(0);
479: }

481: /**********************************************************************/

485: /*@
486:   PCSPAISetVerbose - verbosity level for the SPAI preconditioner

488:   Input Parameters:
489: + pc - the preconditioner
490: - n - level (default 1)

492:   Notes: print parameters, timings and matrix statistics

494:   Level: intermediate

496: .seealso: PCSPAI, PCSetType()
497: @*/
498: PetscErrorCode PCSPAISetVerbose(PC pc,int verbose)
499: {
500:   PetscErrorCode ierr,(*f)(PC,int);
502:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);
503:   if (f) {
504:     (*f)(pc,verbose);
505:   }
506:   return(0);
507: }

509: /**********************************************************************/

513: /*@
514:   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner

516:   Input Parameters:
517: + pc - the preconditioner
518: - n - 0 or 1

520:   Notes: If A has a symmetric nonzero pattern use -sp 1 to
521:                  improve performance by eliminating some communication
522:                  in the parallel version. Even if A does not have a
523:                  symmetric nonzero pattern -sp 1 may well lead to good
524:                  results, but the code will not follow the published
525:                  SPAI algorithm exactly.


528:   Level: intermediate

530: .seealso: PCSPAI, PCSetType()
531: @*/
532: PetscErrorCode PCSPAISetSp(PC pc,int sp)
533: {
534:   PetscErrorCode ierr,(*f)(PC,int);
536:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);
537:   if (f) {
538:     (*f)(pc,sp);
539:   }
540:   return(0);
541: }

543: /**********************************************************************/

545: /**********************************************************************/

549: static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
550: {
551:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
553:   int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
554:   double     epsilon1;
555:   PetscTruth flg;

558:   PetscOptionsHead("SPAI options");
559:     PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);
560:     if (flg) {
561:       PCSPAISetEpsilon(pc,epsilon1);
562:     }
563:     PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);
564:     if (flg) {
565:       PCSPAISetNBSteps(pc,nbsteps1);
566:     }
567:     /* added 1/7/99 g.h. */
568:     PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);
569:     if (flg) {
570:       PCSPAISetMax(pc,max1);
571:     }
572:     PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);
573:     if (flg) {
574:       PCSPAISetMaxNew(pc,maxnew1);
575:     }
576:     PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);
577:     if (flg) {
578:       PCSPAISetBlockSize(pc,block_size1);
579:     }
580:     PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);
581:     if (flg) {
582:       PCSPAISetCacheSize(pc,cache_size);
583:     }
584:     PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);
585:     if (flg) {
586:       PCSPAISetVerbose(pc,verbose);
587:     }
588:     PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);
589:     if (flg) {
590:       PCSPAISetSp(pc,sp);
591:     }
592:   PetscOptionsTail();
593:   return(0);
594: }

596: /**********************************************************************/

598: /*MC
599:    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
600:      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)

602:    Options Database Keys:
603: +  -pc_spai_set_epsilon <eps> - set tolerance
604: .  -pc_spai_nbstep <n> - set nbsteps
605: .  -pc_spai_max <m> - set max
606: .  -pc_spai_max_new <m> - set maxnew
607: .  -pc_spai_block_size <n> - set block size
608: .  -pc_spai_cache_size <n> - set cache size
609: .  -pc_spai_sp <m> - set sp
610: -  -pc_spai_set_verbose <true,false> - verbose output

612:    Notes: This only works with AIJ matrices.

614:    Level: beginner

616:    Concepts: approximate inverse

618: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
619:     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
620:     PCSPAISetVerbose(), PCSPAISetSp()
621: M*/

626: PetscErrorCode PCCreate_SPAI(PC pc)
627: {
628:   PC_SPAI *ispai;

632:   PetscNew(PC_SPAI,&ispai);
633:   pc->data           = (void*)ispai;

635:   pc->ops->destroy         = PCDestroy_SPAI;
636:   pc->ops->apply           = PCApply_SPAI;
637:   pc->ops->applyrichardson = 0;
638:   pc->ops->setup           = PCSetUp_SPAI;
639:   pc->ops->view            = PCView_SPAI;
640:   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;

642:   pc->name          = 0;
643:   ispai->epsilon    = .4;
644:   ispai->nbsteps    = 5;
645:   ispai->max        = 5000;
646:   ispai->maxnew     = 5;
647:   ispai->block_size = 1;
648:   ispai->cache_size = 5;
649:   ispai->verbose    = 0;

651:   ispai->sp         = 1;
652:   MPI_Comm_dup(pc->comm,&(ispai->comm_spai));

654:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
655:                     "PCSPAISetEpsilon_SPAI",
656:                      PCSPAISetEpsilon_SPAI);
657:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
658:                     "PCSPAISetNBSteps_SPAI",
659:                      PCSPAISetNBSteps_SPAI);
660:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
661:                     "PCSPAISetMax_SPAI",
662:                      PCSPAISetMax_SPAI);
663:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
664:                     "PCSPAISetMaxNew_SPAI",
665:                      PCSPAISetMaxNew_SPAI);
666:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
667:                     "PCSPAISetBlockSize_SPAI",
668:                      PCSPAISetBlockSize_SPAI);
669:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
670:                     "PCSPAISetCacheSize_SPAI",
671:                      PCSPAISetCacheSize_SPAI);
672:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
673:                     "PCSPAISetVerbose_SPAI",
674:                      PCSPAISetVerbose_SPAI);
675:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
676:                     "PCSPAISetSp_SPAI",
677:                      PCSPAISetSp_SPAI);

679:   return(0);
680: }

683: /**********************************************************************/

685: /*
686:    Converts from a PETSc matrix to an SPAI matrix 
687: */
690: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
691: {
692:   matrix                  *M;
693:   int                     i,j,col;
694:   int                     row_indx;
695:   int                     len,pe,local_indx,start_indx;
696:   int                     *mapping;
698:   const int               *cols;
699:   const double            *vals;
700:   int                     *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend;
701:   struct compressed_lines *rows;

704: 
705:   MPI_Comm_size(comm,&size);
706:   MPI_Comm_rank(comm,&rank);
707:   MatGetSize(A,&n,&n);
708:   MatGetLocalSize(A,&mnl,&nnl);

710:   /*
711:     not sure why a barrier is required. commenting out
712:   MPI_Barrier(comm);
713:   */

715:   M = new_matrix((void*)comm);
716: 
717:   M->n = n;
718:   M->bs = 1;
719:   M->max_block_size = 1;

721:   M->mnls          = (int*)malloc(sizeof(int)*size);
722:   M->start_indices = (int*)malloc(sizeof(int)*size);
723:   M->pe            = (int*)malloc(sizeof(int)*n);
724:   M->block_sizes   = (int*)malloc(sizeof(int)*n);
725:   for (i=0; i<n; i++) M->block_sizes[i] = 1;

727:   MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);

729:   M->start_indices[0] = 0;
730:   for (i=1; i<size; i++) {
731:     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
732:   }

734:   M->mnl = M->mnls[M->myid];
735:   M->my_start_index = M->start_indices[M->myid];

737:   for (i=0; i<size; i++) {
738:     start_indx = M->start_indices[i];
739:     for (j=0; j<M->mnls[i]; j++)
740:       M->pe[start_indx+j] = i;
741:   }

743:   if (AT) {
744:     M->lines = new_compressed_lines(M->mnls[rank],1);
745:   } else {
746:     M->lines = new_compressed_lines(M->mnls[rank],0);
747:   }

749:   rows     = M->lines;

751:   /* Determine the mapping from global indices to pointers */
752:   PetscMalloc(M->n*sizeof(int),&mapping);
753:   pe         = 0;
754:   local_indx = 0;
755:   for (i=0; i<M->n; i++) {
756:     if (local_indx >= M->mnls[pe]) {
757:       pe++;
758:       local_indx = 0;
759:     }
760:     mapping[i] = local_indx + M->start_indices[pe];
761:     local_indx++;
762:   }


765:   PetscMalloc(mnl*sizeof(int),&num_ptr);

767:   /*********************************************************/
768:   /************** Set up the row structure *****************/
769:   /*********************************************************/

771:   /* count number of nonzeros in every row */
772:   MatGetOwnershipRange(A,&rstart,&rend);
773:   for (i=rstart; i<rend; i++) {
774:     MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
775:     MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
776:   }

778:   /* allocate buffers */
779:   len = 0;
780:   for (i=0; i<mnl; i++) {
781:     if (len < num_ptr[i]) len = num_ptr[i];
782:   }

784:   for (i=rstart; i<rend; i++) {
785:     row_indx             = i-rstart;
786:     len                  = num_ptr[row_indx];
787:     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
788:     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
789:   }

791:   /* copy the matrix */
792:   for (i=rstart; i<rend; i++) {
793:     row_indx = i - rstart;
794:     MatGetRow(A,i,&nz,&cols,&vals);
795:     for (j=0; j<nz; j++) {
796:       col = cols[j];
797:       len = rows->len[row_indx]++;
798:       rows->ptrs[row_indx][len] = mapping[col];
799:       rows->A[row_indx][len]    = vals[j];
800:     }
801:     rows->slen[row_indx] = rows->len[row_indx];
802:     MatRestoreRow(A,i,&nz,&cols,&vals);
803:   }


806:   /************************************************************/
807:   /************** Set up the column structure *****************/
808:   /*********************************************************/

810:   if (AT) {

812:     /* count number of nonzeros in every column */
813:     for (i=rstart; i<rend; i++) {
814:       MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
815:       MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
816:     }

818:     /* allocate buffers */
819:     len = 0;
820:     for (i=0; i<mnl; i++) {
821:       if (len < num_ptr[i]) len = num_ptr[i];
822:     }

824:     for (i=rstart; i<rend; i++) {
825:       row_indx = i-rstart;
826:       len      = num_ptr[row_indx];
827:       rows->rptrs[row_indx]     = (int*)malloc(len*sizeof(int));
828:     }

830:     /* copy the matrix (i.e., the structure) */
831:     for (i=rstart; i<rend; i++) {
832:       row_indx = i - rstart;
833:       MatGetRow(AT,i,&nz,&cols,&vals);
834:       for (j=0; j<nz; j++) {
835:         col = cols[j];
836:         len = rows->rlen[row_indx]++;
837:         rows->rptrs[row_indx][len] = mapping[col];
838:       }
839:       MatRestoreRow(AT,i,&nz,&cols,&vals);
840:     }
841:   }

843:   PetscFree(num_ptr);
844:   PetscFree(mapping);

846:   order_pointers(M);
847:   M->maxnz = calc_maxnz(M);

849:   *B = M;

851:   return(0);
852: }

854: /**********************************************************************/

856: /*
857:    Converts from an SPAI matrix B  to a PETSc matrix PB.
858:    This assumes that the the SPAI matrix B is stored in
859:    COMPRESSED-ROW format.
860: */
863: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
864: {
865:   int         size,rank;
867:   int         m,n,M,N;
868:   int         d_nz,o_nz;
869:   int         *d_nnz,*o_nnz;
870:   int         i,k,global_row,global_col,first_diag_col,last_diag_col;
871:   PetscScalar val;

874:   MPI_Comm_size(comm,&size);
875:   MPI_Comm_rank(comm,&rank);
876: 
877:   m = n = B->mnls[rank];
878:   d_nz = o_nz = 0;

880:   /* Determine preallocation for MatCreateMPIAIJ */
881:   PetscMalloc(m*sizeof(int),&d_nnz);
882:   PetscMalloc(m*sizeof(int),&o_nnz);
883:   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
884:   first_diag_col = B->start_indices[rank];
885:   last_diag_col = first_diag_col + B->mnls[rank];
886:   for (i=0; i<B->mnls[rank]; i++) {
887:     for (k=0; k<B->lines->len[i]; k++) {
888:       global_col = B->lines->ptrs[i][k];
889:       if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
890:         d_nnz[i]++;
891:       else
892:         o_nnz[i]++;
893:     }
894:   }

896:   M = N = B->n;
897:   /* Here we only know how to create AIJ format */
898:   MatCreate(comm,m,n,M,N,PB);
899:   MatSetType(*PB,MATAIJ);
900:   MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);
901:   MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);

903:   for (i=0; i<B->mnls[rank]; i++) {
904:     global_row = B->start_indices[rank]+i;
905:     for (k=0; k<B->lines->len[i]; k++) {
906:       global_col = B->lines->ptrs[i][k];
907:       val = B->lines->A[i][k];
908:       MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);
909:     }
910:   }

912:   PetscFree(d_nnz);
913:   PetscFree(o_nnz);

915:   MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);
916:   MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);

918:   return(0);
919: }

921: /**********************************************************************/

923: /*
924:    Converts from an SPAI vector v  to a PETSc vec Pv.
925: */
928: PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
929: {
931:   int size,rank,m,M,i,*mnls,*start_indices,*global_indices;
932: 
934:   MPI_Comm_size(comm,&size);
935:   MPI_Comm_rank(comm,&rank);
936: 
937:   m = v->mnl;
938:   M = v->n;
939: 
940: 
941:   VecCreateMPI(comm,m,M,Pv);

943:   PetscMalloc(size*sizeof(int),&mnls);
944:   MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);
945: 
946:   PetscMalloc(size*sizeof(int),&start_indices);
947:   start_indices[0] = 0;
948:   for (i=1; i<size; i++)
949:     start_indices[i] = start_indices[i-1] +mnls[i-1];
950: 
951:   PetscMalloc(v->mnl*sizeof(int),&global_indices);
952:   for (i=0; i<v->mnl; i++)
953:     global_indices[i] = start_indices[rank] + i;

955:   PetscFree(mnls);
956:   PetscFree(start_indices);
957: 
958:   VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);

960:   PetscFree(global_indices);

962:   VecAssemblyBegin(*Pv);
963:   VecAssemblyEnd(*Pv);
964: 
965:   return(0);
966: }

968: /**********************************************************************/

970: /*

972:   Reads a matrix and a RHS vector in Matrix Market format and writes them
973:   in PETSc binary format.

975:   !!!! Warning!!!!! PETSc supports only serial execution of this routine.
976:   
977:   f0 <input_file> : matrix in Matrix Market format
978:   f1 <input_file> : vector in Matrix Market array format
979:   f2 <output_file> : matrix and vector in PETSc format

981: */
984: PetscErrorCode MM_to_PETSC(char *f0,char *f1,char *f2)
985: {
986:   Mat         A_PETSC;          /* matrix */
987:   Vec         b_PETSC;          /* RHS */
988:   PetscViewer fd;               /* viewer */
990:   matrix      *A_spai;
991:   vector      *b_spai;


995:   A_spai = read_mm_matrix(f0,1,1,1,0,0,0,NULL);
996:   b_spai = read_rhs_for_matrix(f1,A_spai);

998:   ConvertMatrixToMat(PETSC_COMM_SELF,A_spai,&A_PETSC);
999:   ConvertVectorToVec(PETSC_COMM_SELF,b_spai,&b_PETSC);

1001:   PetscViewerBinaryOpen(PETSC_COMM_SELF,f1,PETSC_FILE_CREATE,&fd);
1002:   MatView(A_PETSC,fd);
1003:   VecView(b_PETSC,fd);

1005:   return(0);
1006: }