Actual source code: ispai.c
petsc-3.3-p7 2013-05-11
2: /*
3: 3/99 Modified by Stephen Barnard to support SPAI version 3.0
4: */
6: /*
7: Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8: Code written by Stephen Barnard.
10: Note: there is some BAD memory bleeding below!
12: This code needs work
14: 1) get rid of all memory bleeding
15: 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16: rather than having the sp flag for PC_SPAI
17: 3) fix to set the block size based on the matrix block size
19: */
21: #include <petsc-private/pcimpl.h> /*I "petscpc.h" I*/
22: #include petscspai.h
24: /*
25: These are the SPAI include files
26: */
27: EXTERN_C_BEGIN
28: #define MPI /* required for setting SPAI_Comm correctly in basics.h */
29: #include <spai.h>
30: #include <matrix.h>
31: EXTERN_C_END
33: extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34: extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
35: extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36: extern PetscErrorCode MM_to_PETSC(char *,char *,char *);
38: typedef struct {
40: matrix *B; /* matrix in SPAI format */
41: matrix *BT; /* transpose of matrix in SPAI format */
42: matrix *M; /* the approximate inverse in SPAI format */
44: Mat PM; /* the approximate inverse PETSc format */
46: double epsilon; /* tolerance */
47: int nbsteps; /* max number of "improvement" steps per line */
48: int max; /* max dimensions of is_I, q, etc. */
49: int maxnew; /* max number of new entries per step */
50: int block_size; /* constant block size */
51: int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
52: int verbose; /* SPAI prints timing and statistics */
54: int sp; /* symmetric nonzero pattern */
55: MPI_Comm comm_spai; /* communicator to be used with spai */
56: } PC_SPAI;
58: /**********************************************************************/
62: static PetscErrorCode PCSetUp_SPAI(PC pc)
63: {
64: PC_SPAI *ispai = (PC_SPAI*)pc->data;
66: Mat AT;
70: init_SPAI();
72: if (ispai->sp) {
73: ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);
74: } else {
75: /* Use the transpose to get the column nonzero structure. */
76: MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);
77: ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);
78: MatDestroy(&AT);
79: }
81: /* Destroy the transpose */
82: /* Don't know how to do it. PETSc developers? */
83:
84: /* construct SPAI preconditioner */
85: /* FILE *messages */ /* file for warning messages */
86: /* double epsilon */ /* tolerance */
87: /* int nbsteps */ /* max number of "improvement" steps per line */
88: /* int max */ /* max dimensions of is_I, q, etc. */
89: /* int maxnew */ /* max number of new entries per step */
90: /* int block_size */ /* block_size == 1 specifies scalar elments
91: block_size == n specifies nxn constant-block elements
92: block_size == 0 specifies variable-block elements */
93: /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */
94: /* cache_size == 0 indicates no caching */
95: /* int verbose */ /* verbose == 0 specifies that SPAI is silent
96: verbose == 1 prints timing and matrix statistics */
98: bspai(ispai->B,&ispai->M,
99: stdout,
100: ispai->epsilon,
101: ispai->nbsteps,
102: ispai->max,
103: ispai->maxnew,
104: ispai->block_size,
105: ispai->cache_size,
106: ispai->verbose);
108: ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);
110: /* free the SPAI matrices */
111: sp_free_matrix(ispai->B);
112: sp_free_matrix(ispai->M);
114: return(0);
115: }
117: /**********************************************************************/
121: static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
122: {
123: PC_SPAI *ispai = (PC_SPAI*)pc->data;
127: /* Now using PETSc's multiply */
128: MatMult(ispai->PM,xx,y);
129: return(0);
130: }
132: /**********************************************************************/
136: static PetscErrorCode PCDestroy_SPAI(PC pc)
137: {
139: PC_SPAI *ispai = (PC_SPAI*)pc->data;
142: MatDestroy(&ispai->PM);
143: MPI_Comm_free(&(ispai->comm_spai));
144: PetscFree(pc->data);
145: return(0);
146: }
148: /**********************************************************************/
152: static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
153: {
154: PC_SPAI *ispai = (PC_SPAI*)pc->data;
156: PetscBool iascii;
159: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
160: if (iascii) {
161: PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");
162: PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);
163: PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);
164: PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);
165: PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);
166: PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);
167: PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);
168: PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);
169: PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);
170: }
171: return(0);
172: }
174: EXTERN_C_BEGIN
177: PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
178: {
179: PC_SPAI *ispai = (PC_SPAI*)pc->data;
181: ispai->epsilon = epsilon1;
182: return(0);
183: }
184: EXTERN_C_END
185:
186: /**********************************************************************/
188: EXTERN_C_BEGIN
191: PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
192: {
193: PC_SPAI *ispai = (PC_SPAI*)pc->data;
195: ispai->nbsteps = nbsteps1;
196: return(0);
197: }
198: EXTERN_C_END
200: /**********************************************************************/
202: /* added 1/7/99 g.h. */
203: EXTERN_C_BEGIN
206: PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1)
207: {
208: PC_SPAI *ispai = (PC_SPAI*)pc->data;
210: ispai->max = max1;
211: return(0);
212: }
213: EXTERN_C_END
215: /**********************************************************************/
217: EXTERN_C_BEGIN
220: PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
221: {
222: PC_SPAI *ispai = (PC_SPAI*)pc->data;
224: ispai->maxnew = maxnew1;
225: return(0);
226: }
227: EXTERN_C_END
229: /**********************************************************************/
231: EXTERN_C_BEGIN
234: PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
235: {
236: PC_SPAI *ispai = (PC_SPAI*)pc->data;
238: ispai->block_size = block_size1;
239: return(0);
240: }
241: EXTERN_C_END
243: /**********************************************************************/
245: EXTERN_C_BEGIN
248: PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
249: {
250: PC_SPAI *ispai = (PC_SPAI*)pc->data;
252: ispai->cache_size = cache_size;
253: return(0);
254: }
255: EXTERN_C_END
257: /**********************************************************************/
259: EXTERN_C_BEGIN
262: PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose)
263: {
264: PC_SPAI *ispai = (PC_SPAI*)pc->data;
266: ispai->verbose = verbose;
267: return(0);
268: }
269: EXTERN_C_END
271: /**********************************************************************/
273: EXTERN_C_BEGIN
276: PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp)
277: {
278: PC_SPAI *ispai = (PC_SPAI*)pc->data;
280: ispai->sp = sp;
281: return(0);
282: }
283: EXTERN_C_END
285: /* -------------------------------------------------------------------*/
289: /*@
290: PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
292: Input Parameters:
293: + pc - the preconditioner
294: - eps - epsilon (default .4)
296: Notes: Espilon must be between 0 and 1. It controls the
297: quality of the approximation of M to the inverse of
298: A. Higher values of epsilon lead to more work, more
299: fill, and usually better preconditioners. In many
300: cases the best choice of epsilon is the one that
301: divides the total solution time equally between the
302: preconditioner and the solver.
303:
304: Level: intermediate
306: .seealso: PCSPAI, PCSetType()
307: @*/
308: PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1)
309: {
312: PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));
313: return(0);
314: }
315:
316: /**********************************************************************/
320: /*@
321: PCSPAISetNBSteps - set maximum number of improvement steps per row in
322: the SPAI preconditioner
324: Input Parameters:
325: + pc - the preconditioner
326: - n - number of steps (default 5)
328: Notes: SPAI constructs to approximation to every column of
329: the exact inverse of A in a series of improvement
330: steps. The quality of the approximation is determined
331: by epsilon. If an approximation achieving an accuracy
332: of epsilon is not obtained after ns steps, SPAI simply
333: uses the best approximation constructed so far.
335: Level: intermediate
337: .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
338: @*/
339: PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1)
340: {
343: PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));
344: return(0);
345: }
347: /**********************************************************************/
349: /* added 1/7/99 g.h. */
352: /*@
353: PCSPAISetMax - set the size of various working buffers in
354: the SPAI preconditioner
356: Input Parameters:
357: + pc - the preconditioner
358: - n - size (default is 5000)
360: Level: intermediate
362: .seealso: PCSPAI, PCSetType()
363: @*/
364: PetscErrorCode PCSPAISetMax(PC pc,int max1)
365: {
368: PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));
369: return(0);
370: }
372: /**********************************************************************/
376: /*@
377: PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
378: in SPAI preconditioner
380: Input Parameters:
381: + pc - the preconditioner
382: - n - maximum number (default 5)
384: Level: intermediate
386: .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
387: @*/
388: PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1)
389: {
392: PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));
393: return(0);
394: }
396: /**********************************************************************/
400: /*@
401: PCSPAISetBlockSize - set the block size for the SPAI preconditioner
403: Input Parameters:
404: + pc - the preconditioner
405: - n - block size (default 1)
407: Notes: A block
408: size of 1 treats A as a matrix of scalar elements. A
409: block size of s > 1 treats A as a matrix of sxs
410: blocks. A block size of 0 treats A as a matrix with
411: variable sized blocks, which are determined by
412: searching for dense square diagonal blocks in A.
413: This can be very effective for finite-element
414: matrices.
416: SPAI will convert A to block form, use a block
417: version of the preconditioner algorithm, and then
418: convert the result back to scalar form.
420: In many cases the a block-size parameter other than 1
421: can lead to very significant improvement in
422: performance.
425: Level: intermediate
427: .seealso: PCSPAI, PCSetType()
428: @*/
429: PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1)
430: {
433: PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));
434: return(0);
435: }
437: /**********************************************************************/
441: /*@
442: PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
444: Input Parameters:
445: + pc - the preconditioner
446: - n - cache size {0,1,2,3,4,5} (default 5)
448: Notes: SPAI uses a hash table to cache messages and avoid
449: redundant communication. If suggest always using
450: 5. This parameter is irrelevant in the serial
451: version.
453: Level: intermediate
455: .seealso: PCSPAI, PCSetType()
456: @*/
457: PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size)
458: {
461: PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));
462: return(0);
463: }
465: /**********************************************************************/
469: /*@
470: PCSPAISetVerbose - verbosity level for the SPAI preconditioner
472: Input Parameters:
473: + pc - the preconditioner
474: - n - level (default 1)
476: Notes: print parameters, timings and matrix statistics
478: Level: intermediate
480: .seealso: PCSPAI, PCSetType()
481: @*/
482: PetscErrorCode PCSPAISetVerbose(PC pc,int verbose)
483: {
486: PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));
487: return(0);
488: }
490: /**********************************************************************/
494: /*@
495: PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
497: Input Parameters:
498: + pc - the preconditioner
499: - n - 0 or 1
501: Notes: If A has a symmetric nonzero pattern use -sp 1 to
502: improve performance by eliminating some communication
503: in the parallel version. Even if A does not have a
504: symmetric nonzero pattern -sp 1 may well lead to good
505: results, but the code will not follow the published
506: SPAI algorithm exactly.
509: Level: intermediate
511: .seealso: PCSPAI, PCSetType()
512: @*/
513: PetscErrorCode PCSPAISetSp(PC pc,int sp)
514: {
517: PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));
518: return(0);
519: }
521: /**********************************************************************/
523: /**********************************************************************/
527: static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
528: {
529: PC_SPAI *ispai = (PC_SPAI*)pc->data;
531: int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
532: double epsilon1;
533: PetscBool flg;
536: PetscOptionsHead("SPAI options");
537: PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);
538: if (flg) {
539: PCSPAISetEpsilon(pc,epsilon1);
540: }
541: PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);
542: if (flg) {
543: PCSPAISetNBSteps(pc,nbsteps1);
544: }
545: /* added 1/7/99 g.h. */
546: PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);
547: if (flg) {
548: PCSPAISetMax(pc,max1);
549: }
550: PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);
551: if (flg) {
552: PCSPAISetMaxNew(pc,maxnew1);
553: }
554: PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);
555: if (flg) {
556: PCSPAISetBlockSize(pc,block_size1);
557: }
558: PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);
559: if (flg) {
560: PCSPAISetCacheSize(pc,cache_size);
561: }
562: PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);
563: if (flg) {
564: PCSPAISetVerbose(pc,verbose);
565: }
566: PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);
567: if (flg) {
568: PCSPAISetSp(pc,sp);
569: }
570: PetscOptionsTail();
571: return(0);
572: }
574: /**********************************************************************/
576: /*MC
577: PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
578: as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
580: Options Database Keys:
581: + -pc_spai_epsilon <eps> - set tolerance
582: . -pc_spai_nbstep <n> - set nbsteps
583: . -pc_spai_max <m> - set max
584: . -pc_spai_max_new <m> - set maxnew
585: . -pc_spai_block_size <n> - set block size
586: . -pc_spai_cache_size <n> - set cache size
587: . -pc_spai_sp <m> - set sp
588: - -pc_spai_set_verbose <true,false> - verbose output
590: Notes: This only works with AIJ matrices.
592: Level: beginner
594: Concepts: approximate inverse
596: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
597: PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
598: PCSPAISetVerbose(), PCSPAISetSp()
599: M*/
601: EXTERN_C_BEGIN
604: PetscErrorCode PCCreate_SPAI(PC pc)
605: {
606: PC_SPAI *ispai;
610: PetscNewLog(pc,PC_SPAI,&ispai);
611: pc->data = ispai;
613: pc->ops->destroy = PCDestroy_SPAI;
614: pc->ops->apply = PCApply_SPAI;
615: pc->ops->applyrichardson = 0;
616: pc->ops->setup = PCSetUp_SPAI;
617: pc->ops->view = PCView_SPAI;
618: pc->ops->setfromoptions = PCSetFromOptions_SPAI;
620: ispai->epsilon = .4;
621: ispai->nbsteps = 5;
622: ispai->max = 5000;
623: ispai->maxnew = 5;
624: ispai->block_size = 1;
625: ispai->cache_size = 5;
626: ispai->verbose = 0;
628: ispai->sp = 1;
629: MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));
631: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
632: "PCSPAISetEpsilon_SPAI",
633: PCSPAISetEpsilon_SPAI);
634: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
635: "PCSPAISetNBSteps_SPAI",
636: PCSPAISetNBSteps_SPAI);
637: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
638: "PCSPAISetMax_SPAI",
639: PCSPAISetMax_SPAI);
640: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
641: "PCSPAISetMaxNew_SPAI",
642: PCSPAISetMaxNew_SPAI);
643: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
644: "PCSPAISetBlockSize_SPAI",
645: PCSPAISetBlockSize_SPAI);
646: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
647: "PCSPAISetCacheSize_SPAI",
648: PCSPAISetCacheSize_SPAI);
649: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
650: "PCSPAISetVerbose_SPAI",
651: PCSPAISetVerbose_SPAI);
652: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
653: "PCSPAISetSp_SPAI",
654: PCSPAISetSp_SPAI);
656: return(0);
657: }
658: EXTERN_C_END
660: /**********************************************************************/
662: /*
663: Converts from a PETSc matrix to an SPAI matrix
664: */
667: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
668: {
669: matrix *M;
670: int i,j,col;
671: int row_indx;
672: int len,pe,local_indx,start_indx;
673: int *mapping;
674: PetscErrorCode ierr;
675: const int *cols;
676: const double *vals;
677: int *num_ptr,n,mnl,nnl,nz,rstart,rend;
678: PetscMPIInt size,rank;
679: struct compressed_lines *rows;
682:
683: MPI_Comm_size(comm,&size);
684: MPI_Comm_rank(comm,&rank);
685: MatGetSize(A,&n,&n);
686: MatGetLocalSize(A,&mnl,&nnl);
688: /*
689: not sure why a barrier is required. commenting out
690: MPI_Barrier(comm);
691: */
693: M = new_matrix((SPAI_Comm)comm);
694:
695: M->n = n;
696: M->bs = 1;
697: M->max_block_size = 1;
699: M->mnls = (int*)malloc(sizeof(int)*size);
700: M->start_indices = (int*)malloc(sizeof(int)*size);
701: M->pe = (int*)malloc(sizeof(int)*n);
702: M->block_sizes = (int*)malloc(sizeof(int)*n);
703: for (i=0; i<n; i++) M->block_sizes[i] = 1;
705: MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);
707: M->start_indices[0] = 0;
708: for (i=1; i<size; i++) {
709: M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
710: }
712: M->mnl = M->mnls[M->myid];
713: M->my_start_index = M->start_indices[M->myid];
715: for (i=0; i<size; i++) {
716: start_indx = M->start_indices[i];
717: for (j=0; j<M->mnls[i]; j++)
718: M->pe[start_indx+j] = i;
719: }
721: if (AT) {
722: M->lines = new_compressed_lines(M->mnls[rank],1);
723: } else {
724: M->lines = new_compressed_lines(M->mnls[rank],0);
725: }
727: rows = M->lines;
729: /* Determine the mapping from global indices to pointers */
730: PetscMalloc(M->n*sizeof(int),&mapping);
731: pe = 0;
732: local_indx = 0;
733: for (i=0; i<M->n; i++) {
734: if (local_indx >= M->mnls[pe]) {
735: pe++;
736: local_indx = 0;
737: }
738: mapping[i] = local_indx + M->start_indices[pe];
739: local_indx++;
740: }
743: PetscMalloc(mnl*sizeof(int),&num_ptr);
745: /*********************************************************/
746: /************** Set up the row structure *****************/
747: /*********************************************************/
749: /* count number of nonzeros in every row */
750: MatGetOwnershipRange(A,&rstart,&rend);
751: for (i=rstart; i<rend; i++) {
752: MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
753: MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
754: }
756: /* allocate buffers */
757: len = 0;
758: for (i=0; i<mnl; i++) {
759: if (len < num_ptr[i]) len = num_ptr[i];
760: }
762: for (i=rstart; i<rend; i++) {
763: row_indx = i-rstart;
764: len = num_ptr[row_indx];
765: rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
766: rows->A[row_indx] = (double*)malloc(len*sizeof(double));
767: }
769: /* copy the matrix */
770: for (i=rstart; i<rend; i++) {
771: row_indx = i - rstart;
772: MatGetRow(A,i,&nz,&cols,&vals);
773: for (j=0; j<nz; j++) {
774: col = cols[j];
775: len = rows->len[row_indx]++;
776: rows->ptrs[row_indx][len] = mapping[col];
777: rows->A[row_indx][len] = vals[j];
778: }
779: rows->slen[row_indx] = rows->len[row_indx];
780: MatRestoreRow(A,i,&nz,&cols,&vals);
781: }
784: /************************************************************/
785: /************** Set up the column structure *****************/
786: /*********************************************************/
788: if (AT) {
790: /* count number of nonzeros in every column */
791: for (i=rstart; i<rend; i++) {
792: MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
793: MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
794: }
796: /* allocate buffers */
797: len = 0;
798: for (i=0; i<mnl; i++) {
799: if (len < num_ptr[i]) len = num_ptr[i];
800: }
802: for (i=rstart; i<rend; i++) {
803: row_indx = i-rstart;
804: len = num_ptr[row_indx];
805: rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
806: }
808: /* copy the matrix (i.e., the structure) */
809: for (i=rstart; i<rend; i++) {
810: row_indx = i - rstart;
811: MatGetRow(AT,i,&nz,&cols,&vals);
812: for (j=0; j<nz; j++) {
813: col = cols[j];
814: len = rows->rlen[row_indx]++;
815: rows->rptrs[row_indx][len] = mapping[col];
816: }
817: MatRestoreRow(AT,i,&nz,&cols,&vals);
818: }
819: }
821: PetscFree(num_ptr);
822: PetscFree(mapping);
824: order_pointers(M);
825: M->maxnz = calc_maxnz(M);
827: *B = M;
829: return(0);
830: }
832: /**********************************************************************/
834: /*
835: Converts from an SPAI matrix B to a PETSc matrix PB.
836: This assumes that the the SPAI matrix B is stored in
837: COMPRESSED-ROW format.
838: */
841: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
842: {
843: PetscMPIInt size,rank;
845: int m,n,M,N;
846: int d_nz,o_nz;
847: int *d_nnz,*o_nnz;
848: int i,k,global_row,global_col,first_diag_col,last_diag_col;
849: PetscScalar val;
852: MPI_Comm_size(comm,&size);
853: MPI_Comm_rank(comm,&rank);
854:
855: m = n = B->mnls[rank];
856: d_nz = o_nz = 0;
858: /* Determine preallocation for MatCreateMPIAIJ */
859: PetscMalloc(m*sizeof(PetscInt),&d_nnz);
860: PetscMalloc(m*sizeof(PetscInt),&o_nnz);
861: for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
862: first_diag_col = B->start_indices[rank];
863: last_diag_col = first_diag_col + B->mnls[rank];
864: for (i=0; i<B->mnls[rank]; i++) {
865: for (k=0; k<B->lines->len[i]; k++) {
866: global_col = B->lines->ptrs[i][k];
867: if ((global_col >= first_diag_col) && (global_col < last_diag_col))
868: d_nnz[i]++;
869: else
870: o_nnz[i]++;
871: }
872: }
874: M = N = B->n;
875: /* Here we only know how to create AIJ format */
876: MatCreate(comm,PB);
877: MatSetSizes(*PB,m,n,M,N);
878: MatSetType(*PB,MATAIJ);
879: MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);
880: MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);
882: for (i=0; i<B->mnls[rank]; i++) {
883: global_row = B->start_indices[rank]+i;
884: for (k=0; k<B->lines->len[i]; k++) {
885: global_col = B->lines->ptrs[i][k];
886: val = B->lines->A[i][k];
887: MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);
888: }
889: }
891: PetscFree(d_nnz);
892: PetscFree(o_nnz);
894: MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);
895: MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);
897: return(0);
898: }
900: /**********************************************************************/
902: /*
903: Converts from an SPAI vector v to a PETSc vec Pv.
904: */
907: PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
908: {
910: PetscMPIInt size,rank;
911: int m,M,i,*mnls,*start_indices,*global_indices;
912:
914: MPI_Comm_size(comm,&size);
915: MPI_Comm_rank(comm,&rank);
916:
917: m = v->mnl;
918: M = v->n;
919:
920:
921: VecCreateMPI(comm,m,M,Pv);
923: PetscMalloc(size*sizeof(int),&mnls);
924: MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);
925:
926: PetscMalloc(size*sizeof(int),&start_indices);
927: start_indices[0] = 0;
928: for (i=1; i<size; i++)
929: start_indices[i] = start_indices[i-1] +mnls[i-1];
930:
931: PetscMalloc(v->mnl*sizeof(int),&global_indices);
932: for (i=0; i<v->mnl; i++)
933: global_indices[i] = start_indices[rank] + i;
935: PetscFree(mnls);
936: PetscFree(start_indices);
937:
938: VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);
939: VecAssemblyBegin(*Pv);
940: VecAssemblyEnd(*Pv);
942: PetscFree(global_indices);
943: return(0);
944: }