Actual source code: mpiaij.c
petsc-3.5.4 2015-05-23
2: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: #include <petsc-private/vecimpl.h>
4: #include <petscblaslapack.h>
5: #include <petscsf.h>
7: /*MC
8: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11: and MATMPIAIJ otherwise. As a result, for single process communicators,
12: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13: for communicators controlling multiple processes. It is recommended that you call both of
14: the above preallocation routines for simplicity.
16: Options Database Keys:
17: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19: Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20: enough exist.
22: Level: beginner
24: .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25: M*/
27: /*MC
28: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
32: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33: for communicators controlling multiple processes. It is recommended that you call both of
34: the above preallocation routines for simplicity.
36: Options Database Keys:
37: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39: Level: beginner
41: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42: M*/
46: PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47: {
48: PetscErrorCode ierr;
49: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
50: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
51: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
52: const PetscInt *ia,*ib;
53: const MatScalar *aa,*bb;
54: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
55: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
58: *keptrows = 0;
59: ia = a->i;
60: ib = b->i;
61: for (i=0; i<m; i++) {
62: na = ia[i+1] - ia[i];
63: nb = ib[i+1] - ib[i];
64: if (!na && !nb) {
65: cnt++;
66: goto ok1;
67: }
68: aa = a->a + ia[i];
69: for (j=0; j<na; j++) {
70: if (aa[j] != 0.0) goto ok1;
71: }
72: bb = b->a + ib[i];
73: for (j=0; j <nb; j++) {
74: if (bb[j] != 0.0) goto ok1;
75: }
76: cnt++;
77: ok1:;
78: }
79: MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));
80: if (!n0rows) return(0);
81: PetscMalloc1((M->rmap->n-cnt),&rows);
82: cnt = 0;
83: for (i=0; i<m; i++) {
84: na = ia[i+1] - ia[i];
85: nb = ib[i+1] - ib[i];
86: if (!na && !nb) continue;
87: aa = a->a + ia[i];
88: for (j=0; j<na;j++) {
89: if (aa[j] != 0.0) {
90: rows[cnt++] = rstart + i;
91: goto ok2;
92: }
93: }
94: bb = b->a + ib[i];
95: for (j=0; j<nb; j++) {
96: if (bb[j] != 0.0) {
97: rows[cnt++] = rstart + i;
98: goto ok2;
99: }
100: }
101: ok2:;
102: }
103: ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);
104: return(0);
105: }
109: PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110: {
111: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data;
113: PetscInt i,rstart,nrows,*rows;
116: *zrows = NULL;
117: MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);
118: MatGetOwnershipRange(M,&rstart,NULL);
119: for (i=0; i<nrows; i++) rows[i] += rstart;
120: ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);
121: return(0);
122: }
126: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127: {
129: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
130: PetscInt i,n,*garray = aij->garray;
131: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
132: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
133: PetscReal *work;
136: MatGetSize(A,NULL,&n);
137: PetscCalloc1(n,&work);
138: if (type == NORM_2) {
139: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141: }
142: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144: }
145: } else if (type == NORM_1) {
146: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148: }
149: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151: }
152: } else if (type == NORM_INFINITY) {
153: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155: }
156: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158: }
160: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161: if (type == NORM_INFINITY) {
162: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
163: } else {
164: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
165: }
166: PetscFree(work);
167: if (type == NORM_2) {
168: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169: }
170: return(0);
171: }
175: /*
176: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179: Only for square matrices
181: Used by a preconditioner, hence PETSC_EXTERN
182: */
183: PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184: {
185: PetscMPIInt rank,size;
186: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
188: Mat mat;
189: Mat_SeqAIJ *gmata;
190: PetscMPIInt tag;
191: MPI_Status status;
192: PetscBool aij;
193: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
196: MPI_Comm_rank(comm,&rank);
197: MPI_Comm_size(comm,&size);
198: if (!rank) {
199: PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
200: if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201: }
202: if (reuse == MAT_INITIAL_MATRIX) {
203: MatCreate(comm,&mat);
204: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
205: MatGetBlockSizes(gmat,&bses[0],&bses[1]);
206: MPI_Bcast(bses,2,MPIU_INT,0,comm);
207: MatSetBlockSizes(mat,bses[0],bses[1]);
208: MatSetType(mat,MATAIJ);
209: PetscMalloc1((size+1),&rowners);
210: PetscMalloc2(m,&dlens,m,&olens);
211: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
213: rowners[0] = 0;
214: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215: rstart = rowners[rank];
216: rend = rowners[rank+1];
217: PetscObjectGetNewTag((PetscObject)mat,&tag);
218: if (!rank) {
219: gmata = (Mat_SeqAIJ*) gmat->data;
220: /* send row lengths to all processors */
221: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222: for (i=1; i<size; i++) {
223: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
224: }
225: /* determine number diagonal and off-diagonal counts */
226: PetscMemzero(olens,m*sizeof(PetscInt));
227: PetscCalloc1(m,&ld);
228: jj = 0;
229: for (i=0; i<m; i++) {
230: for (j=0; j<dlens[i]; j++) {
231: if (gmata->j[jj] < rstart) ld[i]++;
232: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233: jj++;
234: }
235: }
236: /* send column indices to other processes */
237: for (i=1; i<size; i++) {
238: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
240: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
241: }
243: /* send numerical values to other processes */
244: for (i=1; i<size; i++) {
245: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
247: }
248: gmataa = gmata->a;
249: gmataj = gmata->j;
251: } else {
252: /* receive row lengths */
253: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
254: /* receive column indices */
255: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
256: PetscMalloc2(nz,&gmataa,nz,&gmataj);
257: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
258: /* determine number diagonal and off-diagonal counts */
259: PetscMemzero(olens,m*sizeof(PetscInt));
260: PetscCalloc1(m,&ld);
261: jj = 0;
262: for (i=0; i<m; i++) {
263: for (j=0; j<dlens[i]; j++) {
264: if (gmataj[jj] < rstart) ld[i]++;
265: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266: jj++;
267: }
268: }
269: /* receive numerical values */
270: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
271: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
272: }
273: /* set preallocation */
274: for (i=0; i<m; i++) {
275: dlens[i] -= olens[i];
276: }
277: MatSeqAIJSetPreallocation(mat,0,dlens);
278: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
280: for (i=0; i<m; i++) {
281: dlens[i] += olens[i];
282: }
283: cnt = 0;
284: for (i=0; i<m; i++) {
285: row = rstart + i;
286: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
287: cnt += dlens[i];
288: }
289: if (rank) {
290: PetscFree2(gmataa,gmataj);
291: }
292: PetscFree2(dlens,olens);
293: PetscFree(rowners);
295: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
297: *inmat = mat;
298: } else { /* column indices are already set; only need to move over numerical values from process 0 */
299: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301: mat = *inmat;
302: PetscObjectGetNewTag((PetscObject)mat,&tag);
303: if (!rank) {
304: /* send numerical values to other processes */
305: gmata = (Mat_SeqAIJ*) gmat->data;
306: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
307: gmataa = gmata->a;
308: for (i=1; i<size; i++) {
309: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
311: }
312: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313: } else {
314: /* receive numerical values from process 0*/
315: nz = Ad->nz + Ao->nz;
316: PetscMalloc1(nz,&gmataa); gmataarestore = gmataa;
317: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
318: }
319: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321: ad = Ad->a;
322: ao = Ao->a;
323: if (mat->rmap->n) {
324: i = 0;
325: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
326: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
327: }
328: for (i=1; i<mat->rmap->n; i++) {
329: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
330: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
331: }
332: i--;
333: if (mat->rmap->n) {
334: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));
335: }
336: if (rank) {
337: PetscFree(gmataarestore);
338: }
339: }
340: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
341: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
342: return(0);
343: }
345: /*
346: Local utility routine that creates a mapping from the global column
347: number to the local number in the off-diagonal part of the local
348: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
349: a slightly higher hash table cost; without it it is not scalable (each processor
350: has an order N integer array but is fast to acess.
351: */
354: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355: {
356: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
358: PetscInt n = aij->B->cmap->n,i;
361: if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362: #if defined(PETSC_USE_CTABLE)
363: PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
364: for (i=0; i<n; i++) {
365: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
366: }
367: #else
368: PetscCalloc1((mat->cmap->N+1),&aij->colmap);
369: PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));
370: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371: #endif
372: return(0);
373: }
375: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376: { \
377: if (col <= lastcol1) low1 = 0; \
378: else high1 = nrow1; \
379: lastcol1 = col;\
380: while (high1-low1 > 5) { \
381: t = (low1+high1)/2; \
382: if (rp1[t] > col) high1 = t; \
383: else low1 = t; \
384: } \
385: for (_i=low1; _i<high1; _i++) { \
386: if (rp1[_i] > col) break; \
387: if (rp1[_i] == col) { \
388: if (addv == ADD_VALUES) ap1[_i] += value; \
389: else ap1[_i] = value; \
390: goto a_noinsert; \
391: } \
392: } \
393: if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
395: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397: N = nrow1++ - 1; a->nz++; high1++; \
398: /* shift up all the later entries in this row */ \
399: for (ii=N; ii>=_i; ii--) { \
400: rp1[ii+1] = rp1[ii]; \
401: ap1[ii+1] = ap1[ii]; \
402: } \
403: rp1[_i] = col; \
404: ap1[_i] = value; \
405: A->nonzerostate++;\
406: a_noinsert: ; \
407: ailen[row] = nrow1; \
408: }
411: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412: { \
413: if (col <= lastcol2) low2 = 0; \
414: else high2 = nrow2; \
415: lastcol2 = col; \
416: while (high2-low2 > 5) { \
417: t = (low2+high2)/2; \
418: if (rp2[t] > col) high2 = t; \
419: else low2 = t; \
420: } \
421: for (_i=low2; _i<high2; _i++) { \
422: if (rp2[_i] > col) break; \
423: if (rp2[_i] == col) { \
424: if (addv == ADD_VALUES) ap2[_i] += value; \
425: else ap2[_i] = value; \
426: goto b_noinsert; \
427: } \
428: } \
429: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433: N = nrow2++ - 1; b->nz++; high2++; \
434: /* shift up all the later entries in this row */ \
435: for (ii=N; ii>=_i; ii--) { \
436: rp2[ii+1] = rp2[ii]; \
437: ap2[ii+1] = ap2[ii]; \
438: } \
439: rp2[_i] = col; \
440: ap2[_i] = value; \
441: B->nonzerostate++; \
442: b_noinsert: ; \
443: bilen[row] = nrow2; \
444: }
448: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449: {
450: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
451: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
453: PetscInt l,*garray = mat->garray,diag;
456: /* code only works for square matrices A */
458: /* find size of row to the left of the diagonal part */
459: MatGetOwnershipRange(A,&diag,0);
460: row = row - diag;
461: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462: if (garray[b->j[b->i[row]+l]] > diag) break;
463: }
464: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
466: /* diagonal part */
467: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
469: /* right of diagonal part */
470: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
471: return(0);
472: }
476: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477: {
478: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
479: PetscScalar value;
481: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483: PetscBool roworiented = aij->roworiented;
485: /* Some Variables required in the macro */
486: Mat A = aij->A;
487: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
488: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489: MatScalar *aa = a->a;
490: PetscBool ignorezeroentries = a->ignorezeroentries;
491: Mat B = aij->B;
492: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
493: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494: MatScalar *ba = b->a;
496: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497: PetscInt nonew;
498: MatScalar *ap1,*ap2;
501: for (i=0; i<m; i++) {
502: if (im[i] < 0) continue;
503: #if defined(PETSC_USE_DEBUG)
504: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505: #endif
506: if (im[i] >= rstart && im[i] < rend) {
507: row = im[i] - rstart;
508: lastcol1 = -1;
509: rp1 = aj + ai[row];
510: ap1 = aa + ai[row];
511: rmax1 = aimax[row];
512: nrow1 = ailen[row];
513: low1 = 0;
514: high1 = nrow1;
515: lastcol2 = -1;
516: rp2 = bj + bi[row];
517: ap2 = ba + bi[row];
518: rmax2 = bimax[row];
519: nrow2 = bilen[row];
520: low2 = 0;
521: high2 = nrow2;
523: for (j=0; j<n; j++) {
524: if (roworiented) value = v[i*n+j];
525: else value = v[i+j*m];
526: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527: if (in[j] >= cstart && in[j] < cend) {
528: col = in[j] - cstart;
529: nonew = a->nonew;
530: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531: } else if (in[j] < 0) continue;
532: #if defined(PETSC_USE_DEBUG)
533: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534: #endif
535: else {
536: if (mat->was_assembled) {
537: if (!aij->colmap) {
538: MatCreateColmap_MPIAIJ_Private(mat);
539: }
540: #if defined(PETSC_USE_CTABLE)
541: PetscTableFind(aij->colmap,in[j]+1,&col);
542: col--;
543: #else
544: col = aij->colmap[in[j]] - 1;
545: #endif
546: if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547: MatDisAssemble_MPIAIJ(mat);
548: col = in[j];
549: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550: B = aij->B;
551: b = (Mat_SeqAIJ*)B->data;
552: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553: rp2 = bj + bi[row];
554: ap2 = ba + bi[row];
555: rmax2 = bimax[row];
556: nrow2 = bilen[row];
557: low2 = 0;
558: high2 = nrow2;
559: bm = aij->B->rmap->n;
560: ba = b->a;
561: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562: } else col = in[j];
563: nonew = b->nonew;
564: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565: }
566: }
567: } else {
568: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569: if (!aij->donotstash) {
570: mat->assembled = PETSC_FALSE;
571: if (roworiented) {
572: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
573: } else {
574: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
575: }
576: }
577: }
578: }
579: return(0);
580: }
584: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585: {
586: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
588: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
592: for (i=0; i<m; i++) {
593: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595: if (idxm[i] >= rstart && idxm[i] < rend) {
596: row = idxm[i] - rstart;
597: for (j=0; j<n; j++) {
598: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600: if (idxn[j] >= cstart && idxn[j] < cend) {
601: col = idxn[j] - cstart;
602: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
603: } else {
604: if (!aij->colmap) {
605: MatCreateColmap_MPIAIJ_Private(mat);
606: }
607: #if defined(PETSC_USE_CTABLE)
608: PetscTableFind(aij->colmap,idxn[j]+1,&col);
609: col--;
610: #else
611: col = aij->colmap[idxn[j]] - 1;
612: #endif
613: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614: else {
615: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
616: }
617: }
618: }
619: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620: }
621: return(0);
622: }
624: extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
628: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629: {
630: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
632: PetscInt nstash,reallocs;
633: InsertMode addv;
636: if (aij->donotstash || mat->nooffprocentries) return(0);
638: /* make sure all processors are either in INSERTMODE or ADDMODE */
639: MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
640: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641: mat->insertmode = addv; /* in case this processor had no cache */
643: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
644: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
645: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
646: return(0);
647: }
651: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652: {
653: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
654: Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data;
656: PetscMPIInt n;
657: PetscInt i,j,rstart,ncols,flg;
658: PetscInt *row,*col;
659: PetscBool other_disassembled;
660: PetscScalar *val;
661: InsertMode addv = mat->insertmode;
663: /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
666: if (!aij->donotstash && !mat->nooffprocentries) {
667: while (1) {
668: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
669: if (!flg) break;
671: for (i=0; i<n; ) {
672: /* Now identify the consecutive vals belonging to the same row */
673: for (j=i,rstart=row[j]; j<n; j++) {
674: if (row[j] != rstart) break;
675: }
676: if (j < n) ncols = j-i;
677: else ncols = n-i;
678: /* Now assemble all these values with a single function call */
679: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
681: i = j;
682: }
683: }
684: MatStashScatterEnd_Private(&mat->stash);
685: }
686: MatAssemblyBegin(aij->A,mode);
687: MatAssemblyEnd(aij->A,mode);
689: /* determine if any processor has disassembled, if so we must
690: also disassemble ourselfs, in order that we may reassemble. */
691: /*
692: if nonzero structure of submatrix B cannot change then we know that
693: no processor disassembled thus we can skip this stuff
694: */
695: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
697: if (mat->was_assembled && !other_disassembled) {
698: MatDisAssemble_MPIAIJ(mat);
699: }
700: }
701: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702: MatSetUpMultiply_MPIAIJ(mat);
703: }
704: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
705: MatAssemblyBegin(aij->B,mode);
706: MatAssemblyEnd(aij->B,mode);
708: PetscFree2(aij->rowvalues,aij->rowindices);
710: aij->rowvalues = 0;
712: /* used by MatAXPY() */
713: a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
714: a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
716: VecDestroy(&aij->diag);
717: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
719: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721: PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722: MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
723: }
724: return(0);
725: }
729: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730: {
731: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
735: MatZeroEntries(l->A);
736: MatZeroEntries(l->B);
737: return(0);
738: }
742: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743: {
744: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
745: PetscInt *owners = A->rmap->range;
746: PetscInt n = A->rmap->n;
747: PetscSF sf;
748: PetscInt *lrows;
749: PetscSFNode *rrows;
750: PetscInt r, p = 0, len = 0;
754: /* Create SF where leaves are input rows and roots are owned rows */
755: PetscMalloc1(n, &lrows);
756: for (r = 0; r < n; ++r) lrows[r] = -1;
757: if (!A->nooffproczerorows) {PetscMalloc1(N, &rrows);}
758: for (r = 0; r < N; ++r) {
759: const PetscInt idx = rows[r];
760: if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
761: if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
762: PetscLayoutFindOwner(A->rmap,idx,&p);
763: }
764: if (A->nooffproczerorows) {
765: if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
766: lrows[len++] = idx - owners[p];
767: } else {
768: rrows[r].rank = p;
769: rrows[r].index = rows[r] - owners[p];
770: }
771: }
772: if (!A->nooffproczerorows) {
773: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
774: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
775: /* Collect flags for rows to be zeroed */
776: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);
777: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);
778: PetscSFDestroy(&sf);
779: /* Compress and put in row numbers */
780: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781: }
782: /* fix right hand side if needed */
783: if (x && b) {
784: const PetscScalar *xx;
785: PetscScalar *bb;
787: VecGetArrayRead(x, &xx);
788: VecGetArray(b, &bb);
789: for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790: VecRestoreArrayRead(x, &xx);
791: VecRestoreArray(b, &bb);
792: }
793: /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794: MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
795: if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
796: MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);
797: } else if (diag != 0.0) {
798: MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
799: if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
800: for (r = 0; r < len; ++r) {
801: const PetscInt row = lrows[r] + A->rmap->rstart;
802: MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);
803: }
804: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
805: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
806: } else {
807: MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
808: }
809: PetscFree(lrows);
811: /* only change matrix nonzero state if pattern was allowed to be changed */
812: if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
813: PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
814: MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
815: }
816: return(0);
817: }
821: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822: {
823: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
824: PetscErrorCode ierr;
825: PetscMPIInt n = A->rmap->n;
826: PetscInt i,j,r,m,p = 0,len = 0;
827: PetscInt *lrows,*owners = A->rmap->range;
828: PetscSFNode *rrows;
829: PetscSF sf;
830: const PetscScalar *xx;
831: PetscScalar *bb,*mask;
832: Vec xmask,lmask;
833: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
834: const PetscInt *aj, *ii,*ridx;
835: PetscScalar *aa;
838: /* Create SF where leaves are input rows and roots are owned rows */
839: PetscMalloc1(n, &lrows);
840: for (r = 0; r < n; ++r) lrows[r] = -1;
841: PetscMalloc1(N, &rrows);
842: for (r = 0; r < N; ++r) {
843: const PetscInt idx = rows[r];
844: if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845: if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846: PetscLayoutFindOwner(A->rmap,idx,&p);
847: }
848: rrows[r].rank = p;
849: rrows[r].index = rows[r] - owners[p];
850: }
851: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
852: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
853: /* Collect flags for rows to be zeroed */
854: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
855: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
856: PetscSFDestroy(&sf);
857: /* Compress and put in row numbers */
858: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859: /* zero diagonal part of matrix */
860: MatZeroRowsColumns(l->A,len,lrows,diag,x,b);
861: /* handle off diagonal part of matrix */
862: MatGetVecs(A,&xmask,NULL);
863: VecDuplicate(l->lvec,&lmask);
864: VecGetArray(xmask,&bb);
865: for (i=0; i<len; i++) bb[lrows[i]] = 1;
866: VecRestoreArray(xmask,&bb);
867: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
868: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
869: VecDestroy(&xmask);
870: if (x) {
871: VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
872: VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
873: VecGetArrayRead(l->lvec,&xx);
874: VecGetArray(b,&bb);
875: }
876: VecGetArray(lmask,&mask);
877: /* remove zeroed rows of off diagonal matrix */
878: ii = aij->i;
879: for (i=0; i<len; i++) {
880: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
881: }
882: /* loop over all elements of off process part of matrix zeroing removed columns*/
883: if (aij->compressedrow.use) {
884: m = aij->compressedrow.nrows;
885: ii = aij->compressedrow.i;
886: ridx = aij->compressedrow.rindex;
887: for (i=0; i<m; i++) {
888: n = ii[i+1] - ii[i];
889: aj = aij->j + ii[i];
890: aa = aij->a + ii[i];
892: for (j=0; j<n; j++) {
893: if (PetscAbsScalar(mask[*aj])) {
894: if (b) bb[*ridx] -= *aa*xx[*aj];
895: *aa = 0.0;
896: }
897: aa++;
898: aj++;
899: }
900: ridx++;
901: }
902: } else { /* do not use compressed row format */
903: m = l->B->rmap->n;
904: for (i=0; i<m; i++) {
905: n = ii[i+1] - ii[i];
906: aj = aij->j + ii[i];
907: aa = aij->a + ii[i];
908: for (j=0; j<n; j++) {
909: if (PetscAbsScalar(mask[*aj])) {
910: if (b) bb[i] -= *aa*xx[*aj];
911: *aa = 0.0;
912: }
913: aa++;
914: aj++;
915: }
916: }
917: }
918: if (x) {
919: VecRestoreArray(b,&bb);
920: VecRestoreArrayRead(l->lvec,&xx);
921: }
922: VecRestoreArray(lmask,&mask);
923: VecDestroy(&lmask);
924: PetscFree(lrows);
926: /* only change matrix nonzero state if pattern was allowed to be changed */
927: if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928: PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929: MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
930: }
931: return(0);
932: }
936: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
937: {
938: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
940: PetscInt nt;
943: VecGetLocalSize(xx,&nt);
944: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
945: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
946: (*a->A->ops->mult)(a->A,xx,yy);
947: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
948: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
949: return(0);
950: }
954: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
955: {
956: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
960: MatMultDiagonalBlock(a->A,bb,xx);
961: return(0);
962: }
966: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
967: {
968: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
972: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
973: (*a->A->ops->multadd)(a->A,xx,yy,zz);
974: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
975: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
976: return(0);
977: }
981: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
982: {
983: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
985: PetscBool merged;
988: VecScatterGetMerged(a->Mvctx,&merged);
989: /* do nondiagonal part */
990: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
991: if (!merged) {
992: /* send it on its way */
993: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
994: /* do local part */
995: (*a->A->ops->multtranspose)(a->A,xx,yy);
996: /* receive remote parts: note this assumes the values are not actually */
997: /* added in yy until the next line, */
998: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
999: } else {
1000: /* do local part */
1001: (*a->A->ops->multtranspose)(a->A,xx,yy);
1002: /* send it on its way */
1003: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1004: /* values actually were received in the Begin() but we need to call this nop */
1005: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1006: }
1007: return(0);
1008: }
1012: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1013: {
1014: MPI_Comm comm;
1015: Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017: IS Me,Notme;
1019: PetscInt M,N,first,last,*notme,i;
1020: PetscMPIInt size;
1023: /* Easy test: symmetric diagonal block */
1024: Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1025: MatIsTranspose(Adia,Bdia,tol,f);
1026: if (!*f) return(0);
1027: PetscObjectGetComm((PetscObject)Amat,&comm);
1028: MPI_Comm_size(comm,&size);
1029: if (size == 1) return(0);
1031: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1032: MatGetSize(Amat,&M,&N);
1033: MatGetOwnershipRange(Amat,&first,&last);
1034: PetscMalloc1((N-last+first),¬me);
1035: for (i=0; i<first; i++) notme[i] = i;
1036: for (i=last; i<M; i++) notme[i-last+first] = i;
1037: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1038: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1039: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1040: Aoff = Aoffs[0];
1041: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1042: Boff = Boffs[0];
1043: MatIsTranspose(Aoff,Boff,tol,f);
1044: MatDestroyMatrices(1,&Aoffs);
1045: MatDestroyMatrices(1,&Boffs);
1046: ISDestroy(&Me);
1047: ISDestroy(&Notme);
1048: PetscFree(notme);
1049: return(0);
1050: }
1054: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055: {
1056: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1060: /* do nondiagonal part */
1061: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1062: /* send it on its way */
1063: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1064: /* do local part */
1065: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1066: /* receive remote parts */
1067: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1068: return(0);
1069: }
1071: /*
1072: This only works correctly for square matrices where the subblock A->A is the
1073: diagonal block
1074: */
1077: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1078: {
1080: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1083: if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1084: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1085: MatGetDiagonal(a->A,v);
1086: return(0);
1087: }
1091: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1092: {
1093: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1097: MatScale(a->A,aa);
1098: MatScale(a->B,aa);
1099: return(0);
1100: }
1104: PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1105: {
1107: Mat_Redundant *redund = *redundant;
1108: PetscInt i;
1111: *redundant = NULL;
1112: if (redund){
1113: if (redund->matseq) { /* via MatGetSubMatrices() */
1114: ISDestroy(&redund->isrow);
1115: ISDestroy(&redund->iscol);
1116: MatDestroy(&redund->matseq[0]);
1117: PetscFree(redund->matseq);
1118: } else {
1119: PetscFree2(redund->send_rank,redund->recv_rank);
1120: PetscFree(redund->sbuf_j);
1121: PetscFree(redund->sbuf_a);
1122: for (i=0; i<redund->nrecvs; i++) {
1123: PetscFree(redund->rbuf_j[i]);
1124: PetscFree(redund->rbuf_a[i]);
1125: }
1126: PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);
1127: }
1129: if (redund->psubcomm) {
1130: PetscSubcommDestroy(&redund->psubcomm);
1131: }
1132: PetscFree(redund);
1133: }
1134: return(0);
1135: }
1139: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1140: {
1141: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1145: #if defined(PETSC_USE_LOG)
1146: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1147: #endif
1148: MatDestroy_Redundant(&aij->redundant);
1149: MatStashDestroy_Private(&mat->stash);
1150: VecDestroy(&aij->diag);
1151: MatDestroy(&aij->A);
1152: MatDestroy(&aij->B);
1153: #if defined(PETSC_USE_CTABLE)
1154: PetscTableDestroy(&aij->colmap);
1155: #else
1156: PetscFree(aij->colmap);
1157: #endif
1158: PetscFree(aij->garray);
1159: VecDestroy(&aij->lvec);
1160: VecScatterDestroy(&aij->Mvctx);
1161: PetscFree2(aij->rowvalues,aij->rowindices);
1162: PetscFree(aij->ld);
1163: PetscFree(mat->data);
1165: PetscObjectChangeTypeName((PetscObject)mat,0);
1166: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1167: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1168: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
1169: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);
1170: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);
1171: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);
1172: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);
1173: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);
1174: return(0);
1175: }
1179: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1180: {
1181: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1182: Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data;
1183: Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data;
1185: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1186: int fd;
1187: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1188: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1189: PetscScalar *column_values;
1190: PetscInt message_count,flowcontrolcount;
1191: FILE *file;
1194: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1195: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
1196: nz = A->nz + B->nz;
1197: if (!rank) {
1198: header[0] = MAT_FILE_CLASSID;
1199: header[1] = mat->rmap->N;
1200: header[2] = mat->cmap->N;
1202: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1203: PetscViewerBinaryGetDescriptor(viewer,&fd);
1204: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1205: /* get largest number of rows any processor has */
1206: rlen = mat->rmap->n;
1207: range = mat->rmap->range;
1208: for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1209: } else {
1210: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1211: rlen = mat->rmap->n;
1212: }
1214: /* load up the local row counts */
1215: PetscMalloc1((rlen+1),&row_lengths);
1216: for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1218: /* store the row lengths to the file */
1219: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1220: if (!rank) {
1221: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1222: for (i=1; i<size; i++) {
1223: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1224: rlen = range[i+1] - range[i];
1225: MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));
1226: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1227: }
1228: PetscViewerFlowControlEndMaster(viewer,&message_count);
1229: } else {
1230: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1231: MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1232: PetscViewerFlowControlEndWorker(viewer,&message_count);
1233: }
1234: PetscFree(row_lengths);
1236: /* load up the local column indices */
1237: nzmax = nz; /* th processor needs space a largest processor needs */
1238: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));
1239: PetscMalloc1((nzmax+1),&column_indices);
1240: cnt = 0;
1241: for (i=0; i<mat->rmap->n; i++) {
1242: for (j=B->i[i]; j<B->i[i+1]; j++) {
1243: if ((col = garray[B->j[j]]) > cstart) break;
1244: column_indices[cnt++] = col;
1245: }
1246: for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1247: for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1248: }
1249: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1251: /* store the column indices to the file */
1252: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1253: if (!rank) {
1254: MPI_Status status;
1255: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1256: for (i=1; i<size; i++) {
1257: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1258: MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1259: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1260: MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));
1261: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1262: }
1263: PetscViewerFlowControlEndMaster(viewer,&message_count);
1264: } else {
1265: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1266: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1267: MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1268: PetscViewerFlowControlEndWorker(viewer,&message_count);
1269: }
1270: PetscFree(column_indices);
1272: /* load up the local column values */
1273: PetscMalloc1((nzmax+1),&column_values);
1274: cnt = 0;
1275: for (i=0; i<mat->rmap->n; i++) {
1276: for (j=B->i[i]; j<B->i[i+1]; j++) {
1277: if (garray[B->j[j]] > cstart) break;
1278: column_values[cnt++] = B->a[j];
1279: }
1280: for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1281: for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1282: }
1283: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1285: /* store the column values to the file */
1286: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1287: if (!rank) {
1288: MPI_Status status;
1289: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1290: for (i=1; i<size; i++) {
1291: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1292: MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1293: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1294: MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));
1295: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1296: }
1297: PetscViewerFlowControlEndMaster(viewer,&message_count);
1298: } else {
1299: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1300: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1301: MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
1302: PetscViewerFlowControlEndWorker(viewer,&message_count);
1303: }
1304: PetscFree(column_values);
1306: PetscViewerBinaryGetInfoPointer(viewer,&file);
1307: if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1308: return(0);
1309: }
1311: #include <petscdraw.h>
1314: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1315: {
1316: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1317: PetscErrorCode ierr;
1318: PetscMPIInt rank = aij->rank,size = aij->size;
1319: PetscBool isdraw,iascii,isbinary;
1320: PetscViewer sviewer;
1321: PetscViewerFormat format;
1324: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1325: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1326: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1327: if (iascii) {
1328: PetscViewerGetFormat(viewer,&format);
1329: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1330: MatInfo info;
1331: PetscBool inodes;
1333: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1334: MatGetInfo(mat,MAT_LOCAL,&info);
1335: MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);
1336: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1337: if (!inodes) {
1338: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1339: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1340: } else {
1341: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1342: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1343: }
1344: MatGetInfo(aij->A,MAT_LOCAL,&info);
1345: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1346: MatGetInfo(aij->B,MAT_LOCAL,&info);
1347: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1348: PetscViewerFlush(viewer);
1349: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1350: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1351: VecScatterView(aij->Mvctx,viewer);
1352: return(0);
1353: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1354: PetscInt inodecount,inodelimit,*inodes;
1355: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1356: if (inodes) {
1357: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1358: } else {
1359: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1360: }
1361: return(0);
1362: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1363: return(0);
1364: }
1365: } else if (isbinary) {
1366: if (size == 1) {
1367: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1368: MatView(aij->A,viewer);
1369: } else {
1370: MatView_MPIAIJ_Binary(mat,viewer);
1371: }
1372: return(0);
1373: } else if (isdraw) {
1374: PetscDraw draw;
1375: PetscBool isnull;
1376: PetscViewerDrawGetDraw(viewer,0,&draw);
1377: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1378: }
1380: {
1381: /* assemble the entire matrix onto first processor. */
1382: Mat A;
1383: Mat_SeqAIJ *Aloc;
1384: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1385: MatScalar *a;
1386: const char *matname;
1388: MatCreate(PetscObjectComm((PetscObject)mat),&A);
1389: if (!rank) {
1390: MatSetSizes(A,M,N,M,N);
1391: } else {
1392: MatSetSizes(A,0,0,M,N);
1393: }
1394: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1395: MatSetType(A,MATMPIAIJ);
1396: MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);
1397: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1398: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
1400: /* copy over the A part */
1401: Aloc = (Mat_SeqAIJ*)aij->A->data;
1402: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1403: row = mat->rmap->rstart;
1404: for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1405: for (i=0; i<m; i++) {
1406: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1407: row++;
1408: a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1409: }
1410: aj = Aloc->j;
1411: for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1413: /* copy over the B part */
1414: Aloc = (Mat_SeqAIJ*)aij->B->data;
1415: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1416: row = mat->rmap->rstart;
1417: PetscMalloc1((ai[m]+1),&cols);
1418: ct = cols;
1419: for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1420: for (i=0; i<m; i++) {
1421: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1422: row++;
1423: a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1424: }
1425: PetscFree(ct);
1426: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1427: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1428: /*
1429: Everyone has to call to draw the matrix since the graphics waits are
1430: synchronized across all processors that share the PetscDraw object
1431: */
1432: PetscViewerGetSingleton(viewer,&sviewer);
1433: PetscObjectGetName((PetscObject)mat,&matname);
1434: if (!rank) {
1435: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,matname);
1436: MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1437: }
1438: PetscViewerRestoreSingleton(viewer,&sviewer);
1439: MatDestroy(&A);
1440: }
1441: return(0);
1442: }
1446: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1447: {
1449: PetscBool iascii,isdraw,issocket,isbinary;
1452: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1453: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1454: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1455: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1456: if (iascii || isdraw || isbinary || issocket) {
1457: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1458: }
1459: return(0);
1460: }
1464: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1465: {
1466: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1468: Vec bb1 = 0;
1469: PetscBool hasop;
1472: if (flag == SOR_APPLY_UPPER) {
1473: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1474: return(0);
1475: }
1477: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1478: VecDuplicate(bb,&bb1);
1479: }
1481: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1482: if (flag & SOR_ZERO_INITIAL_GUESS) {
1483: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1484: its--;
1485: }
1487: while (its--) {
1488: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1489: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1491: /* update rhs: bb1 = bb - B*x */
1492: VecScale(mat->lvec,-1.0);
1493: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1495: /* local sweep */
1496: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1497: }
1498: } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1499: if (flag & SOR_ZERO_INITIAL_GUESS) {
1500: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1501: its--;
1502: }
1503: while (its--) {
1504: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1505: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1507: /* update rhs: bb1 = bb - B*x */
1508: VecScale(mat->lvec,-1.0);
1509: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1511: /* local sweep */
1512: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1513: }
1514: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1515: if (flag & SOR_ZERO_INITIAL_GUESS) {
1516: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1517: its--;
1518: }
1519: while (its--) {
1520: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1521: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1523: /* update rhs: bb1 = bb - B*x */
1524: VecScale(mat->lvec,-1.0);
1525: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1527: /* local sweep */
1528: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1529: }
1530: } else if (flag & SOR_EISENSTAT) {
1531: Vec xx1;
1533: VecDuplicate(bb,&xx1);
1534: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1536: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1537: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1538: if (!mat->diag) {
1539: MatGetVecs(matin,&mat->diag,NULL);
1540: MatGetDiagonal(matin,mat->diag);
1541: }
1542: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1543: if (hasop) {
1544: MatMultDiagonalBlock(matin,xx,bb1);
1545: } else {
1546: VecPointwiseMult(bb1,mat->diag,xx);
1547: }
1548: VecAYPX(bb1,(omega-2.0)/omega,bb);
1550: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1552: /* local sweep */
1553: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1554: VecAXPY(xx,1.0,xx1);
1555: VecDestroy(&xx1);
1556: } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1558: VecDestroy(&bb1);
1559: return(0);
1560: }
1564: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1565: {
1566: Mat aA,aB,Aperm;
1567: const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1568: PetscScalar *aa,*ba;
1569: PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1570: PetscSF rowsf,sf;
1571: IS parcolp = NULL;
1572: PetscBool done;
1576: MatGetLocalSize(A,&m,&n);
1577: ISGetIndices(rowp,&rwant);
1578: ISGetIndices(colp,&cwant);
1579: PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);
1581: /* Invert row permutation to find out where my rows should go */
1582: PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);
1583: PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);
1584: PetscSFSetFromOptions(rowsf);
1585: for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1586: PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1587: PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1589: /* Invert column permutation to find out where my columns should go */
1590: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1591: PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);
1592: PetscSFSetFromOptions(sf);
1593: for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1594: PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1595: PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1596: PetscSFDestroy(&sf);
1598: ISRestoreIndices(rowp,&rwant);
1599: ISRestoreIndices(colp,&cwant);
1600: MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);
1602: /* Find out where my gcols should go */
1603: MatGetSize(aB,NULL,&ng);
1604: PetscMalloc1(ng,&gcdest);
1605: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1606: PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);
1607: PetscSFSetFromOptions(sf);
1608: PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);
1609: PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);
1610: PetscSFDestroy(&sf);
1612: PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);
1613: MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1614: MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1615: for (i=0; i<m; i++) {
1616: PetscInt row = rdest[i],rowner;
1617: PetscLayoutFindOwner(A->rmap,row,&rowner);
1618: for (j=ai[i]; j<ai[i+1]; j++) {
1619: PetscInt cowner,col = cdest[aj[j]];
1620: PetscLayoutFindOwner(A->cmap,col,&cowner); /* Could build an index for the columns to eliminate this search */
1621: if (rowner == cowner) dnnz[i]++;
1622: else onnz[i]++;
1623: }
1624: for (j=bi[i]; j<bi[i+1]; j++) {
1625: PetscInt cowner,col = gcdest[bj[j]];
1626: PetscLayoutFindOwner(A->cmap,col,&cowner);
1627: if (rowner == cowner) dnnz[i]++;
1628: else onnz[i]++;
1629: }
1630: }
1631: PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);
1632: PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);
1633: PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);
1634: PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);
1635: PetscSFDestroy(&rowsf);
1637: MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);
1638: MatSeqAIJGetArray(aA,&aa);
1639: MatSeqAIJGetArray(aB,&ba);
1640: for (i=0; i<m; i++) {
1641: PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1642: PetscInt j0,rowlen;
1643: rowlen = ai[i+1] - ai[i];
1644: for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1645: for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1646: MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);
1647: }
1648: rowlen = bi[i+1] - bi[i];
1649: for (j0=j=0; j<rowlen; j0=j) {
1650: for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1651: MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);
1652: }
1653: }
1654: MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);
1655: MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);
1656: MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1657: MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1658: MatSeqAIJRestoreArray(aA,&aa);
1659: MatSeqAIJRestoreArray(aB,&ba);
1660: PetscFree4(dnnz,onnz,tdnnz,tonnz);
1661: PetscFree3(work,rdest,cdest);
1662: PetscFree(gcdest);
1663: if (parcolp) {ISDestroy(&colp);}
1664: *B = Aperm;
1665: return(0);
1666: }
1670: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1671: {
1672: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1673: Mat A = mat->A,B = mat->B;
1675: PetscReal isend[5],irecv[5];
1678: info->block_size = 1.0;
1679: MatGetInfo(A,MAT_LOCAL,info);
1681: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1682: isend[3] = info->memory; isend[4] = info->mallocs;
1684: MatGetInfo(B,MAT_LOCAL,info);
1686: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1687: isend[3] += info->memory; isend[4] += info->mallocs;
1688: if (flag == MAT_LOCAL) {
1689: info->nz_used = isend[0];
1690: info->nz_allocated = isend[1];
1691: info->nz_unneeded = isend[2];
1692: info->memory = isend[3];
1693: info->mallocs = isend[4];
1694: } else if (flag == MAT_GLOBAL_MAX) {
1695: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));
1697: info->nz_used = irecv[0];
1698: info->nz_allocated = irecv[1];
1699: info->nz_unneeded = irecv[2];
1700: info->memory = irecv[3];
1701: info->mallocs = irecv[4];
1702: } else if (flag == MAT_GLOBAL_SUM) {
1703: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));
1705: info->nz_used = irecv[0];
1706: info->nz_allocated = irecv[1];
1707: info->nz_unneeded = irecv[2];
1708: info->memory = irecv[3];
1709: info->mallocs = irecv[4];
1710: }
1711: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1712: info->fill_ratio_needed = 0;
1713: info->factor_mallocs = 0;
1714: return(0);
1715: }
1719: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1720: {
1721: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1725: switch (op) {
1726: case MAT_NEW_NONZERO_LOCATIONS:
1727: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1728: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1729: case MAT_KEEP_NONZERO_PATTERN:
1730: case MAT_NEW_NONZERO_LOCATION_ERR:
1731: case MAT_USE_INODES:
1732: case MAT_IGNORE_ZERO_ENTRIES:
1733: MatCheckPreallocated(A,1);
1734: MatSetOption(a->A,op,flg);
1735: MatSetOption(a->B,op,flg);
1736: break;
1737: case MAT_ROW_ORIENTED:
1738: a->roworiented = flg;
1740: MatSetOption(a->A,op,flg);
1741: MatSetOption(a->B,op,flg);
1742: break;
1743: case MAT_NEW_DIAGONALS:
1744: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1745: break;
1746: case MAT_IGNORE_OFF_PROC_ENTRIES:
1747: a->donotstash = flg;
1748: break;
1749: case MAT_SPD:
1750: A->spd_set = PETSC_TRUE;
1751: A->spd = flg;
1752: if (flg) {
1753: A->symmetric = PETSC_TRUE;
1754: A->structurally_symmetric = PETSC_TRUE;
1755: A->symmetric_set = PETSC_TRUE;
1756: A->structurally_symmetric_set = PETSC_TRUE;
1757: }
1758: break;
1759: case MAT_SYMMETRIC:
1760: MatSetOption(a->A,op,flg);
1761: break;
1762: case MAT_STRUCTURALLY_SYMMETRIC:
1763: MatSetOption(a->A,op,flg);
1764: break;
1765: case MAT_HERMITIAN:
1766: MatSetOption(a->A,op,flg);
1767: break;
1768: case MAT_SYMMETRY_ETERNAL:
1769: MatSetOption(a->A,op,flg);
1770: break;
1771: default:
1772: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1773: }
1774: return(0);
1775: }
1779: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1780: {
1781: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1782: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1784: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1785: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1786: PetscInt *cmap,*idx_p;
1789: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1790: mat->getrowactive = PETSC_TRUE;
1792: if (!mat->rowvalues && (idx || v)) {
1793: /*
1794: allocate enough space to hold information from the longest row.
1795: */
1796: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1797: PetscInt max = 1,tmp;
1798: for (i=0; i<matin->rmap->n; i++) {
1799: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1800: if (max < tmp) max = tmp;
1801: }
1802: PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);
1803: }
1805: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1806: lrow = row - rstart;
1808: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1809: if (!v) {pvA = 0; pvB = 0;}
1810: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1811: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1812: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1813: nztot = nzA + nzB;
1815: cmap = mat->garray;
1816: if (v || idx) {
1817: if (nztot) {
1818: /* Sort by increasing column numbers, assuming A and B already sorted */
1819: PetscInt imark = -1;
1820: if (v) {
1821: *v = v_p = mat->rowvalues;
1822: for (i=0; i<nzB; i++) {
1823: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1824: else break;
1825: }
1826: imark = i;
1827: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1828: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1829: }
1830: if (idx) {
1831: *idx = idx_p = mat->rowindices;
1832: if (imark > -1) {
1833: for (i=0; i<imark; i++) {
1834: idx_p[i] = cmap[cworkB[i]];
1835: }
1836: } else {
1837: for (i=0; i<nzB; i++) {
1838: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1839: else break;
1840: }
1841: imark = i;
1842: }
1843: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1844: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1845: }
1846: } else {
1847: if (idx) *idx = 0;
1848: if (v) *v = 0;
1849: }
1850: }
1851: *nz = nztot;
1852: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1853: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1854: return(0);
1855: }
1859: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1860: {
1861: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1864: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1865: aij->getrowactive = PETSC_FALSE;
1866: return(0);
1867: }
1871: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1872: {
1873: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1874: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1876: PetscInt i,j,cstart = mat->cmap->rstart;
1877: PetscReal sum = 0.0;
1878: MatScalar *v;
1881: if (aij->size == 1) {
1882: MatNorm(aij->A,type,norm);
1883: } else {
1884: if (type == NORM_FROBENIUS) {
1885: v = amat->a;
1886: for (i=0; i<amat->nz; i++) {
1887: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1888: }
1889: v = bmat->a;
1890: for (i=0; i<bmat->nz; i++) {
1891: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1892: }
1893: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1894: *norm = PetscSqrtReal(*norm);
1895: } else if (type == NORM_1) { /* max column norm */
1896: PetscReal *tmp,*tmp2;
1897: PetscInt *jj,*garray = aij->garray;
1898: PetscCalloc1((mat->cmap->N+1),&tmp);
1899: PetscMalloc1((mat->cmap->N+1),&tmp2);
1900: *norm = 0.0;
1901: v = amat->a; jj = amat->j;
1902: for (j=0; j<amat->nz; j++) {
1903: tmp[cstart + *jj++] += PetscAbsScalar(*v); v++;
1904: }
1905: v = bmat->a; jj = bmat->j;
1906: for (j=0; j<bmat->nz; j++) {
1907: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1908: }
1909: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1910: for (j=0; j<mat->cmap->N; j++) {
1911: if (tmp2[j] > *norm) *norm = tmp2[j];
1912: }
1913: PetscFree(tmp);
1914: PetscFree(tmp2);
1915: } else if (type == NORM_INFINITY) { /* max row norm */
1916: PetscReal ntemp = 0.0;
1917: for (j=0; j<aij->A->rmap->n; j++) {
1918: v = amat->a + amat->i[j];
1919: sum = 0.0;
1920: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1921: sum += PetscAbsScalar(*v); v++;
1922: }
1923: v = bmat->a + bmat->i[j];
1924: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1925: sum += PetscAbsScalar(*v); v++;
1926: }
1927: if (sum > ntemp) ntemp = sum;
1928: }
1929: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));
1930: } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1931: }
1932: return(0);
1933: }
1937: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1938: {
1939: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1940: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1942: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1943: PetscInt cstart = A->cmap->rstart,ncol;
1944: Mat B;
1945: MatScalar *array;
1948: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1950: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1951: ai = Aloc->i; aj = Aloc->j;
1952: bi = Bloc->i; bj = Bloc->j;
1953: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1954: PetscInt *d_nnz,*g_nnz,*o_nnz;
1955: PetscSFNode *oloc;
1956: PETSC_UNUSED PetscSF sf;
1958: PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);
1959: /* compute d_nnz for preallocation */
1960: PetscMemzero(d_nnz,na*sizeof(PetscInt));
1961: for (i=0; i<ai[ma]; i++) {
1962: d_nnz[aj[i]]++;
1963: aj[i] += cstart; /* global col index to be used by MatSetValues() */
1964: }
1965: /* compute local off-diagonal contributions */
1966: PetscMemzero(g_nnz,nb*sizeof(PetscInt));
1967: for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1968: /* map those to global */
1969: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1970: PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);
1971: PetscSFSetFromOptions(sf);
1972: PetscMemzero(o_nnz,na*sizeof(PetscInt));
1973: PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
1974: PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
1975: PetscSFDestroy(&sf);
1977: MatCreate(PetscObjectComm((PetscObject)A),&B);
1978: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1979: MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1980: MatSetType(B,((PetscObject)A)->type_name);
1981: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
1982: PetscFree4(d_nnz,o_nnz,g_nnz,oloc);
1983: } else {
1984: B = *matout;
1985: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
1986: for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1987: }
1989: /* copy over the A part */
1990: array = Aloc->a;
1991: row = A->rmap->rstart;
1992: for (i=0; i<ma; i++) {
1993: ncol = ai[i+1]-ai[i];
1994: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
1995: row++;
1996: array += ncol; aj += ncol;
1997: }
1998: aj = Aloc->j;
1999: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2001: /* copy over the B part */
2002: PetscCalloc1(bi[mb],&cols);
2003: array = Bloc->a;
2004: row = A->rmap->rstart;
2005: for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2006: cols_tmp = cols;
2007: for (i=0; i<mb; i++) {
2008: ncol = bi[i+1]-bi[i];
2009: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
2010: row++;
2011: array += ncol; cols_tmp += ncol;
2012: }
2013: PetscFree(cols);
2015: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2016: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2017: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2018: *matout = B;
2019: } else {
2020: MatHeaderMerge(A,B);
2021: }
2022: return(0);
2023: }
2027: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2028: {
2029: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2030: Mat a = aij->A,b = aij->B;
2032: PetscInt s1,s2,s3;
2035: MatGetLocalSize(mat,&s2,&s3);
2036: if (rr) {
2037: VecGetLocalSize(rr,&s1);
2038: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2039: /* Overlap communication with computation. */
2040: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2041: }
2042: if (ll) {
2043: VecGetLocalSize(ll,&s1);
2044: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2045: (*b->ops->diagonalscale)(b,ll,0);
2046: }
2047: /* scale the diagonal block */
2048: (*a->ops->diagonalscale)(a,ll,rr);
2050: if (rr) {
2051: /* Do a scatter end and then right scale the off-diagonal block */
2052: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2053: (*b->ops->diagonalscale)(b,0,aij->lvec);
2054: }
2055: return(0);
2056: }
2060: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2061: {
2062: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2066: MatSetUnfactored(a->A);
2067: return(0);
2068: }
2072: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2073: {
2074: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2075: Mat a,b,c,d;
2076: PetscBool flg;
2080: a = matA->A; b = matA->B;
2081: c = matB->A; d = matB->B;
2083: MatEqual(a,c,&flg);
2084: if (flg) {
2085: MatEqual(b,d,&flg);
2086: }
2087: MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
2088: return(0);
2089: }
2093: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2094: {
2096: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2097: Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
2100: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2101: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2102: /* because of the column compression in the off-processor part of the matrix a->B,
2103: the number of columns in a->B and b->B may be different, hence we cannot call
2104: the MatCopy() directly on the two parts. If need be, we can provide a more
2105: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2106: then copying the submatrices */
2107: MatCopy_Basic(A,B,str);
2108: } else {
2109: MatCopy(a->A,b->A,str);
2110: MatCopy(a->B,b->B,str);
2111: }
2112: return(0);
2113: }
2117: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2118: {
2122: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2123: return(0);
2124: }
2126: /*
2127: Computes the number of nonzeros per row needed for preallocation when X and Y
2128: have different nonzero structure.
2129: */
2132: PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2133: {
2134: PetscInt i,j,k,nzx,nzy;
2137: /* Set the number of nonzeros in the new matrix */
2138: for (i=0; i<m; i++) {
2139: const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2140: nzx = xi[i+1] - xi[i];
2141: nzy = yi[i+1] - yi[i];
2142: nnz[i] = 0;
2143: for (j=0,k=0; j<nzx; j++) { /* Point in X */
2144: for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2145: if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */
2146: nnz[i]++;
2147: }
2148: for (; k<nzy; k++) nnz[i]++;
2149: }
2150: return(0);
2151: }
2153: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2156: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2157: {
2159: PetscInt m = Y->rmap->N;
2160: Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data;
2161: Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data;
2164: MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);
2165: return(0);
2166: }
2170: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2171: {
2173: Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2174: PetscBLASInt bnz,one=1;
2175: Mat_SeqAIJ *x,*y;
2178: if (str == SAME_NONZERO_PATTERN) {
2179: PetscScalar alpha = a;
2180: x = (Mat_SeqAIJ*)xx->A->data;
2181: PetscBLASIntCast(x->nz,&bnz);
2182: y = (Mat_SeqAIJ*)yy->A->data;
2183: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2184: x = (Mat_SeqAIJ*)xx->B->data;
2185: y = (Mat_SeqAIJ*)yy->B->data;
2186: PetscBLASIntCast(x->nz,&bnz);
2187: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2188: PetscObjectStateIncrease((PetscObject)Y);
2189: } else if (str == SUBSET_NONZERO_PATTERN) {
2190: MatAXPY_Basic(Y,a,X,str);
2191: } else {
2192: Mat B;
2193: PetscInt *nnz_d,*nnz_o;
2194: PetscMalloc1(yy->A->rmap->N,&nnz_d);
2195: PetscMalloc1(yy->B->rmap->N,&nnz_o);
2196: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2197: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2198: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2199: MatSetBlockSizesFromMats(B,Y,Y);
2200: MatSetType(B,MATMPIAIJ);
2201: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2202: MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2203: MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2204: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2205: MatHeaderReplace(Y,B);
2206: PetscFree(nnz_d);
2207: PetscFree(nnz_o);
2208: }
2209: return(0);
2210: }
2212: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2216: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2217: {
2218: #if defined(PETSC_USE_COMPLEX)
2220: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2223: MatConjugate_SeqAIJ(aij->A);
2224: MatConjugate_SeqAIJ(aij->B);
2225: #else
2227: #endif
2228: return(0);
2229: }
2233: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2234: {
2235: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2239: MatRealPart(a->A);
2240: MatRealPart(a->B);
2241: return(0);
2242: }
2246: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2247: {
2248: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2252: MatImaginaryPart(a->A);
2253: MatImaginaryPart(a->B);
2254: return(0);
2255: }
2257: #if defined(PETSC_HAVE_PBGL)
2259: #include <boost/parallel/mpi/bsp_process_group.hpp>
2260: #include <boost/graph/distributed/ilu_default_graph.hpp>
2261: #include <boost/graph/distributed/ilu_0_block.hpp>
2262: #include <boost/graph/distributed/ilu_preconditioner.hpp>
2263: #include <boost/graph/distributed/petsc/interface.hpp>
2264: #include <boost/multi_array.hpp>
2265: #include <boost/parallel/distributed_property_map->hpp>
2269: /*
2270: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2271: */
2272: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2273: {
2274: namespace petsc = boost::distributed::petsc;
2276: namespace graph_dist = boost::graph::distributed;
2277: using boost::graph::distributed::ilu_default::process_group_type;
2278: using boost::graph::ilu_permuted;
2280: PetscBool row_identity, col_identity;
2281: PetscContainer c;
2282: PetscInt m, n, M, N;
2286: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2287: ISIdentity(isrow, &row_identity);
2288: ISIdentity(iscol, &col_identity);
2289: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2291: process_group_type pg;
2292: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2293: lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2294: lgraph_type& level_graph = *lgraph_p;
2295: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2297: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2298: ilu_permuted(level_graph);
2300: /* put together the new matrix */
2301: MatCreate(PetscObjectComm((PetscObject)A), fact);
2302: MatGetLocalSize(A, &m, &n);
2303: MatGetSize(A, &M, &N);
2304: MatSetSizes(fact, m, n, M, N);
2305: MatSetBlockSizesFromMats(fact,A,A);
2306: MatSetType(fact, ((PetscObject)A)->type_name);
2307: MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);
2308: MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);
2310: PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2311: PetscContainerSetPointer(c, lgraph_p);
2312: PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2313: PetscContainerDestroy(&c);
2314: return(0);
2315: }
2319: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2320: {
2322: return(0);
2323: }
2327: /*
2328: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2329: */
2330: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2331: {
2332: namespace graph_dist = boost::graph::distributed;
2334: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2335: lgraph_type *lgraph_p;
2336: PetscContainer c;
2340: PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);
2341: PetscContainerGetPointer(c, (void**) &lgraph_p);
2342: VecCopy(b, x);
2344: PetscScalar *array_x;
2345: VecGetArray(x, &array_x);
2346: PetscInt sx;
2347: VecGetSize(x, &sx);
2349: PetscScalar *array_b;
2350: VecGetArray(b, &array_b);
2351: PetscInt sb;
2352: VecGetSize(b, &sb);
2354: lgraph_type& level_graph = *lgraph_p;
2355: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2357: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2358: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]);
2359: array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]);
2361: typedef boost::iterator_property_map<array_ref_type::iterator,
2362: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
2363: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2364: gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2366: ilu_set_solve(*lgraph_p, vector_b, vector_x);
2367: return(0);
2368: }
2369: #endif
2374: PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2375: {
2376: PetscMPIInt rank,size;
2377: MPI_Comm comm;
2379: PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2380: PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2381: PetscInt *rowrange = mat->rmap->range;
2382: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2383: Mat A = aij->A,B=aij->B,C=*matredundant;
2384: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2385: PetscScalar *sbuf_a;
2386: PetscInt nzlocal=a->nz+b->nz;
2387: PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2388: PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2389: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2390: MatScalar *aworkA,*aworkB;
2391: PetscScalar *vals;
2392: PetscMPIInt tag1,tag2,tag3,imdex;
2393: MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2394: MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2395: MPI_Status recv_status,*send_status;
2396: PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2397: PetscInt **rbuf_j=NULL;
2398: PetscScalar **rbuf_a=NULL;
2399: Mat_Redundant *redund =NULL;
2400:
2402: PetscObjectGetComm((PetscObject)mat,&comm);
2403: MPI_Comm_rank(comm,&rank);
2404: MPI_Comm_size(comm,&size);
2405: MPI_Comm_rank(subcomm,&subrank);
2406: MPI_Comm_size(subcomm,&subsize);
2408: if (reuse == MAT_REUSE_MATRIX) {
2409: if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2410: if (subsize == 1) {
2411: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2412: redund = c->redundant;
2413: } else {
2414: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2415: redund = c->redundant;
2416: }
2417: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2419: nsends = redund->nsends;
2420: nrecvs = redund->nrecvs;
2421: send_rank = redund->send_rank;
2422: recv_rank = redund->recv_rank;
2423: sbuf_nz = redund->sbuf_nz;
2424: rbuf_nz = redund->rbuf_nz;
2425: sbuf_j = redund->sbuf_j;
2426: sbuf_a = redund->sbuf_a;
2427: rbuf_j = redund->rbuf_j;
2428: rbuf_a = redund->rbuf_a;
2429: }
2431: if (reuse == MAT_INITIAL_MATRIX) {
2432: PetscInt nleftover,np_subcomm;
2434: /* get the destination processors' id send_rank, nsends and nrecvs */
2435: PetscMalloc2(size,&send_rank,size,&recv_rank);
2437: np_subcomm = size/nsubcomm;
2438: nleftover = size - nsubcomm*np_subcomm;
2440: /* block of codes below is specific for INTERLACED */
2441: /* ------------------------------------------------*/
2442: nsends = 0; nrecvs = 0;
2443: for (i=0; i<size; i++) {
2444: if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2445: send_rank[nsends++] = i;
2446: recv_rank[nrecvs++] = i;
2447: }
2448: }
2449: if (rank >= size - nleftover) { /* this proc is a leftover processor */
2450: i = size-nleftover-1;
2451: j = 0;
2452: while (j < nsubcomm - nleftover) {
2453: send_rank[nsends++] = i;
2454: i--; j++;
2455: }
2456: }
2458: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2459: for (i=0; i<nleftover; i++) {
2460: recv_rank[nrecvs++] = size-nleftover+i;
2461: }
2462: }
2463: /*----------------------------------------------*/
2465: /* allocate sbuf_j, sbuf_a */
2466: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2467: PetscMalloc1(i,&sbuf_j);
2468: PetscMalloc1((nzlocal+1),&sbuf_a);
2469: /*
2470: PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);
2471: PetscSynchronizedFlush(comm,PETSC_STDOUT);
2472: */
2473: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2475: /* copy mat's local entries into the buffers */
2476: if (reuse == MAT_INITIAL_MATRIX) {
2477: rownz_max = 0;
2478: rptr = sbuf_j;
2479: cols = sbuf_j + rend-rstart + 1;
2480: vals = sbuf_a;
2481: rptr[0] = 0;
2482: for (i=0; i<rend-rstart; i++) {
2483: row = i + rstart;
2484: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2485: ncols = nzA + nzB;
2486: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2487: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2488: /* load the column indices for this row into cols */
2489: lwrite = 0;
2490: for (l=0; l<nzB; l++) {
2491: if ((ctmp = bmap[cworkB[l]]) < cstart) {
2492: vals[lwrite] = aworkB[l];
2493: cols[lwrite++] = ctmp;
2494: }
2495: }
2496: for (l=0; l<nzA; l++) {
2497: vals[lwrite] = aworkA[l];
2498: cols[lwrite++] = cstart + cworkA[l];
2499: }
2500: for (l=0; l<nzB; l++) {
2501: if ((ctmp = bmap[cworkB[l]]) >= cend) {
2502: vals[lwrite] = aworkB[l];
2503: cols[lwrite++] = ctmp;
2504: }
2505: }
2506: vals += ncols;
2507: cols += ncols;
2508: rptr[i+1] = rptr[i] + ncols;
2509: if (rownz_max < ncols) rownz_max = ncols;
2510: }
2511: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2512: } else { /* only copy matrix values into sbuf_a */
2513: rptr = sbuf_j;
2514: vals = sbuf_a;
2515: rptr[0] = 0;
2516: for (i=0; i<rend-rstart; i++) {
2517: row = i + rstart;
2518: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2519: ncols = nzA + nzB;
2520: cworkB = b->j + b->i[i];
2521: aworkA = a->a + a->i[i];
2522: aworkB = b->a + b->i[i];
2523: lwrite = 0;
2524: for (l=0; l<nzB; l++) {
2525: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2526: }
2527: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2528: for (l=0; l<nzB; l++) {
2529: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2530: }
2531: vals += ncols;
2532: rptr[i+1] = rptr[i] + ncols;
2533: }
2534: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2536: /* send nzlocal to others, and recv other's nzlocal */
2537: /*--------------------------------------------------*/
2538: if (reuse == MAT_INITIAL_MATRIX) {
2539: PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);
2541: s_waits2 = s_waits3 + nsends;
2542: s_waits1 = s_waits2 + nsends;
2543: r_waits1 = s_waits1 + nsends;
2544: r_waits2 = r_waits1 + nrecvs;
2545: r_waits3 = r_waits2 + nrecvs;
2546: } else {
2547: PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);
2549: r_waits3 = s_waits3 + nsends;
2550: }
2552: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2553: if (reuse == MAT_INITIAL_MATRIX) {
2554: /* get new tags to keep the communication clean */
2555: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2556: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2557: PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);
2559: /* post receives of other's nzlocal */
2560: for (i=0; i<nrecvs; i++) {
2561: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2562: }
2563: /* send nzlocal to others */
2564: for (i=0; i<nsends; i++) {
2565: sbuf_nz[i] = nzlocal;
2566: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2567: }
2568: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2569: count = nrecvs;
2570: while (count) {
2571: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2573: recv_rank[imdex] = recv_status.MPI_SOURCE;
2574: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2575: PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);
2577: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2579: rbuf_nz[imdex] += i + 2;
2581: PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);
2582: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2583: count--;
2584: }
2585: /* wait on sends of nzlocal */
2586: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2587: /* send mat->i,j to others, and recv from other's */
2588: /*------------------------------------------------*/
2589: for (i=0; i<nsends; i++) {
2590: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2591: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2592: }
2593: /* wait on receives of mat->i,j */
2594: /*------------------------------*/
2595: count = nrecvs;
2596: while (count) {
2597: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2598: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2599: count--;
2600: }
2601: /* wait on sends of mat->i,j */
2602: /*---------------------------*/
2603: if (nsends) {
2604: MPI_Waitall(nsends,s_waits2,send_status);
2605: }
2606: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2608: /* post receives, send and receive mat->a */
2609: /*----------------------------------------*/
2610: for (imdex=0; imdex<nrecvs; imdex++) {
2611: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2612: }
2613: for (i=0; i<nsends; i++) {
2614: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2615: }
2616: count = nrecvs;
2617: while (count) {
2618: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2619: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2620: count--;
2621: }
2622: if (nsends) {
2623: MPI_Waitall(nsends,s_waits3,send_status);
2624: }
2626: PetscFree2(s_waits3,send_status);
2628: /* create redundant matrix */
2629: /*-------------------------*/
2630: if (reuse == MAT_INITIAL_MATRIX) {
2631: const PetscInt *range;
2632: PetscInt rstart_sub,rend_sub,mloc_sub;
2634: /* compute rownz_max for preallocation */
2635: for (imdex=0; imdex<nrecvs; imdex++) {
2636: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2637: rptr = rbuf_j[imdex];
2638: for (i=0; i<j; i++) {
2639: ncols = rptr[i+1] - rptr[i];
2640: if (rownz_max < ncols) rownz_max = ncols;
2641: }
2642: }
2644: MatCreate(subcomm,&C);
2646: /* get local size of redundant matrix
2647: - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2648: MatGetOwnershipRanges(mat,&range);
2649: rstart_sub = range[nsubcomm*subrank];
2650: if (subrank+1 < subsize) { /* not the last proc in subcomm */
2651: rend_sub = range[nsubcomm*(subrank+1)];
2652: } else {
2653: rend_sub = mat->rmap->N;
2654: }
2655: mloc_sub = rend_sub - rstart_sub;
2657: if (M == N) {
2658: MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);
2659: } else { /* non-square matrix */
2660: MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);
2661: }
2662: MatSetBlockSizesFromMats(C,mat,mat);
2663: MatSetFromOptions(C);
2664: MatSeqAIJSetPreallocation(C,rownz_max,NULL);
2665: MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);
2666: } else {
2667: C = *matredundant;
2668: }
2670: /* insert local matrix entries */
2671: rptr = sbuf_j;
2672: cols = sbuf_j + rend-rstart + 1;
2673: vals = sbuf_a;
2674: for (i=0; i<rend-rstart; i++) {
2675: row = i + rstart;
2676: ncols = rptr[i+1] - rptr[i];
2677: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2678: vals += ncols;
2679: cols += ncols;
2680: }
2681: /* insert received matrix entries */
2682: for (imdex=0; imdex<nrecvs; imdex++) {
2683: rstart = rowrange[recv_rank[imdex]];
2684: rend = rowrange[recv_rank[imdex]+1];
2685: /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2686: rptr = rbuf_j[imdex];
2687: cols = rbuf_j[imdex] + rend-rstart + 1;
2688: vals = rbuf_a[imdex];
2689: for (i=0; i<rend-rstart; i++) {
2690: row = i + rstart;
2691: ncols = rptr[i+1] - rptr[i];
2692: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2693: vals += ncols;
2694: cols += ncols;
2695: }
2696: }
2697: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2698: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2700: if (reuse == MAT_INITIAL_MATRIX) {
2701: *matredundant = C;
2703: /* create a supporting struct and attach it to C for reuse */
2704: PetscNewLog(C,&redund);
2705: if (subsize == 1) {
2706: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2707: c->redundant = redund;
2708: } else {
2709: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2710: c->redundant = redund;
2711: }
2713: redund->nzlocal = nzlocal;
2714: redund->nsends = nsends;
2715: redund->nrecvs = nrecvs;
2716: redund->send_rank = send_rank;
2717: redund->recv_rank = recv_rank;
2718: redund->sbuf_nz = sbuf_nz;
2719: redund->rbuf_nz = rbuf_nz;
2720: redund->sbuf_j = sbuf_j;
2721: redund->sbuf_a = sbuf_a;
2722: redund->rbuf_j = rbuf_j;
2723: redund->rbuf_a = rbuf_a;
2724: redund->psubcomm = NULL;
2725: }
2726: return(0);
2727: }
2731: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2732: {
2734: MPI_Comm comm;
2735: PetscMPIInt size,subsize;
2736: PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2737: Mat_Redundant *redund=NULL;
2738: PetscSubcomm psubcomm=NULL;
2739: MPI_Comm subcomm_in=subcomm;
2740: Mat *matseq;
2741: IS isrow,iscol;
2744: if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2745: if (reuse == MAT_INITIAL_MATRIX) {
2746: /* create psubcomm, then get subcomm */
2747: PetscObjectGetComm((PetscObject)mat,&comm);
2748: MPI_Comm_size(comm,&size);
2749: if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2751: PetscSubcommCreate(comm,&psubcomm);
2752: PetscSubcommSetNumber(psubcomm,nsubcomm);
2753: PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);
2754: PetscSubcommSetFromOptions(psubcomm);
2755: subcomm = psubcomm->comm;
2756: } else { /* retrieve psubcomm and subcomm */
2757: PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);
2758: MPI_Comm_size(subcomm,&subsize);
2759: if (subsize == 1) {
2760: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2761: redund = c->redundant;
2762: } else {
2763: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2764: redund = c->redundant;
2765: }
2766: psubcomm = redund->psubcomm;
2767: }
2768: if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2769: MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);
2770: if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2771: MPI_Comm_size(psubcomm->comm,&subsize);
2772: if (subsize == 1) {
2773: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2774: c->redundant->psubcomm = psubcomm;
2775: } else {
2776: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2777: c->redundant->psubcomm = psubcomm ;
2778: }
2779: }
2780: return(0);
2781: }
2782: }
2784: /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2785: MPI_Comm_size(subcomm,&subsize);
2786: if (reuse == MAT_INITIAL_MATRIX) {
2787: /* create a local sequential matrix matseq[0] */
2788: mloc_sub = PETSC_DECIDE;
2789: PetscSplitOwnership(subcomm,&mloc_sub,&M);
2790: MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);
2791: rstart = rend - mloc_sub;
2792: ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);
2793: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);
2794: } else { /* reuse == MAT_REUSE_MATRIX */
2795: if (subsize == 1) {
2796: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2797: redund = c->redundant;
2798: } else {
2799: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2800: redund = c->redundant;
2801: }
2803: isrow = redund->isrow;
2804: iscol = redund->iscol;
2805: matseq = redund->matseq;
2806: }
2807: MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);
2808: MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);
2810: if (reuse == MAT_INITIAL_MATRIX) {
2811: /* create a supporting struct and attach it to C for reuse */
2812: PetscNewLog(*matredundant,&redund);
2813: if (subsize == 1) {
2814: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2815: c->redundant = redund;
2816: } else {
2817: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2818: c->redundant = redund;
2819: }
2820: redund->isrow = isrow;
2821: redund->iscol = iscol;
2822: redund->matseq = matseq;
2823: redund->psubcomm = psubcomm;
2824: }
2825: return(0);
2826: }
2830: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2831: {
2832: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2834: PetscInt i,*idxb = 0;
2835: PetscScalar *va,*vb;
2836: Vec vtmp;
2839: MatGetRowMaxAbs(a->A,v,idx);
2840: VecGetArray(v,&va);
2841: if (idx) {
2842: for (i=0; i<A->rmap->n; i++) {
2843: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2844: }
2845: }
2847: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2848: if (idx) {
2849: PetscMalloc1(A->rmap->n,&idxb);
2850: }
2851: MatGetRowMaxAbs(a->B,vtmp,idxb);
2852: VecGetArray(vtmp,&vb);
2854: for (i=0; i<A->rmap->n; i++) {
2855: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2856: va[i] = vb[i];
2857: if (idx) idx[i] = a->garray[idxb[i]];
2858: }
2859: }
2861: VecRestoreArray(v,&va);
2862: VecRestoreArray(vtmp,&vb);
2863: PetscFree(idxb);
2864: VecDestroy(&vtmp);
2865: return(0);
2866: }
2870: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2871: {
2872: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2874: PetscInt i,*idxb = 0;
2875: PetscScalar *va,*vb;
2876: Vec vtmp;
2879: MatGetRowMinAbs(a->A,v,idx);
2880: VecGetArray(v,&va);
2881: if (idx) {
2882: for (i=0; i<A->cmap->n; i++) {
2883: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2884: }
2885: }
2887: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2888: if (idx) {
2889: PetscMalloc1(A->rmap->n,&idxb);
2890: }
2891: MatGetRowMinAbs(a->B,vtmp,idxb);
2892: VecGetArray(vtmp,&vb);
2894: for (i=0; i<A->rmap->n; i++) {
2895: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2896: va[i] = vb[i];
2897: if (idx) idx[i] = a->garray[idxb[i]];
2898: }
2899: }
2901: VecRestoreArray(v,&va);
2902: VecRestoreArray(vtmp,&vb);
2903: PetscFree(idxb);
2904: VecDestroy(&vtmp);
2905: return(0);
2906: }
2910: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2911: {
2912: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2913: PetscInt n = A->rmap->n;
2914: PetscInt cstart = A->cmap->rstart;
2915: PetscInt *cmap = mat->garray;
2916: PetscInt *diagIdx, *offdiagIdx;
2917: Vec diagV, offdiagV;
2918: PetscScalar *a, *diagA, *offdiagA;
2919: PetscInt r;
2923: PetscMalloc2(n,&diagIdx,n,&offdiagIdx);
2924: VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);
2925: VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);
2926: MatGetRowMin(mat->A, diagV, diagIdx);
2927: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2928: VecGetArray(v, &a);
2929: VecGetArray(diagV, &diagA);
2930: VecGetArray(offdiagV, &offdiagA);
2931: for (r = 0; r < n; ++r) {
2932: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2933: a[r] = diagA[r];
2934: idx[r] = cstart + diagIdx[r];
2935: } else {
2936: a[r] = offdiagA[r];
2937: idx[r] = cmap[offdiagIdx[r]];
2938: }
2939: }
2940: VecRestoreArray(v, &a);
2941: VecRestoreArray(diagV, &diagA);
2942: VecRestoreArray(offdiagV, &offdiagA);
2943: VecDestroy(&diagV);
2944: VecDestroy(&offdiagV);
2945: PetscFree2(diagIdx, offdiagIdx);
2946: return(0);
2947: }
2951: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2952: {
2953: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2954: PetscInt n = A->rmap->n;
2955: PetscInt cstart = A->cmap->rstart;
2956: PetscInt *cmap = mat->garray;
2957: PetscInt *diagIdx, *offdiagIdx;
2958: Vec diagV, offdiagV;
2959: PetscScalar *a, *diagA, *offdiagA;
2960: PetscInt r;
2964: PetscMalloc2(n,&diagIdx,n,&offdiagIdx);
2965: VecCreateSeq(PETSC_COMM_SELF, n, &diagV);
2966: VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);
2967: MatGetRowMax(mat->A, diagV, diagIdx);
2968: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
2969: VecGetArray(v, &a);
2970: VecGetArray(diagV, &diagA);
2971: VecGetArray(offdiagV, &offdiagA);
2972: for (r = 0; r < n; ++r) {
2973: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2974: a[r] = diagA[r];
2975: idx[r] = cstart + diagIdx[r];
2976: } else {
2977: a[r] = offdiagA[r];
2978: idx[r] = cmap[offdiagIdx[r]];
2979: }
2980: }
2981: VecRestoreArray(v, &a);
2982: VecRestoreArray(diagV, &diagA);
2983: VecRestoreArray(offdiagV, &offdiagA);
2984: VecDestroy(&diagV);
2985: VecDestroy(&offdiagV);
2986: PetscFree2(diagIdx, offdiagIdx);
2987: return(0);
2988: }
2992: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2993: {
2995: Mat *dummy;
2998: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
2999: *newmat = *dummy;
3000: PetscFree(dummy);
3001: return(0);
3002: }
3006: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3007: {
3008: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
3012: MatInvertBlockDiagonal(a->A,values);
3013: return(0);
3014: }
3018: static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3019: {
3021: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data;
3024: MatSetRandom(aij->A,rctx);
3025: MatSetRandom(aij->B,rctx);
3026: MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);
3027: MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);
3028: return(0);
3029: }
3031: /* -------------------------------------------------------------------*/
3032: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3033: MatGetRow_MPIAIJ,
3034: MatRestoreRow_MPIAIJ,
3035: MatMult_MPIAIJ,
3036: /* 4*/ MatMultAdd_MPIAIJ,
3037: MatMultTranspose_MPIAIJ,
3038: MatMultTransposeAdd_MPIAIJ,
3039: #if defined(PETSC_HAVE_PBGL)
3040: MatSolve_MPIAIJ,
3041: #else
3042: 0,
3043: #endif
3044: 0,
3045: 0,
3046: /*10*/ 0,
3047: 0,
3048: 0,
3049: MatSOR_MPIAIJ,
3050: MatTranspose_MPIAIJ,
3051: /*15*/ MatGetInfo_MPIAIJ,
3052: MatEqual_MPIAIJ,
3053: MatGetDiagonal_MPIAIJ,
3054: MatDiagonalScale_MPIAIJ,
3055: MatNorm_MPIAIJ,
3056: /*20*/ MatAssemblyBegin_MPIAIJ,
3057: MatAssemblyEnd_MPIAIJ,
3058: MatSetOption_MPIAIJ,
3059: MatZeroEntries_MPIAIJ,
3060: /*24*/ MatZeroRows_MPIAIJ,
3061: 0,
3062: #if defined(PETSC_HAVE_PBGL)
3063: 0,
3064: #else
3065: 0,
3066: #endif
3067: 0,
3068: 0,
3069: /*29*/ MatSetUp_MPIAIJ,
3070: #if defined(PETSC_HAVE_PBGL)
3071: 0,
3072: #else
3073: 0,
3074: #endif
3075: 0,
3076: 0,
3077: 0,
3078: /*34*/ MatDuplicate_MPIAIJ,
3079: 0,
3080: 0,
3081: 0,
3082: 0,
3083: /*39*/ MatAXPY_MPIAIJ,
3084: MatGetSubMatrices_MPIAIJ,
3085: MatIncreaseOverlap_MPIAIJ,
3086: MatGetValues_MPIAIJ,
3087: MatCopy_MPIAIJ,
3088: /*44*/ MatGetRowMax_MPIAIJ,
3089: MatScale_MPIAIJ,
3090: 0,
3091: 0,
3092: MatZeroRowsColumns_MPIAIJ,
3093: /*49*/ MatSetRandom_MPIAIJ,
3094: 0,
3095: 0,
3096: 0,
3097: 0,
3098: /*54*/ MatFDColoringCreate_MPIXAIJ,
3099: 0,
3100: MatSetUnfactored_MPIAIJ,
3101: MatPermute_MPIAIJ,
3102: 0,
3103: /*59*/ MatGetSubMatrix_MPIAIJ,
3104: MatDestroy_MPIAIJ,
3105: MatView_MPIAIJ,
3106: 0,
3107: MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3108: /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3109: MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3110: 0,
3111: 0,
3112: 0,
3113: /*69*/ MatGetRowMaxAbs_MPIAIJ,
3114: MatGetRowMinAbs_MPIAIJ,
3115: 0,
3116: MatSetColoring_MPIAIJ,
3117: 0,
3118: MatSetValuesAdifor_MPIAIJ,
3119: /*75*/ MatFDColoringApply_AIJ,
3120: 0,
3121: 0,
3122: 0,
3123: MatFindZeroDiagonals_MPIAIJ,
3124: /*80*/ 0,
3125: 0,
3126: 0,
3127: /*83*/ MatLoad_MPIAIJ,
3128: 0,
3129: 0,
3130: 0,
3131: 0,
3132: 0,
3133: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3134: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3135: MatMatMultNumeric_MPIAIJ_MPIAIJ,
3136: MatPtAP_MPIAIJ_MPIAIJ,
3137: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3138: /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3139: 0,
3140: 0,
3141: 0,
3142: 0,
3143: /*99*/ 0,
3144: 0,
3145: 0,
3146: MatConjugate_MPIAIJ,
3147: 0,
3148: /*104*/MatSetValuesRow_MPIAIJ,
3149: MatRealPart_MPIAIJ,
3150: MatImaginaryPart_MPIAIJ,
3151: 0,
3152: 0,
3153: /*109*/0,
3154: MatGetRedundantMatrix_MPIAIJ,
3155: MatGetRowMin_MPIAIJ,
3156: 0,
3157: 0,
3158: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3159: 0,
3160: 0,
3161: 0,
3162: 0,
3163: /*119*/0,
3164: 0,
3165: 0,
3166: 0,
3167: MatGetMultiProcBlock_MPIAIJ,
3168: /*124*/MatFindNonzeroRows_MPIAIJ,
3169: MatGetColumnNorms_MPIAIJ,
3170: MatInvertBlockDiagonal_MPIAIJ,
3171: 0,
3172: MatGetSubMatricesParallel_MPIAIJ,
3173: /*129*/0,
3174: MatTransposeMatMult_MPIAIJ_MPIAIJ,
3175: MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3176: MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3177: 0,
3178: /*134*/0,
3179: 0,
3180: 0,
3181: 0,
3182: 0,
3183: /*139*/0,
3184: 0,
3185: 0,
3186: MatFDColoringSetUp_MPIXAIJ
3187: };
3189: /* ----------------------------------------------------------------------------------------*/
3193: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
3194: {
3195: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
3199: MatStoreValues(aij->A);
3200: MatStoreValues(aij->B);
3201: return(0);
3202: }
3206: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
3207: {
3208: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
3212: MatRetrieveValues(aij->A);
3213: MatRetrieveValues(aij->B);
3214: return(0);
3215: }
3219: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3220: {
3221: Mat_MPIAIJ *b;
3225: PetscLayoutSetUp(B->rmap);
3226: PetscLayoutSetUp(B->cmap);
3227: b = (Mat_MPIAIJ*)B->data;
3229: if (!B->preallocated) {
3230: /* Explicitly create 2 MATSEQAIJ matrices. */
3231: MatCreate(PETSC_COMM_SELF,&b->A);
3232: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3233: MatSetBlockSizesFromMats(b->A,B,B);
3234: MatSetType(b->A,MATSEQAIJ);
3235: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
3236: MatCreate(PETSC_COMM_SELF,&b->B);
3237: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3238: MatSetBlockSizesFromMats(b->B,B,B);
3239: MatSetType(b->B,MATSEQAIJ);
3240: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
3241: }
3243: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3244: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3245: B->preallocated = PETSC_TRUE;
3246: return(0);
3247: }
3251: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3252: {
3253: Mat mat;
3254: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3258: *newmat = 0;
3259: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
3260: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3261: MatSetBlockSizesFromMats(mat,matin,matin);
3262: MatSetType(mat,((PetscObject)matin)->type_name);
3263: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3264: a = (Mat_MPIAIJ*)mat->data;
3266: mat->factortype = matin->factortype;
3267: mat->assembled = PETSC_TRUE;
3268: mat->insertmode = NOT_SET_VALUES;
3269: mat->preallocated = PETSC_TRUE;
3271: a->size = oldmat->size;
3272: a->rank = oldmat->rank;
3273: a->donotstash = oldmat->donotstash;
3274: a->roworiented = oldmat->roworiented;
3275: a->rowindices = 0;
3276: a->rowvalues = 0;
3277: a->getrowactive = PETSC_FALSE;
3279: PetscLayoutReference(matin->rmap,&mat->rmap);
3280: PetscLayoutReference(matin->cmap,&mat->cmap);
3282: if (oldmat->colmap) {
3283: #if defined(PETSC_USE_CTABLE)
3284: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3285: #else
3286: PetscMalloc1((mat->cmap->N),&a->colmap);
3287: PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));
3288: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
3289: #endif
3290: } else a->colmap = 0;
3291: if (oldmat->garray) {
3292: PetscInt len;
3293: len = oldmat->B->cmap->n;
3294: PetscMalloc1((len+1),&a->garray);
3295: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
3296: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
3297: } else a->garray = 0;
3299: VecDuplicate(oldmat->lvec,&a->lvec);
3300: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
3301: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3302: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
3303: MatDuplicate(oldmat->A,cpvalues,&a->A);
3304: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
3305: MatDuplicate(oldmat->B,cpvalues,&a->B);
3306: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
3307: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3308: *newmat = mat;
3309: return(0);
3310: }
3316: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3317: {
3318: PetscScalar *vals,*svals;
3319: MPI_Comm comm;
3321: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
3322: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3323: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
3324: PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3325: PetscInt cend,cstart,n,*rowners,sizesset=1;
3326: int fd;
3327: PetscInt bs = newMat->rmap->bs;
3330: PetscObjectGetComm((PetscObject)viewer,&comm);
3331: MPI_Comm_size(comm,&size);
3332: MPI_Comm_rank(comm,&rank);
3333: if (!rank) {
3334: PetscViewerBinaryGetDescriptor(viewer,&fd);
3335: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
3336: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3337: }
3339: PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");
3340: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3341: PetscOptionsEnd();
3342: if (bs < 0) bs = 1;
3344: if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3346: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3347: M = header[1]; N = header[2];
3348: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3349: if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3350: if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3352: /* If global sizes are set, check if they are consistent with that given in the file */
3353: if (sizesset) {
3354: MatGetSize(newMat,&grows,&gcols);
3355: }
3356: if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3357: if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3359: /* determine ownership of all (block) rows */
3360: if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3361: if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3362: else m = newMat->rmap->n; /* Set by user */
3364: PetscMalloc1((size+1),&rowners);
3365: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3367: /* First process needs enough room for process with most rows */
3368: if (!rank) {
3369: mmax = rowners[1];
3370: for (i=2; i<=size; i++) {
3371: mmax = PetscMax(mmax, rowners[i]);
3372: }
3373: } else mmax = -1; /* unused, but compilers complain */
3375: rowners[0] = 0;
3376: for (i=2; i<=size; i++) {
3377: rowners[i] += rowners[i-1];
3378: }
3379: rstart = rowners[rank];
3380: rend = rowners[rank+1];
3382: /* distribute row lengths to all processors */
3383: PetscMalloc2(m,&ourlens,m,&offlens);
3384: if (!rank) {
3385: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
3386: PetscMalloc1(mmax,&rowlengths);
3387: PetscCalloc1(size,&procsnz);
3388: for (j=0; j<m; j++) {
3389: procsnz[0] += ourlens[j];
3390: }
3391: for (i=1; i<size; i++) {
3392: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
3393: /* calculate the number of nonzeros on each processor */
3394: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3395: procsnz[i] += rowlengths[j];
3396: }
3397: MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
3398: }
3399: PetscFree(rowlengths);
3400: } else {
3401: MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
3402: }
3404: if (!rank) {
3405: /* determine max buffer needed and allocate it */
3406: maxnz = 0;
3407: for (i=0; i<size; i++) {
3408: maxnz = PetscMax(maxnz,procsnz[i]);
3409: }
3410: PetscMalloc1(maxnz,&cols);
3412: /* read in my part of the matrix column indices */
3413: nz = procsnz[0];
3414: PetscMalloc1(nz,&mycols);
3415: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3417: /* read in every one elses and ship off */
3418: for (i=1; i<size; i++) {
3419: nz = procsnz[i];
3420: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3421: MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);
3422: }
3423: PetscFree(cols);
3424: } else {
3425: /* determine buffer space needed for message */
3426: nz = 0;
3427: for (i=0; i<m; i++) {
3428: nz += ourlens[i];
3429: }
3430: PetscMalloc1(nz,&mycols);
3432: /* receive message of column indices*/
3433: MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
3434: }
3436: /* determine column ownership if matrix is not square */
3437: if (N != M) {
3438: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3439: else n = newMat->cmap->n;
3440: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
3441: cstart = cend - n;
3442: } else {
3443: cstart = rstart;
3444: cend = rend;
3445: n = cend - cstart;
3446: }
3448: /* loop over local rows, determining number of off diagonal entries */
3449: PetscMemzero(offlens,m*sizeof(PetscInt));
3450: jj = 0;
3451: for (i=0; i<m; i++) {
3452: for (j=0; j<ourlens[i]; j++) {
3453: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3454: jj++;
3455: }
3456: }
3458: for (i=0; i<m; i++) {
3459: ourlens[i] -= offlens[i];
3460: }
3461: if (!sizesset) {
3462: MatSetSizes(newMat,m,n,M,N);
3463: }
3465: if (bs > 1) {MatSetBlockSize(newMat,bs);}
3467: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
3469: for (i=0; i<m; i++) {
3470: ourlens[i] += offlens[i];
3471: }
3473: if (!rank) {
3474: PetscMalloc1((maxnz+1),&vals);
3476: /* read in my part of the matrix numerical values */
3477: nz = procsnz[0];
3478: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3480: /* insert into matrix */
3481: jj = rstart;
3482: smycols = mycols;
3483: svals = vals;
3484: for (i=0; i<m; i++) {
3485: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3486: smycols += ourlens[i];
3487: svals += ourlens[i];
3488: jj++;
3489: }
3491: /* read in other processors and ship out */
3492: for (i=1; i<size; i++) {
3493: nz = procsnz[i];
3494: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3495: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
3496: }
3497: PetscFree(procsnz);
3498: } else {
3499: /* receive numeric values */
3500: PetscMalloc1((nz+1),&vals);
3502: /* receive message of values*/
3503: MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
3505: /* insert into matrix */
3506: jj = rstart;
3507: smycols = mycols;
3508: svals = vals;
3509: for (i=0; i<m; i++) {
3510: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3511: smycols += ourlens[i];
3512: svals += ourlens[i];
3513: jj++;
3514: }
3515: }
3516: PetscFree2(ourlens,offlens);
3517: PetscFree(vals);
3518: PetscFree(mycols);
3519: PetscFree(rowners);
3520: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3521: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3522: return(0);
3523: }
3527: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3528: {
3530: IS iscol_local;
3531: PetscInt csize;
3534: ISGetLocalSize(iscol,&csize);
3535: if (call == MAT_REUSE_MATRIX) {
3536: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3537: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3538: } else {
3539: PetscInt cbs;
3540: ISGetBlockSize(iscol,&cbs);
3541: ISAllGather(iscol,&iscol_local);
3542: ISSetBlockSize(iscol_local,cbs);
3543: }
3544: MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
3545: if (call == MAT_INITIAL_MATRIX) {
3546: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3547: ISDestroy(&iscol_local);
3548: }
3549: return(0);
3550: }
3552: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3555: /*
3556: Not great since it makes two copies of the submatrix, first an SeqAIJ
3557: in local and then by concatenating the local matrices the end result.
3558: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3560: Note: This requires a sequential iscol with all indices.
3561: */
3562: PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3563: {
3565: PetscMPIInt rank,size;
3566: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3567: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3568: PetscBool allcolumns, colflag;
3569: Mat M,Mreuse;
3570: MatScalar *vwork,*aa;
3571: MPI_Comm comm;
3572: Mat_SeqAIJ *aij;
3575: PetscObjectGetComm((PetscObject)mat,&comm);
3576: MPI_Comm_rank(comm,&rank);
3577: MPI_Comm_size(comm,&size);
3579: ISIdentity(iscol,&colflag);
3580: ISGetLocalSize(iscol,&ncol);
3581: if (colflag && ncol == mat->cmap->N) {
3582: allcolumns = PETSC_TRUE;
3583: } else {
3584: allcolumns = PETSC_FALSE;
3585: }
3586: if (call == MAT_REUSE_MATRIX) {
3587: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);
3588: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3589: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);
3590: } else {
3591: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);
3592: }
3594: /*
3595: m - number of local rows
3596: n - number of columns (same on all processors)
3597: rstart - first row in new global matrix generated
3598: */
3599: MatGetSize(Mreuse,&m,&n);
3600: MatGetBlockSizes(Mreuse,&bs,&cbs);
3601: if (call == MAT_INITIAL_MATRIX) {
3602: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3603: ii = aij->i;
3604: jj = aij->j;
3606: /*
3607: Determine the number of non-zeros in the diagonal and off-diagonal
3608: portions of the matrix in order to do correct preallocation
3609: */
3611: /* first get start and end of "diagonal" columns */
3612: if (csize == PETSC_DECIDE) {
3613: ISGetSize(isrow,&mglobal);
3614: if (mglobal == n) { /* square matrix */
3615: nlocal = m;
3616: } else {
3617: nlocal = n/size + ((n % size) > rank);
3618: }
3619: } else {
3620: nlocal = csize;
3621: }
3622: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3623: rstart = rend - nlocal;
3624: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3626: /* next, compute all the lengths */
3627: PetscMalloc1((2*m+1),&dlens);
3628: olens = dlens + m;
3629: for (i=0; i<m; i++) {
3630: jend = ii[i+1] - ii[i];
3631: olen = 0;
3632: dlen = 0;
3633: for (j=0; j<jend; j++) {
3634: if (*jj < rstart || *jj >= rend) olen++;
3635: else dlen++;
3636: jj++;
3637: }
3638: olens[i] = olen;
3639: dlens[i] = dlen;
3640: }
3641: MatCreate(comm,&M);
3642: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3643: MatSetBlockSizes(M,bs,cbs);
3644: MatSetType(M,((PetscObject)mat)->type_name);
3645: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3646: PetscFree(dlens);
3647: } else {
3648: PetscInt ml,nl;
3650: M = *newmat;
3651: MatGetLocalSize(M,&ml,&nl);
3652: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3653: MatZeroEntries(M);
3654: /*
3655: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3656: rather than the slower MatSetValues().
3657: */
3658: M->was_assembled = PETSC_TRUE;
3659: M->assembled = PETSC_FALSE;
3660: }
3661: MatGetOwnershipRange(M,&rstart,&rend);
3662: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3663: ii = aij->i;
3664: jj = aij->j;
3665: aa = aij->a;
3666: for (i=0; i<m; i++) {
3667: row = rstart + i;
3668: nz = ii[i+1] - ii[i];
3669: cwork = jj; jj += nz;
3670: vwork = aa; aa += nz;
3671: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3672: }
3674: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3675: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3676: *newmat = M;
3678: /* save submatrix used in processor for next request */
3679: if (call == MAT_INITIAL_MATRIX) {
3680: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3681: MatDestroy(&Mreuse);
3682: }
3683: return(0);
3684: }
3688: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3689: {
3690: PetscInt m,cstart, cend,j,nnz,i,d;
3691: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3692: const PetscInt *JJ;
3693: PetscScalar *values;
3697: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3699: PetscLayoutSetUp(B->rmap);
3700: PetscLayoutSetUp(B->cmap);
3701: m = B->rmap->n;
3702: cstart = B->cmap->rstart;
3703: cend = B->cmap->rend;
3704: rstart = B->rmap->rstart;
3706: PetscMalloc2(m,&d_nnz,m,&o_nnz);
3708: #if defined(PETSC_USE_DEBUGGING)
3709: for (i=0; i<m; i++) {
3710: nnz = Ii[i+1]- Ii[i];
3711: JJ = J + Ii[i];
3712: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3713: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3714: if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3715: }
3716: #endif
3718: for (i=0; i<m; i++) {
3719: nnz = Ii[i+1]- Ii[i];
3720: JJ = J + Ii[i];
3721: nnz_max = PetscMax(nnz_max,nnz);
3722: d = 0;
3723: for (j=0; j<nnz; j++) {
3724: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3725: }
3726: d_nnz[i] = d;
3727: o_nnz[i] = nnz - d;
3728: }
3729: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3730: PetscFree2(d_nnz,o_nnz);
3732: if (v) values = (PetscScalar*)v;
3733: else {
3734: PetscCalloc1((nnz_max+1),&values);
3735: }
3737: for (i=0; i<m; i++) {
3738: ii = i + rstart;
3739: nnz = Ii[i+1]- Ii[i];
3740: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3741: }
3742: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3743: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3745: if (!v) {
3746: PetscFree(values);
3747: }
3748: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3749: return(0);
3750: }
3754: /*@
3755: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3756: (the default parallel PETSc format).
3758: Collective on MPI_Comm
3760: Input Parameters:
3761: + B - the matrix
3762: . i - the indices into j for the start of each local row (starts with zero)
3763: . j - the column indices for each local row (starts with zero)
3764: - v - optional values in the matrix
3766: Level: developer
3768: Notes:
3769: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3770: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3771: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3773: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3775: The format which is used for the sparse matrix input, is equivalent to a
3776: row-major ordering.. i.e for the following matrix, the input data expected is
3777: as shown:
3779: 1 0 0
3780: 2 0 3 P0
3781: -------
3782: 4 5 6 P1
3784: Process0 [P0]: rows_owned=[0,1]
3785: i = {0,1,3} [size = nrow+1 = 2+1]
3786: j = {0,0,2} [size = nz = 6]
3787: v = {1,2,3} [size = nz = 6]
3789: Process1 [P1]: rows_owned=[2]
3790: i = {0,3} [size = nrow+1 = 1+1]
3791: j = {0,1,2} [size = nz = 6]
3792: v = {4,5,6} [size = nz = 6]
3794: .keywords: matrix, aij, compressed row, sparse, parallel
3796: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3797: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3798: @*/
3799: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3800: {
3804: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3805: return(0);
3806: }
3810: /*@C
3811: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3812: (the default parallel PETSc format). For good matrix assembly performance
3813: the user should preallocate the matrix storage by setting the parameters
3814: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3815: performance can be increased by more than a factor of 50.
3817: Collective on MPI_Comm
3819: Input Parameters:
3820: + B - the matrix
3821: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3822: (same value is used for all local rows)
3823: . d_nnz - array containing the number of nonzeros in the various rows of the
3824: DIAGONAL portion of the local submatrix (possibly different for each row)
3825: or NULL, if d_nz is used to specify the nonzero structure.
3826: The size of this array is equal to the number of local rows, i.e 'm'.
3827: For matrices that will be factored, you must leave room for (and set)
3828: the diagonal entry even if it is zero.
3829: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3830: submatrix (same value is used for all local rows).
3831: - o_nnz - array containing the number of nonzeros in the various rows of the
3832: OFF-DIAGONAL portion of the local submatrix (possibly different for
3833: each row) or NULL, if o_nz is used to specify the nonzero
3834: structure. The size of this array is equal to the number
3835: of local rows, i.e 'm'.
3837: If the *_nnz parameter is given then the *_nz parameter is ignored
3839: The AIJ format (also called the Yale sparse matrix format or
3840: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3841: storage. The stored row and column indices begin with zero.
3842: See Users-Manual: ch_mat for details.
3844: The parallel matrix is partitioned such that the first m0 rows belong to
3845: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3846: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3848: The DIAGONAL portion of the local submatrix of a processor can be defined
3849: as the submatrix which is obtained by extraction the part corresponding to
3850: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3851: first row that belongs to the processor, r2 is the last row belonging to
3852: the this processor, and c1-c2 is range of indices of the local part of a
3853: vector suitable for applying the matrix to. This is an mxn matrix. In the
3854: common case of a square matrix, the row and column ranges are the same and
3855: the DIAGONAL part is also square. The remaining portion of the local
3856: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3858: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3860: You can call MatGetInfo() to get information on how effective the preallocation was;
3861: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3862: You can also run with the option -info and look for messages with the string
3863: malloc in them to see if additional memory allocation was needed.
3865: Example usage:
3867: Consider the following 8x8 matrix with 34 non-zero values, that is
3868: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3869: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3870: as follows:
3872: .vb
3873: 1 2 0 | 0 3 0 | 0 4
3874: Proc0 0 5 6 | 7 0 0 | 8 0
3875: 9 0 10 | 11 0 0 | 12 0
3876: -------------------------------------
3877: 13 0 14 | 15 16 17 | 0 0
3878: Proc1 0 18 0 | 19 20 21 | 0 0
3879: 0 0 0 | 22 23 0 | 24 0
3880: -------------------------------------
3881: Proc2 25 26 27 | 0 0 28 | 29 0
3882: 30 0 0 | 31 32 33 | 0 34
3883: .ve
3885: This can be represented as a collection of submatrices as:
3887: .vb
3888: A B C
3889: D E F
3890: G H I
3891: .ve
3893: Where the submatrices A,B,C are owned by proc0, D,E,F are
3894: owned by proc1, G,H,I are owned by proc2.
3896: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3897: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3898: The 'M','N' parameters are 8,8, and have the same values on all procs.
3900: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3901: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3902: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3903: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3904: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3905: matrix, ans [DF] as another SeqAIJ matrix.
3907: When d_nz, o_nz parameters are specified, d_nz storage elements are
3908: allocated for every row of the local diagonal submatrix, and o_nz
3909: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3910: One way to choose d_nz and o_nz is to use the max nonzerors per local
3911: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3912: In this case, the values of d_nz,o_nz are:
3913: .vb
3914: proc0 : dnz = 2, o_nz = 2
3915: proc1 : dnz = 3, o_nz = 2
3916: proc2 : dnz = 1, o_nz = 4
3917: .ve
3918: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3919: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3920: for proc3. i.e we are using 12+15+10=37 storage locations to store
3921: 34 values.
3923: When d_nnz, o_nnz parameters are specified, the storage is specified
3924: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3925: In the above case the values for d_nnz,o_nnz are:
3926: .vb
3927: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3928: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3929: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3930: .ve
3931: Here the space allocated is sum of all the above values i.e 34, and
3932: hence pre-allocation is perfect.
3934: Level: intermediate
3936: .keywords: matrix, aij, compressed row, sparse, parallel
3938: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3939: MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3940: @*/
3941: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3942: {
3948: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
3949: return(0);
3950: }
3954: /*@
3955: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3956: CSR format the local rows.
3958: Collective on MPI_Comm
3960: Input Parameters:
3961: + comm - MPI communicator
3962: . m - number of local rows (Cannot be PETSC_DECIDE)
3963: . n - This value should be the same as the local size used in creating the
3964: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3965: calculated if N is given) For square matrices n is almost always m.
3966: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3967: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3968: . i - row indices
3969: . j - column indices
3970: - a - matrix values
3972: Output Parameter:
3973: . mat - the matrix
3975: Level: intermediate
3977: Notes:
3978: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3979: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3980: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3982: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3984: The format which is used for the sparse matrix input, is equivalent to a
3985: row-major ordering.. i.e for the following matrix, the input data expected is
3986: as shown:
3988: 1 0 0
3989: 2 0 3 P0
3990: -------
3991: 4 5 6 P1
3993: Process0 [P0]: rows_owned=[0,1]
3994: i = {0,1,3} [size = nrow+1 = 2+1]
3995: j = {0,0,2} [size = nz = 6]
3996: v = {1,2,3} [size = nz = 6]
3998: Process1 [P1]: rows_owned=[2]
3999: i = {0,3} [size = nrow+1 = 1+1]
4000: j = {0,1,2} [size = nz = 6]
4001: v = {4,5,6} [size = nz = 6]
4003: .keywords: matrix, aij, compressed row, sparse, parallel
4005: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4006: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4007: @*/
4008: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4009: {
4013: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4014: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4015: MatCreate(comm,mat);
4016: MatSetSizes(*mat,m,n,M,N);
4017: /* MatSetBlockSizes(M,bs,cbs); */
4018: MatSetType(*mat,MATMPIAIJ);
4019: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
4020: return(0);
4021: }
4025: /*@C
4026: MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4027: (the default parallel PETSc format). For good matrix assembly performance
4028: the user should preallocate the matrix storage by setting the parameters
4029: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4030: performance can be increased by more than a factor of 50.
4032: Collective on MPI_Comm
4034: Input Parameters:
4035: + comm - MPI communicator
4036: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4037: This value should be the same as the local size used in creating the
4038: y vector for the matrix-vector product y = Ax.
4039: . n - This value should be the same as the local size used in creating the
4040: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4041: calculated if N is given) For square matrices n is almost always m.
4042: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4043: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4044: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4045: (same value is used for all local rows)
4046: . d_nnz - array containing the number of nonzeros in the various rows of the
4047: DIAGONAL portion of the local submatrix (possibly different for each row)
4048: or NULL, if d_nz is used to specify the nonzero structure.
4049: The size of this array is equal to the number of local rows, i.e 'm'.
4050: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4051: submatrix (same value is used for all local rows).
4052: - o_nnz - array containing the number of nonzeros in the various rows of the
4053: OFF-DIAGONAL portion of the local submatrix (possibly different for
4054: each row) or NULL, if o_nz is used to specify the nonzero
4055: structure. The size of this array is equal to the number
4056: of local rows, i.e 'm'.
4058: Output Parameter:
4059: . A - the matrix
4061: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4062: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4063: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4065: Notes:
4066: If the *_nnz parameter is given then the *_nz parameter is ignored
4068: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4069: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4070: storage requirements for this matrix.
4072: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4073: processor than it must be used on all processors that share the object for
4074: that argument.
4076: The user MUST specify either the local or global matrix dimensions
4077: (possibly both).
4079: The parallel matrix is partitioned across processors such that the
4080: first m0 rows belong to process 0, the next m1 rows belong to
4081: process 1, the next m2 rows belong to process 2 etc.. where
4082: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4083: values corresponding to [m x N] submatrix.
4085: The columns are logically partitioned with the n0 columns belonging
4086: to 0th partition, the next n1 columns belonging to the next
4087: partition etc.. where n0,n1,n2... are the input parameter 'n'.
4089: The DIAGONAL portion of the local submatrix on any given processor
4090: is the submatrix corresponding to the rows and columns m,n
4091: corresponding to the given processor. i.e diagonal matrix on
4092: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4093: etc. The remaining portion of the local submatrix [m x (N-n)]
4094: constitute the OFF-DIAGONAL portion. The example below better
4095: illustrates this concept.
4097: For a square global matrix we define each processor's diagonal portion
4098: to be its local rows and the corresponding columns (a square submatrix);
4099: each processor's off-diagonal portion encompasses the remainder of the
4100: local matrix (a rectangular submatrix).
4102: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4104: When calling this routine with a single process communicator, a matrix of
4105: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4106: type of communicator, use the construction mechanism:
4107: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4109: By default, this format uses inodes (identical nodes) when possible.
4110: We search for consecutive rows with the same nonzero structure, thereby
4111: reusing matrix information to achieve increased efficiency.
4113: Options Database Keys:
4114: + -mat_no_inode - Do not use inodes
4115: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4116: - -mat_aij_oneindex - Internally use indexing starting at 1
4117: rather than 0. Note that when calling MatSetValues(),
4118: the user still MUST index entries starting at 0!
4121: Example usage:
4123: Consider the following 8x8 matrix with 34 non-zero values, that is
4124: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4125: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4126: as follows:
4128: .vb
4129: 1 2 0 | 0 3 0 | 0 4
4130: Proc0 0 5 6 | 7 0 0 | 8 0
4131: 9 0 10 | 11 0 0 | 12 0
4132: -------------------------------------
4133: 13 0 14 | 15 16 17 | 0 0
4134: Proc1 0 18 0 | 19 20 21 | 0 0
4135: 0 0 0 | 22 23 0 | 24 0
4136: -------------------------------------
4137: Proc2 25 26 27 | 0 0 28 | 29 0
4138: 30 0 0 | 31 32 33 | 0 34
4139: .ve
4141: This can be represented as a collection of submatrices as:
4143: .vb
4144: A B C
4145: D E F
4146: G H I
4147: .ve
4149: Where the submatrices A,B,C are owned by proc0, D,E,F are
4150: owned by proc1, G,H,I are owned by proc2.
4152: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4153: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4154: The 'M','N' parameters are 8,8, and have the same values on all procs.
4156: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4157: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4158: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4159: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4160: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4161: matrix, ans [DF] as another SeqAIJ matrix.
4163: When d_nz, o_nz parameters are specified, d_nz storage elements are
4164: allocated for every row of the local diagonal submatrix, and o_nz
4165: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4166: One way to choose d_nz and o_nz is to use the max nonzerors per local
4167: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4168: In this case, the values of d_nz,o_nz are:
4169: .vb
4170: proc0 : dnz = 2, o_nz = 2
4171: proc1 : dnz = 3, o_nz = 2
4172: proc2 : dnz = 1, o_nz = 4
4173: .ve
4174: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4175: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4176: for proc3. i.e we are using 12+15+10=37 storage locations to store
4177: 34 values.
4179: When d_nnz, o_nnz parameters are specified, the storage is specified
4180: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4181: In the above case the values for d_nnz,o_nnz are:
4182: .vb
4183: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4184: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4185: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4186: .ve
4187: Here the space allocated is sum of all the above values i.e 34, and
4188: hence pre-allocation is perfect.
4190: Level: intermediate
4192: .keywords: matrix, aij, compressed row, sparse, parallel
4194: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4195: MPIAIJ, MatCreateMPIAIJWithArrays()
4196: @*/
4197: PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4198: {
4200: PetscMPIInt size;
4203: MatCreate(comm,A);
4204: MatSetSizes(*A,m,n,M,N);
4205: MPI_Comm_size(comm,&size);
4206: if (size > 1) {
4207: MatSetType(*A,MATMPIAIJ);
4208: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4209: } else {
4210: MatSetType(*A,MATSEQAIJ);
4211: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4212: }
4213: return(0);
4214: }
4218: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4219: {
4220: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4223: if (Ad) *Ad = a->A;
4224: if (Ao) *Ao = a->B;
4225: if (colmap) *colmap = a->garray;
4226: return(0);
4227: }
4231: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4232: {
4234: PetscInt i;
4235: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4238: if (coloring->ctype == IS_COLORING_GLOBAL) {
4239: ISColoringValue *allcolors,*colors;
4240: ISColoring ocoloring;
4242: /* set coloring for diagonal portion */
4243: MatSetColoring_SeqAIJ(a->A,coloring);
4245: /* set coloring for off-diagonal portion */
4246: ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);
4247: PetscMalloc1((a->B->cmap->n+1),&colors);
4248: for (i=0; i<a->B->cmap->n; i++) {
4249: colors[i] = allcolors[a->garray[i]];
4250: }
4251: PetscFree(allcolors);
4252: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4253: MatSetColoring_SeqAIJ(a->B,ocoloring);
4254: ISColoringDestroy(&ocoloring);
4255: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4256: ISColoringValue *colors;
4257: PetscInt *larray;
4258: ISColoring ocoloring;
4260: /* set coloring for diagonal portion */
4261: PetscMalloc1((a->A->cmap->n+1),&larray);
4262: for (i=0; i<a->A->cmap->n; i++) {
4263: larray[i] = i + A->cmap->rstart;
4264: }
4265: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);
4266: PetscMalloc1((a->A->cmap->n+1),&colors);
4267: for (i=0; i<a->A->cmap->n; i++) {
4268: colors[i] = coloring->colors[larray[i]];
4269: }
4270: PetscFree(larray);
4271: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);
4272: MatSetColoring_SeqAIJ(a->A,ocoloring);
4273: ISColoringDestroy(&ocoloring);
4275: /* set coloring for off-diagonal portion */
4276: PetscMalloc1((a->B->cmap->n+1),&larray);
4277: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);
4278: PetscMalloc1((a->B->cmap->n+1),&colors);
4279: for (i=0; i<a->B->cmap->n; i++) {
4280: colors[i] = coloring->colors[larray[i]];
4281: }
4282: PetscFree(larray);
4283: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4284: MatSetColoring_SeqAIJ(a->B,ocoloring);
4285: ISColoringDestroy(&ocoloring);
4286: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4287: return(0);
4288: }
4292: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4293: {
4294: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4298: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
4299: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
4300: return(0);
4301: }
4305: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4306: {
4308: PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4309: PetscInt *indx;
4312: /* This routine will ONLY return MPIAIJ type matrix */
4313: MatGetSize(inmat,&m,&N);
4314: MatGetBlockSizes(inmat,&bs,&cbs);
4315: if (n == PETSC_DECIDE) {
4316: PetscSplitOwnership(comm,&n,&N);
4317: }
4318: /* Check sum(n) = N */
4319: MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
4320: if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4322: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4323: rstart -= m;
4325: MatPreallocateInitialize(comm,m,n,dnz,onz);
4326: for (i=0; i<m; i++) {
4327: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4328: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4329: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4330: }
4332: MatCreate(comm,outmat);
4333: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4334: MatSetBlockSizes(*outmat,bs,cbs);
4335: MatSetType(*outmat,MATMPIAIJ);
4336: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4337: MatPreallocateFinalize(dnz,onz);
4338: return(0);
4339: }
4343: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4344: {
4346: PetscInt m,N,i,rstart,nnz,Ii;
4347: PetscInt *indx;
4348: PetscScalar *values;
4351: MatGetSize(inmat,&m,&N);
4352: MatGetOwnershipRange(outmat,&rstart,NULL);
4353: for (i=0; i<m; i++) {
4354: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4355: Ii = i + rstart;
4356: MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4357: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4358: }
4359: MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);
4360: MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);
4361: return(0);
4362: }
4366: /*@
4367: MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4368: matrices from each processor
4370: Collective on MPI_Comm
4372: Input Parameters:
4373: + comm - the communicators the parallel matrix will live on
4374: . inmat - the input sequential matrices
4375: . n - number of local columns (or PETSC_DECIDE)
4376: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4378: Output Parameter:
4379: . outmat - the parallel matrix generated
4381: Level: advanced
4383: Notes: The number of columns of the matrix in EACH processor MUST be the same.
4385: @*/
4386: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4387: {
4389: PetscMPIInt size;
4392: MPI_Comm_size(comm,&size);
4393: PetscLogEventBegin(MAT_Merge,inmat,0,0,0);
4394: if (size == 1) {
4395: if (scall == MAT_INITIAL_MATRIX) {
4396: MatDuplicate(inmat,MAT_COPY_VALUES,outmat);
4397: } else {
4398: MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
4399: }
4400: } else {
4401: if (scall == MAT_INITIAL_MATRIX) {
4402: MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);
4403: }
4404: MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);
4405: }
4406: PetscLogEventEnd(MAT_Merge,inmat,0,0,0);
4407: return(0);
4408: }
4412: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4413: {
4414: PetscErrorCode ierr;
4415: PetscMPIInt rank;
4416: PetscInt m,N,i,rstart,nnz;
4417: size_t len;
4418: const PetscInt *indx;
4419: PetscViewer out;
4420: char *name;
4421: Mat B;
4422: const PetscScalar *values;
4425: MatGetLocalSize(A,&m,0);
4426: MatGetSize(A,0,&N);
4427: /* Should this be the type of the diagonal block of A? */
4428: MatCreate(PETSC_COMM_SELF,&B);
4429: MatSetSizes(B,m,N,m,N);
4430: MatSetBlockSizesFromMats(B,A,A);
4431: MatSetType(B,MATSEQAIJ);
4432: MatSeqAIJSetPreallocation(B,0,NULL);
4433: MatGetOwnershipRange(A,&rstart,0);
4434: for (i=0; i<m; i++) {
4435: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4436: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4437: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4438: }
4439: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4440: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4442: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
4443: PetscStrlen(outfile,&len);
4444: PetscMalloc1((len+5),&name);
4445: sprintf(name,"%s.%d",outfile,rank);
4446: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4447: PetscFree(name);
4448: MatView(B,out);
4449: PetscViewerDestroy(&out);
4450: MatDestroy(&B);
4451: return(0);
4452: }
4454: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4457: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4458: {
4459: PetscErrorCode ierr;
4460: Mat_Merge_SeqsToMPI *merge;
4461: PetscContainer container;
4464: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);
4465: if (container) {
4466: PetscContainerGetPointer(container,(void**)&merge);
4467: PetscFree(merge->id_r);
4468: PetscFree(merge->len_s);
4469: PetscFree(merge->len_r);
4470: PetscFree(merge->bi);
4471: PetscFree(merge->bj);
4472: PetscFree(merge->buf_ri[0]);
4473: PetscFree(merge->buf_ri);
4474: PetscFree(merge->buf_rj[0]);
4475: PetscFree(merge->buf_rj);
4476: PetscFree(merge->coi);
4477: PetscFree(merge->coj);
4478: PetscFree(merge->owners_co);
4479: PetscLayoutDestroy(&merge->rowmap);
4480: PetscFree(merge);
4481: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4482: }
4483: MatDestroy_MPIAIJ(A);
4484: return(0);
4485: }
4487: #include <../src/mat/utils/freespace.h>
4488: #include <petscbt.h>
4492: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4493: {
4494: PetscErrorCode ierr;
4495: MPI_Comm comm;
4496: Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data;
4497: PetscMPIInt size,rank,taga,*len_s;
4498: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4499: PetscInt proc,m;
4500: PetscInt **buf_ri,**buf_rj;
4501: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4502: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4503: MPI_Request *s_waits,*r_waits;
4504: MPI_Status *status;
4505: MatScalar *aa=a->a;
4506: MatScalar **abuf_r,*ba_i;
4507: Mat_Merge_SeqsToMPI *merge;
4508: PetscContainer container;
4511: PetscObjectGetComm((PetscObject)mpimat,&comm);
4512: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4514: MPI_Comm_size(comm,&size);
4515: MPI_Comm_rank(comm,&rank);
4517: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);
4518: PetscContainerGetPointer(container,(void**)&merge);
4520: bi = merge->bi;
4521: bj = merge->bj;
4522: buf_ri = merge->buf_ri;
4523: buf_rj = merge->buf_rj;
4525: PetscMalloc1(size,&status);
4526: owners = merge->rowmap->range;
4527: len_s = merge->len_s;
4529: /* send and recv matrix values */
4530: /*-----------------------------*/
4531: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4532: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4534: PetscMalloc1((merge->nsend+1),&s_waits);
4535: for (proc=0,k=0; proc<size; proc++) {
4536: if (!len_s[proc]) continue;
4537: i = owners[proc];
4538: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4539: k++;
4540: }
4542: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4543: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4544: PetscFree(status);
4546: PetscFree(s_waits);
4547: PetscFree(r_waits);
4549: /* insert mat values of mpimat */
4550: /*----------------------------*/
4551: PetscMalloc1(N,&ba_i);
4552: PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);
4554: for (k=0; k<merge->nrecv; k++) {
4555: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4556: nrows = *(buf_ri_k[k]);
4557: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4558: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4559: }
4561: /* set values of ba */
4562: m = merge->rowmap->n;
4563: for (i=0; i<m; i++) {
4564: arow = owners[rank] + i;
4565: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4566: bnzi = bi[i+1] - bi[i];
4567: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4569: /* add local non-zero vals of this proc's seqmat into ba */
4570: anzi = ai[arow+1] - ai[arow];
4571: aj = a->j + ai[arow];
4572: aa = a->a + ai[arow];
4573: nextaj = 0;
4574: for (j=0; nextaj<anzi; j++) {
4575: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4576: ba_i[j] += aa[nextaj++];
4577: }
4578: }
4580: /* add received vals into ba */
4581: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4582: /* i-th row */
4583: if (i == *nextrow[k]) {
4584: anzi = *(nextai[k]+1) - *nextai[k];
4585: aj = buf_rj[k] + *(nextai[k]);
4586: aa = abuf_r[k] + *(nextai[k]);
4587: nextaj = 0;
4588: for (j=0; nextaj<anzi; j++) {
4589: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4590: ba_i[j] += aa[nextaj++];
4591: }
4592: }
4593: nextrow[k]++; nextai[k]++;
4594: }
4595: }
4596: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4597: }
4598: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4599: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4601: PetscFree(abuf_r[0]);
4602: PetscFree(abuf_r);
4603: PetscFree(ba_i);
4604: PetscFree3(buf_ri_k,nextrow,nextai);
4605: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4606: return(0);
4607: }
4609: extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4613: PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4614: {
4615: PetscErrorCode ierr;
4616: Mat B_mpi;
4617: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4618: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4619: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4620: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4621: PetscInt len,proc,*dnz,*onz,bs,cbs;
4622: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4623: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4624: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4625: MPI_Status *status;
4626: PetscFreeSpaceList free_space=NULL,current_space=NULL;
4627: PetscBT lnkbt;
4628: Mat_Merge_SeqsToMPI *merge;
4629: PetscContainer container;
4632: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4634: /* make sure it is a PETSc comm */
4635: PetscCommDuplicate(comm,&comm,NULL);
4636: MPI_Comm_size(comm,&size);
4637: MPI_Comm_rank(comm,&rank);
4639: PetscNew(&merge);
4640: PetscMalloc1(size,&status);
4642: /* determine row ownership */
4643: /*---------------------------------------------------------*/
4644: PetscLayoutCreate(comm,&merge->rowmap);
4645: PetscLayoutSetLocalSize(merge->rowmap,m);
4646: PetscLayoutSetSize(merge->rowmap,M);
4647: PetscLayoutSetBlockSize(merge->rowmap,1);
4648: PetscLayoutSetUp(merge->rowmap);
4649: PetscMalloc1(size,&len_si);
4650: PetscMalloc1(size,&merge->len_s);
4652: m = merge->rowmap->n;
4653: owners = merge->rowmap->range;
4655: /* determine the number of messages to send, their lengths */
4656: /*---------------------------------------------------------*/
4657: len_s = merge->len_s;
4659: len = 0; /* length of buf_si[] */
4660: merge->nsend = 0;
4661: for (proc=0; proc<size; proc++) {
4662: len_si[proc] = 0;
4663: if (proc == rank) {
4664: len_s[proc] = 0;
4665: } else {
4666: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4667: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4668: }
4669: if (len_s[proc]) {
4670: merge->nsend++;
4671: nrows = 0;
4672: for (i=owners[proc]; i<owners[proc+1]; i++) {
4673: if (ai[i+1] > ai[i]) nrows++;
4674: }
4675: len_si[proc] = 2*(nrows+1);
4676: len += len_si[proc];
4677: }
4678: }
4680: /* determine the number and length of messages to receive for ij-structure */
4681: /*-------------------------------------------------------------------------*/
4682: PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);
4683: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4685: /* post the Irecv of j-structure */
4686: /*-------------------------------*/
4687: PetscCommGetNewTag(comm,&tagj);
4688: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4690: /* post the Isend of j-structure */
4691: /*--------------------------------*/
4692: PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);
4694: for (proc=0, k=0; proc<size; proc++) {
4695: if (!len_s[proc]) continue;
4696: i = owners[proc];
4697: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4698: k++;
4699: }
4701: /* receives and sends of j-structure are complete */
4702: /*------------------------------------------------*/
4703: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4704: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4706: /* send and recv i-structure */
4707: /*---------------------------*/
4708: PetscCommGetNewTag(comm,&tagi);
4709: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4711: PetscMalloc1((len+1),&buf_s);
4712: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4713: for (proc=0,k=0; proc<size; proc++) {
4714: if (!len_s[proc]) continue;
4715: /* form outgoing message for i-structure:
4716: buf_si[0]: nrows to be sent
4717: [1:nrows]: row index (global)
4718: [nrows+1:2*nrows+1]: i-structure index
4719: */
4720: /*-------------------------------------------*/
4721: nrows = len_si[proc]/2 - 1;
4722: buf_si_i = buf_si + nrows+1;
4723: buf_si[0] = nrows;
4724: buf_si_i[0] = 0;
4725: nrows = 0;
4726: for (i=owners[proc]; i<owners[proc+1]; i++) {
4727: anzi = ai[i+1] - ai[i];
4728: if (anzi) {
4729: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4730: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4731: nrows++;
4732: }
4733: }
4734: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4735: k++;
4736: buf_si += len_si[proc];
4737: }
4739: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4740: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4742: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4743: for (i=0; i<merge->nrecv; i++) {
4744: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4745: }
4747: PetscFree(len_si);
4748: PetscFree(len_ri);
4749: PetscFree(rj_waits);
4750: PetscFree2(si_waits,sj_waits);
4751: PetscFree(ri_waits);
4752: PetscFree(buf_s);
4753: PetscFree(status);
4755: /* compute a local seq matrix in each processor */
4756: /*----------------------------------------------*/
4757: /* allocate bi array and free space for accumulating nonzero column info */
4758: PetscMalloc1((m+1),&bi);
4759: bi[0] = 0;
4761: /* create and initialize a linked list */
4762: nlnk = N+1;
4763: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4765: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4766: len = ai[owners[rank+1]] - ai[owners[rank]];
4767: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
4769: current_space = free_space;
4771: /* determine symbolic info for each local row */
4772: PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);
4774: for (k=0; k<merge->nrecv; k++) {
4775: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4776: nrows = *buf_ri_k[k];
4777: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4778: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4779: }
4781: MatPreallocateInitialize(comm,m,n,dnz,onz);
4782: len = 0;
4783: for (i=0; i<m; i++) {
4784: bnzi = 0;
4785: /* add local non-zero cols of this proc's seqmat into lnk */
4786: arow = owners[rank] + i;
4787: anzi = ai[arow+1] - ai[arow];
4788: aj = a->j + ai[arow];
4789: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4790: bnzi += nlnk;
4791: /* add received col data into lnk */
4792: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4793: if (i == *nextrow[k]) { /* i-th row */
4794: anzi = *(nextai[k]+1) - *nextai[k];
4795: aj = buf_rj[k] + *nextai[k];
4796: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4797: bnzi += nlnk;
4798: nextrow[k]++; nextai[k]++;
4799: }
4800: }
4801: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4803: /* if free space is not available, make more free space */
4804: if (current_space->local_remaining<bnzi) {
4805: PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);
4806: nspacedouble++;
4807: }
4808: /* copy data into free space, then initialize lnk */
4809: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4810: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4812: current_space->array += bnzi;
4813: current_space->local_used += bnzi;
4814: current_space->local_remaining -= bnzi;
4816: bi[i+1] = bi[i] + bnzi;
4817: }
4819: PetscFree3(buf_ri_k,nextrow,nextai);
4821: PetscMalloc1((bi[m]+1),&bj);
4822: PetscFreeSpaceContiguous(&free_space,bj);
4823: PetscLLDestroy(lnk,lnkbt);
4825: /* create symbolic parallel matrix B_mpi */
4826: /*---------------------------------------*/
4827: MatGetBlockSizes(seqmat,&bs,&cbs);
4828: MatCreate(comm,&B_mpi);
4829: if (n==PETSC_DECIDE) {
4830: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4831: } else {
4832: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4833: }
4834: MatSetBlockSizes(B_mpi,bs,cbs);
4835: MatSetType(B_mpi,MATMPIAIJ);
4836: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4837: MatPreallocateFinalize(dnz,onz);
4838: MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
4840: /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4841: B_mpi->assembled = PETSC_FALSE;
4842: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4843: merge->bi = bi;
4844: merge->bj = bj;
4845: merge->buf_ri = buf_ri;
4846: merge->buf_rj = buf_rj;
4847: merge->coi = NULL;
4848: merge->coj = NULL;
4849: merge->owners_co = NULL;
4851: PetscCommDestroy(&comm);
4853: /* attach the supporting struct to B_mpi for reuse */
4854: PetscContainerCreate(PETSC_COMM_SELF,&container);
4855: PetscContainerSetPointer(container,merge);
4856: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4857: PetscContainerDestroy(&container);
4858: *mpimat = B_mpi;
4860: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4861: return(0);
4862: }
4866: /*@C
4867: MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4868: matrices from each processor
4870: Collective on MPI_Comm
4872: Input Parameters:
4873: + comm - the communicators the parallel matrix will live on
4874: . seqmat - the input sequential matrices
4875: . m - number of local rows (or PETSC_DECIDE)
4876: . n - number of local columns (or PETSC_DECIDE)
4877: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4879: Output Parameter:
4880: . mpimat - the parallel matrix generated
4882: Level: advanced
4884: Notes:
4885: The dimensions of the sequential matrix in each processor MUST be the same.
4886: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4887: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4888: @*/
4889: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4890: {
4892: PetscMPIInt size;
4895: MPI_Comm_size(comm,&size);
4896: if (size == 1) {
4897: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4898: if (scall == MAT_INITIAL_MATRIX) {
4899: MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);
4900: } else {
4901: MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);
4902: }
4903: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4904: return(0);
4905: }
4906: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4907: if (scall == MAT_INITIAL_MATRIX) {
4908: MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);
4909: }
4910: MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);
4911: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4912: return(0);
4913: }
4917: /*@
4918: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4919: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4920: with MatGetSize()
4922: Not Collective
4924: Input Parameters:
4925: + A - the matrix
4926: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4928: Output Parameter:
4929: . A_loc - the local sequential matrix generated
4931: Level: developer
4933: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4935: @*/
4936: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4937: {
4939: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4940: Mat_SeqAIJ *mat,*a,*b;
4941: PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4942: MatScalar *aa,*ba,*cam;
4943: PetscScalar *ca;
4944: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4945: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4946: PetscBool match;
4947: MPI_Comm comm;
4948: PetscMPIInt size;
4951: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4952: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4953: PetscObjectGetComm((PetscObject)A,&comm);
4954: MPI_Comm_size(comm,&size);
4955: if (size == 1 && scall == MAT_REUSE_MATRIX) return(0);
4956:
4957: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4958: a = (Mat_SeqAIJ*)(mpimat->A)->data;
4959: b = (Mat_SeqAIJ*)(mpimat->B)->data;
4960: ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4961: aa = a->a; ba = b->a;
4962: if (scall == MAT_INITIAL_MATRIX) {
4963: if (size == 1) {
4964: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);
4965: return(0);
4966: }
4968: PetscMalloc1((1+am),&ci);
4969: ci[0] = 0;
4970: for (i=0; i<am; i++) {
4971: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4972: }
4973: PetscMalloc1((1+ci[am]),&cj);
4974: PetscMalloc1((1+ci[am]),&ca);
4975: k = 0;
4976: for (i=0; i<am; i++) {
4977: ncols_o = bi[i+1] - bi[i];
4978: ncols_d = ai[i+1] - ai[i];
4979: /* off-diagonal portion of A */
4980: for (jo=0; jo<ncols_o; jo++) {
4981: col = cmap[*bj];
4982: if (col >= cstart) break;
4983: cj[k] = col; bj++;
4984: ca[k++] = *ba++;
4985: }
4986: /* diagonal portion of A */
4987: for (j=0; j<ncols_d; j++) {
4988: cj[k] = cstart + *aj++;
4989: ca[k++] = *aa++;
4990: }
4991: /* off-diagonal portion of A */
4992: for (j=jo; j<ncols_o; j++) {
4993: cj[k] = cmap[*bj++];
4994: ca[k++] = *ba++;
4995: }
4996: }
4997: /* put together the new matrix */
4998: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
4999: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5000: /* Since these are PETSc arrays, change flags to free them as necessary. */
5001: mat = (Mat_SeqAIJ*)(*A_loc)->data;
5002: mat->free_a = PETSC_TRUE;
5003: mat->free_ij = PETSC_TRUE;
5004: mat->nonew = 0;
5005: } else if (scall == MAT_REUSE_MATRIX) {
5006: mat=(Mat_SeqAIJ*)(*A_loc)->data;
5007: ci = mat->i; cj = mat->j; cam = mat->a;
5008: for (i=0; i<am; i++) {
5009: /* off-diagonal portion of A */
5010: ncols_o = bi[i+1] - bi[i];
5011: for (jo=0; jo<ncols_o; jo++) {
5012: col = cmap[*bj];
5013: if (col >= cstart) break;
5014: *cam++ = *ba++; bj++;
5015: }
5016: /* diagonal portion of A */
5017: ncols_d = ai[i+1] - ai[i];
5018: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5019: /* off-diagonal portion of A */
5020: for (j=jo; j<ncols_o; j++) {
5021: *cam++ = *ba++; bj++;
5022: }
5023: }
5024: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5025: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
5026: return(0);
5027: }
5031: /*@C
5032: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5034: Not Collective
5036: Input Parameters:
5037: + A - the matrix
5038: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5039: - row, col - index sets of rows and columns to extract (or NULL)
5041: Output Parameter:
5042: . A_loc - the local sequential matrix generated
5044: Level: developer
5046: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5048: @*/
5049: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5050: {
5051: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5053: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5054: IS isrowa,iscola;
5055: Mat *aloc;
5056: PetscBool match;
5059: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5060: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5061: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
5062: if (!row) {
5063: start = A->rmap->rstart; end = A->rmap->rend;
5064: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
5065: } else {
5066: isrowa = *row;
5067: }
5068: if (!col) {
5069: start = A->cmap->rstart;
5070: cmap = a->garray;
5071: nzA = a->A->cmap->n;
5072: nzB = a->B->cmap->n;
5073: PetscMalloc1((nzA+nzB), &idx);
5074: ncols = 0;
5075: for (i=0; i<nzB; i++) {
5076: if (cmap[i] < start) idx[ncols++] = cmap[i];
5077: else break;
5078: }
5079: imark = i;
5080: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5081: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5082: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
5083: } else {
5084: iscola = *col;
5085: }
5086: if (scall != MAT_INITIAL_MATRIX) {
5087: PetscMalloc(sizeof(Mat),&aloc);
5088: aloc[0] = *A_loc;
5089: }
5090: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
5091: *A_loc = aloc[0];
5092: PetscFree(aloc);
5093: if (!row) {
5094: ISDestroy(&isrowa);
5095: }
5096: if (!col) {
5097: ISDestroy(&iscola);
5098: }
5099: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5100: return(0);
5101: }
5105: /*@C
5106: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5108: Collective on Mat
5110: Input Parameters:
5111: + A,B - the matrices in mpiaij format
5112: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5113: - rowb, colb - index sets of rows and columns of B to extract (or NULL)
5115: Output Parameter:
5116: + rowb, colb - index sets of rows and columns of B to extract
5117: - B_seq - the sequential matrix generated
5119: Level: developer
5121: @*/
5122: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5123: {
5124: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5126: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5127: IS isrowb,iscolb;
5128: Mat *bseq=NULL;
5131: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5132: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5133: }
5134: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5136: if (scall == MAT_INITIAL_MATRIX) {
5137: start = A->cmap->rstart;
5138: cmap = a->garray;
5139: nzA = a->A->cmap->n;
5140: nzB = a->B->cmap->n;
5141: PetscMalloc1((nzA+nzB), &idx);
5142: ncols = 0;
5143: for (i=0; i<nzB; i++) { /* row < local row index */
5144: if (cmap[i] < start) idx[ncols++] = cmap[i];
5145: else break;
5146: }
5147: imark = i;
5148: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5149: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5150: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5151: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5152: } else {
5153: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5154: isrowb = *rowb; iscolb = *colb;
5155: PetscMalloc(sizeof(Mat),&bseq);
5156: bseq[0] = *B_seq;
5157: }
5158: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5159: *B_seq = bseq[0];
5160: PetscFree(bseq);
5161: if (!rowb) {
5162: ISDestroy(&isrowb);
5163: } else {
5164: *rowb = isrowb;
5165: }
5166: if (!colb) {
5167: ISDestroy(&iscolb);
5168: } else {
5169: *colb = iscolb;
5170: }
5171: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5172: return(0);
5173: }
5177: /*
5178: MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5179: of the OFF-DIAGONAL portion of local A
5181: Collective on Mat
5183: Input Parameters:
5184: + A,B - the matrices in mpiaij format
5185: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5187: Output Parameter:
5188: + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5189: . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5190: . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5191: - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5193: Level: developer
5195: */
5196: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5197: {
5198: VecScatter_MPI_General *gen_to,*gen_from;
5199: PetscErrorCode ierr;
5200: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5201: Mat_SeqAIJ *b_oth;
5202: VecScatter ctx =a->Mvctx;
5203: MPI_Comm comm;
5204: PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5205: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5206: PetscScalar *rvalues,*svalues;
5207: MatScalar *b_otha,*bufa,*bufA;
5208: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5209: MPI_Request *rwaits = NULL,*swaits = NULL;
5210: MPI_Status *sstatus,rstatus;
5211: PetscMPIInt jj,size;
5212: PetscInt *cols,sbs,rbs;
5213: PetscScalar *vals;
5216: PetscObjectGetComm((PetscObject)A,&comm);
5217: MPI_Comm_size(comm,&size);
5218: if (size == 1) return(0);
5220: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5221: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5222: }
5223: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5224: MPI_Comm_rank(comm,&rank);
5226: gen_to = (VecScatter_MPI_General*)ctx->todata;
5227: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5228: rvalues = gen_from->values; /* holds the length of receiving row */
5229: svalues = gen_to->values; /* holds the length of sending row */
5230: nrecvs = gen_from->n;
5231: nsends = gen_to->n;
5233: PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);
5234: srow = gen_to->indices; /* local row index to be sent */
5235: sstarts = gen_to->starts;
5236: sprocs = gen_to->procs;
5237: sstatus = gen_to->sstatus;
5238: sbs = gen_to->bs;
5239: rstarts = gen_from->starts;
5240: rprocs = gen_from->procs;
5241: rbs = gen_from->bs;
5243: if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5244: if (scall == MAT_INITIAL_MATRIX) {
5245: /* i-array */
5246: /*---------*/
5247: /* post receives */
5248: for (i=0; i<nrecvs; i++) {
5249: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5250: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5251: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5252: }
5254: /* pack the outgoing message */
5255: PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);
5257: sstartsj[0] = 0;
5258: rstartsj[0] = 0;
5259: len = 0; /* total length of j or a array to be sent */
5260: k = 0;
5261: for (i=0; i<nsends; i++) {
5262: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5263: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5264: for (j=0; j<nrows; j++) {
5265: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5266: for (l=0; l<sbs; l++) {
5267: MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL); /* rowlength */
5269: rowlen[j*sbs+l] = ncols;
5271: len += ncols;
5272: MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);
5273: }
5274: k++;
5275: }
5276: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5278: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5279: }
5280: /* recvs and sends of i-array are completed */
5281: i = nrecvs;
5282: while (i--) {
5283: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5284: }
5285: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5287: /* allocate buffers for sending j and a arrays */
5288: PetscMalloc1((len+1),&bufj);
5289: PetscMalloc1((len+1),&bufa);
5291: /* create i-array of B_oth */
5292: PetscMalloc1((aBn+2),&b_othi);
5294: b_othi[0] = 0;
5295: len = 0; /* total length of j or a array to be received */
5296: k = 0;
5297: for (i=0; i<nrecvs; i++) {
5298: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5299: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5300: for (j=0; j<nrows; j++) {
5301: b_othi[k+1] = b_othi[k] + rowlen[j];
5302: len += rowlen[j]; k++;
5303: }
5304: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5305: }
5307: /* allocate space for j and a arrrays of B_oth */
5308: PetscMalloc1((b_othi[aBn]+1),&b_othj);
5309: PetscMalloc1((b_othi[aBn]+1),&b_otha);
5311: /* j-array */
5312: /*---------*/
5313: /* post receives of j-array */
5314: for (i=0; i<nrecvs; i++) {
5315: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5316: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5317: }
5319: /* pack the outgoing message j-array */
5320: k = 0;
5321: for (i=0; i<nsends; i++) {
5322: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5323: bufJ = bufj+sstartsj[i];
5324: for (j=0; j<nrows; j++) {
5325: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5326: for (ll=0; ll<sbs; ll++) {
5327: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5328: for (l=0; l<ncols; l++) {
5329: *bufJ++ = cols[l];
5330: }
5331: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5332: }
5333: }
5334: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5335: }
5337: /* recvs and sends of j-array are completed */
5338: i = nrecvs;
5339: while (i--) {
5340: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5341: }
5342: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5343: } else if (scall == MAT_REUSE_MATRIX) {
5344: sstartsj = *startsj_s;
5345: rstartsj = *startsj_r;
5346: bufa = *bufa_ptr;
5347: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5348: b_otha = b_oth->a;
5349: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5351: /* a-array */
5352: /*---------*/
5353: /* post receives of a-array */
5354: for (i=0; i<nrecvs; i++) {
5355: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5356: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5357: }
5359: /* pack the outgoing message a-array */
5360: k = 0;
5361: for (i=0; i<nsends; i++) {
5362: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5363: bufA = bufa+sstartsj[i];
5364: for (j=0; j<nrows; j++) {
5365: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5366: for (ll=0; ll<sbs; ll++) {
5367: MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5368: for (l=0; l<ncols; l++) {
5369: *bufA++ = vals[l];
5370: }
5371: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5372: }
5373: }
5374: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5375: }
5376: /* recvs and sends of a-array are completed */
5377: i = nrecvs;
5378: while (i--) {
5379: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5380: }
5381: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5382: PetscFree2(rwaits,swaits);
5384: if (scall == MAT_INITIAL_MATRIX) {
5385: /* put together the new matrix */
5386: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5388: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5389: /* Since these are PETSc arrays, change flags to free them as necessary. */
5390: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5391: b_oth->free_a = PETSC_TRUE;
5392: b_oth->free_ij = PETSC_TRUE;
5393: b_oth->nonew = 0;
5395: PetscFree(bufj);
5396: if (!startsj_s || !bufa_ptr) {
5397: PetscFree2(sstartsj,rstartsj);
5398: PetscFree(bufa_ptr);
5399: } else {
5400: *startsj_s = sstartsj;
5401: *startsj_r = rstartsj;
5402: *bufa_ptr = bufa;
5403: }
5404: }
5405: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5406: return(0);
5407: }
5411: /*@C
5412: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5414: Not Collective
5416: Input Parameters:
5417: . A - The matrix in mpiaij format
5419: Output Parameter:
5420: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5421: . colmap - A map from global column index to local index into lvec
5422: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5424: Level: developer
5426: @*/
5427: #if defined(PETSC_USE_CTABLE)
5428: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5429: #else
5430: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5431: #endif
5432: {
5433: Mat_MPIAIJ *a;
5440: a = (Mat_MPIAIJ*) A->data;
5441: if (lvec) *lvec = a->lvec;
5442: if (colmap) *colmap = a->colmap;
5443: if (multScatter) *multScatter = a->Mvctx;
5444: return(0);
5445: }
5447: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5448: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5449: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5453: /*
5454: Computes (B'*A')' since computing B*A directly is untenable
5456: n p p
5457: ( ) ( ) ( )
5458: m ( A ) * n ( B ) = m ( C )
5459: ( ) ( ) ( )
5461: */
5462: PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5463: {
5465: Mat At,Bt,Ct;
5468: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5469: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5470: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);
5471: MatDestroy(&At);
5472: MatDestroy(&Bt);
5473: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5474: MatDestroy(&Ct);
5475: return(0);
5476: }
5480: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5481: {
5483: PetscInt m=A->rmap->n,n=B->cmap->n;
5484: Mat Cmat;
5487: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5488: MatCreate(PetscObjectComm((PetscObject)A),&Cmat);
5489: MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5490: MatSetBlockSizesFromMats(Cmat,A,B);
5491: MatSetType(Cmat,MATMPIDENSE);
5492: MatMPIDenseSetPreallocation(Cmat,NULL);
5493: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
5494: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
5496: Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5498: *C = Cmat;
5499: return(0);
5500: }
5502: /* ----------------------------------------------------------------*/
5505: PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5506: {
5510: if (scall == MAT_INITIAL_MATRIX) {
5511: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
5512: MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);
5513: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
5514: }
5515: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
5516: MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);
5517: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
5518: return(0);
5519: }
5521: #if defined(PETSC_HAVE_MUMPS)
5522: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5523: #endif
5524: #if defined(PETSC_HAVE_PASTIX)
5525: PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5526: #endif
5527: #if defined(PETSC_HAVE_SUPERLU_DIST)
5528: PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5529: #endif
5530: #if defined(PETSC_HAVE_CLIQUE)
5531: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5532: #endif
5534: /*MC
5535: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5537: Options Database Keys:
5538: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5540: Level: beginner
5542: .seealso: MatCreateAIJ()
5543: M*/
5547: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5548: {
5549: Mat_MPIAIJ *b;
5551: PetscMPIInt size;
5554: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
5556: PetscNewLog(B,&b);
5557: B->data = (void*)b;
5558: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
5559: B->assembled = PETSC_FALSE;
5560: B->insertmode = NOT_SET_VALUES;
5561: b->size = size;
5563: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
5565: /* build cache for off array entries formed */
5566: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
5568: b->donotstash = PETSC_FALSE;
5569: b->colmap = 0;
5570: b->garray = 0;
5571: b->roworiented = PETSC_TRUE;
5573: /* stuff used for matrix vector multiply */
5574: b->lvec = NULL;
5575: b->Mvctx = NULL;
5577: /* stuff for MatGetRow() */
5578: b->rowindices = 0;
5579: b->rowvalues = 0;
5580: b->getrowactive = PETSC_FALSE;
5582: /* flexible pointer used in CUSP/CUSPARSE classes */
5583: b->spptr = NULL;
5585: #if defined(PETSC_HAVE_MUMPS)
5586: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);
5587: #endif
5588: #if defined(PETSC_HAVE_PASTIX)
5589: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);
5590: #endif
5591: #if defined(PETSC_HAVE_SUPERLU_DIST)
5592: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);
5593: #endif
5594: #if defined(PETSC_HAVE_CLIQUE)
5595: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);
5596: #endif
5597: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);
5598: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);
5599: PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);
5600: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);
5601: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);
5602: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);
5603: PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);
5604: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);
5605: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);
5606: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);
5607: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);
5608: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);
5609: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);
5610: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
5611: return(0);
5612: }
5616: /*@C
5617: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5618: and "off-diagonal" part of the matrix in CSR format.
5620: Collective on MPI_Comm
5622: Input Parameters:
5623: + comm - MPI communicator
5624: . m - number of local rows (Cannot be PETSC_DECIDE)
5625: . n - This value should be the same as the local size used in creating the
5626: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5627: calculated if N is given) For square matrices n is almost always m.
5628: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5629: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5630: . i - row indices for "diagonal" portion of matrix
5631: . j - column indices
5632: . a - matrix values
5633: . oi - row indices for "off-diagonal" portion of matrix
5634: . oj - column indices
5635: - oa - matrix values
5637: Output Parameter:
5638: . mat - the matrix
5640: Level: advanced
5642: Notes:
5643: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5644: must free the arrays once the matrix has been destroyed and not before.
5646: The i and j indices are 0 based
5648: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5650: This sets local rows and cannot be used to set off-processor values.
5652: Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5653: legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5654: not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5655: the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5656: keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5657: communication if it is known that only local entries will be set.
5659: .keywords: matrix, aij, compressed row, sparse, parallel
5661: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5662: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5663: C@*/
5664: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5665: {
5667: Mat_MPIAIJ *maij;
5670: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5671: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5672: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5673: MatCreate(comm,mat);
5674: MatSetSizes(*mat,m,n,M,N);
5675: MatSetType(*mat,MATMPIAIJ);
5676: maij = (Mat_MPIAIJ*) (*mat)->data;
5678: (*mat)->preallocated = PETSC_TRUE;
5680: PetscLayoutSetUp((*mat)->rmap);
5681: PetscLayoutSetUp((*mat)->cmap);
5683: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
5684: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
5686: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
5687: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
5688: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
5689: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
5691: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
5692: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
5693: MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
5694: return(0);
5695: }
5697: /*
5698: Special version for direct calls from Fortran
5699: */
5700: #include <petsc-private/fortranimpl.h>
5702: #if defined(PETSC_HAVE_FORTRAN_CAPS)
5703: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5704: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5705: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5706: #endif
5708: /* Change these macros so can be used in void function */
5709: #undef CHKERRQ
5710: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5711: #undef SETERRQ2
5712: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5713: #undef SETERRQ3
5714: #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5715: #undef SETERRQ
5716: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5720: PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5721: {
5722: Mat mat = *mmat;
5723: PetscInt m = *mm, n = *mn;
5724: InsertMode addv = *maddv;
5725: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
5726: PetscScalar value;
5729: MatCheckPreallocated(mat,1);
5730: if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5732: #if defined(PETSC_USE_DEBUG)
5733: else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5734: #endif
5735: {
5736: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5737: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5738: PetscBool roworiented = aij->roworiented;
5740: /* Some Variables required in the macro */
5741: Mat A = aij->A;
5742: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
5743: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5744: MatScalar *aa = a->a;
5745: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5746: Mat B = aij->B;
5747: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
5748: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5749: MatScalar *ba = b->a;
5751: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5752: PetscInt nonew = a->nonew;
5753: MatScalar *ap1,*ap2;
5756: for (i=0; i<m; i++) {
5757: if (im[i] < 0) continue;
5758: #if defined(PETSC_USE_DEBUG)
5759: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5760: #endif
5761: if (im[i] >= rstart && im[i] < rend) {
5762: row = im[i] - rstart;
5763: lastcol1 = -1;
5764: rp1 = aj + ai[row];
5765: ap1 = aa + ai[row];
5766: rmax1 = aimax[row];
5767: nrow1 = ailen[row];
5768: low1 = 0;
5769: high1 = nrow1;
5770: lastcol2 = -1;
5771: rp2 = bj + bi[row];
5772: ap2 = ba + bi[row];
5773: rmax2 = bimax[row];
5774: nrow2 = bilen[row];
5775: low2 = 0;
5776: high2 = nrow2;
5778: for (j=0; j<n; j++) {
5779: if (roworiented) value = v[i*n+j];
5780: else value = v[i+j*m];
5781: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5782: if (in[j] >= cstart && in[j] < cend) {
5783: col = in[j] - cstart;
5784: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5785: } else if (in[j] < 0) continue;
5786: #if defined(PETSC_USE_DEBUG)
5787: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5788: #endif
5789: else {
5790: if (mat->was_assembled) {
5791: if (!aij->colmap) {
5792: MatCreateColmap_MPIAIJ_Private(mat);
5793: }
5794: #if defined(PETSC_USE_CTABLE)
5795: PetscTableFind(aij->colmap,in[j]+1,&col);
5796: col--;
5797: #else
5798: col = aij->colmap[in[j]] - 1;
5799: #endif
5800: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5801: MatDisAssemble_MPIAIJ(mat);
5802: col = in[j];
5803: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5804: B = aij->B;
5805: b = (Mat_SeqAIJ*)B->data;
5806: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5807: rp2 = bj + bi[row];
5808: ap2 = ba + bi[row];
5809: rmax2 = bimax[row];
5810: nrow2 = bilen[row];
5811: low2 = 0;
5812: high2 = nrow2;
5813: bm = aij->B->rmap->n;
5814: ba = b->a;
5815: }
5816: } else col = in[j];
5817: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5818: }
5819: }
5820: } else if (!aij->donotstash) {
5821: if (roworiented) {
5822: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5823: } else {
5824: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5825: }
5826: }
5827: }
5828: }
5829: PetscFunctionReturnVoid();
5830: }