Actual source code: mpiaij.c
2: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: #include <petscblaslapack.h>
5: /*MC
6: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9: and MATMPIAIJ otherwise. As a result, for single process communicators,
10: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11: for communicators controlling multiple processes. It is recommended that you call both of
12: the above preallocation routines for simplicity.
14: Options Database Keys:
15: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17: Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18: enough exist.
20: Level: beginner
22: .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23: M*/
25: /*MC
26: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
30: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31: for communicators controlling multiple processes. It is recommended that you call both of
32: the above preallocation routines for simplicity.
34: Options Database Keys:
35: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37: Level: beginner
39: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40: M*/
44: PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45: {
46: PetscErrorCode ierr;
47: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
48: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
49: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
50: const PetscInt *ia,*ib;
51: const MatScalar *aa,*bb;
52: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
53: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
56: *keptrows = 0;
57: ia = a->i;
58: ib = b->i;
59: for (i=0; i<m; i++) {
60: na = ia[i+1] - ia[i];
61: nb = ib[i+1] - ib[i];
62: if (!na && !nb) {
63: cnt++;
64: goto ok1;
65: }
66: aa = a->a + ia[i];
67: for (j=0; j<na; j++) {
68: if (aa[j] != 0.0) goto ok1;
69: }
70: bb = b->a + ib[i];
71: for (j=0; j <nb; j++) {
72: if (bb[j] != 0.0) goto ok1;
73: }
74: cnt++;
75: ok1:;
76: }
77: MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);
78: if (!n0rows) return(0);
79: PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);
80: cnt = 0;
81: for (i=0; i<m; i++) {
82: na = ia[i+1] - ia[i];
83: nb = ib[i+1] - ib[i];
84: if (!na && !nb) continue;
85: aa = a->a + ia[i];
86: for(j=0; j<na;j++) {
87: if (aa[j] != 0.0) {
88: rows[cnt++] = rstart + i;
89: goto ok2;
90: }
91: }
92: bb = b->a + ib[i];
93: for (j=0; j<nb; j++) {
94: if (bb[j] != 0.0) {
95: rows[cnt++] = rstart + i;
96: goto ok2;
97: }
98: }
99: ok2:;
100: }
101: ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);
102: return(0);
103: }
107: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108: {
110: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
111: PetscInt i,n,*garray = aij->garray;
112: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
113: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
114: PetscReal *work;
117: MatGetSize(A,PETSC_NULL,&n);
118: PetscMalloc(n*sizeof(PetscReal),&work);
119: PetscMemzero(work,n*sizeof(PetscReal));
120: if (type == NORM_2) {
121: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123: }
124: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126: }
127: } else if (type == NORM_1) {
128: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130: }
131: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133: }
134: } else if (type == NORM_INFINITY) {
135: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137: }
138: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140: }
142: } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143: if (type == NORM_INFINITY) {
144: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
145: } else {
146: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
147: }
148: PetscFree(work);
149: if (type == NORM_2) {
150: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151: }
152: return(0);
153: }
157: /*
158: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
161: Only for square matrices
162: */
163: PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164: {
165: PetscMPIInt rank,size;
166: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
168: Mat mat;
169: Mat_SeqAIJ *gmata;
170: PetscMPIInt tag;
171: MPI_Status status;
172: PetscBool aij;
173: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
176: CHKMEMQ;
177: MPI_Comm_rank(comm,&rank);
178: MPI_Comm_size(comm,&size);
179: if (!rank) {
180: PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
181: if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182: }
183: if (reuse == MAT_INITIAL_MATRIX) {
184: MatCreate(comm,&mat);
185: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
186: MatSetType(mat,MATAIJ);
187: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
188: PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
189: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
190: rowners[0] = 0;
191: for (i=2; i<=size; i++) {
192: rowners[i] += rowners[i-1];
193: }
194: rstart = rowners[rank];
195: rend = rowners[rank+1];
196: PetscObjectGetNewTag((PetscObject)mat,&tag);
197: if (!rank) {
198: gmata = (Mat_SeqAIJ*) gmat->data;
199: /* send row lengths to all processors */
200: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201: for (i=1; i<size; i++) {
202: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
203: }
204: /* determine number diagonal and off-diagonal counts */
205: PetscMemzero(olens,m*sizeof(PetscInt));
206: PetscMalloc(m*sizeof(PetscInt),&ld);
207: PetscMemzero(ld,m*sizeof(PetscInt));
208: jj = 0;
209: for (i=0; i<m; i++) {
210: for (j=0; j<dlens[i]; j++) {
211: if (gmata->j[jj] < rstart) ld[i]++;
212: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213: jj++;
214: }
215: }
216: /* send column indices to other processes */
217: for (i=1; i<size; i++) {
218: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
220: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
221: }
223: /* send numerical values to other processes */
224: for (i=1; i<size; i++) {
225: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
227: }
228: gmataa = gmata->a;
229: gmataj = gmata->j;
231: } else {
232: /* receive row lengths */
233: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
234: /* receive column indices */
235: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
236: PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
237: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
238: /* determine number diagonal and off-diagonal counts */
239: PetscMemzero(olens,m*sizeof(PetscInt));
240: PetscMalloc(m*sizeof(PetscInt),&ld);
241: PetscMemzero(ld,m*sizeof(PetscInt));
242: jj = 0;
243: for (i=0; i<m; i++) {
244: for (j=0; j<dlens[i]; j++) {
245: if (gmataj[jj] < rstart) ld[i]++;
246: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247: jj++;
248: }
249: }
250: /* receive numerical values */
251: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
252: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
253: }
254: /* set preallocation */
255: for (i=0; i<m; i++) {
256: dlens[i] -= olens[i];
257: }
258: MatSeqAIJSetPreallocation(mat,0,dlens);
259: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
260:
261: for (i=0; i<m; i++) {
262: dlens[i] += olens[i];
263: }
264: cnt = 0;
265: for (i=0; i<m; i++) {
266: row = rstart + i;
267: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
268: cnt += dlens[i];
269: }
270: if (rank) {
271: PetscFree2(gmataa,gmataj);
272: }
273: PetscFree2(dlens,olens);
274: PetscFree(rowners);
275: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276: *inmat = mat;
277: } else { /* column indices are already set; only need to move over numerical values from process 0 */
278: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280: mat = *inmat;
281: PetscObjectGetNewTag((PetscObject)mat,&tag);
282: if (!rank) {
283: /* send numerical values to other processes */
284: gmata = (Mat_SeqAIJ*) gmat->data;
285: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
286: gmataa = gmata->a;
287: for (i=1; i<size; i++) {
288: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
290: }
291: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292: } else {
293: /* receive numerical values from process 0*/
294: nz = Ad->nz + Ao->nz;
295: PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
296: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
297: }
298: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300: ad = Ad->a;
301: ao = Ao->a;
302: if (mat->rmap->n) {
303: i = 0;
304: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
305: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
306: }
307: for (i=1; i<mat->rmap->n; i++) {
308: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
309: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
310: }
311: i--;
312: if (mat->rmap->n) {
313: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
314: }
315: if (rank) {
316: PetscFree(gmataarestore);
317: }
318: }
319: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
320: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
321: CHKMEMQ;
322: return(0);
323: }
325: /*
326: Local utility routine that creates a mapping from the global column
327: number to the local number in the off-diagonal part of the local
328: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
329: a slightly higher hash table cost; without it it is not scalable (each processor
330: has an order N integer array but is fast to acess.
331: */
334: PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335: {
336: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
338: PetscInt n = aij->B->cmap->n,i;
341: #if defined (PETSC_USE_CTABLE)
342: PetscTableCreate(n,&aij->colmap);
343: for (i=0; i<n; i++){
344: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
345: }
346: #else
347: PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);
348: PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));
349: PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));
350: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351: #endif
352: return(0);
353: }
355: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356: { \
357: if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358: lastcol1 = col;\
359: while (high1-low1 > 5) { \
360: t = (low1+high1)/2; \
361: if (rp1[t] > col) high1 = t; \
362: else low1 = t; \
363: } \
364: for (_i=low1; _i<high1; _i++) { \
365: if (rp1[_i] > col) break; \
366: if (rp1[_i] == col) { \
367: if (addv == ADD_VALUES) ap1[_i] += value; \
368: else ap1[_i] = value; \
369: goto a_noinsert; \
370: } \
371: } \
372: if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
374: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376: N = nrow1++ - 1; a->nz++; high1++; \
377: /* shift up all the later entries in this row */ \
378: for (ii=N; ii>=_i; ii--) { \
379: rp1[ii+1] = rp1[ii]; \
380: ap1[ii+1] = ap1[ii]; \
381: } \
382: rp1[_i] = col; \
383: ap1[_i] = value; \
384: a_noinsert: ; \
385: ailen[row] = nrow1; \
386: }
389: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390: { \
391: if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392: lastcol2 = col;\
393: while (high2-low2 > 5) { \
394: t = (low2+high2)/2; \
395: if (rp2[t] > col) high2 = t; \
396: else low2 = t; \
397: } \
398: for (_i=low2; _i<high2; _i++) { \
399: if (rp2[_i] > col) break; \
400: if (rp2[_i] == col) { \
401: if (addv == ADD_VALUES) ap2[_i] += value; \
402: else ap2[_i] = value; \
403: goto b_noinsert; \
404: } \
405: } \
406: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
408: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410: N = nrow2++ - 1; b->nz++; high2++; \
411: /* shift up all the later entries in this row */ \
412: for (ii=N; ii>=_i; ii--) { \
413: rp2[ii+1] = rp2[ii]; \
414: ap2[ii+1] = ap2[ii]; \
415: } \
416: rp2[_i] = col; \
417: ap2[_i] = value; \
418: b_noinsert: ; \
419: bilen[row] = nrow2; \
420: }
424: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425: {
426: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
427: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
429: PetscInt l,*garray = mat->garray,diag;
432: /* code only works for square matrices A */
434: /* find size of row to the left of the diagonal part */
435: MatGetOwnershipRange(A,&diag,0);
436: row = row - diag;
437: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438: if (garray[b->j[b->i[row]+l]] > diag) break;
439: }
440: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
442: /* diagonal part */
443: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
445: /* right of diagonal part */
446: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
447: return(0);
448: }
452: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453: {
454: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
455: PetscScalar value;
457: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459: PetscBool roworiented = aij->roworiented;
461: /* Some Variables required in the macro */
462: Mat A = aij->A;
463: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
464: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465: MatScalar *aa = a->a;
466: PetscBool ignorezeroentries = a->ignorezeroentries;
467: Mat B = aij->B;
468: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
469: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470: MatScalar *ba = b->a;
472: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473: PetscInt nonew = a->nonew;
474: MatScalar *ap1,*ap2;
478: for (i=0; i<m; i++) {
479: if (im[i] < 0) continue;
480: #if defined(PETSC_USE_DEBUG)
481: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482: #endif
483: if (im[i] >= rstart && im[i] < rend) {
484: row = im[i] - rstart;
485: lastcol1 = -1;
486: rp1 = aj + ai[row];
487: ap1 = aa + ai[row];
488: rmax1 = aimax[row];
489: nrow1 = ailen[row];
490: low1 = 0;
491: high1 = nrow1;
492: lastcol2 = -1;
493: rp2 = bj + bi[row];
494: ap2 = ba + bi[row];
495: rmax2 = bimax[row];
496: nrow2 = bilen[row];
497: low2 = 0;
498: high2 = nrow2;
500: for (j=0; j<n; j++) {
501: if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503: if (in[j] >= cstart && in[j] < cend){
504: col = in[j] - cstart;
505: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506: } else if (in[j] < 0) continue;
507: #if defined(PETSC_USE_DEBUG)
508: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509: #endif
510: else {
511: if (mat->was_assembled) {
512: if (!aij->colmap) {
513: CreateColmap_MPIAIJ_Private(mat);
514: }
515: #if defined (PETSC_USE_CTABLE)
516: PetscTableFind(aij->colmap,in[j]+1,&col);
517: col--;
518: #else
519: col = aij->colmap[in[j]] - 1;
520: #endif
521: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522: DisAssemble_MPIAIJ(mat);
523: col = in[j];
524: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525: B = aij->B;
526: b = (Mat_SeqAIJ*)B->data;
527: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528: rp2 = bj + bi[row];
529: ap2 = ba + bi[row];
530: rmax2 = bimax[row];
531: nrow2 = bilen[row];
532: low2 = 0;
533: high2 = nrow2;
534: bm = aij->B->rmap->n;
535: ba = b->a;
536: }
537: } else col = in[j];
538: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539: }
540: }
541: } else {
542: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543: if (!aij->donotstash) {
544: if (roworiented) {
545: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
546: } else {
547: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
548: }
549: }
550: }
551: }
552: return(0);
553: }
557: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558: {
559: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
561: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
565: for (i=0; i<m; i++) {
566: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568: if (idxm[i] >= rstart && idxm[i] < rend) {
569: row = idxm[i] - rstart;
570: for (j=0; j<n; j++) {
571: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573: if (idxn[j] >= cstart && idxn[j] < cend){
574: col = idxn[j] - cstart;
575: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
576: } else {
577: if (!aij->colmap) {
578: CreateColmap_MPIAIJ_Private(mat);
579: }
580: #if defined (PETSC_USE_CTABLE)
581: PetscTableFind(aij->colmap,idxn[j]+1,&col);
582: col --;
583: #else
584: col = aij->colmap[idxn[j]] - 1;
585: #endif
586: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587: else {
588: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
589: }
590: }
591: }
592: } else {
593: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594: }
595: }
596: return(0);
597: }
599: extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
603: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604: {
605: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
607: PetscInt nstash,reallocs;
608: InsertMode addv;
611: if (aij->donotstash || mat->nooffprocentries) {
612: return(0);
613: }
615: /* make sure all processors are either in INSERTMODE or ADDMODE */
616: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
617: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618: mat->insertmode = addv; /* in case this processor had no cache */
620: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
621: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
622: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
623: return(0);
624: }
628: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629: {
630: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
631: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data;
633: PetscMPIInt n;
634: PetscInt i,j,rstart,ncols,flg;
635: PetscInt *row,*col;
636: PetscBool other_disassembled;
637: PetscScalar *val;
638: InsertMode addv = mat->insertmode;
640: /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
642: if (!aij->donotstash && !mat->nooffprocentries) {
643: while (1) {
644: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
645: if (!flg) break;
647: for (i=0; i<n;) {
648: /* Now identify the consecutive vals belonging to the same row */
649: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650: if (j < n) ncols = j-i;
651: else ncols = n-i;
652: /* Now assemble all these values with a single function call */
653: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
654: i = j;
655: }
656: }
657: MatStashScatterEnd_Private(&mat->stash);
658: }
659: MatAssemblyBegin(aij->A,mode);
660: MatAssemblyEnd(aij->A,mode);
662: /* determine if any processor has disassembled, if so we must
663: also disassemble ourselfs, in order that we may reassemble. */
664: /*
665: if nonzero structure of submatrix B cannot change then we know that
666: no processor disassembled thus we can skip this stuff
667: */
668: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
669: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
670: if (mat->was_assembled && !other_disassembled) {
671: DisAssemble_MPIAIJ(mat);
672: }
673: }
674: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675: MatSetUpMultiply_MPIAIJ(mat);
676: }
677: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
678: MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);
679: MatAssemblyBegin(aij->B,mode);
680: MatAssemblyEnd(aij->B,mode);
682: PetscFree2(aij->rowvalues,aij->rowindices);
683: aij->rowvalues = 0;
685: /* used by MatAXPY() */
686: a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
687: a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
689: VecDestroy(&aij->diag);
690: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691: return(0);
692: }
696: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697: {
698: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
702: MatZeroEntries(l->A);
703: MatZeroEntries(l->B);
704: return(0);
705: }
709: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710: {
711: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
712: PetscErrorCode ierr;
713: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714: PetscInt i,*owners = A->rmap->range;
715: PetscInt *nprocs,j,idx,nsends,row;
716: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
717: PetscInt *rvalues,count,base,slen,*source;
718: PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart;
719: MPI_Comm comm = ((PetscObject)A)->comm;
720: MPI_Request *send_waits,*recv_waits;
721: MPI_Status recv_status,*send_status;
722: const PetscScalar *xx;
723: PetscScalar *bb;
724: #if defined(PETSC_DEBUG)
725: PetscBool found = PETSC_FALSE;
726: #endif
729: /* first count number of contributors to each processor */
730: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
731: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
732: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
733: j = 0;
734: for (i=0; i<N; i++) {
735: if (lastidx > (idx = rows[i])) j = 0;
736: lastidx = idx;
737: for (; j<size; j++) {
738: if (idx >= owners[j] && idx < owners[j+1]) {
739: nprocs[2*j]++;
740: nprocs[2*j+1] = 1;
741: owner[i] = j;
742: #if defined(PETSC_DEBUG)
743: found = PETSC_TRUE;
744: #endif
745: break;
746: }
747: }
748: #if defined(PETSC_DEBUG)
749: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750: found = PETSC_FALSE;
751: #endif
752: }
753: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
755: if (A->nooffproczerorows) {
756: if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757: nrecvs = nsends;
758: nmax = N;
759: } else {
760: /* inform other processors of number of messages and max length*/
761: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
762: }
764: /* post receives: */
765: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
766: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
767: for (i=0; i<nrecvs; i++) {
768: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
769: }
771: /* do sends:
772: 1) starts[i] gives the starting index in svalues for stuff going to
773: the ith processor
774: */
775: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
776: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
777: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
778: starts[0] = 0;
779: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780: for (i=0; i<N; i++) {
781: svalues[starts[owner[i]]++] = rows[i];
782: }
784: starts[0] = 0;
785: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786: count = 0;
787: for (i=0; i<size; i++) {
788: if (nprocs[2*i+1]) {
789: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
790: }
791: }
792: PetscFree(starts);
794: base = owners[rank];
796: /* wait on receives */
797: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
798: count = nrecvs; slen = 0;
799: while (count) {
800: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
801: /* unpack receives into our local space */
802: MPI_Get_count(&recv_status,MPIU_INT,&n);
803: source[imdex] = recv_status.MPI_SOURCE;
804: lens[imdex] = n;
805: slen += n;
806: count--;
807: }
808: PetscFree(recv_waits);
809:
810: /* move the data into the send scatter */
811: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
812: count = 0;
813: for (i=0; i<nrecvs; i++) {
814: values = rvalues + i*nmax;
815: for (j=0; j<lens[i]; j++) {
816: lrows[count++] = values[j] - base;
817: }
818: }
819: PetscFree(rvalues);
820: PetscFree2(lens,source);
821: PetscFree(owner);
822: PetscFree(nprocs);
823:
824: /* fix right hand side if needed */
825: if (x && b) {
826: VecGetArrayRead(x,&xx);
827: VecGetArray(b,&bb);
828: for (i=0; i<slen; i++) {
829: bb[lrows[i]] = diag*xx[lrows[i]];
830: }
831: VecRestoreArrayRead(x,&xx);
832: VecRestoreArray(b,&bb);
833: }
834: /*
835: Zero the required rows. If the "diagonal block" of the matrix
836: is square and the user wishes to set the diagonal we use separate
837: code so that MatSetValues() is not called for each diagonal allocating
838: new memory, thus calling lots of mallocs and slowing things down.
840: */
841: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842: MatZeroRows(l->B,slen,lrows,0.0,0,0);
843: if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844: MatZeroRows(l->A,slen,lrows,diag,0,0);
845: } else if (diag != 0.0) {
846: MatZeroRows(l->A,slen,lrows,0.0,0,0);
847: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850: }
851: for (i = 0; i < slen; i++) {
852: row = lrows[i] + rstart;
853: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
854: }
855: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
856: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
857: } else {
858: MatZeroRows(l->A,slen,lrows,0.0,0,0);
859: }
860: PetscFree(lrows);
862: /* wait on sends */
863: if (nsends) {
864: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
865: MPI_Waitall(nsends,send_waits,send_status);
866: PetscFree(send_status);
867: }
868: PetscFree(send_waits);
869: PetscFree(svalues);
870: return(0);
871: }
875: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876: {
877: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
878: PetscErrorCode ierr;
879: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880: PetscInt i,*owners = A->rmap->range;
881: PetscInt *nprocs,j,idx,nsends;
882: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
883: PetscInt *rvalues,count,base,slen,*source;
884: PetscInt *lens,*lrows,*values,m;
885: MPI_Comm comm = ((PetscObject)A)->comm;
886: MPI_Request *send_waits,*recv_waits;
887: MPI_Status recv_status,*send_status;
888: const PetscScalar *xx;
889: PetscScalar *bb,*mask;
890: Vec xmask,lmask;
891: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
892: const PetscInt *aj, *ii,*ridx;
893: PetscScalar *aa;
894: #if defined(PETSC_DEBUG)
895: PetscBool found = PETSC_FALSE;
896: #endif
899: /* first count number of contributors to each processor */
900: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
901: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
902: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
903: j = 0;
904: for (i=0; i<N; i++) {
905: if (lastidx > (idx = rows[i])) j = 0;
906: lastidx = idx;
907: for (; j<size; j++) {
908: if (idx >= owners[j] && idx < owners[j+1]) {
909: nprocs[2*j]++;
910: nprocs[2*j+1] = 1;
911: owner[i] = j;
912: #if defined(PETSC_DEBUG)
913: found = PETSC_TRUE;
914: #endif
915: break;
916: }
917: }
918: #if defined(PETSC_DEBUG)
919: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920: found = PETSC_FALSE;
921: #endif
922: }
923: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
925: /* inform other processors of number of messages and max length*/
926: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
928: /* post receives: */
929: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
930: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
931: for (i=0; i<nrecvs; i++) {
932: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
933: }
935: /* do sends:
936: 1) starts[i] gives the starting index in svalues for stuff going to
937: the ith processor
938: */
939: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
940: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
941: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
942: starts[0] = 0;
943: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944: for (i=0; i<N; i++) {
945: svalues[starts[owner[i]]++] = rows[i];
946: }
948: starts[0] = 0;
949: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950: count = 0;
951: for (i=0; i<size; i++) {
952: if (nprocs[2*i+1]) {
953: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
954: }
955: }
956: PetscFree(starts);
958: base = owners[rank];
960: /* wait on receives */
961: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
962: count = nrecvs; slen = 0;
963: while (count) {
964: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
965: /* unpack receives into our local space */
966: MPI_Get_count(&recv_status,MPIU_INT,&n);
967: source[imdex] = recv_status.MPI_SOURCE;
968: lens[imdex] = n;
969: slen += n;
970: count--;
971: }
972: PetscFree(recv_waits);
973:
974: /* move the data into the send scatter */
975: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
976: count = 0;
977: for (i=0; i<nrecvs; i++) {
978: values = rvalues + i*nmax;
979: for (j=0; j<lens[i]; j++) {
980: lrows[count++] = values[j] - base;
981: }
982: }
983: PetscFree(rvalues);
984: PetscFree2(lens,source);
985: PetscFree(owner);
986: PetscFree(nprocs);
987: /* lrows are the local rows to be zeroed, slen is the number of local rows */
989: /* zero diagonal part of matrix */
990: MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);
991:
992: /* handle off diagonal part of matrix */
993: MatGetVecs(A,&xmask,PETSC_NULL);
994: VecDuplicate(l->lvec,&lmask);
995: VecGetArray(xmask,&bb);
996: for (i=0; i<slen; i++) {
997: bb[lrows[i]] = 1;
998: }
999: VecRestoreArray(xmask,&bb);
1000: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1001: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1002: VecDestroy(&xmask);
1003: if (x) {
1004: VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1005: VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1006: VecGetArrayRead(l->lvec,&xx);
1007: VecGetArray(b,&bb);
1008: }
1009: VecGetArray(lmask,&mask);
1011: /* remove zeroed rows of off diagonal matrix */
1012: ii = aij->i;
1013: for (i=0; i<slen; i++) {
1014: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
1015: }
1017: /* loop over all elements of off process part of matrix zeroing removed columns*/
1018: if (aij->compressedrow.use){
1019: m = aij->compressedrow.nrows;
1020: ii = aij->compressedrow.i;
1021: ridx = aij->compressedrow.rindex;
1022: for (i=0; i<m; i++){
1023: n = ii[i+1] - ii[i];
1024: aj = aij->j + ii[i];
1025: aa = aij->a + ii[i];
1027: for (j=0; j<n; j++) {
1028: if (PetscAbsScalar(mask[*aj])) {
1029: if (b) bb[*ridx] -= *aa*xx[*aj];
1030: *aa = 0.0;
1031: }
1032: aa++;
1033: aj++;
1034: }
1035: ridx++;
1036: }
1037: } else { /* do not use compressed row format */
1038: m = l->B->rmap->n;
1039: for (i=0; i<m; i++) {
1040: n = ii[i+1] - ii[i];
1041: aj = aij->j + ii[i];
1042: aa = aij->a + ii[i];
1043: for (j=0; j<n; j++) {
1044: if (PetscAbsScalar(mask[*aj])) {
1045: if (b) bb[i] -= *aa*xx[*aj];
1046: *aa = 0.0;
1047: }
1048: aa++;
1049: aj++;
1050: }
1051: }
1052: }
1053: if (x) {
1054: VecRestoreArray(b,&bb);
1055: VecRestoreArrayRead(l->lvec,&xx);
1056: }
1057: VecRestoreArray(lmask,&mask);
1058: VecDestroy(&lmask);
1059: PetscFree(lrows);
1061: /* wait on sends */
1062: if (nsends) {
1063: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1064: MPI_Waitall(nsends,send_waits,send_status);
1065: PetscFree(send_status);
1066: }
1067: PetscFree(send_waits);
1068: PetscFree(svalues);
1070: return(0);
1071: }
1075: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1076: {
1077: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1079: PetscInt nt;
1082: VecGetLocalSize(xx,&nt);
1083: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1084: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1085: (*a->A->ops->mult)(a->A,xx,yy);
1086: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1087: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1088: return(0);
1089: }
1093: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1094: {
1095: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1099: MatMultDiagonalBlock(a->A,bb,xx);
1100: return(0);
1101: }
1105: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106: {
1107: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1111: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1112: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1113: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1114: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1115: return(0);
1116: }
1120: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1121: {
1122: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1124: PetscBool merged;
1127: VecScatterGetMerged(a->Mvctx,&merged);
1128: /* do nondiagonal part */
1129: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1130: if (!merged) {
1131: /* send it on its way */
1132: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1133: /* do local part */
1134: (*a->A->ops->multtranspose)(a->A,xx,yy);
1135: /* receive remote parts: note this assumes the values are not actually */
1136: /* added in yy until the next line, */
1137: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1138: } else {
1139: /* do local part */
1140: (*a->A->ops->multtranspose)(a->A,xx,yy);
1141: /* send it on its way */
1142: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1143: /* values actually were received in the Begin() but we need to call this nop */
1144: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1145: }
1146: return(0);
1147: }
1149: EXTERN_C_BEGIN
1152: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1153: {
1154: MPI_Comm comm;
1155: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1156: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1157: IS Me,Notme;
1159: PetscInt M,N,first,last,*notme,i;
1160: PetscMPIInt size;
1164: /* Easy test: symmetric diagonal block */
1165: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1166: MatIsTranspose(Adia,Bdia,tol,f);
1167: if (!*f) return(0);
1168: PetscObjectGetComm((PetscObject)Amat,&comm);
1169: MPI_Comm_size(comm,&size);
1170: if (size == 1) return(0);
1172: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1173: MatGetSize(Amat,&M,&N);
1174: MatGetOwnershipRange(Amat,&first,&last);
1175: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
1176: for (i=0; i<first; i++) notme[i] = i;
1177: for (i=last; i<M; i++) notme[i-last+first] = i;
1178: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1179: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1180: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1181: Aoff = Aoffs[0];
1182: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1183: Boff = Boffs[0];
1184: MatIsTranspose(Aoff,Boff,tol,f);
1185: MatDestroyMatrices(1,&Aoffs);
1186: MatDestroyMatrices(1,&Boffs);
1187: ISDestroy(&Me);
1188: ISDestroy(&Notme);
1189: PetscFree(notme);
1190: return(0);
1191: }
1192: EXTERN_C_END
1196: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197: {
1198: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1202: /* do nondiagonal part */
1203: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1204: /* send it on its way */
1205: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1206: /* do local part */
1207: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1208: /* receive remote parts */
1209: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1210: return(0);
1211: }
1213: /*
1214: This only works correctly for square matrices where the subblock A->A is the
1215: diagonal block
1216: */
1219: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1220: {
1222: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1225: if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1227: MatGetDiagonal(a->A,v);
1228: return(0);
1229: }
1233: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1234: {
1235: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1239: MatScale(a->A,aa);
1240: MatScale(a->B,aa);
1241: return(0);
1242: }
1246: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1247: {
1248: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1252: #if defined(PETSC_USE_LOG)
1253: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1254: #endif
1255: MatStashDestroy_Private(&mat->stash);
1256: VecDestroy(&aij->diag);
1257: MatDestroy(&aij->A);
1258: MatDestroy(&aij->B);
1259: #if defined (PETSC_USE_CTABLE)
1260: PetscTableDestroy(&aij->colmap);
1261: #else
1262: PetscFree(aij->colmap);
1263: #endif
1264: PetscFree(aij->garray);
1265: VecDestroy(&aij->lvec);
1266: VecScatterDestroy(&aij->Mvctx);
1267: PetscFree2(aij->rowvalues,aij->rowindices);
1268: PetscFree(aij->ld);
1269: PetscFree(mat->data);
1271: PetscObjectChangeTypeName((PetscObject)mat,0);
1272: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1273: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1274: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1275: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
1276: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
1277: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
1278: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1279: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);
1280: return(0);
1281: }
1285: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1286: {
1287: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1288: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
1289: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
1290: PetscErrorCode ierr;
1291: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1292: int fd;
1293: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1294: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1295: PetscScalar *column_values;
1296: PetscInt message_count,flowcontrolcount;
1299: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1300: MPI_Comm_size(((PetscObject)mat)->comm,&size);
1301: nz = A->nz + B->nz;
1302: if (!rank) {
1303: header[0] = MAT_FILE_CLASSID;
1304: header[1] = mat->rmap->N;
1305: header[2] = mat->cmap->N;
1306: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1307: PetscViewerBinaryGetDescriptor(viewer,&fd);
1308: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1309: /* get largest number of rows any processor has */
1310: rlen = mat->rmap->n;
1311: range = mat->rmap->range;
1312: for (i=1; i<size; i++) {
1313: rlen = PetscMax(rlen,range[i+1] - range[i]);
1314: }
1315: } else {
1316: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1317: rlen = mat->rmap->n;
1318: }
1320: /* load up the local row counts */
1321: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
1322: for (i=0; i<mat->rmap->n; i++) {
1323: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1324: }
1326: /* store the row lengths to the file */
1327: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1328: if (!rank) {
1329: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1330: for (i=1; i<size; i++) {
1331: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1332: rlen = range[i+1] - range[i];
1333: MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1334: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1335: }
1336: PetscViewerFlowControlEndMaster(viewer,message_count);
1337: } else {
1338: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1339: MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1340: PetscViewerFlowControlEndWorker(viewer,message_count);
1341: }
1342: PetscFree(row_lengths);
1344: /* load up the local column indices */
1345: nzmax = nz; /* )th processor needs space a largest processor needs */
1346: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);
1347: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
1348: cnt = 0;
1349: for (i=0; i<mat->rmap->n; i++) {
1350: for (j=B->i[i]; j<B->i[i+1]; j++) {
1351: if ( (col = garray[B->j[j]]) > cstart) break;
1352: column_indices[cnt++] = col;
1353: }
1354: for (k=A->i[i]; k<A->i[i+1]; k++) {
1355: column_indices[cnt++] = A->j[k] + cstart;
1356: }
1357: for (; j<B->i[i+1]; j++) {
1358: column_indices[cnt++] = garray[B->j[j]];
1359: }
1360: }
1361: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1363: /* store the column indices to the file */
1364: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1365: if (!rank) {
1366: MPI_Status status;
1367: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1368: for (i=1; i<size; i++) {
1369: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1370: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1371: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372: MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1373: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1374: }
1375: PetscViewerFlowControlEndMaster(viewer,message_count);
1376: } else {
1377: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1378: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1379: MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1380: PetscViewerFlowControlEndWorker(viewer,message_count);
1381: }
1382: PetscFree(column_indices);
1384: /* load up the local column values */
1385: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
1386: cnt = 0;
1387: for (i=0; i<mat->rmap->n; i++) {
1388: for (j=B->i[i]; j<B->i[i+1]; j++) {
1389: if ( garray[B->j[j]] > cstart) break;
1390: column_values[cnt++] = B->a[j];
1391: }
1392: for (k=A->i[i]; k<A->i[i+1]; k++) {
1393: column_values[cnt++] = A->a[k];
1394: }
1395: for (; j<B->i[i+1]; j++) {
1396: column_values[cnt++] = B->a[j];
1397: }
1398: }
1399: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1401: /* store the column values to the file */
1402: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1403: if (!rank) {
1404: MPI_Status status;
1405: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1406: for (i=1; i<size; i++) {
1407: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1408: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1409: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1410: MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);
1411: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1412: }
1413: PetscViewerFlowControlEndMaster(viewer,message_count);
1414: } else {
1415: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1416: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1417: MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
1418: PetscViewerFlowControlEndWorker(viewer,message_count);
1419: }
1420: PetscFree(column_values);
1421: return(0);
1422: }
1426: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1427: {
1428: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1429: PetscErrorCode ierr;
1430: PetscMPIInt rank = aij->rank,size = aij->size;
1431: PetscBool isdraw,iascii,isbinary;
1432: PetscViewer sviewer;
1433: PetscViewerFormat format;
1436: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1437: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1438: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1439: if (iascii) {
1440: PetscViewerGetFormat(viewer,&format);
1441: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442: MatInfo info;
1443: PetscBool inodes;
1445: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1446: MatGetInfo(mat,MAT_LOCAL,&info);
1447: MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
1448: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1449: if (!inodes) {
1450: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1451: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1452: } else {
1453: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1454: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1455: }
1456: MatGetInfo(aij->A,MAT_LOCAL,&info);
1457: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1458: MatGetInfo(aij->B,MAT_LOCAL,&info);
1459: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1460: PetscViewerFlush(viewer);
1461: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1462: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1463: VecScatterView(aij->Mvctx,viewer);
1464: return(0);
1465: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1466: PetscInt inodecount,inodelimit,*inodes;
1467: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1468: if (inodes) {
1469: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1470: } else {
1471: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1472: }
1473: return(0);
1474: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1475: return(0);
1476: }
1477: } else if (isbinary) {
1478: if (size == 1) {
1479: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1480: MatView(aij->A,viewer);
1481: } else {
1482: MatView_MPIAIJ_Binary(mat,viewer);
1483: }
1484: return(0);
1485: } else if (isdraw) {
1486: PetscDraw draw;
1487: PetscBool isnull;
1488: PetscViewerDrawGetDraw(viewer,0,&draw);
1489: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1490: }
1492: if (size == 1) {
1493: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1494: MatView(aij->A,viewer);
1495: } else {
1496: /* assemble the entire matrix onto first processor. */
1497: Mat A;
1498: Mat_SeqAIJ *Aloc;
1499: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1500: MatScalar *a;
1502: if (mat->rmap->N > 1024) {
1503: PetscBool flg = PETSC_FALSE;
1505: PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);
1506: if (!flg) {
1507: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1508: }
1509: }
1511: MatCreate(((PetscObject)mat)->comm,&A);
1512: if (!rank) {
1513: MatSetSizes(A,M,N,M,N);
1514: } else {
1515: MatSetSizes(A,0,0,M,N);
1516: }
1517: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1518: MatSetType(A,MATMPIAIJ);
1519: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
1520: PetscLogObjectParent(mat,A);
1522: /* copy over the A part */
1523: Aloc = (Mat_SeqAIJ*)aij->A->data;
1524: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1525: row = mat->rmap->rstart;
1526: for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1527: for (i=0; i<m; i++) {
1528: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1529: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1530: }
1531: aj = Aloc->j;
1532: for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1534: /* copy over the B part */
1535: Aloc = (Mat_SeqAIJ*)aij->B->data;
1536: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1537: row = mat->rmap->rstart;
1538: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
1539: ct = cols;
1540: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1541: for (i=0; i<m; i++) {
1542: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1543: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1544: }
1545: PetscFree(ct);
1546: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1547: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1548: /*
1549: Everyone has to call to draw the matrix since the graphics waits are
1550: synchronized across all processors that share the PetscDraw object
1551: */
1552: PetscViewerGetSingleton(viewer,&sviewer);
1553: if (!rank) {
1554: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);
1555: /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1556: PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1557: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1558: }
1559: PetscViewerRestoreSingleton(viewer,&sviewer);
1560: MatDestroy(&A);
1561: }
1562: return(0);
1563: }
1567: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1568: {
1570: PetscBool iascii,isdraw,issocket,isbinary;
1571:
1573: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1574: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1575: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1576: PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1577: if (iascii || isdraw || isbinary || issocket) {
1578: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1579: } else {
1580: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1581: }
1582: return(0);
1583: }
1587: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1588: {
1589: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1591: Vec bb1 = 0;
1592: PetscBool hasop;
1595: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1596: VecDuplicate(bb,&bb1);
1597: }
1599: if (flag == SOR_APPLY_UPPER) {
1600: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1601: return(0);
1602: }
1604: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1605: if (flag & SOR_ZERO_INITIAL_GUESS) {
1606: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1607: its--;
1608: }
1609:
1610: while (its--) {
1611: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1612: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1614: /* update rhs: bb1 = bb - B*x */
1615: VecScale(mat->lvec,-1.0);
1616: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1618: /* local sweep */
1619: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1620: }
1621: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1622: if (flag & SOR_ZERO_INITIAL_GUESS) {
1623: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1624: its--;
1625: }
1626: while (its--) {
1627: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1628: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1630: /* update rhs: bb1 = bb - B*x */
1631: VecScale(mat->lvec,-1.0);
1632: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1634: /* local sweep */
1635: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1636: }
1637: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1638: if (flag & SOR_ZERO_INITIAL_GUESS) {
1639: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1640: its--;
1641: }
1642: while (its--) {
1643: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1644: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1646: /* update rhs: bb1 = bb - B*x */
1647: VecScale(mat->lvec,-1.0);
1648: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1650: /* local sweep */
1651: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1652: }
1653: } else if (flag & SOR_EISENSTAT) {
1654: Vec xx1;
1656: VecDuplicate(bb,&xx1);
1657: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1659: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1660: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1661: if (!mat->diag) {
1662: MatGetVecs(matin,&mat->diag,PETSC_NULL);
1663: MatGetDiagonal(matin,mat->diag);
1664: }
1665: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1666: if (hasop) {
1667: MatMultDiagonalBlock(matin,xx,bb1);
1668: } else {
1669: VecPointwiseMult(bb1,mat->diag,xx);
1670: }
1671: VecAYPX(bb1,(omega-2.0)/omega,bb);
1673: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1675: /* local sweep */
1676: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1677: VecAXPY(xx,1.0,xx1);
1678: VecDestroy(&xx1);
1679: } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1681: VecDestroy(&bb1);
1682: return(0);
1683: }
1687: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1688: {
1689: MPI_Comm comm,pcomm;
1690: PetscInt first,local_size,nrows;
1691: const PetscInt *rows;
1692: PetscMPIInt size;
1693: IS crowp,growp,irowp,lrowp,lcolp,icolp;
1697: PetscObjectGetComm((PetscObject)A,&comm);
1698: /* make a collective version of 'rowp' */
1699: PetscObjectGetComm((PetscObject)rowp,&pcomm);
1700: if (pcomm==comm) {
1701: crowp = rowp;
1702: } else {
1703: ISGetSize(rowp,&nrows);
1704: ISGetIndices(rowp,&rows);
1705: ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);
1706: ISRestoreIndices(rowp,&rows);
1707: }
1708: /* collect the global row permutation and invert it */
1709: ISAllGather(crowp,&growp);
1710: ISSetPermutation(growp);
1711: if (pcomm!=comm) {
1712: ISDestroy(&crowp);
1713: }
1714: ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
1715: /* get the local target indices */
1716: MatGetOwnershipRange(A,&first,PETSC_NULL);
1717: MatGetLocalSize(A,&local_size,PETSC_NULL);
1718: ISGetIndices(irowp,&rows);
1719: ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);
1720: ISRestoreIndices(irowp,&rows);
1721: ISDestroy(&irowp);
1722: /* the column permutation is so much easier;
1723: make a local version of 'colp' and invert it */
1724: PetscObjectGetComm((PetscObject)colp,&pcomm);
1725: MPI_Comm_size(pcomm,&size);
1726: if (size==1) {
1727: lcolp = colp;
1728: } else {
1729: ISGetSize(colp,&nrows);
1730: ISGetIndices(colp,&rows);
1731: ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);
1732: }
1733: ISSetPermutation(lcolp);
1734: ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
1735: ISSetPermutation(icolp);
1736: if (size>1) {
1737: ISRestoreIndices(colp,&rows);
1738: ISDestroy(&lcolp);
1739: }
1740: /* now we just get the submatrix */
1741: MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);
1742: /* clean up */
1743: ISDestroy(&lrowp);
1744: ISDestroy(&icolp);
1745: return(0);
1746: }
1750: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1751: {
1752: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1753: Mat A = mat->A,B = mat->B;
1755: PetscReal isend[5],irecv[5];
1758: info->block_size = 1.0;
1759: MatGetInfo(A,MAT_LOCAL,info);
1760: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1761: isend[3] = info->memory; isend[4] = info->mallocs;
1762: MatGetInfo(B,MAT_LOCAL,info);
1763: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1764: isend[3] += info->memory; isend[4] += info->mallocs;
1765: if (flag == MAT_LOCAL) {
1766: info->nz_used = isend[0];
1767: info->nz_allocated = isend[1];
1768: info->nz_unneeded = isend[2];
1769: info->memory = isend[3];
1770: info->mallocs = isend[4];
1771: } else if (flag == MAT_GLOBAL_MAX) {
1772: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);
1773: info->nz_used = irecv[0];
1774: info->nz_allocated = irecv[1];
1775: info->nz_unneeded = irecv[2];
1776: info->memory = irecv[3];
1777: info->mallocs = irecv[4];
1778: } else if (flag == MAT_GLOBAL_SUM) {
1779: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);
1780: info->nz_used = irecv[0];
1781: info->nz_allocated = irecv[1];
1782: info->nz_unneeded = irecv[2];
1783: info->memory = irecv[3];
1784: info->mallocs = irecv[4];
1785: }
1786: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1787: info->fill_ratio_needed = 0;
1788: info->factor_mallocs = 0;
1790: return(0);
1791: }
1795: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1796: {
1797: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1801: switch (op) {
1802: case MAT_NEW_NONZERO_LOCATIONS:
1803: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1804: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1805: case MAT_KEEP_NONZERO_PATTERN:
1806: case MAT_NEW_NONZERO_LOCATION_ERR:
1807: case MAT_USE_INODES:
1808: case MAT_IGNORE_ZERO_ENTRIES:
1809: MatSetOption(a->A,op,flg);
1810: MatSetOption(a->B,op,flg);
1811: break;
1812: case MAT_ROW_ORIENTED:
1813: a->roworiented = flg;
1814: MatSetOption(a->A,op,flg);
1815: MatSetOption(a->B,op,flg);
1816: break;
1817: case MAT_NEW_DIAGONALS:
1818: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1819: break;
1820: case MAT_IGNORE_OFF_PROC_ENTRIES:
1821: a->donotstash = flg;
1822: break;
1823: case MAT_SPD:
1824: A->spd_set = PETSC_TRUE;
1825: A->spd = flg;
1826: if (flg) {
1827: A->symmetric = PETSC_TRUE;
1828: A->structurally_symmetric = PETSC_TRUE;
1829: A->symmetric_set = PETSC_TRUE;
1830: A->structurally_symmetric_set = PETSC_TRUE;
1831: }
1832: break;
1833: case MAT_SYMMETRIC:
1834: MatSetOption(a->A,op,flg);
1835: break;
1836: case MAT_STRUCTURALLY_SYMMETRIC:
1837: MatSetOption(a->A,op,flg);
1838: break;
1839: case MAT_HERMITIAN:
1840: MatSetOption(a->A,op,flg);
1841: break;
1842: case MAT_SYMMETRY_ETERNAL:
1843: MatSetOption(a->A,op,flg);
1844: break;
1845: default:
1846: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1847: }
1848: return(0);
1849: }
1853: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1854: {
1855: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1856: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1858: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1859: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1860: PetscInt *cmap,*idx_p;
1863: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1864: mat->getrowactive = PETSC_TRUE;
1866: if (!mat->rowvalues && (idx || v)) {
1867: /*
1868: allocate enough space to hold information from the longest row.
1869: */
1870: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1871: PetscInt max = 1,tmp;
1872: for (i=0; i<matin->rmap->n; i++) {
1873: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1874: if (max < tmp) { max = tmp; }
1875: }
1876: PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);
1877: }
1879: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1880: lrow = row - rstart;
1882: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1883: if (!v) {pvA = 0; pvB = 0;}
1884: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1885: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1886: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1887: nztot = nzA + nzB;
1889: cmap = mat->garray;
1890: if (v || idx) {
1891: if (nztot) {
1892: /* Sort by increasing column numbers, assuming A and B already sorted */
1893: PetscInt imark = -1;
1894: if (v) {
1895: *v = v_p = mat->rowvalues;
1896: for (i=0; i<nzB; i++) {
1897: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1898: else break;
1899: }
1900: imark = i;
1901: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1902: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1903: }
1904: if (idx) {
1905: *idx = idx_p = mat->rowindices;
1906: if (imark > -1) {
1907: for (i=0; i<imark; i++) {
1908: idx_p[i] = cmap[cworkB[i]];
1909: }
1910: } else {
1911: for (i=0; i<nzB; i++) {
1912: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1913: else break;
1914: }
1915: imark = i;
1916: }
1917: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1918: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1919: }
1920: } else {
1921: if (idx) *idx = 0;
1922: if (v) *v = 0;
1923: }
1924: }
1925: *nz = nztot;
1926: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1927: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1928: return(0);
1929: }
1933: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1934: {
1935: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1938: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1939: aij->getrowactive = PETSC_FALSE;
1940: return(0);
1941: }
1945: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1946: {
1947: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1948: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1950: PetscInt i,j,cstart = mat->cmap->rstart;
1951: PetscReal sum = 0.0;
1952: MatScalar *v;
1955: if (aij->size == 1) {
1956: MatNorm(aij->A,type,norm);
1957: } else {
1958: if (type == NORM_FROBENIUS) {
1959: v = amat->a;
1960: for (i=0; i<amat->nz; i++) {
1961: #if defined(PETSC_USE_COMPLEX)
1962: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963: #else
1964: sum += (*v)*(*v); v++;
1965: #endif
1966: }
1967: v = bmat->a;
1968: for (i=0; i<bmat->nz; i++) {
1969: #if defined(PETSC_USE_COMPLEX)
1970: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1971: #else
1972: sum += (*v)*(*v); v++;
1973: #endif
1974: }
1975: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1976: *norm = PetscSqrtReal(*norm);
1977: } else if (type == NORM_1) { /* max column norm */
1978: PetscReal *tmp,*tmp2;
1979: PetscInt *jj,*garray = aij->garray;
1980: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);
1981: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);
1982: PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
1983: *norm = 0.0;
1984: v = amat->a; jj = amat->j;
1985: for (j=0; j<amat->nz; j++) {
1986: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1987: }
1988: v = bmat->a; jj = bmat->j;
1989: for (j=0; j<bmat->nz; j++) {
1990: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1991: }
1992: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1993: for (j=0; j<mat->cmap->N; j++) {
1994: if (tmp2[j] > *norm) *norm = tmp2[j];
1995: }
1996: PetscFree(tmp);
1997: PetscFree(tmp2);
1998: } else if (type == NORM_INFINITY) { /* max row norm */
1999: PetscReal ntemp = 0.0;
2000: for (j=0; j<aij->A->rmap->n; j++) {
2001: v = amat->a + amat->i[j];
2002: sum = 0.0;
2003: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2004: sum += PetscAbsScalar(*v); v++;
2005: }
2006: v = bmat->a + bmat->i[j];
2007: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2008: sum += PetscAbsScalar(*v); v++;
2009: }
2010: if (sum > ntemp) ntemp = sum;
2011: }
2012: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);
2013: } else {
2014: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2015: }
2016: }
2017: return(0);
2018: }
2022: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2023: {
2024: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2025: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2027: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2028: PetscInt cstart=A->cmap->rstart,ncol;
2029: Mat B;
2030: MatScalar *array;
2033: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2035: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2036: ai = Aloc->i; aj = Aloc->j;
2037: bi = Bloc->i; bj = Bloc->j;
2038: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2039: /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2040: PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);
2041: PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));
2042: for (i=0; i<ai[ma]; i++){
2043: d_nnz[aj[i]] ++;
2044: aj[i] += cstart; /* global col index to be used by MatSetValues() */
2045: }
2047: MatCreate(((PetscObject)A)->comm,&B);
2048: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
2049: MatSetType(B,((PetscObject)A)->type_name);
2050: MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);
2051: PetscFree(d_nnz);
2052: } else {
2053: B = *matout;
2054: }
2056: /* copy over the A part */
2057: array = Aloc->a;
2058: row = A->rmap->rstart;
2059: for (i=0; i<ma; i++) {
2060: ncol = ai[i+1]-ai[i];
2061: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
2062: row++; array += ncol; aj += ncol;
2063: }
2064: aj = Aloc->j;
2065: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2067: /* copy over the B part */
2068: PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);
2069: PetscMemzero(cols,bi[mb]*sizeof(PetscInt));
2070: array = Bloc->a;
2071: row = A->rmap->rstart;
2072: for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2073: cols_tmp = cols;
2074: for (i=0; i<mb; i++) {
2075: ncol = bi[i+1]-bi[i];
2076: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
2077: row++; array += ncol; cols_tmp += ncol;
2078: }
2079: PetscFree(cols);
2080:
2081: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2082: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2083: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2084: *matout = B;
2085: } else {
2086: MatHeaderMerge(A,B);
2087: }
2088: return(0);
2089: }
2093: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2094: {
2095: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2096: Mat a = aij->A,b = aij->B;
2098: PetscInt s1,s2,s3;
2101: MatGetLocalSize(mat,&s2,&s3);
2102: if (rr) {
2103: VecGetLocalSize(rr,&s1);
2104: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2105: /* Overlap communication with computation. */
2106: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2107: }
2108: if (ll) {
2109: VecGetLocalSize(ll,&s1);
2110: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2111: (*b->ops->diagonalscale)(b,ll,0);
2112: }
2113: /* scale the diagonal block */
2114: (*a->ops->diagonalscale)(a,ll,rr);
2116: if (rr) {
2117: /* Do a scatter end and then right scale the off-diagonal block */
2118: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2119: (*b->ops->diagonalscale)(b,0,aij->lvec);
2120: }
2121:
2122: return(0);
2123: }
2127: PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2128: {
2129: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2133: MatSetBlockSize(a->A,bs);
2134: MatSetBlockSize(a->B,bs);
2135: PetscLayoutSetBlockSize(A->rmap,bs);
2136: PetscLayoutSetBlockSize(A->cmap,bs);
2137: return(0);
2138: }
2141: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2142: {
2143: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2147: MatSetUnfactored(a->A);
2148: return(0);
2149: }
2153: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2154: {
2155: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2156: Mat a,b,c,d;
2157: PetscBool flg;
2161: a = matA->A; b = matA->B;
2162: c = matB->A; d = matB->B;
2164: MatEqual(a,c,&flg);
2165: if (flg) {
2166: MatEqual(b,d,&flg);
2167: }
2168: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2169: return(0);
2170: }
2174: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2175: {
2177: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2178: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2181: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2182: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2183: /* because of the column compression in the off-processor part of the matrix a->B,
2184: the number of columns in a->B and b->B may be different, hence we cannot call
2185: the MatCopy() directly on the two parts. If need be, we can provide a more
2186: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2187: then copying the submatrices */
2188: MatCopy_Basic(A,B,str);
2189: } else {
2190: MatCopy(a->A,b->A,str);
2191: MatCopy(a->B,b->B,str);
2192: }
2193: return(0);
2194: }
2198: PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2199: {
2203: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2204: return(0);
2205: }
2209: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2210: {
2212: PetscInt i;
2213: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2214: PetscBLASInt bnz,one=1;
2215: Mat_SeqAIJ *x,*y;
2218: if (str == SAME_NONZERO_PATTERN) {
2219: PetscScalar alpha = a;
2220: x = (Mat_SeqAIJ *)xx->A->data;
2221: y = (Mat_SeqAIJ *)yy->A->data;
2222: bnz = PetscBLASIntCast(x->nz);
2223: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2224: x = (Mat_SeqAIJ *)xx->B->data;
2225: y = (Mat_SeqAIJ *)yy->B->data;
2226: bnz = PetscBLASIntCast(x->nz);
2227: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2228: } else if (str == SUBSET_NONZERO_PATTERN) {
2229: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
2231: x = (Mat_SeqAIJ *)xx->B->data;
2232: y = (Mat_SeqAIJ *)yy->B->data;
2233: if (y->xtoy && y->XtoY != xx->B) {
2234: PetscFree(y->xtoy);
2235: MatDestroy(&y->XtoY);
2236: }
2237: if (!y->xtoy) { /* get xtoy */
2238: MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
2239: y->XtoY = xx->B;
2240: PetscObjectReference((PetscObject)xx->B);
2241: }
2242: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2243: } else {
2244: Mat B;
2245: PetscInt *nnz_d,*nnz_o;
2246: PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);
2247: PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);
2248: MatCreate(((PetscObject)Y)->comm,&B);
2249: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2250: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2251: MatSetType(B,MATMPIAIJ);
2252: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2253: MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);
2254: MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);
2255: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2256: MatHeaderReplace(Y,B);
2257: PetscFree(nnz_d);
2258: PetscFree(nnz_o);
2259: }
2260: return(0);
2261: }
2263: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2267: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2268: {
2269: #if defined(PETSC_USE_COMPLEX)
2271: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2274: MatConjugate_SeqAIJ(aij->A);
2275: MatConjugate_SeqAIJ(aij->B);
2276: #else
2278: #endif
2279: return(0);
2280: }
2284: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2285: {
2286: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2290: MatRealPart(a->A);
2291: MatRealPart(a->B);
2292: return(0);
2293: }
2297: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2298: {
2299: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2303: MatImaginaryPart(a->A);
2304: MatImaginaryPart(a->B);
2305: return(0);
2306: }
2308: #ifdef PETSC_HAVE_PBGL
2310: #include <boost/parallel/mpi/bsp_process_group.hpp>
2311: #include <boost/graph/distributed/ilu_default_graph.hpp>
2312: #include <boost/graph/distributed/ilu_0_block.hpp>
2313: #include <boost/graph/distributed/ilu_preconditioner.hpp>
2314: #include <boost/graph/distributed/petsc/interface.hpp>
2315: #include <boost/multi_array.hpp>
2316: #include <boost/parallel/distributed_property_map->hpp>
2320: /*
2321: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2322: */
2323: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2324: {
2325: namespace petsc = boost::distributed::petsc;
2326:
2327: namespace graph_dist = boost::graph::distributed;
2328: using boost::graph::distributed::ilu_default::process_group_type;
2329: using boost::graph::ilu_permuted;
2331: PetscBool row_identity, col_identity;
2332: PetscContainer c;
2333: PetscInt m, n, M, N;
2334: PetscErrorCode ierr;
2337: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2338: ISIdentity(isrow, &row_identity);
2339: ISIdentity(iscol, &col_identity);
2340: if (!row_identity || !col_identity) {
2341: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2342: }
2344: process_group_type pg;
2345: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2346: lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2347: lgraph_type& level_graph = *lgraph_p;
2348: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2350: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2351: ilu_permuted(level_graph);
2353: /* put together the new matrix */
2354: MatCreate(((PetscObject)A)->comm, fact);
2355: MatGetLocalSize(A, &m, &n);
2356: MatGetSize(A, &M, &N);
2357: MatSetSizes(fact, m, n, M, N);
2358: MatSetType(fact, ((PetscObject)A)->type_name);
2359: MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);
2360: MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);
2362: PetscContainerCreate(((PetscObject)A)->comm, &c);
2363: PetscContainerSetPointer(c, lgraph_p);
2364: PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2365: PetscContainerDestroy(&c);
2366: return(0);
2367: }
2371: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2372: {
2374: return(0);
2375: }
2379: /*
2380: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2381: */
2382: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2383: {
2384: namespace graph_dist = boost::graph::distributed;
2386: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2387: lgraph_type* lgraph_p;
2388: PetscContainer c;
2392: PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);
2393: PetscContainerGetPointer(c, (void **) &lgraph_p);
2394: VecCopy(b, x);
2396: PetscScalar* array_x;
2397: VecGetArray(x, &array_x);
2398: PetscInt sx;
2399: VecGetSize(x, &sx);
2400:
2401: PetscScalar* array_b;
2402: VecGetArray(b, &array_b);
2403: PetscInt sb;
2404: VecGetSize(b, &sb);
2406: lgraph_type& level_graph = *lgraph_p;
2407: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2409: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2410: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]),
2411: ref_x(array_x, boost::extents[num_vertices(graph)]);
2413: typedef boost::iterator_property_map<array_ref_type::iterator,
2414: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
2415: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2416: vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2417:
2418: ilu_set_solve(*lgraph_p, vector_b, vector_x);
2420: return(0);
2421: }
2422: #endif
2424: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2425: PetscInt nzlocal,nsends,nrecvs;
2426: PetscMPIInt *send_rank,*recv_rank;
2427: PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2428: PetscScalar *sbuf_a,**rbuf_a;
2429: PetscErrorCode (*Destroy)(Mat);
2430: } Mat_Redundant;
2434: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2435: {
2436: PetscErrorCode ierr;
2437: Mat_Redundant *redund=(Mat_Redundant*)ptr;
2438: PetscInt i;
2441: PetscFree2(redund->send_rank,redund->recv_rank);
2442: PetscFree(redund->sbuf_j);
2443: PetscFree(redund->sbuf_a);
2444: for (i=0; i<redund->nrecvs; i++){
2445: PetscFree(redund->rbuf_j[i]);
2446: PetscFree(redund->rbuf_a[i]);
2447: }
2448: PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);
2449: PetscFree(redund);
2450: return(0);
2451: }
2455: PetscErrorCode MatDestroy_MatRedundant(Mat A)
2456: {
2457: PetscErrorCode ierr;
2458: PetscContainer container;
2459: Mat_Redundant *redund=PETSC_NULL;
2462: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
2463: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2464: PetscContainerGetPointer(container,(void **)&redund);
2465: A->ops->destroy = redund->Destroy;
2466: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
2467: if (A->ops->destroy) {
2468: (*A->ops->destroy)(A);
2469: }
2470: return(0);
2471: }
2475: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2476: {
2477: PetscMPIInt rank,size;
2478: MPI_Comm comm=((PetscObject)mat)->comm;
2480: PetscInt nsends=0,nrecvs=0,i,rownz_max=0;
2481: PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2482: PetscInt *rowrange=mat->rmap->range;
2483: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2484: Mat A=aij->A,B=aij->B,C=*matredundant;
2485: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2486: PetscScalar *sbuf_a;
2487: PetscInt nzlocal=a->nz+b->nz;
2488: PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2489: PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2490: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2491: MatScalar *aworkA,*aworkB;
2492: PetscScalar *vals;
2493: PetscMPIInt tag1,tag2,tag3,imdex;
2494: MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2495: *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2496: MPI_Status recv_status,*send_status;
2497: PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2498: PetscInt **rbuf_j=PETSC_NULL;
2499: PetscScalar **rbuf_a=PETSC_NULL;
2500: Mat_Redundant *redund=PETSC_NULL;
2501: PetscContainer container;
2504: MPI_Comm_rank(comm,&rank);
2505: MPI_Comm_size(comm,&size);
2507: if (reuse == MAT_REUSE_MATRIX) {
2508: MatGetSize(C,&M,&N);
2509: if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2510: MatGetLocalSize(C,&M,&N);
2511: if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2512: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
2513: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2514: PetscContainerGetPointer(container,(void **)&redund);
2515: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2517: nsends = redund->nsends;
2518: nrecvs = redund->nrecvs;
2519: send_rank = redund->send_rank;
2520: recv_rank = redund->recv_rank;
2521: sbuf_nz = redund->sbuf_nz;
2522: rbuf_nz = redund->rbuf_nz;
2523: sbuf_j = redund->sbuf_j;
2524: sbuf_a = redund->sbuf_a;
2525: rbuf_j = redund->rbuf_j;
2526: rbuf_a = redund->rbuf_a;
2527: }
2529: if (reuse == MAT_INITIAL_MATRIX){
2530: PetscMPIInt subrank,subsize;
2531: PetscInt nleftover,np_subcomm;
2532: /* get the destination processors' id send_rank, nsends and nrecvs */
2533: MPI_Comm_rank(subcomm,&subrank);
2534: MPI_Comm_size(subcomm,&subsize);
2535: PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2536: np_subcomm = size/nsubcomm;
2537: nleftover = size - nsubcomm*np_subcomm;
2538: nsends = 0; nrecvs = 0;
2539: for (i=0; i<size; i++){ /* i=rank*/
2540: if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2541: send_rank[nsends] = i; nsends++;
2542: recv_rank[nrecvs++] = i;
2543: }
2544: }
2545: if (rank >= size - nleftover){/* this proc is a leftover processor */
2546: i = size-nleftover-1;
2547: j = 0;
2548: while (j < nsubcomm - nleftover){
2549: send_rank[nsends++] = i;
2550: i--; j++;
2551: }
2552: }
2554: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2555: for (i=0; i<nleftover; i++){
2556: recv_rank[nrecvs++] = size-nleftover+i;
2557: }
2558: }
2560: /* allocate sbuf_j, sbuf_a */
2561: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2562: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
2563: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
2564: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2566: /* copy mat's local entries into the buffers */
2567: if (reuse == MAT_INITIAL_MATRIX){
2568: rownz_max = 0;
2569: rptr = sbuf_j;
2570: cols = sbuf_j + rend-rstart + 1;
2571: vals = sbuf_a;
2572: rptr[0] = 0;
2573: for (i=0; i<rend-rstart; i++){
2574: row = i + rstart;
2575: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2576: ncols = nzA + nzB;
2577: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2578: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2579: /* load the column indices for this row into cols */
2580: lwrite = 0;
2581: for (l=0; l<nzB; l++) {
2582: if ((ctmp = bmap[cworkB[l]]) < cstart){
2583: vals[lwrite] = aworkB[l];
2584: cols[lwrite++] = ctmp;
2585: }
2586: }
2587: for (l=0; l<nzA; l++){
2588: vals[lwrite] = aworkA[l];
2589: cols[lwrite++] = cstart + cworkA[l];
2590: }
2591: for (l=0; l<nzB; l++) {
2592: if ((ctmp = bmap[cworkB[l]]) >= cend){
2593: vals[lwrite] = aworkB[l];
2594: cols[lwrite++] = ctmp;
2595: }
2596: }
2597: vals += ncols;
2598: cols += ncols;
2599: rptr[i+1] = rptr[i] + ncols;
2600: if (rownz_max < ncols) rownz_max = ncols;
2601: }
2602: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2603: } else { /* only copy matrix values into sbuf_a */
2604: rptr = sbuf_j;
2605: vals = sbuf_a;
2606: rptr[0] = 0;
2607: for (i=0; i<rend-rstart; i++){
2608: row = i + rstart;
2609: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2610: ncols = nzA + nzB;
2611: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2612: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2613: lwrite = 0;
2614: for (l=0; l<nzB; l++) {
2615: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2616: }
2617: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2618: for (l=0; l<nzB; l++) {
2619: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2620: }
2621: vals += ncols;
2622: rptr[i+1] = rptr[i] + ncols;
2623: }
2624: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2626: /* send nzlocal to others, and recv other's nzlocal */
2627: /*--------------------------------------------------*/
2628: if (reuse == MAT_INITIAL_MATRIX){
2629: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2630: s_waits2 = s_waits3 + nsends;
2631: s_waits1 = s_waits2 + nsends;
2632: r_waits1 = s_waits1 + nsends;
2633: r_waits2 = r_waits1 + nrecvs;
2634: r_waits3 = r_waits2 + nrecvs;
2635: } else {
2636: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2637: r_waits3 = s_waits3 + nsends;
2638: }
2640: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2641: if (reuse == MAT_INITIAL_MATRIX){
2642: /* get new tags to keep the communication clean */
2643: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2644: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2645: PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
2647: /* post receives of other's nzlocal */
2648: for (i=0; i<nrecvs; i++){
2649: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2650: }
2651: /* send nzlocal to others */
2652: for (i=0; i<nsends; i++){
2653: sbuf_nz[i] = nzlocal;
2654: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2655: }
2656: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2657: count = nrecvs;
2658: while (count) {
2659: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2660: recv_rank[imdex] = recv_status.MPI_SOURCE;
2661: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2662: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
2664: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2665: rbuf_nz[imdex] += i + 2;
2666: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
2667: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2668: count--;
2669: }
2670: /* wait on sends of nzlocal */
2671: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2672: /* send mat->i,j to others, and recv from other's */
2673: /*------------------------------------------------*/
2674: for (i=0; i<nsends; i++){
2675: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2676: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2677: }
2678: /* wait on receives of mat->i,j */
2679: /*------------------------------*/
2680: count = nrecvs;
2681: while (count) {
2682: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2683: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2684: count--;
2685: }
2686: /* wait on sends of mat->i,j */
2687: /*---------------------------*/
2688: if (nsends) {
2689: MPI_Waitall(nsends,s_waits2,send_status);
2690: }
2691: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2693: /* post receives, send and receive mat->a */
2694: /*----------------------------------------*/
2695: for (imdex=0; imdex<nrecvs; imdex++) {
2696: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2697: }
2698: for (i=0; i<nsends; i++){
2699: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2700: }
2701: count = nrecvs;
2702: while (count) {
2703: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2704: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2705: count--;
2706: }
2707: if (nsends) {
2708: MPI_Waitall(nsends,s_waits3,send_status);
2709: }
2711: PetscFree2(s_waits3,send_status);
2713: /* create redundant matrix */
2714: /*-------------------------*/
2715: if (reuse == MAT_INITIAL_MATRIX){
2716: /* compute rownz_max for preallocation */
2717: for (imdex=0; imdex<nrecvs; imdex++){
2718: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2719: rptr = rbuf_j[imdex];
2720: for (i=0; i<j; i++){
2721: ncols = rptr[i+1] - rptr[i];
2722: if (rownz_max < ncols) rownz_max = ncols;
2723: }
2724: }
2726: MatCreate(subcomm,&C);
2727: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
2728: MatSetFromOptions(C);
2729: MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
2730: MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
2731: } else {
2732: C = *matredundant;
2733: }
2735: /* insert local matrix entries */
2736: rptr = sbuf_j;
2737: cols = sbuf_j + rend-rstart + 1;
2738: vals = sbuf_a;
2739: for (i=0; i<rend-rstart; i++){
2740: row = i + rstart;
2741: ncols = rptr[i+1] - rptr[i];
2742: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2743: vals += ncols;
2744: cols += ncols;
2745: }
2746: /* insert received matrix entries */
2747: for (imdex=0; imdex<nrecvs; imdex++){
2748: rstart = rowrange[recv_rank[imdex]];
2749: rend = rowrange[recv_rank[imdex]+1];
2750: rptr = rbuf_j[imdex];
2751: cols = rbuf_j[imdex] + rend-rstart + 1;
2752: vals = rbuf_a[imdex];
2753: for (i=0; i<rend-rstart; i++){
2754: row = i + rstart;
2755: ncols = rptr[i+1] - rptr[i];
2756: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2757: vals += ncols;
2758: cols += ncols;
2759: }
2760: }
2761: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2762: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2763: MatGetSize(C,&M,&N);
2764: if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2765: if (reuse == MAT_INITIAL_MATRIX) {
2766: PetscContainer container;
2767: *matredundant = C;
2768: /* create a supporting struct and attach it to C for reuse */
2769: PetscNewLog(C,Mat_Redundant,&redund);
2770: PetscContainerCreate(PETSC_COMM_SELF,&container);
2771: PetscContainerSetPointer(container,redund);
2772: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
2773: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
2774: PetscContainerDestroy(&container);
2776: redund->nzlocal = nzlocal;
2777: redund->nsends = nsends;
2778: redund->nrecvs = nrecvs;
2779: redund->send_rank = send_rank;
2780: redund->recv_rank = recv_rank;
2781: redund->sbuf_nz = sbuf_nz;
2782: redund->rbuf_nz = rbuf_nz;
2783: redund->sbuf_j = sbuf_j;
2784: redund->sbuf_a = sbuf_a;
2785: redund->rbuf_j = rbuf_j;
2786: redund->rbuf_a = rbuf_a;
2788: redund->Destroy = C->ops->destroy;
2789: C->ops->destroy = MatDestroy_MatRedundant;
2790: }
2791: return(0);
2792: }
2796: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2797: {
2798: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2800: PetscInt i,*idxb = 0;
2801: PetscScalar *va,*vb;
2802: Vec vtmp;
2805: MatGetRowMaxAbs(a->A,v,idx);
2806: VecGetArray(v,&va);
2807: if (idx) {
2808: for (i=0; i<A->rmap->n; i++) {
2809: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2810: }
2811: }
2813: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2814: if (idx) {
2815: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2816: }
2817: MatGetRowMaxAbs(a->B,vtmp,idxb);
2818: VecGetArray(vtmp,&vb);
2820: for (i=0; i<A->rmap->n; i++){
2821: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2822: va[i] = vb[i];
2823: if (idx) idx[i] = a->garray[idxb[i]];
2824: }
2825: }
2827: VecRestoreArray(v,&va);
2828: VecRestoreArray(vtmp,&vb);
2829: PetscFree(idxb);
2830: VecDestroy(&vtmp);
2831: return(0);
2832: }
2836: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2837: {
2838: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2840: PetscInt i,*idxb = 0;
2841: PetscScalar *va,*vb;
2842: Vec vtmp;
2845: MatGetRowMinAbs(a->A,v,idx);
2846: VecGetArray(v,&va);
2847: if (idx) {
2848: for (i=0; i<A->cmap->n; i++) {
2849: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2850: }
2851: }
2853: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2854: if (idx) {
2855: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2856: }
2857: MatGetRowMinAbs(a->B,vtmp,idxb);
2858: VecGetArray(vtmp,&vb);
2860: for (i=0; i<A->rmap->n; i++){
2861: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2862: va[i] = vb[i];
2863: if (idx) idx[i] = a->garray[idxb[i]];
2864: }
2865: }
2867: VecRestoreArray(v,&va);
2868: VecRestoreArray(vtmp,&vb);
2869: PetscFree(idxb);
2870: VecDestroy(&vtmp);
2871: return(0);
2872: }
2876: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2877: {
2878: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2879: PetscInt n = A->rmap->n;
2880: PetscInt cstart = A->cmap->rstart;
2881: PetscInt *cmap = mat->garray;
2882: PetscInt *diagIdx, *offdiagIdx;
2883: Vec diagV, offdiagV;
2884: PetscScalar *a, *diagA, *offdiagA;
2885: PetscInt r;
2889: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2890: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2891: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2892: MatGetRowMin(mat->A, diagV, diagIdx);
2893: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2894: VecGetArray(v, &a);
2895: VecGetArray(diagV, &diagA);
2896: VecGetArray(offdiagV, &offdiagA);
2897: for(r = 0; r < n; ++r) {
2898: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2899: a[r] = diagA[r];
2900: idx[r] = cstart + diagIdx[r];
2901: } else {
2902: a[r] = offdiagA[r];
2903: idx[r] = cmap[offdiagIdx[r]];
2904: }
2905: }
2906: VecRestoreArray(v, &a);
2907: VecRestoreArray(diagV, &diagA);
2908: VecRestoreArray(offdiagV, &offdiagA);
2909: VecDestroy(&diagV);
2910: VecDestroy(&offdiagV);
2911: PetscFree2(diagIdx, offdiagIdx);
2912: return(0);
2913: }
2917: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2918: {
2919: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2920: PetscInt n = A->rmap->n;
2921: PetscInt cstart = A->cmap->rstart;
2922: PetscInt *cmap = mat->garray;
2923: PetscInt *diagIdx, *offdiagIdx;
2924: Vec diagV, offdiagV;
2925: PetscScalar *a, *diagA, *offdiagA;
2926: PetscInt r;
2930: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2931: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2932: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2933: MatGetRowMax(mat->A, diagV, diagIdx);
2934: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
2935: VecGetArray(v, &a);
2936: VecGetArray(diagV, &diagA);
2937: VecGetArray(offdiagV, &offdiagA);
2938: for(r = 0; r < n; ++r) {
2939: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2940: a[r] = diagA[r];
2941: idx[r] = cstart + diagIdx[r];
2942: } else {
2943: a[r] = offdiagA[r];
2944: idx[r] = cmap[offdiagIdx[r]];
2945: }
2946: }
2947: VecRestoreArray(v, &a);
2948: VecRestoreArray(diagV, &diagA);
2949: VecRestoreArray(offdiagV, &offdiagA);
2950: VecDestroy(&diagV);
2951: VecDestroy(&offdiagV);
2952: PetscFree2(diagIdx, offdiagIdx);
2953: return(0);
2954: }
2958: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2959: {
2961: Mat *dummy;
2964: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
2965: *newmat = *dummy;
2966: PetscFree(dummy);
2967: return(0);
2968: }
2970: extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2974: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2975: {
2976: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
2980: MatInvertBlockDiagonal(a->A,values);
2981: return(0);
2982: }
2985: /* -------------------------------------------------------------------*/
2986: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2987: MatGetRow_MPIAIJ,
2988: MatRestoreRow_MPIAIJ,
2989: MatMult_MPIAIJ,
2990: /* 4*/ MatMultAdd_MPIAIJ,
2991: MatMultTranspose_MPIAIJ,
2992: MatMultTransposeAdd_MPIAIJ,
2993: #ifdef PETSC_HAVE_PBGL
2994: MatSolve_MPIAIJ,
2995: #else
2996: 0,
2997: #endif
2998: 0,
2999: 0,
3000: /*10*/ 0,
3001: 0,
3002: 0,
3003: MatSOR_MPIAIJ,
3004: MatTranspose_MPIAIJ,
3005: /*15*/ MatGetInfo_MPIAIJ,
3006: MatEqual_MPIAIJ,
3007: MatGetDiagonal_MPIAIJ,
3008: MatDiagonalScale_MPIAIJ,
3009: MatNorm_MPIAIJ,
3010: /*20*/ MatAssemblyBegin_MPIAIJ,
3011: MatAssemblyEnd_MPIAIJ,
3012: MatSetOption_MPIAIJ,
3013: MatZeroEntries_MPIAIJ,
3014: /*24*/ MatZeroRows_MPIAIJ,
3015: 0,
3016: #ifdef PETSC_HAVE_PBGL
3017: 0,
3018: #else
3019: 0,
3020: #endif
3021: 0,
3022: 0,
3023: /*29*/ MatSetUpPreallocation_MPIAIJ,
3024: #ifdef PETSC_HAVE_PBGL
3025: 0,
3026: #else
3027: 0,
3028: #endif
3029: 0,
3030: 0,
3031: 0,
3032: /*34*/ MatDuplicate_MPIAIJ,
3033: 0,
3034: 0,
3035: 0,
3036: 0,
3037: /*39*/ MatAXPY_MPIAIJ,
3038: MatGetSubMatrices_MPIAIJ,
3039: MatIncreaseOverlap_MPIAIJ,
3040: MatGetValues_MPIAIJ,
3041: MatCopy_MPIAIJ,
3042: /*44*/ MatGetRowMax_MPIAIJ,
3043: MatScale_MPIAIJ,
3044: 0,
3045: 0,
3046: MatZeroRowsColumns_MPIAIJ,
3047: /*49*/ MatSetBlockSize_MPIAIJ,
3048: 0,
3049: 0,
3050: 0,
3051: 0,
3052: /*54*/ MatFDColoringCreate_MPIAIJ,
3053: 0,
3054: MatSetUnfactored_MPIAIJ,
3055: MatPermute_MPIAIJ,
3056: 0,
3057: /*59*/ MatGetSubMatrix_MPIAIJ,
3058: MatDestroy_MPIAIJ,
3059: MatView_MPIAIJ,
3060: 0,
3061: 0,
3062: /*64*/ 0,
3063: 0,
3064: 0,
3065: 0,
3066: 0,
3067: /*69*/ MatGetRowMaxAbs_MPIAIJ,
3068: MatGetRowMinAbs_MPIAIJ,
3069: 0,
3070: MatSetColoring_MPIAIJ,
3071: #if defined(PETSC_HAVE_ADIC)
3072: MatSetValuesAdic_MPIAIJ,
3073: #else
3074: 0,
3075: #endif
3076: MatSetValuesAdifor_MPIAIJ,
3077: /*75*/ MatFDColoringApply_AIJ,
3078: 0,
3079: 0,
3080: 0,
3081: 0,
3082: /*80*/ 0,
3083: 0,
3084: 0,
3085: /*83*/ MatLoad_MPIAIJ,
3086: 0,
3087: 0,
3088: 0,
3089: 0,
3090: 0,
3091: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3092: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3093: MatMatMultNumeric_MPIAIJ_MPIAIJ,
3094: MatPtAP_Basic,
3095: MatPtAPSymbolic_MPIAIJ,
3096: /*94*/ MatPtAPNumeric_MPIAIJ,
3097: 0,
3098: 0,
3099: 0,
3100: 0,
3101: /*99*/ 0,
3102: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3103: MatPtAPNumeric_MPIAIJ_MPIAIJ,
3104: MatConjugate_MPIAIJ,
3105: 0,
3106: /*104*/MatSetValuesRow_MPIAIJ,
3107: MatRealPart_MPIAIJ,
3108: MatImaginaryPart_MPIAIJ,
3109: 0,
3110: 0,
3111: /*109*/0,
3112: MatGetRedundantMatrix_MPIAIJ,
3113: MatGetRowMin_MPIAIJ,
3114: 0,
3115: 0,
3116: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3117: 0,
3118: 0,
3119: 0,
3120: 0,
3121: /*119*/0,
3122: 0,
3123: 0,
3124: 0,
3125: MatGetMultiProcBlock_MPIAIJ,
3126: /*124*/MatFindNonZeroRows_MPIAIJ,
3127: MatGetColumnNorms_MPIAIJ,
3128: MatInvertBlockDiagonal_MPIAIJ,
3129: 0,
3130: MatGetSubMatricesParallel_MPIAIJ,
3131: /*129*/0
3132: };
3134: /* ----------------------------------------------------------------------------------------*/
3136: EXTERN_C_BEGIN
3139: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
3140: {
3141: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3145: MatStoreValues(aij->A);
3146: MatStoreValues(aij->B);
3147: return(0);
3148: }
3149: EXTERN_C_END
3151: EXTERN_C_BEGIN
3154: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
3155: {
3156: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3160: MatRetrieveValues(aij->A);
3161: MatRetrieveValues(aij->B);
3162: return(0);
3163: }
3164: EXTERN_C_END
3166: EXTERN_C_BEGIN
3169: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3170: {
3171: Mat_MPIAIJ *b;
3173: PetscInt i;
3176: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3177: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3178: if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3179: if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3181: PetscLayoutSetBlockSize(B->rmap,1);
3182: PetscLayoutSetBlockSize(B->cmap,1);
3183: PetscLayoutSetUp(B->rmap);
3184: PetscLayoutSetUp(B->cmap);
3185: if (d_nnz) {
3186: for (i=0; i<B->rmap->n; i++) {
3187: if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3188: }
3189: }
3190: if (o_nnz) {
3191: for (i=0; i<B->rmap->n; i++) {
3192: if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3193: }
3194: }
3195: b = (Mat_MPIAIJ*)B->data;
3197: if (!B->preallocated) {
3198: /* Explicitly create 2 MATSEQAIJ matrices. */
3199: MatCreate(PETSC_COMM_SELF,&b->A);
3200: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3201: MatSetType(b->A,MATSEQAIJ);
3202: PetscLogObjectParent(B,b->A);
3203: MatCreate(PETSC_COMM_SELF,&b->B);
3204: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3205: MatSetType(b->B,MATSEQAIJ);
3206: PetscLogObjectParent(B,b->B);
3207: }
3209: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3210: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3211: B->preallocated = PETSC_TRUE;
3212: return(0);
3213: }
3214: EXTERN_C_END
3218: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3219: {
3220: Mat mat;
3221: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3225: *newmat = 0;
3226: MatCreate(((PetscObject)matin)->comm,&mat);
3227: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3228: MatSetType(mat,((PetscObject)matin)->type_name);
3229: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3230: a = (Mat_MPIAIJ*)mat->data;
3231:
3232: mat->factortype = matin->factortype;
3233: mat->rmap->bs = matin->rmap->bs;
3234: mat->assembled = PETSC_TRUE;
3235: mat->insertmode = NOT_SET_VALUES;
3236: mat->preallocated = PETSC_TRUE;
3238: a->size = oldmat->size;
3239: a->rank = oldmat->rank;
3240: a->donotstash = oldmat->donotstash;
3241: a->roworiented = oldmat->roworiented;
3242: a->rowindices = 0;
3243: a->rowvalues = 0;
3244: a->getrowactive = PETSC_FALSE;
3246: PetscLayoutReference(matin->rmap,&mat->rmap);
3247: PetscLayoutReference(matin->cmap,&mat->cmap);
3249: if (oldmat->colmap) {
3250: #if defined (PETSC_USE_CTABLE)
3251: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3252: #else
3253: PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);
3254: PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));
3255: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
3256: #endif
3257: } else a->colmap = 0;
3258: if (oldmat->garray) {
3259: PetscInt len;
3260: len = oldmat->B->cmap->n;
3261: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
3262: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
3263: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
3264: } else a->garray = 0;
3265:
3266: VecDuplicate(oldmat->lvec,&a->lvec);
3267: PetscLogObjectParent(mat,a->lvec);
3268: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3269: PetscLogObjectParent(mat,a->Mvctx);
3270: MatDuplicate(oldmat->A,cpvalues,&a->A);
3271: PetscLogObjectParent(mat,a->A);
3272: MatDuplicate(oldmat->B,cpvalues,&a->B);
3273: PetscLogObjectParent(mat,a->B);
3274: PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3275: *newmat = mat;
3276: return(0);
3277: }
3283: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3284: {
3285: PetscScalar *vals,*svals;
3286: MPI_Comm comm = ((PetscObject)viewer)->comm;
3288: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
3289: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3290: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
3291: PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3292: PetscInt cend,cstart,n,*rowners,sizesset=1;
3293: int fd;
3296: MPI_Comm_size(comm,&size);
3297: MPI_Comm_rank(comm,&rank);
3298: if (!rank) {
3299: PetscViewerBinaryGetDescriptor(viewer,&fd);
3300: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
3301: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3302: }
3304: if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3306: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3307: M = header[1]; N = header[2];
3308: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3309: if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3310: if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3311:
3312: /* If global sizes are set, check if they are consistent with that given in the file */
3313: if (sizesset) {
3314: MatGetSize(newMat,&grows,&gcols);
3315: }
3316: if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3317: if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3319: /* determine ownership of all rows */
3320: if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3321: else m = newMat->rmap->n; /* Set by user */
3322:
3323: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
3324: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3326: /* First process needs enough room for process with most rows */
3327: if (!rank) {
3328: mmax = rowners[1];
3329: for (i=2; i<size; i++) {
3330: mmax = PetscMax(mmax,rowners[i]);
3331: }
3332: } else mmax = m;
3334: rowners[0] = 0;
3335: for (i=2; i<=size; i++) {
3336: rowners[i] += rowners[i-1];
3337: }
3338: rstart = rowners[rank];
3339: rend = rowners[rank+1];
3341: /* distribute row lengths to all processors */
3342: PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);
3343: if (!rank) {
3344: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
3345: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
3346: PetscMalloc(size*sizeof(PetscInt),&procsnz);
3347: PetscMemzero(procsnz,size*sizeof(PetscInt));
3348: for (j=0; j<m; j++) {
3349: procsnz[0] += ourlens[j];
3350: }
3351: for (i=1; i<size; i++) {
3352: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
3353: /* calculate the number of nonzeros on each processor */
3354: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3355: procsnz[i] += rowlengths[j];
3356: }
3357: MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
3358: }
3359: PetscFree(rowlengths);
3360: } else {
3361: MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
3362: }
3364: if (!rank) {
3365: /* determine max buffer needed and allocate it */
3366: maxnz = 0;
3367: for (i=0; i<size; i++) {
3368: maxnz = PetscMax(maxnz,procsnz[i]);
3369: }
3370: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
3372: /* read in my part of the matrix column indices */
3373: nz = procsnz[0];
3374: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3375: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3377: /* read in every one elses and ship off */
3378: for (i=1; i<size; i++) {
3379: nz = procsnz[i];
3380: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3381: MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);
3382: }
3383: PetscFree(cols);
3384: } else {
3385: /* determine buffer space needed for message */
3386: nz = 0;
3387: for (i=0; i<m; i++) {
3388: nz += ourlens[i];
3389: }
3390: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3392: /* receive message of column indices*/
3393: MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
3394: }
3396: /* determine column ownership if matrix is not square */
3397: if (N != M) {
3398: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3399: else n = newMat->cmap->n;
3400: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
3401: cstart = cend - n;
3402: } else {
3403: cstart = rstart;
3404: cend = rend;
3405: n = cend - cstart;
3406: }
3408: /* loop over local rows, determining number of off diagonal entries */
3409: PetscMemzero(offlens,m*sizeof(PetscInt));
3410: jj = 0;
3411: for (i=0; i<m; i++) {
3412: for (j=0; j<ourlens[i]; j++) {
3413: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3414: jj++;
3415: }
3416: }
3418: for (i=0; i<m; i++) {
3419: ourlens[i] -= offlens[i];
3420: }
3421: if (!sizesset) {
3422: MatSetSizes(newMat,m,n,M,N);
3423: }
3424: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
3426: for (i=0; i<m; i++) {
3427: ourlens[i] += offlens[i];
3428: }
3430: if (!rank) {
3431: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
3433: /* read in my part of the matrix numerical values */
3434: nz = procsnz[0];
3435: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3436:
3437: /* insert into matrix */
3438: jj = rstart;
3439: smycols = mycols;
3440: svals = vals;
3441: for (i=0; i<m; i++) {
3442: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3443: smycols += ourlens[i];
3444: svals += ourlens[i];
3445: jj++;
3446: }
3448: /* read in other processors and ship out */
3449: for (i=1; i<size; i++) {
3450: nz = procsnz[i];
3451: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3452: MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
3453: }
3454: PetscFree(procsnz);
3455: } else {
3456: /* receive numeric values */
3457: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
3459: /* receive message of values*/
3460: MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
3462: /* insert into matrix */
3463: jj = rstart;
3464: smycols = mycols;
3465: svals = vals;
3466: for (i=0; i<m; i++) {
3467: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3468: smycols += ourlens[i];
3469: svals += ourlens[i];
3470: jj++;
3471: }
3472: }
3473: PetscFree2(ourlens,offlens);
3474: PetscFree(vals);
3475: PetscFree(mycols);
3476: PetscFree(rowners);
3478: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3479: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3480: return(0);
3481: }
3485: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3486: {
3488: IS iscol_local;
3489: PetscInt csize;
3492: ISGetLocalSize(iscol,&csize);
3493: if (call == MAT_REUSE_MATRIX) {
3494: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3495: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3496: } else {
3497: ISAllGather(iscol,&iscol_local);
3498: }
3499: MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
3500: if (call == MAT_INITIAL_MATRIX) {
3501: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3502: ISDestroy(&iscol_local);
3503: }
3504: return(0);
3505: }
3509: /*
3510: Not great since it makes two copies of the submatrix, first an SeqAIJ
3511: in local and then by concatenating the local matrices the end result.
3512: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3514: Note: This requires a sequential iscol with all indices.
3515: */
3516: PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3517: {
3519: PetscMPIInt rank,size;
3520: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j;
3521: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3522: Mat *local,M,Mreuse;
3523: MatScalar *vwork,*aa;
3524: MPI_Comm comm = ((PetscObject)mat)->comm;
3525: Mat_SeqAIJ *aij;
3529: MPI_Comm_rank(comm,&rank);
3530: MPI_Comm_size(comm,&size);
3532: if (call == MAT_REUSE_MATRIX) {
3533: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
3534: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3535: local = &Mreuse;
3536: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
3537: } else {
3538: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
3539: Mreuse = *local;
3540: PetscFree(local);
3541: }
3543: /*
3544: m - number of local rows
3545: n - number of columns (same on all processors)
3546: rstart - first row in new global matrix generated
3547: */
3548: MatGetSize(Mreuse,&m,&n);
3549: if (call == MAT_INITIAL_MATRIX) {
3550: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3551: ii = aij->i;
3552: jj = aij->j;
3554: /*
3555: Determine the number of non-zeros in the diagonal and off-diagonal
3556: portions of the matrix in order to do correct preallocation
3557: */
3559: /* first get start and end of "diagonal" columns */
3560: if (csize == PETSC_DECIDE) {
3561: ISGetSize(isrow,&mglobal);
3562: if (mglobal == n) { /* square matrix */
3563: nlocal = m;
3564: } else {
3565: nlocal = n/size + ((n % size) > rank);
3566: }
3567: } else {
3568: nlocal = csize;
3569: }
3570: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3571: rstart = rend - nlocal;
3572: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3574: /* next, compute all the lengths */
3575: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
3576: olens = dlens + m;
3577: for (i=0; i<m; i++) {
3578: jend = ii[i+1] - ii[i];
3579: olen = 0;
3580: dlen = 0;
3581: for (j=0; j<jend; j++) {
3582: if (*jj < rstart || *jj >= rend) olen++;
3583: else dlen++;
3584: jj++;
3585: }
3586: olens[i] = olen;
3587: dlens[i] = dlen;
3588: }
3589: MatCreate(comm,&M);
3590: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3591: MatSetType(M,((PetscObject)mat)->type_name);
3592: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3593: PetscFree(dlens);
3594: } else {
3595: PetscInt ml,nl;
3597: M = *newmat;
3598: MatGetLocalSize(M,&ml,&nl);
3599: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3600: MatZeroEntries(M);
3601: /*
3602: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3603: rather than the slower MatSetValues().
3604: */
3605: M->was_assembled = PETSC_TRUE;
3606: M->assembled = PETSC_FALSE;
3607: }
3608: MatGetOwnershipRange(M,&rstart,&rend);
3609: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3610: ii = aij->i;
3611: jj = aij->j;
3612: aa = aij->a;
3613: for (i=0; i<m; i++) {
3614: row = rstart + i;
3615: nz = ii[i+1] - ii[i];
3616: cwork = jj; jj += nz;
3617: vwork = aa; aa += nz;
3618: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3619: }
3621: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3622: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3623: *newmat = M;
3625: /* save submatrix used in processor for next request */
3626: if (call == MAT_INITIAL_MATRIX) {
3627: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3628: MatDestroy(&Mreuse);
3629: }
3631: return(0);
3632: }
3634: EXTERN_C_BEGIN
3637: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3638: {
3639: PetscInt m,cstart, cend,j,nnz,i,d;
3640: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3641: const PetscInt *JJ;
3642: PetscScalar *values;
3646: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3648: PetscLayoutSetBlockSize(B->rmap,1);
3649: PetscLayoutSetBlockSize(B->cmap,1);
3650: PetscLayoutSetUp(B->rmap);
3651: PetscLayoutSetUp(B->cmap);
3652: m = B->rmap->n;
3653: cstart = B->cmap->rstart;
3654: cend = B->cmap->rend;
3655: rstart = B->rmap->rstart;
3657: PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);
3659: #if defined(PETSC_USE_DEBUGGING)
3660: for (i=0; i<m; i++) {
3661: nnz = Ii[i+1]- Ii[i];
3662: JJ = J + Ii[i];
3663: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3664: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3665: if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3666: }
3667: #endif
3669: for (i=0; i<m; i++) {
3670: nnz = Ii[i+1]- Ii[i];
3671: JJ = J + Ii[i];
3672: nnz_max = PetscMax(nnz_max,nnz);
3673: d = 0;
3674: for (j=0; j<nnz; j++) {
3675: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3676: }
3677: d_nnz[i] = d;
3678: o_nnz[i] = nnz - d;
3679: }
3680: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3681: PetscFree2(d_nnz,o_nnz);
3683: if (v) values = (PetscScalar*)v;
3684: else {
3685: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
3686: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
3687: }
3689: for (i=0; i<m; i++) {
3690: ii = i + rstart;
3691: nnz = Ii[i+1]- Ii[i];
3692: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3693: }
3694: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3695: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3697: if (!v) {
3698: PetscFree(values);
3699: }
3700: return(0);
3701: }
3702: EXTERN_C_END
3706: /*@
3707: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3708: (the default parallel PETSc format).
3710: Collective on MPI_Comm
3712: Input Parameters:
3713: + B - the matrix
3714: . i - the indices into j for the start of each local row (starts with zero)
3715: . j - the column indices for each local row (starts with zero)
3716: - v - optional values in the matrix
3718: Level: developer
3720: Notes:
3721: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3722: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3723: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3725: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3727: The format which is used for the sparse matrix input, is equivalent to a
3728: row-major ordering.. i.e for the following matrix, the input data expected is
3729: as shown:
3731: 1 0 0
3732: 2 0 3 P0
3733: -------
3734: 4 5 6 P1
3736: Process0 [P0]: rows_owned=[0,1]
3737: i = {0,1,3} [size = nrow+1 = 2+1]
3738: j = {0,0,2} [size = nz = 6]
3739: v = {1,2,3} [size = nz = 6]
3741: Process1 [P1]: rows_owned=[2]
3742: i = {0,3} [size = nrow+1 = 1+1]
3743: j = {0,1,2} [size = nz = 6]
3744: v = {4,5,6} [size = nz = 6]
3746: .keywords: matrix, aij, compressed row, sparse, parallel
3748: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3749: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3750: @*/
3751: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3752: {
3756: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3757: return(0);
3758: }
3762: /*@C
3763: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3764: (the default parallel PETSc format). For good matrix assembly performance
3765: the user should preallocate the matrix storage by setting the parameters
3766: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3767: performance can be increased by more than a factor of 50.
3769: Collective on MPI_Comm
3771: Input Parameters:
3772: + A - the matrix
3773: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3774: (same value is used for all local rows)
3775: . d_nnz - array containing the number of nonzeros in the various rows of the
3776: DIAGONAL portion of the local submatrix (possibly different for each row)
3777: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3778: The size of this array is equal to the number of local rows, i.e 'm'.
3779: You must leave room for the diagonal entry even if it is zero.
3780: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3781: submatrix (same value is used for all local rows).
3782: - o_nnz - array containing the number of nonzeros in the various rows of the
3783: OFF-DIAGONAL portion of the local submatrix (possibly different for
3784: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3785: structure. The size of this array is equal to the number
3786: of local rows, i.e 'm'.
3788: If the *_nnz parameter is given then the *_nz parameter is ignored
3790: The AIJ format (also called the Yale sparse matrix format or
3791: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3792: storage. The stored row and column indices begin with zero.
3793: See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3795: The parallel matrix is partitioned such that the first m0 rows belong to
3796: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3797: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3799: The DIAGONAL portion of the local submatrix of a processor can be defined
3800: as the submatrix which is obtained by extraction the part corresponding to
3801: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3802: first row that belongs to the processor, r2 is the last row belonging to
3803: the this processor, and c1-c2 is range of indices of the local part of a
3804: vector suitable for applying the matrix to. This is an mxn matrix. In the
3805: common case of a square matrix, the row and column ranges are the same and
3806: the DIAGONAL part is also square. The remaining portion of the local
3807: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3809: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3811: You can call MatGetInfo() to get information on how effective the preallocation was;
3812: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3813: You can also run with the option -info and look for messages with the string
3814: malloc in them to see if additional memory allocation was needed.
3816: Example usage:
3817:
3818: Consider the following 8x8 matrix with 34 non-zero values, that is
3819: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3820: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3821: as follows:
3823: .vb
3824: 1 2 0 | 0 3 0 | 0 4
3825: Proc0 0 5 6 | 7 0 0 | 8 0
3826: 9 0 10 | 11 0 0 | 12 0
3827: -------------------------------------
3828: 13 0 14 | 15 16 17 | 0 0
3829: Proc1 0 18 0 | 19 20 21 | 0 0
3830: 0 0 0 | 22 23 0 | 24 0
3831: -------------------------------------
3832: Proc2 25 26 27 | 0 0 28 | 29 0
3833: 30 0 0 | 31 32 33 | 0 34
3834: .ve
3836: This can be represented as a collection of submatrices as:
3838: .vb
3839: A B C
3840: D E F
3841: G H I
3842: .ve
3844: Where the submatrices A,B,C are owned by proc0, D,E,F are
3845: owned by proc1, G,H,I are owned by proc2.
3847: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3848: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3849: The 'M','N' parameters are 8,8, and have the same values on all procs.
3851: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3852: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3853: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3854: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3855: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3856: matrix, ans [DF] as another SeqAIJ matrix.
3858: When d_nz, o_nz parameters are specified, d_nz storage elements are
3859: allocated for every row of the local diagonal submatrix, and o_nz
3860: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3861: One way to choose d_nz and o_nz is to use the max nonzerors per local
3862: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3863: In this case, the values of d_nz,o_nz are:
3864: .vb
3865: proc0 : dnz = 2, o_nz = 2
3866: proc1 : dnz = 3, o_nz = 2
3867: proc2 : dnz = 1, o_nz = 4
3868: .ve
3869: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3870: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3871: for proc3. i.e we are using 12+15+10=37 storage locations to store
3872: 34 values.
3874: When d_nnz, o_nnz parameters are specified, the storage is specified
3875: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3876: In the above case the values for d_nnz,o_nnz are:
3877: .vb
3878: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3879: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3880: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3881: .ve
3882: Here the space allocated is sum of all the above values i.e 34, and
3883: hence pre-allocation is perfect.
3885: Level: intermediate
3887: .keywords: matrix, aij, compressed row, sparse, parallel
3889: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3890: MPIAIJ, MatGetInfo()
3891: @*/
3892: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3893: {
3897: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
3898: return(0);
3899: }
3903: /*@
3904: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3905: CSR format the local rows.
3907: Collective on MPI_Comm
3909: Input Parameters:
3910: + comm - MPI communicator
3911: . m - number of local rows (Cannot be PETSC_DECIDE)
3912: . n - This value should be the same as the local size used in creating the
3913: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3914: calculated if N is given) For square matrices n is almost always m.
3915: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3916: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3917: . i - row indices
3918: . j - column indices
3919: - a - matrix values
3921: Output Parameter:
3922: . mat - the matrix
3924: Level: intermediate
3926: Notes:
3927: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3928: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3929: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3931: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3933: The format which is used for the sparse matrix input, is equivalent to a
3934: row-major ordering.. i.e for the following matrix, the input data expected is
3935: as shown:
3937: 1 0 0
3938: 2 0 3 P0
3939: -------
3940: 4 5 6 P1
3942: Process0 [P0]: rows_owned=[0,1]
3943: i = {0,1,3} [size = nrow+1 = 2+1]
3944: j = {0,0,2} [size = nz = 6]
3945: v = {1,2,3} [size = nz = 6]
3947: Process1 [P1]: rows_owned=[2]
3948: i = {0,3} [size = nrow+1 = 1+1]
3949: j = {0,1,2} [size = nz = 6]
3950: v = {4,5,6} [size = nz = 6]
3952: .keywords: matrix, aij, compressed row, sparse, parallel
3954: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3955: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3956: @*/
3957: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3958: {
3962: if (i[0]) {
3963: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3964: }
3965: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3966: MatCreate(comm,mat);
3967: MatSetSizes(*mat,m,n,M,N);
3968: MatSetType(*mat,MATMPIAIJ);
3969: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
3970: return(0);
3971: }
3975: /*@C
3976: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3977: (the default parallel PETSc format). For good matrix assembly performance
3978: the user should preallocate the matrix storage by setting the parameters
3979: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3980: performance can be increased by more than a factor of 50.
3982: Collective on MPI_Comm
3984: Input Parameters:
3985: + comm - MPI communicator
3986: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3987: This value should be the same as the local size used in creating the
3988: y vector for the matrix-vector product y = Ax.
3989: . n - This value should be the same as the local size used in creating the
3990: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3991: calculated if N is given) For square matrices n is almost always m.
3992: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3993: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3994: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3995: (same value is used for all local rows)
3996: . d_nnz - array containing the number of nonzeros in the various rows of the
3997: DIAGONAL portion of the local submatrix (possibly different for each row)
3998: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3999: The size of this array is equal to the number of local rows, i.e 'm'.
4000: You must leave room for the diagonal entry even if it is zero.
4001: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4002: submatrix (same value is used for all local rows).
4003: - o_nnz - array containing the number of nonzeros in the various rows of the
4004: OFF-DIAGONAL portion of the local submatrix (possibly different for
4005: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4006: structure. The size of this array is equal to the number
4007: of local rows, i.e 'm'.
4009: Output Parameter:
4010: . A - the matrix
4012: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4013: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4014: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4016: Notes:
4017: If the *_nnz parameter is given then the *_nz parameter is ignored
4019: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4020: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4021: storage requirements for this matrix.
4023: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4024: processor than it must be used on all processors that share the object for
4025: that argument.
4027: The user MUST specify either the local or global matrix dimensions
4028: (possibly both).
4030: The parallel matrix is partitioned across processors such that the
4031: first m0 rows belong to process 0, the next m1 rows belong to
4032: process 1, the next m2 rows belong to process 2 etc.. where
4033: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4034: values corresponding to [m x N] submatrix.
4036: The columns are logically partitioned with the n0 columns belonging
4037: to 0th partition, the next n1 columns belonging to the next
4038: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4040: The DIAGONAL portion of the local submatrix on any given processor
4041: is the submatrix corresponding to the rows and columns m,n
4042: corresponding to the given processor. i.e diagonal matrix on
4043: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4044: etc. The remaining portion of the local submatrix [m x (N-n)]
4045: constitute the OFF-DIAGONAL portion. The example below better
4046: illustrates this concept.
4048: For a square global matrix we define each processor's diagonal portion
4049: to be its local rows and the corresponding columns (a square submatrix);
4050: each processor's off-diagonal portion encompasses the remainder of the
4051: local matrix (a rectangular submatrix).
4053: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4055: When calling this routine with a single process communicator, a matrix of
4056: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4057: type of communicator, use the construction mechanism:
4058: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4059:
4060: By default, this format uses inodes (identical nodes) when possible.
4061: We search for consecutive rows with the same nonzero structure, thereby
4062: reusing matrix information to achieve increased efficiency.
4064: Options Database Keys:
4065: + -mat_no_inode - Do not use inodes
4066: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4067: - -mat_aij_oneindex - Internally use indexing starting at 1
4068: rather than 0. Note that when calling MatSetValues(),
4069: the user still MUST index entries starting at 0!
4072: Example usage:
4073:
4074: Consider the following 8x8 matrix with 34 non-zero values, that is
4075: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4076: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4077: as follows:
4079: .vb
4080: 1 2 0 | 0 3 0 | 0 4
4081: Proc0 0 5 6 | 7 0 0 | 8 0
4082: 9 0 10 | 11 0 0 | 12 0
4083: -------------------------------------
4084: 13 0 14 | 15 16 17 | 0 0
4085: Proc1 0 18 0 | 19 20 21 | 0 0
4086: 0 0 0 | 22 23 0 | 24 0
4087: -------------------------------------
4088: Proc2 25 26 27 | 0 0 28 | 29 0
4089: 30 0 0 | 31 32 33 | 0 34
4090: .ve
4092: This can be represented as a collection of submatrices as:
4094: .vb
4095: A B C
4096: D E F
4097: G H I
4098: .ve
4100: Where the submatrices A,B,C are owned by proc0, D,E,F are
4101: owned by proc1, G,H,I are owned by proc2.
4103: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4104: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4105: The 'M','N' parameters are 8,8, and have the same values on all procs.
4107: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4108: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4109: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4110: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4111: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4112: matrix, ans [DF] as another SeqAIJ matrix.
4114: When d_nz, o_nz parameters are specified, d_nz storage elements are
4115: allocated for every row of the local diagonal submatrix, and o_nz
4116: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4117: One way to choose d_nz and o_nz is to use the max nonzerors per local
4118: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4119: In this case, the values of d_nz,o_nz are:
4120: .vb
4121: proc0 : dnz = 2, o_nz = 2
4122: proc1 : dnz = 3, o_nz = 2
4123: proc2 : dnz = 1, o_nz = 4
4124: .ve
4125: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4126: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4127: for proc3. i.e we are using 12+15+10=37 storage locations to store
4128: 34 values.
4130: When d_nnz, o_nnz parameters are specified, the storage is specified
4131: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4132: In the above case the values for d_nnz,o_nnz are:
4133: .vb
4134: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4135: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4136: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4137: .ve
4138: Here the space allocated is sum of all the above values i.e 34, and
4139: hence pre-allocation is perfect.
4141: Level: intermediate
4143: .keywords: matrix, aij, compressed row, sparse, parallel
4145: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4146: MPIAIJ, MatCreateMPIAIJWithArrays()
4147: @*/
4148: PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4149: {
4151: PetscMPIInt size;
4154: MatCreate(comm,A);
4155: MatSetSizes(*A,m,n,M,N);
4156: MPI_Comm_size(comm,&size);
4157: if (size > 1) {
4158: MatSetType(*A,MATMPIAIJ);
4159: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4160: } else {
4161: MatSetType(*A,MATSEQAIJ);
4162: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4163: }
4164: return(0);
4165: }
4169: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4170: {
4171: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4174: *Ad = a->A;
4175: *Ao = a->B;
4176: *colmap = a->garray;
4177: return(0);
4178: }
4182: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4183: {
4185: PetscInt i;
4186: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4189: if (coloring->ctype == IS_COLORING_GLOBAL) {
4190: ISColoringValue *allcolors,*colors;
4191: ISColoring ocoloring;
4193: /* set coloring for diagonal portion */
4194: MatSetColoring_SeqAIJ(a->A,coloring);
4196: /* set coloring for off-diagonal portion */
4197: ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
4198: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4199: for (i=0; i<a->B->cmap->n; i++) {
4200: colors[i] = allcolors[a->garray[i]];
4201: }
4202: PetscFree(allcolors);
4203: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4204: MatSetColoring_SeqAIJ(a->B,ocoloring);
4205: ISColoringDestroy(&ocoloring);
4206: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4207: ISColoringValue *colors;
4208: PetscInt *larray;
4209: ISColoring ocoloring;
4211: /* set coloring for diagonal portion */
4212: PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);
4213: for (i=0; i<a->A->cmap->n; i++) {
4214: larray[i] = i + A->cmap->rstart;
4215: }
4216: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);
4217: PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);
4218: for (i=0; i<a->A->cmap->n; i++) {
4219: colors[i] = coloring->colors[larray[i]];
4220: }
4221: PetscFree(larray);
4222: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);
4223: MatSetColoring_SeqAIJ(a->A,ocoloring);
4224: ISColoringDestroy(&ocoloring);
4226: /* set coloring for off-diagonal portion */
4227: PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);
4228: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);
4229: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4230: for (i=0; i<a->B->cmap->n; i++) {
4231: colors[i] = coloring->colors[larray[i]];
4232: }
4233: PetscFree(larray);
4234: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4235: MatSetColoring_SeqAIJ(a->B,ocoloring);
4236: ISColoringDestroy(&ocoloring);
4237: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4239: return(0);
4240: }
4242: #if defined(PETSC_HAVE_ADIC)
4245: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4246: {
4247: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4251: MatSetValuesAdic_SeqAIJ(a->A,advalues);
4252: MatSetValuesAdic_SeqAIJ(a->B,advalues);
4253: return(0);
4254: }
4255: #endif
4259: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4260: {
4261: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4265: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
4266: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
4267: return(0);
4268: }
4272: /*@
4273: MatMerge - Creates a single large PETSc matrix by concatinating sequential
4274: matrices from each processor
4276: Collective on MPI_Comm
4278: Input Parameters:
4279: + comm - the communicators the parallel matrix will live on
4280: . inmat - the input sequential matrices
4281: . n - number of local columns (or PETSC_DECIDE)
4282: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4284: Output Parameter:
4285: . outmat - the parallel matrix generated
4287: Level: advanced
4289: Notes: The number of columns of the matrix in EACH processor MUST be the same.
4291: @*/
4292: PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4293: {
4295: PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz;
4296: PetscInt *indx;
4297: PetscScalar *values;
4300: MatGetSize(inmat,&m,&N);
4301: if (scall == MAT_INITIAL_MATRIX){
4302: /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4303: if (n == PETSC_DECIDE){
4304: PetscSplitOwnership(comm,&n,&N);
4305: }
4306: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4307: rstart -= m;
4309: MatPreallocateInitialize(comm,m,n,dnz,onz);
4310: for (i=0;i<m;i++) {
4311: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4312: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4313: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4314: }
4315: /* This routine will ONLY return MPIAIJ type matrix */
4316: MatCreate(comm,outmat);
4317: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4318: MatSetType(*outmat,MATMPIAIJ);
4319: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4320: MatPreallocateFinalize(dnz,onz);
4321:
4322: } else if (scall == MAT_REUSE_MATRIX){
4323: MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);
4324: } else {
4325: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4326: }
4328: for (i=0;i<m;i++) {
4329: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4330: Ii = i + rstart;
4331: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4332: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4333: }
4334: MatDestroy(&inmat);
4335: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
4336: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
4338: return(0);
4339: }
4343: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4344: {
4345: PetscErrorCode ierr;
4346: PetscMPIInt rank;
4347: PetscInt m,N,i,rstart,nnz;
4348: size_t len;
4349: const PetscInt *indx;
4350: PetscViewer out;
4351: char *name;
4352: Mat B;
4353: const PetscScalar *values;
4356: MatGetLocalSize(A,&m,0);
4357: MatGetSize(A,0,&N);
4358: /* Should this be the type of the diagonal block of A? */
4359: MatCreate(PETSC_COMM_SELF,&B);
4360: MatSetSizes(B,m,N,m,N);
4361: MatSetType(B,MATSEQAIJ);
4362: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
4363: MatGetOwnershipRange(A,&rstart,0);
4364: for (i=0;i<m;i++) {
4365: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4366: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4367: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4368: }
4369: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4370: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4372: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
4373: PetscStrlen(outfile,&len);
4374: PetscMalloc((len+5)*sizeof(char),&name);
4375: sprintf(name,"%s.%d",outfile,rank);
4376: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4377: PetscFree(name);
4378: MatView(B,out);
4379: PetscViewerDestroy(&out);
4380: MatDestroy(&B);
4381: return(0);
4382: }
4384: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4387: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4388: {
4389: PetscErrorCode ierr;
4390: Mat_Merge_SeqsToMPI *merge;
4391: PetscContainer container;
4394: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
4395: if (container) {
4396: PetscContainerGetPointer(container,(void **)&merge);
4397: PetscFree(merge->id_r);
4398: PetscFree(merge->len_s);
4399: PetscFree(merge->len_r);
4400: PetscFree(merge->bi);
4401: PetscFree(merge->bj);
4402: PetscFree(merge->buf_ri[0]);
4403: PetscFree(merge->buf_ri);
4404: PetscFree(merge->buf_rj[0]);
4405: PetscFree(merge->buf_rj);
4406: PetscFree(merge->coi);
4407: PetscFree(merge->coj);
4408: PetscFree(merge->owners_co);
4409: PetscLayoutDestroy(&merge->rowmap);
4410: PetscFree(merge);
4411: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4412: }
4413: MatDestroy_MPIAIJ(A);
4414: return(0);
4415: }
4417: #include <../src/mat/utils/freespace.h>
4418: #include <petscbt.h>
4422: /*@C
4423: MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4424: matrices from each processor
4426: Collective on MPI_Comm
4428: Input Parameters:
4429: + comm - the communicators the parallel matrix will live on
4430: . seqmat - the input sequential matrices
4431: . m - number of local rows (or PETSC_DECIDE)
4432: . n - number of local columns (or PETSC_DECIDE)
4433: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4435: Output Parameter:
4436: . mpimat - the parallel matrix generated
4438: Level: advanced
4440: Notes:
4441: The dimensions of the sequential matrix in each processor MUST be the same.
4442: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4443: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4444: @*/
4445: PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4446: {
4447: PetscErrorCode ierr;
4448: MPI_Comm comm=((PetscObject)mpimat)->comm;
4449: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4450: PetscMPIInt size,rank,taga,*len_s;
4451: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4452: PetscInt proc,m;
4453: PetscInt **buf_ri,**buf_rj;
4454: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4455: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4456: MPI_Request *s_waits,*r_waits;
4457: MPI_Status *status;
4458: MatScalar *aa=a->a;
4459: MatScalar **abuf_r,*ba_i;
4460: Mat_Merge_SeqsToMPI *merge;
4461: PetscContainer container;
4464: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4466: MPI_Comm_size(comm,&size);
4467: MPI_Comm_rank(comm,&rank);
4469: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
4470: PetscContainerGetPointer(container,(void **)&merge);
4472: bi = merge->bi;
4473: bj = merge->bj;
4474: buf_ri = merge->buf_ri;
4475: buf_rj = merge->buf_rj;
4477: PetscMalloc(size*sizeof(MPI_Status),&status);
4478: owners = merge->rowmap->range;
4479: len_s = merge->len_s;
4481: /* send and recv matrix values */
4482: /*-----------------------------*/
4483: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4484: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4486: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
4487: for (proc=0,k=0; proc<size; proc++){
4488: if (!len_s[proc]) continue;
4489: i = owners[proc];
4490: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4491: k++;
4492: }
4494: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4495: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4496: PetscFree(status);
4498: PetscFree(s_waits);
4499: PetscFree(r_waits);
4501: /* insert mat values of mpimat */
4502: /*----------------------------*/
4503: PetscMalloc(N*sizeof(PetscScalar),&ba_i);
4504: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4506: for (k=0; k<merge->nrecv; k++){
4507: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4508: nrows = *(buf_ri_k[k]);
4509: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4510: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4511: }
4513: /* set values of ba */
4514: m = merge->rowmap->n;
4515: for (i=0; i<m; i++) {
4516: arow = owners[rank] + i;
4517: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4518: bnzi = bi[i+1] - bi[i];
4519: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4521: /* add local non-zero vals of this proc's seqmat into ba */
4522: anzi = ai[arow+1] - ai[arow];
4523: aj = a->j + ai[arow];
4524: aa = a->a + ai[arow];
4525: nextaj = 0;
4526: for (j=0; nextaj<anzi; j++){
4527: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4528: ba_i[j] += aa[nextaj++];
4529: }
4530: }
4532: /* add received vals into ba */
4533: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4534: /* i-th row */
4535: if (i == *nextrow[k]) {
4536: anzi = *(nextai[k]+1) - *nextai[k];
4537: aj = buf_rj[k] + *(nextai[k]);
4538: aa = abuf_r[k] + *(nextai[k]);
4539: nextaj = 0;
4540: for (j=0; nextaj<anzi; j++){
4541: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4542: ba_i[j] += aa[nextaj++];
4543: }
4544: }
4545: nextrow[k]++; nextai[k]++;
4546: }
4547: }
4548: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4549: }
4550: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4551: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4553: PetscFree(abuf_r[0]);
4554: PetscFree(abuf_r);
4555: PetscFree(ba_i);
4556: PetscFree3(buf_ri_k,nextrow,nextai);
4557: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4558: return(0);
4559: }
4561: extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4565: PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4566: {
4567: PetscErrorCode ierr;
4568: Mat B_mpi;
4569: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4570: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4571: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4572: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4573: PetscInt len,proc,*dnz,*onz;
4574: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4575: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4576: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4577: MPI_Status *status;
4578: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
4579: PetscBT lnkbt;
4580: Mat_Merge_SeqsToMPI *merge;
4581: PetscContainer container;
4584: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4586: /* make sure it is a PETSc comm */
4587: PetscCommDuplicate(comm,&comm,PETSC_NULL);
4588: MPI_Comm_size(comm,&size);
4589: MPI_Comm_rank(comm,&rank);
4591: PetscNew(Mat_Merge_SeqsToMPI,&merge);
4592: PetscMalloc(size*sizeof(MPI_Status),&status);
4594: /* determine row ownership */
4595: /*---------------------------------------------------------*/
4596: PetscLayoutCreate(comm,&merge->rowmap);
4597: PetscLayoutSetLocalSize(merge->rowmap,m);
4598: PetscLayoutSetSize(merge->rowmap,M);
4599: PetscLayoutSetBlockSize(merge->rowmap,1);
4600: PetscLayoutSetUp(merge->rowmap);
4601: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
4602: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
4604: m = merge->rowmap->n;
4605: M = merge->rowmap->N;
4606: owners = merge->rowmap->range;
4608: /* determine the number of messages to send, their lengths */
4609: /*---------------------------------------------------------*/
4610: len_s = merge->len_s;
4612: len = 0; /* length of buf_si[] */
4613: merge->nsend = 0;
4614: for (proc=0; proc<size; proc++){
4615: len_si[proc] = 0;
4616: if (proc == rank){
4617: len_s[proc] = 0;
4618: } else {
4619: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4620: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4621: }
4622: if (len_s[proc]) {
4623: merge->nsend++;
4624: nrows = 0;
4625: for (i=owners[proc]; i<owners[proc+1]; i++){
4626: if (ai[i+1] > ai[i]) nrows++;
4627: }
4628: len_si[proc] = 2*(nrows+1);
4629: len += len_si[proc];
4630: }
4631: }
4633: /* determine the number and length of messages to receive for ij-structure */
4634: /*-------------------------------------------------------------------------*/
4635: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
4636: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4638: /* post the Irecv of j-structure */
4639: /*-------------------------------*/
4640: PetscCommGetNewTag(comm,&tagj);
4641: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4643: /* post the Isend of j-structure */
4644: /*--------------------------------*/
4645: PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);
4647: for (proc=0, k=0; proc<size; proc++){
4648: if (!len_s[proc]) continue;
4649: i = owners[proc];
4650: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4651: k++;
4652: }
4654: /* receives and sends of j-structure are complete */
4655: /*------------------------------------------------*/
4656: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4657: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4659: /* send and recv i-structure */
4660: /*---------------------------*/
4661: PetscCommGetNewTag(comm,&tagi);
4662: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4664: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
4665: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4666: for (proc=0,k=0; proc<size; proc++){
4667: if (!len_s[proc]) continue;
4668: /* form outgoing message for i-structure:
4669: buf_si[0]: nrows to be sent
4670: [1:nrows]: row index (global)
4671: [nrows+1:2*nrows+1]: i-structure index
4672: */
4673: /*-------------------------------------------*/
4674: nrows = len_si[proc]/2 - 1;
4675: buf_si_i = buf_si + nrows+1;
4676: buf_si[0] = nrows;
4677: buf_si_i[0] = 0;
4678: nrows = 0;
4679: for (i=owners[proc]; i<owners[proc+1]; i++){
4680: anzi = ai[i+1] - ai[i];
4681: if (anzi) {
4682: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4683: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4684: nrows++;
4685: }
4686: }
4687: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4688: k++;
4689: buf_si += len_si[proc];
4690: }
4692: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4693: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4695: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4696: for (i=0; i<merge->nrecv; i++){
4697: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4698: }
4700: PetscFree(len_si);
4701: PetscFree(len_ri);
4702: PetscFree(rj_waits);
4703: PetscFree2(si_waits,sj_waits);
4704: PetscFree(ri_waits);
4705: PetscFree(buf_s);
4706: PetscFree(status);
4708: /* compute a local seq matrix in each processor */
4709: /*----------------------------------------------*/
4710: /* allocate bi array and free space for accumulating nonzero column info */
4711: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
4712: bi[0] = 0;
4714: /* create and initialize a linked list */
4715: nlnk = N+1;
4716: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4718: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4719: len = 0;
4720: len = ai[owners[rank+1]] - ai[owners[rank]];
4721: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
4722: current_space = free_space;
4724: /* determine symbolic info for each local row */
4725: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4727: for (k=0; k<merge->nrecv; k++){
4728: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4729: nrows = *buf_ri_k[k];
4730: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4731: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4732: }
4734: MatPreallocateInitialize(comm,m,n,dnz,onz);
4735: len = 0;
4736: for (i=0;i<m;i++) {
4737: bnzi = 0;
4738: /* add local non-zero cols of this proc's seqmat into lnk */
4739: arow = owners[rank] + i;
4740: anzi = ai[arow+1] - ai[arow];
4741: aj = a->j + ai[arow];
4742: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
4743: bnzi += nlnk;
4744: /* add received col data into lnk */
4745: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4746: if (i == *nextrow[k]) { /* i-th row */
4747: anzi = *(nextai[k]+1) - *nextai[k];
4748: aj = buf_rj[k] + *nextai[k];
4749: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
4750: bnzi += nlnk;
4751: nextrow[k]++; nextai[k]++;
4752: }
4753: }
4754: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4756: /* if free space is not available, make more free space */
4757: if (current_space->local_remaining<bnzi) {
4758: PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);
4759: nspacedouble++;
4760: }
4761: /* copy data into free space, then initialize lnk */
4762: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4763: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4765: current_space->array += bnzi;
4766: current_space->local_used += bnzi;
4767: current_space->local_remaining -= bnzi;
4769: bi[i+1] = bi[i] + bnzi;
4770: }
4772: PetscFree3(buf_ri_k,nextrow,nextai);
4774: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
4775: PetscFreeSpaceContiguous(&free_space,bj);
4776: PetscLLDestroy(lnk,lnkbt);
4778: /* create symbolic parallel matrix B_mpi */
4779: /*---------------------------------------*/
4780: MatCreate(comm,&B_mpi);
4781: if (n==PETSC_DECIDE) {
4782: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4783: } else {
4784: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4785: }
4786: MatSetType(B_mpi,MATMPIAIJ);
4787: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4788: MatPreallocateFinalize(dnz,onz);
4790: /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4791: B_mpi->assembled = PETSC_FALSE;
4792: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4793: merge->bi = bi;
4794: merge->bj = bj;
4795: merge->buf_ri = buf_ri;
4796: merge->buf_rj = buf_rj;
4797: merge->coi = PETSC_NULL;
4798: merge->coj = PETSC_NULL;
4799: merge->owners_co = PETSC_NULL;
4801: PetscCommDestroy(&comm);
4803: /* attach the supporting struct to B_mpi for reuse */
4804: PetscContainerCreate(PETSC_COMM_SELF,&container);
4805: PetscContainerSetPointer(container,merge);
4806: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4807: PetscContainerDestroy(&container);
4808: *mpimat = B_mpi;
4810: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4811: return(0);
4812: }
4816: PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4817: {
4818: PetscErrorCode ierr;
4821: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4822: if (scall == MAT_INITIAL_MATRIX){
4823: MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);
4824: }
4825: MatMerge_SeqsToMPINumeric(seqmat,*mpimat);
4826: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4827: return(0);
4828: }
4832: /*@
4833: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4834: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4835: with MatGetSize()
4837: Not Collective
4839: Input Parameters:
4840: + A - the matrix
4841: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4843: Output Parameter:
4844: . A_loc - the local sequential matrix generated
4846: Level: developer
4848: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4850: @*/
4851: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4852: {
4853: PetscErrorCode ierr;
4854: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4855: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4856: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4857: MatScalar *aa=a->a,*ba=b->a,*cam;
4858: PetscScalar *ca;
4859: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4860: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4861: PetscBool match;
4864: PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4865: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4866: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4867: if (scall == MAT_INITIAL_MATRIX){
4868: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
4869: ci[0] = 0;
4870: for (i=0; i<am; i++){
4871: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4872: }
4873: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
4874: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
4875: k = 0;
4876: for (i=0; i<am; i++) {
4877: ncols_o = bi[i+1] - bi[i];
4878: ncols_d = ai[i+1] - ai[i];
4879: /* off-diagonal portion of A */
4880: for (jo=0; jo<ncols_o; jo++) {
4881: col = cmap[*bj];
4882: if (col >= cstart) break;
4883: cj[k] = col; bj++;
4884: ca[k++] = *ba++;
4885: }
4886: /* diagonal portion of A */
4887: for (j=0; j<ncols_d; j++) {
4888: cj[k] = cstart + *aj++;
4889: ca[k++] = *aa++;
4890: }
4891: /* off-diagonal portion of A */
4892: for (j=jo; j<ncols_o; j++) {
4893: cj[k] = cmap[*bj++];
4894: ca[k++] = *ba++;
4895: }
4896: }
4897: /* put together the new matrix */
4898: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
4899: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4900: /* Since these are PETSc arrays, change flags to free them as necessary. */
4901: mat = (Mat_SeqAIJ*)(*A_loc)->data;
4902: mat->free_a = PETSC_TRUE;
4903: mat->free_ij = PETSC_TRUE;
4904: mat->nonew = 0;
4905: } else if (scall == MAT_REUSE_MATRIX){
4906: mat=(Mat_SeqAIJ*)(*A_loc)->data;
4907: ci = mat->i; cj = mat->j; cam = mat->a;
4908: for (i=0; i<am; i++) {
4909: /* off-diagonal portion of A */
4910: ncols_o = bi[i+1] - bi[i];
4911: for (jo=0; jo<ncols_o; jo++) {
4912: col = cmap[*bj];
4913: if (col >= cstart) break;
4914: *cam++ = *ba++; bj++;
4915: }
4916: /* diagonal portion of A */
4917: ncols_d = ai[i+1] - ai[i];
4918: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4919: /* off-diagonal portion of A */
4920: for (j=jo; j<ncols_o; j++) {
4921: *cam++ = *ba++; bj++;
4922: }
4923: }
4924: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4925: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
4926: return(0);
4927: }
4931: /*@C
4932: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4934: Not Collective
4936: Input Parameters:
4937: + A - the matrix
4938: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4939: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
4941: Output Parameter:
4942: . A_loc - the local sequential matrix generated
4944: Level: developer
4946: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4948: @*/
4949: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4950: {
4951: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4952: PetscErrorCode ierr;
4953: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4954: IS isrowa,iscola;
4955: Mat *aloc;
4956: PetscBool match;
4959: PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4960: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4961: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
4962: if (!row){
4963: start = A->rmap->rstart; end = A->rmap->rend;
4964: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
4965: } else {
4966: isrowa = *row;
4967: }
4968: if (!col){
4969: start = A->cmap->rstart;
4970: cmap = a->garray;
4971: nzA = a->A->cmap->n;
4972: nzB = a->B->cmap->n;
4973: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
4974: ncols = 0;
4975: for (i=0; i<nzB; i++) {
4976: if (cmap[i] < start) idx[ncols++] = cmap[i];
4977: else break;
4978: }
4979: imark = i;
4980: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4981: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4982: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
4983: } else {
4984: iscola = *col;
4985: }
4986: if (scall != MAT_INITIAL_MATRIX){
4987: PetscMalloc(sizeof(Mat),&aloc);
4988: aloc[0] = *A_loc;
4989: }
4990: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
4991: *A_loc = aloc[0];
4992: PetscFree(aloc);
4993: if (!row){
4994: ISDestroy(&isrowa);
4995: }
4996: if (!col){
4997: ISDestroy(&iscola);
4998: }
4999: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5000: return(0);
5001: }
5005: /*@C
5006: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5008: Collective on Mat
5010: Input Parameters:
5011: + A,B - the matrices in mpiaij format
5012: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5013: - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5015: Output Parameter:
5016: + rowb, colb - index sets of rows and columns of B to extract
5017: . brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
5018: - B_seq - the sequential matrix generated
5020: Level: developer
5022: @*/
5023: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
5024: {
5025: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5026: PetscErrorCode ierr;
5027: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5028: IS isrowb,iscolb;
5029: Mat *bseq;
5030:
5032: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5033: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5034: }
5035: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5036:
5037: if (scall == MAT_INITIAL_MATRIX){
5038: start = A->cmap->rstart;
5039: cmap = a->garray;
5040: nzA = a->A->cmap->n;
5041: nzB = a->B->cmap->n;
5042: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
5043: ncols = 0;
5044: for (i=0; i<nzB; i++) { /* row < local row index */
5045: if (cmap[i] < start) idx[ncols++] = cmap[i];
5046: else break;
5047: }
5048: imark = i;
5049: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5050: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5051: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5052: *brstart = imark;
5053: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5054: } else {
5055: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5056: isrowb = *rowb; iscolb = *colb;
5057: PetscMalloc(sizeof(Mat),&bseq);
5058: bseq[0] = *B_seq;
5059: }
5060: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5061: *B_seq = bseq[0];
5062: PetscFree(bseq);
5063: if (!rowb){
5064: ISDestroy(&isrowb);
5065: } else {
5066: *rowb = isrowb;
5067: }
5068: if (!colb){
5069: ISDestroy(&iscolb);
5070: } else {
5071: *colb = iscolb;
5072: }
5073: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5074: return(0);
5075: }
5079: /*@C
5080: MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5081: of the OFF-DIAGONAL portion of local A
5083: Collective on Mat
5085: Input Parameters:
5086: + A,B - the matrices in mpiaij format
5087: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5088: . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5089: . startsj_r - similar to startsj for receives
5090: - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5092: Output Parameter:
5093: + B_oth - the sequential matrix generated
5095: Level: developer
5097: @*/
5098: PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5099: {
5100: VecScatter_MPI_General *gen_to,*gen_from;
5101: PetscErrorCode ierr;
5102: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5103: Mat_SeqAIJ *b_oth;
5104: VecScatter ctx=a->Mvctx;
5105: MPI_Comm comm=((PetscObject)ctx)->comm;
5106: PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5107: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5108: PetscScalar *rvalues,*svalues;
5109: MatScalar *b_otha,*bufa,*bufA;
5110: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5111: MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5112: MPI_Status *sstatus,rstatus;
5113: PetscMPIInt jj;
5114: PetscInt *cols,sbs,rbs;
5115: PetscScalar *vals;
5118: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5119: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5120: }
5121: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5122: MPI_Comm_rank(comm,&rank);
5124: gen_to = (VecScatter_MPI_General*)ctx->todata;
5125: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5126: rvalues = gen_from->values; /* holds the length of receiving row */
5127: svalues = gen_to->values; /* holds the length of sending row */
5128: nrecvs = gen_from->n;
5129: nsends = gen_to->n;
5131: PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);
5132: srow = gen_to->indices; /* local row index to be sent */
5133: sstarts = gen_to->starts;
5134: sprocs = gen_to->procs;
5135: sstatus = gen_to->sstatus;
5136: sbs = gen_to->bs;
5137: rstarts = gen_from->starts;
5138: rprocs = gen_from->procs;
5139: rbs = gen_from->bs;
5141: if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5142: if (scall == MAT_INITIAL_MATRIX){
5143: /* i-array */
5144: /*---------*/
5145: /* post receives */
5146: for (i=0; i<nrecvs; i++){
5147: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5148: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5149: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5150: }
5152: /* pack the outgoing message */
5153: PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);
5154: sstartsj[0] = 0; rstartsj[0] = 0;
5155: len = 0; /* total length of j or a array to be sent */
5156: k = 0;
5157: for (i=0; i<nsends; i++){
5158: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5159: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5160: for (j=0; j<nrows; j++) {
5161: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5162: for (l=0; l<sbs; l++){
5163: MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL); /* rowlength */
5164: rowlen[j*sbs+l] = ncols;
5165: len += ncols;
5166: MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);
5167: }
5168: k++;
5169: }
5170: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5171: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5172: }
5173: /* recvs and sends of i-array are completed */
5174: i = nrecvs;
5175: while (i--) {
5176: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5177: }
5178: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5180: /* allocate buffers for sending j and a arrays */
5181: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
5182: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
5184: /* create i-array of B_oth */
5185: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
5186: b_othi[0] = 0;
5187: len = 0; /* total length of j or a array to be received */
5188: k = 0;
5189: for (i=0; i<nrecvs; i++){
5190: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5191: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5192: for (j=0; j<nrows; j++) {
5193: b_othi[k+1] = b_othi[k] + rowlen[j];
5194: len += rowlen[j]; k++;
5195: }
5196: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5197: }
5199: /* allocate space for j and a arrrays of B_oth */
5200: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
5201: PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);
5203: /* j-array */
5204: /*---------*/
5205: /* post receives of j-array */
5206: for (i=0; i<nrecvs; i++){
5207: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5208: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5209: }
5211: /* pack the outgoing message j-array */
5212: k = 0;
5213: for (i=0; i<nsends; i++){
5214: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5215: bufJ = bufj+sstartsj[i];
5216: for (j=0; j<nrows; j++) {
5217: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5218: for (ll=0; ll<sbs; ll++){
5219: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
5220: for (l=0; l<ncols; l++){
5221: *bufJ++ = cols[l];
5222: }
5223: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
5224: }
5225: }
5226: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5227: }
5229: /* recvs and sends of j-array are completed */
5230: i = nrecvs;
5231: while (i--) {
5232: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5233: }
5234: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5235: } else if (scall == MAT_REUSE_MATRIX){
5236: sstartsj = *startsj;
5237: rstartsj = *startsj_r;
5238: bufa = *bufa_ptr;
5239: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5240: b_otha = b_oth->a;
5241: } else {
5242: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5243: }
5245: /* a-array */
5246: /*---------*/
5247: /* post receives of a-array */
5248: for (i=0; i<nrecvs; i++){
5249: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5250: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5251: }
5253: /* pack the outgoing message a-array */
5254: k = 0;
5255: for (i=0; i<nsends; i++){
5256: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5257: bufA = bufa+sstartsj[i];
5258: for (j=0; j<nrows; j++) {
5259: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5260: for (ll=0; ll<sbs; ll++){
5261: MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
5262: for (l=0; l<ncols; l++){
5263: *bufA++ = vals[l];
5264: }
5265: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
5266: }
5267: }
5268: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5269: }
5270: /* recvs and sends of a-array are completed */
5271: i = nrecvs;
5272: while (i--) {
5273: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5274: }
5275: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5276: PetscFree2(rwaits,swaits);
5278: if (scall == MAT_INITIAL_MATRIX){
5279: /* put together the new matrix */
5280: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5282: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5283: /* Since these are PETSc arrays, change flags to free them as necessary. */
5284: b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
5285: b_oth->free_a = PETSC_TRUE;
5286: b_oth->free_ij = PETSC_TRUE;
5287: b_oth->nonew = 0;
5289: PetscFree(bufj);
5290: if (!startsj || !bufa_ptr){
5291: PetscFree2(sstartsj,rstartsj);
5292: PetscFree(bufa_ptr);
5293: } else {
5294: *startsj = sstartsj;
5295: *startsj_r = rstartsj;
5296: *bufa_ptr = bufa;
5297: }
5298: }
5299: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5300: return(0);
5301: }
5305: /*@C
5306: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5308: Not Collective
5310: Input Parameters:
5311: . A - The matrix in mpiaij format
5313: Output Parameter:
5314: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5315: . colmap - A map from global column index to local index into lvec
5316: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5318: Level: developer
5320: @*/
5321: #if defined (PETSC_USE_CTABLE)
5322: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5323: #else
5324: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5325: #endif
5326: {
5327: Mat_MPIAIJ *a;
5334: a = (Mat_MPIAIJ *) A->data;
5335: if (lvec) *lvec = a->lvec;
5336: if (colmap) *colmap = a->colmap;
5337: if (multScatter) *multScatter = a->Mvctx;
5338: return(0);
5339: }
5341: EXTERN_C_BEGIN
5342: extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5343: extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5344: extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5345: EXTERN_C_END
5349: /*
5350: Computes (B'*A')' since computing B*A directly is untenable
5352: n p p
5353: ( ) ( ) ( )
5354: m ( A ) * n ( B ) = m ( C )
5355: ( ) ( ) ( )
5357: */
5358: PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5359: {
5360: PetscErrorCode ierr;
5361: Mat At,Bt,Ct;
5364: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5365: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5366: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);
5367: MatDestroy(&At);
5368: MatDestroy(&Bt);
5369: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5370: MatDestroy(&Ct);
5371: return(0);
5372: }
5376: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5377: {
5379: PetscInt m=A->rmap->n,n=B->cmap->n;
5380: Mat Cmat;
5383: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5384: MatCreate(((PetscObject)A)->comm,&Cmat);
5385: MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5386: MatSetType(Cmat,MATMPIDENSE);
5387: MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);
5388: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
5389: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
5390: *C = Cmat;
5391: return(0);
5392: }
5394: /* ----------------------------------------------------------------*/
5397: PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5398: {
5402: if (scall == MAT_INITIAL_MATRIX){
5403: MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);
5404: }
5405: MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);
5406: return(0);
5407: }
5409: EXTERN_C_BEGIN
5410: #if defined(PETSC_HAVE_MUMPS)
5411: extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5412: #endif
5413: #if defined(PETSC_HAVE_PASTIX)
5414: extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5415: #endif
5416: #if defined(PETSC_HAVE_SUPERLU_DIST)
5417: extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5418: #endif
5419: #if defined(PETSC_HAVE_SPOOLES)
5420: extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5421: #endif
5422: EXTERN_C_END
5424: /*MC
5425: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5427: Options Database Keys:
5428: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5430: Level: beginner
5432: .seealso: MatCreateMPIAIJ()
5433: M*/
5435: EXTERN_C_BEGIN
5438: PetscErrorCode MatCreate_MPIAIJ(Mat B)
5439: {
5440: Mat_MPIAIJ *b;
5442: PetscMPIInt size;
5445: MPI_Comm_size(((PetscObject)B)->comm,&size);
5447: PetscNewLog(B,Mat_MPIAIJ,&b);
5448: B->data = (void*)b;
5449: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
5450: B->rmap->bs = 1;
5451: B->assembled = PETSC_FALSE;
5453: B->insertmode = NOT_SET_VALUES;
5454: b->size = size;
5455: MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);
5457: /* build cache for off array entries formed */
5458: MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);
5459: b->donotstash = PETSC_FALSE;
5460: b->colmap = 0;
5461: b->garray = 0;
5462: b->roworiented = PETSC_TRUE;
5464: /* stuff used for matrix vector multiply */
5465: b->lvec = PETSC_NULL;
5466: b->Mvctx = PETSC_NULL;
5468: /* stuff for MatGetRow() */
5469: b->rowindices = 0;
5470: b->rowvalues = 0;
5471: b->getrowactive = PETSC_FALSE;
5473: #if defined(PETSC_HAVE_SPOOLES)
5474: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5475: "MatGetFactor_mpiaij_spooles",
5476: MatGetFactor_mpiaij_spooles);
5477: #endif
5478: #if defined(PETSC_HAVE_MUMPS)
5479: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5480: "MatGetFactor_aij_mumps",
5481: MatGetFactor_aij_mumps);
5482: #endif
5483: #if defined(PETSC_HAVE_PASTIX)
5484: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5485: "MatGetFactor_mpiaij_pastix",
5486: MatGetFactor_mpiaij_pastix);
5487: #endif
5488: #if defined(PETSC_HAVE_SUPERLU_DIST)
5489: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5490: "MatGetFactor_mpiaij_superlu_dist",
5491: MatGetFactor_mpiaij_superlu_dist);
5492: #endif
5493: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5494: "MatStoreValues_MPIAIJ",
5495: MatStoreValues_MPIAIJ);
5496: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5497: "MatRetrieveValues_MPIAIJ",
5498: MatRetrieveValues_MPIAIJ);
5499: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5500: "MatGetDiagonalBlock_MPIAIJ",
5501: MatGetDiagonalBlock_MPIAIJ);
5502: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5503: "MatIsTranspose_MPIAIJ",
5504: MatIsTranspose_MPIAIJ);
5505: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5506: "MatMPIAIJSetPreallocation_MPIAIJ",
5507: MatMPIAIJSetPreallocation_MPIAIJ);
5508: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5509: "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5510: MatMPIAIJSetPreallocationCSR_MPIAIJ);
5511: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5512: "MatDiagonalScaleLocal_MPIAIJ",
5513: MatDiagonalScaleLocal_MPIAIJ);
5514: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5515: "MatConvert_MPIAIJ_MPIAIJPERM",
5516: MatConvert_MPIAIJ_MPIAIJPERM);
5517: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5518: "MatConvert_MPIAIJ_MPIAIJCRL",
5519: MatConvert_MPIAIJ_MPIAIJCRL);
5520: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5521: "MatConvert_MPIAIJ_MPISBAIJ",
5522: MatConvert_MPIAIJ_MPISBAIJ);
5523: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5524: "MatMatMult_MPIDense_MPIAIJ",
5525: MatMatMult_MPIDense_MPIAIJ);
5526: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5527: "MatMatMultSymbolic_MPIDense_MPIAIJ",
5528: MatMatMultSymbolic_MPIDense_MPIAIJ);
5529: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5530: "MatMatMultNumeric_MPIDense_MPIAIJ",
5531: MatMatMultNumeric_MPIDense_MPIAIJ);
5532: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
5533: return(0);
5534: }
5535: EXTERN_C_END
5539: /*@
5540: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5541: and "off-diagonal" part of the matrix in CSR format.
5543: Collective on MPI_Comm
5545: Input Parameters:
5546: + comm - MPI communicator
5547: . m - number of local rows (Cannot be PETSC_DECIDE)
5548: . n - This value should be the same as the local size used in creating the
5549: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5550: calculated if N is given) For square matrices n is almost always m.
5551: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5552: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5553: . i - row indices for "diagonal" portion of matrix
5554: . j - column indices
5555: . a - matrix values
5556: . oi - row indices for "off-diagonal" portion of matrix
5557: . oj - column indices
5558: - oa - matrix values
5560: Output Parameter:
5561: . mat - the matrix
5563: Level: advanced
5565: Notes:
5566: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5567: must free the arrays once the matrix has been destroyed and not before.
5569: The i and j indices are 0 based
5570:
5571: See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5573: This sets local rows and cannot be used to set off-processor values.
5575: You cannot later use MatSetValues() to change values in this matrix.
5577: .keywords: matrix, aij, compressed row, sparse, parallel
5579: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5580: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5581: @*/
5582: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5583: PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5584: {
5586: Mat_MPIAIJ *maij;
5589: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5590: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5591: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5592: MatCreate(comm,mat);
5593: MatSetSizes(*mat,m,n,M,N);
5594: MatSetType(*mat,MATMPIAIJ);
5595: maij = (Mat_MPIAIJ*) (*mat)->data;
5596: maij->donotstash = PETSC_TRUE;
5597: (*mat)->preallocated = PETSC_TRUE;
5599: PetscLayoutSetBlockSize((*mat)->rmap,1);
5600: PetscLayoutSetBlockSize((*mat)->cmap,1);
5601: PetscLayoutSetUp((*mat)->rmap);
5602: PetscLayoutSetUp((*mat)->cmap);
5604: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
5605: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
5607: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
5608: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
5609: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
5610: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
5612: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
5613: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
5614: return(0);
5615: }
5617: /*
5618: Special version for direct calls from Fortran
5619: */
5620: #include <private/fortranimpl.h>
5622: #if defined(PETSC_HAVE_FORTRAN_CAPS)
5623: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5624: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5625: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5626: #endif
5628: /* Change these macros so can be used in void function */
5629: #undef CHKERRQ
5630: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5631: #undef SETERRQ2
5632: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5633: #undef SETERRQ
5634: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5636: EXTERN_C_BEGIN
5639: void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5640: {
5641: Mat mat = *mmat;
5642: PetscInt m = *mm, n = *mn;
5643: InsertMode addv = *maddv;
5644: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
5645: PetscScalar value;
5646: PetscErrorCode ierr;
5648: MatPreallocated(mat);
5649: if (mat->insertmode == NOT_SET_VALUES) {
5650: mat->insertmode = addv;
5651: }
5652: #if defined(PETSC_USE_DEBUG)
5653: else if (mat->insertmode != addv) {
5654: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5655: }
5656: #endif
5657: {
5658: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5659: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5660: PetscBool roworiented = aij->roworiented;
5662: /* Some Variables required in the macro */
5663: Mat A = aij->A;
5664: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
5665: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5666: MatScalar *aa = a->a;
5667: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5668: Mat B = aij->B;
5669: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
5670: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5671: MatScalar *ba = b->a;
5673: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5674: PetscInt nonew = a->nonew;
5675: MatScalar *ap1,*ap2;
5678: for (i=0; i<m; i++) {
5679: if (im[i] < 0) continue;
5680: #if defined(PETSC_USE_DEBUG)
5681: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5682: #endif
5683: if (im[i] >= rstart && im[i] < rend) {
5684: row = im[i] - rstart;
5685: lastcol1 = -1;
5686: rp1 = aj + ai[row];
5687: ap1 = aa + ai[row];
5688: rmax1 = aimax[row];
5689: nrow1 = ailen[row];
5690: low1 = 0;
5691: high1 = nrow1;
5692: lastcol2 = -1;
5693: rp2 = bj + bi[row];
5694: ap2 = ba + bi[row];
5695: rmax2 = bimax[row];
5696: nrow2 = bilen[row];
5697: low2 = 0;
5698: high2 = nrow2;
5700: for (j=0; j<n; j++) {
5701: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5702: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5703: if (in[j] >= cstart && in[j] < cend){
5704: col = in[j] - cstart;
5705: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5706: } else if (in[j] < 0) continue;
5707: #if defined(PETSC_USE_DEBUG)
5708: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5709: #endif
5710: else {
5711: if (mat->was_assembled) {
5712: if (!aij->colmap) {
5713: CreateColmap_MPIAIJ_Private(mat);
5714: }
5715: #if defined (PETSC_USE_CTABLE)
5716: PetscTableFind(aij->colmap,in[j]+1,&col);
5717: col--;
5718: #else
5719: col = aij->colmap[in[j]] - 1;
5720: #endif
5721: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5722: DisAssemble_MPIAIJ(mat);
5723: col = in[j];
5724: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5725: B = aij->B;
5726: b = (Mat_SeqAIJ*)B->data;
5727: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5728: rp2 = bj + bi[row];
5729: ap2 = ba + bi[row];
5730: rmax2 = bimax[row];
5731: nrow2 = bilen[row];
5732: low2 = 0;
5733: high2 = nrow2;
5734: bm = aij->B->rmap->n;
5735: ba = b->a;
5736: }
5737: } else col = in[j];
5738: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5739: }
5740: }
5741: } else {
5742: if (!aij->donotstash) {
5743: if (roworiented) {
5744: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5745: } else {
5746: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5747: }
5748: }
5749: }
5750: }}
5751: PetscFunctionReturnVoid();
5752: }
5753: EXTERN_C_END