Actual source code: mpiaij.c
petsc-3.3-p7 2013-05-11
2: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: #include <petscblaslapack.h>
5: /*MC
6: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9: and MATMPIAIJ otherwise. As a result, for single process communicators,
10: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11: for communicators controlling multiple processes. It is recommended that you call both of
12: the above preallocation routines for simplicity.
14: Options Database Keys:
15: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17: Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18: enough exist.
20: Level: beginner
22: .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23: M*/
25: /*MC
26: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
30: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31: for communicators controlling multiple processes. It is recommended that you call both of
32: the above preallocation routines for simplicity.
34: Options Database Keys:
35: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37: Level: beginner
39: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40: M*/
44: PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
45: {
46: PetscErrorCode ierr;
47: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
48: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
49: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
50: const PetscInt *ia,*ib;
51: const MatScalar *aa,*bb;
52: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
53: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
56: *keptrows = 0;
57: ia = a->i;
58: ib = b->i;
59: for (i=0; i<m; i++) {
60: na = ia[i+1] - ia[i];
61: nb = ib[i+1] - ib[i];
62: if (!na && !nb) {
63: cnt++;
64: goto ok1;
65: }
66: aa = a->a + ia[i];
67: for (j=0; j<na; j++) {
68: if (aa[j] != 0.0) goto ok1;
69: }
70: bb = b->a + ib[i];
71: for (j=0; j <nb; j++) {
72: if (bb[j] != 0.0) goto ok1;
73: }
74: cnt++;
75: ok1:;
76: }
77: MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);
78: if (!n0rows) return(0);
79: PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);
80: cnt = 0;
81: for (i=0; i<m; i++) {
82: na = ia[i+1] - ia[i];
83: nb = ib[i+1] - ib[i];
84: if (!na && !nb) continue;
85: aa = a->a + ia[i];
86: for(j=0; j<na;j++) {
87: if (aa[j] != 0.0) {
88: rows[cnt++] = rstart + i;
89: goto ok2;
90: }
91: }
92: bb = b->a + ib[i];
93: for (j=0; j<nb; j++) {
94: if (bb[j] != 0.0) {
95: rows[cnt++] = rstart + i;
96: goto ok2;
97: }
98: }
99: ok2:;
100: }
101: ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);
102: return(0);
103: }
107: PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
108: {
109: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data;
111: PetscInt i,rstart,nrows,*rows;
114: *zrows = PETSC_NULL;
115: MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);
116: MatGetOwnershipRange(M,&rstart,PETSC_NULL);
117: for (i=0; i<nrows; i++) rows[i] += rstart;
118: ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);
119: return(0);
120: }
124: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
125: {
127: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
128: PetscInt i,n,*garray = aij->garray;
129: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
130: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
131: PetscReal *work;
134: MatGetSize(A,PETSC_NULL,&n);
135: PetscMalloc(n*sizeof(PetscReal),&work);
136: PetscMemzero(work,n*sizeof(PetscReal));
137: if (type == NORM_2) {
138: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
139: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
140: }
141: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
142: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
143: }
144: } else if (type == NORM_1) {
145: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
146: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
147: }
148: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
149: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
150: }
151: } else if (type == NORM_INFINITY) {
152: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
153: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
154: }
155: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
156: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
157: }
159: } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
160: if (type == NORM_INFINITY) {
161: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
162: } else {
163: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
164: }
165: PetscFree(work);
166: if (type == NORM_2) {
167: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
168: }
169: return(0);
170: }
174: /*
175: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
176: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178: Only for square matrices
179: */
180: PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
181: {
182: PetscMPIInt rank,size;
183: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
185: Mat mat;
186: Mat_SeqAIJ *gmata;
187: PetscMPIInt tag;
188: MPI_Status status;
189: PetscBool aij;
190: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
193: CHKMEMQ;
194: MPI_Comm_rank(comm,&rank);
195: MPI_Comm_size(comm,&size);
196: if (!rank) {
197: PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
198: if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
199: }
200: if (reuse == MAT_INITIAL_MATRIX) {
201: MatCreate(comm,&mat);
202: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
203: if (!rank) {
204: bses[0] = gmat->rmap->bs;
205: bses[1] = gmat->cmap->bs;
206: }
207: MPI_Bcast(bses,2,MPIU_INT,0,comm);
208: MatSetBlockSizes(mat,bses[0],bses[1]);
209: MatSetType(mat,MATAIJ);
210: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
211: PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
212: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
213: rowners[0] = 0;
214: for (i=2; i<=size; i++) {
215: rowners[i] += rowners[i-1];
216: }
217: rstart = rowners[rank];
218: rend = rowners[rank+1];
219: PetscObjectGetNewTag((PetscObject)mat,&tag);
220: if (!rank) {
221: gmata = (Mat_SeqAIJ*) gmat->data;
222: /* send row lengths to all processors */
223: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
224: for (i=1; i<size; i++) {
225: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
226: }
227: /* determine number diagonal and off-diagonal counts */
228: PetscMemzero(olens,m*sizeof(PetscInt));
229: PetscMalloc(m*sizeof(PetscInt),&ld);
230: PetscMemzero(ld,m*sizeof(PetscInt));
231: jj = 0;
232: for (i=0; i<m; i++) {
233: for (j=0; j<dlens[i]; j++) {
234: if (gmata->j[jj] < rstart) ld[i]++;
235: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236: jj++;
237: }
238: }
239: /* send column indices to other processes */
240: for (i=1; i<size; i++) {
241: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
243: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
244: }
246: /* send numerical values to other processes */
247: for (i=1; i<size; i++) {
248: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
250: }
251: gmataa = gmata->a;
252: gmataj = gmata->j;
254: } else {
255: /* receive row lengths */
256: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
257: /* receive column indices */
258: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
259: PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
260: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
261: /* determine number diagonal and off-diagonal counts */
262: PetscMemzero(olens,m*sizeof(PetscInt));
263: PetscMalloc(m*sizeof(PetscInt),&ld);
264: PetscMemzero(ld,m*sizeof(PetscInt));
265: jj = 0;
266: for (i=0; i<m; i++) {
267: for (j=0; j<dlens[i]; j++) {
268: if (gmataj[jj] < rstart) ld[i]++;
269: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
270: jj++;
271: }
272: }
273: /* receive numerical values */
274: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
275: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
276: }
277: /* set preallocation */
278: for (i=0; i<m; i++) {
279: dlens[i] -= olens[i];
280: }
281: MatSeqAIJSetPreallocation(mat,0,dlens);
282: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
283:
284: for (i=0; i<m; i++) {
285: dlens[i] += olens[i];
286: }
287: cnt = 0;
288: for (i=0; i<m; i++) {
289: row = rstart + i;
290: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
291: cnt += dlens[i];
292: }
293: if (rank) {
294: PetscFree2(gmataa,gmataj);
295: }
296: PetscFree2(dlens,olens);
297: PetscFree(rowners);
298: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299: *inmat = mat;
300: } else { /* column indices are already set; only need to move over numerical values from process 0 */
301: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
302: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
303: mat = *inmat;
304: PetscObjectGetNewTag((PetscObject)mat,&tag);
305: if (!rank) {
306: /* send numerical values to other processes */
307: gmata = (Mat_SeqAIJ*) gmat->data;
308: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
309: gmataa = gmata->a;
310: for (i=1; i<size; i++) {
311: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
312: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
313: }
314: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
315: } else {
316: /* receive numerical values from process 0*/
317: nz = Ad->nz + Ao->nz;
318: PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
319: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
320: }
321: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
322: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
323: ad = Ad->a;
324: ao = Ao->a;
325: if (mat->rmap->n) {
326: i = 0;
327: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
328: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
329: }
330: for (i=1; i<mat->rmap->n; i++) {
331: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
332: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
333: }
334: i--;
335: if (mat->rmap->n) {
336: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
337: }
338: if (rank) {
339: PetscFree(gmataarestore);
340: }
341: }
342: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
343: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
344: CHKMEMQ;
345: return(0);
346: }
348: /*
349: Local utility routine that creates a mapping from the global column
350: number to the local number in the off-diagonal part of the local
351: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
352: a slightly higher hash table cost; without it it is not scalable (each processor
353: has an order N integer array but is fast to acess.
354: */
357: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358: {
359: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
361: PetscInt n = aij->B->cmap->n,i;
364: if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365: #if defined (PETSC_USE_CTABLE)
366: PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
367: for (i=0; i<n; i++){
368: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
369: }
370: #else
371: PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);
372: PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));
373: PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));
374: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
375: #endif
376: return(0);
377: }
379: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
380: { \
381: if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
382: lastcol1 = col;\
383: while (high1-low1 > 5) { \
384: t = (low1+high1)/2; \
385: if (rp1[t] > col) high1 = t; \
386: else low1 = t; \
387: } \
388: for (_i=low1; _i<high1; _i++) { \
389: if (rp1[_i] > col) break; \
390: if (rp1[_i] == col) { \
391: if (addv == ADD_VALUES) ap1[_i] += value; \
392: else ap1[_i] = value; \
393: goto a_noinsert; \
394: } \
395: } \
396: if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
398: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400: N = nrow1++ - 1; a->nz++; high1++; \
401: /* shift up all the later entries in this row */ \
402: for (ii=N; ii>=_i; ii--) { \
403: rp1[ii+1] = rp1[ii]; \
404: ap1[ii+1] = ap1[ii]; \
405: } \
406: rp1[_i] = col; \
407: ap1[_i] = value; \
408: a_noinsert: ; \
409: ailen[row] = nrow1; \
410: }
413: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414: { \
415: if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
416: lastcol2 = col;\
417: while (high2-low2 > 5) { \
418: t = (low2+high2)/2; \
419: if (rp2[t] > col) high2 = t; \
420: else low2 = t; \
421: } \
422: for (_i=low2; _i<high2; _i++) { \
423: if (rp2[_i] > col) break; \
424: if (rp2[_i] == col) { \
425: if (addv == ADD_VALUES) ap2[_i] += value; \
426: else ap2[_i] = value; \
427: goto b_noinsert; \
428: } \
429: } \
430: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
432: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
433: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
434: N = nrow2++ - 1; b->nz++; high2++; \
435: /* shift up all the later entries in this row */ \
436: for (ii=N; ii>=_i; ii--) { \
437: rp2[ii+1] = rp2[ii]; \
438: ap2[ii+1] = ap2[ii]; \
439: } \
440: rp2[_i] = col; \
441: ap2[_i] = value; \
442: b_noinsert: ; \
443: bilen[row] = nrow2; \
444: }
448: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449: {
450: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
451: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
453: PetscInt l,*garray = mat->garray,diag;
456: /* code only works for square matrices A */
458: /* find size of row to the left of the diagonal part */
459: MatGetOwnershipRange(A,&diag,0);
460: row = row - diag;
461: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462: if (garray[b->j[b->i[row]+l]] > diag) break;
463: }
464: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
466: /* diagonal part */
467: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
469: /* right of diagonal part */
470: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
471: return(0);
472: }
476: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477: {
478: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
479: PetscScalar value;
481: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483: PetscBool roworiented = aij->roworiented;
485: /* Some Variables required in the macro */
486: Mat A = aij->A;
487: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
488: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489: MatScalar *aa = a->a;
490: PetscBool ignorezeroentries = a->ignorezeroentries;
491: Mat B = aij->B;
492: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
493: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494: MatScalar *ba = b->a;
496: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497: PetscInt nonew;
498: MatScalar *ap1,*ap2;
502: for (i=0; i<m; i++) {
503: if (im[i] < 0) continue;
504: #if defined(PETSC_USE_DEBUG)
505: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506: #endif
507: if (im[i] >= rstart && im[i] < rend) {
508: row = im[i] - rstart;
509: lastcol1 = -1;
510: rp1 = aj + ai[row];
511: ap1 = aa + ai[row];
512: rmax1 = aimax[row];
513: nrow1 = ailen[row];
514: low1 = 0;
515: high1 = nrow1;
516: lastcol2 = -1;
517: rp2 = bj + bi[row];
518: ap2 = ba + bi[row];
519: rmax2 = bimax[row];
520: nrow2 = bilen[row];
521: low2 = 0;
522: high2 = nrow2;
524: for (j=0; j<n; j++) {
525: if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
526: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527: if (in[j] >= cstart && in[j] < cend){
528: col = in[j] - cstart;
529: nonew = a->nonew;
530: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531: } else if (in[j] < 0) continue;
532: #if defined(PETSC_USE_DEBUG)
533: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534: #endif
535: else {
536: if (mat->was_assembled) {
537: if (!aij->colmap) {
538: MatCreateColmap_MPIAIJ_Private(mat);
539: }
540: #if defined (PETSC_USE_CTABLE)
541: PetscTableFind(aij->colmap,in[j]+1,&col);
542: col--;
543: #else
544: col = aij->colmap[in[j]] - 1;
545: #endif
546: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
547: MatDisAssemble_MPIAIJ(mat);
548: col = in[j];
549: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550: B = aij->B;
551: b = (Mat_SeqAIJ*)B->data;
552: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553: rp2 = bj + bi[row];
554: ap2 = ba + bi[row];
555: rmax2 = bimax[row];
556: nrow2 = bilen[row];
557: low2 = 0;
558: high2 = nrow2;
559: bm = aij->B->rmap->n;
560: ba = b->a;
561: }
562: } else col = in[j];
563: nonew = b->nonew;
564: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565: }
566: }
567: } else {
568: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569: if (!aij->donotstash) {
570: mat->assembled = PETSC_FALSE;
571: if (roworiented) {
572: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
573: } else {
574: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
575: }
576: }
577: }
578: }
579: return(0);
580: }
584: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585: {
586: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
588: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
592: for (i=0; i<m; i++) {
593: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595: if (idxm[i] >= rstart && idxm[i] < rend) {
596: row = idxm[i] - rstart;
597: for (j=0; j<n; j++) {
598: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600: if (idxn[j] >= cstart && idxn[j] < cend){
601: col = idxn[j] - cstart;
602: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
603: } else {
604: if (!aij->colmap) {
605: MatCreateColmap_MPIAIJ_Private(mat);
606: }
607: #if defined (PETSC_USE_CTABLE)
608: PetscTableFind(aij->colmap,idxn[j]+1,&col);
609: col --;
610: #else
611: col = aij->colmap[idxn[j]] - 1;
612: #endif
613: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614: else {
615: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
616: }
617: }
618: }
619: } else {
620: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621: }
622: }
623: return(0);
624: }
626: extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
630: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
631: {
632: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
634: PetscInt nstash,reallocs;
635: InsertMode addv;
638: if (aij->donotstash || mat->nooffprocentries) {
639: return(0);
640: }
642: /* make sure all processors are either in INSERTMODE or ADDMODE */
643: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
644: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645: mat->insertmode = addv; /* in case this processor had no cache */
647: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
648: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
649: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
650: return(0);
651: }
655: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656: {
657: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
658: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data;
660: PetscMPIInt n;
661: PetscInt i,j,rstart,ncols,flg;
662: PetscInt *row,*col;
663: PetscBool other_disassembled;
664: PetscScalar *val;
665: InsertMode addv = mat->insertmode;
667: /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
669: if (!aij->donotstash && !mat->nooffprocentries) {
670: while (1) {
671: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
672: if (!flg) break;
674: for (i=0; i<n;) {
675: /* Now identify the consecutive vals belonging to the same row */
676: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
677: if (j < n) ncols = j-i;
678: else ncols = n-i;
679: /* Now assemble all these values with a single function call */
680: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
681: i = j;
682: }
683: }
684: MatStashScatterEnd_Private(&mat->stash);
685: }
686: MatAssemblyBegin(aij->A,mode);
687: MatAssemblyEnd(aij->A,mode);
689: /* determine if any processor has disassembled, if so we must
690: also disassemble ourselfs, in order that we may reassemble. */
691: /*
692: if nonzero structure of submatrix B cannot change then we know that
693: no processor disassembled thus we can skip this stuff
694: */
695: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
697: if (mat->was_assembled && !other_disassembled) {
698: MatDisAssemble_MPIAIJ(mat);
699: }
700: }
701: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702: MatSetUpMultiply_MPIAIJ(mat);
703: }
704: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
705: MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);
706: MatAssemblyBegin(aij->B,mode);
707: MatAssemblyEnd(aij->B,mode);
709: PetscFree2(aij->rowvalues,aij->rowindices);
710: aij->rowvalues = 0;
712: /* used by MatAXPY() */
713: a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
714: a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
716: VecDestroy(&aij->diag);
717: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718: return(0);
719: }
723: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
724: {
725: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
729: MatZeroEntries(l->A);
730: MatZeroEntries(l->B);
731: return(0);
732: }
736: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
737: {
738: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
739: PetscErrorCode ierr;
740: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
741: PetscInt i,*owners = A->rmap->range;
742: PetscInt *nprocs,j,idx,nsends,row;
743: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
744: PetscInt *rvalues,count,base,slen,*source;
745: PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart;
746: MPI_Comm comm = ((PetscObject)A)->comm;
747: MPI_Request *send_waits,*recv_waits;
748: MPI_Status recv_status,*send_status;
749: const PetscScalar *xx;
750: PetscScalar *bb;
751: #if defined(PETSC_DEBUG)
752: PetscBool found = PETSC_FALSE;
753: #endif
756: /* first count number of contributors to each processor */
757: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
758: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
759: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
760: j = 0;
761: for (i=0; i<N; i++) {
762: if (lastidx > (idx = rows[i])) j = 0;
763: lastidx = idx;
764: for (; j<size; j++) {
765: if (idx >= owners[j] && idx < owners[j+1]) {
766: nprocs[2*j]++;
767: nprocs[2*j+1] = 1;
768: owner[i] = j;
769: #if defined(PETSC_DEBUG)
770: found = PETSC_TRUE;
771: #endif
772: break;
773: }
774: }
775: #if defined(PETSC_DEBUG)
776: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
777: found = PETSC_FALSE;
778: #endif
779: }
780: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
782: if (A->nooffproczerorows) {
783: if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
784: nrecvs = nsends;
785: nmax = N;
786: } else {
787: /* inform other processors of number of messages and max length*/
788: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
789: }
791: /* post receives: */
792: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
793: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
794: for (i=0; i<nrecvs; i++) {
795: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
796: }
798: /* do sends:
799: 1) starts[i] gives the starting index in svalues for stuff going to
800: the ith processor
801: */
802: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
803: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
804: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
805: starts[0] = 0;
806: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
807: for (i=0; i<N; i++) {
808: svalues[starts[owner[i]]++] = rows[i];
809: }
811: starts[0] = 0;
812: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
813: count = 0;
814: for (i=0; i<size; i++) {
815: if (nprocs[2*i+1]) {
816: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
817: }
818: }
819: PetscFree(starts);
821: base = owners[rank];
823: /* wait on receives */
824: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
825: count = nrecvs; slen = 0;
826: while (count) {
827: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
828: /* unpack receives into our local space */
829: MPI_Get_count(&recv_status,MPIU_INT,&n);
830: source[imdex] = recv_status.MPI_SOURCE;
831: lens[imdex] = n;
832: slen += n;
833: count--;
834: }
835: PetscFree(recv_waits);
836:
837: /* move the data into the send scatter */
838: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
839: count = 0;
840: for (i=0; i<nrecvs; i++) {
841: values = rvalues + i*nmax;
842: for (j=0; j<lens[i]; j++) {
843: lrows[count++] = values[j] - base;
844: }
845: }
846: PetscFree(rvalues);
847: PetscFree2(lens,source);
848: PetscFree(owner);
849: PetscFree(nprocs);
850:
851: /* fix right hand side if needed */
852: if (x && b) {
853: VecGetArrayRead(x,&xx);
854: VecGetArray(b,&bb);
855: for (i=0; i<slen; i++) {
856: bb[lrows[i]] = diag*xx[lrows[i]];
857: }
858: VecRestoreArrayRead(x,&xx);
859: VecRestoreArray(b,&bb);
860: }
861: /*
862: Zero the required rows. If the "diagonal block" of the matrix
863: is square and the user wishes to set the diagonal we use separate
864: code so that MatSetValues() is not called for each diagonal allocating
865: new memory, thus calling lots of mallocs and slowing things down.
867: */
868: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
869: MatZeroRows(l->B,slen,lrows,0.0,0,0);
870: if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
871: MatZeroRows(l->A,slen,lrows,diag,0,0);
872: } else if (diag != 0.0) {
873: MatZeroRows(l->A,slen,lrows,0.0,0,0);
874: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
875: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
876: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
877: }
878: for (i = 0; i < slen; i++) {
879: row = lrows[i] + rstart;
880: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
881: }
882: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
883: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
884: } else {
885: MatZeroRows(l->A,slen,lrows,0.0,0,0);
886: }
887: PetscFree(lrows);
889: /* wait on sends */
890: if (nsends) {
891: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
892: MPI_Waitall(nsends,send_waits,send_status);
893: PetscFree(send_status);
894: }
895: PetscFree(send_waits);
896: PetscFree(svalues);
897: return(0);
898: }
902: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
903: {
904: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
905: PetscErrorCode ierr;
906: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
907: PetscInt i,*owners = A->rmap->range;
908: PetscInt *nprocs,j,idx,nsends;
909: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
910: PetscInt *rvalues,count,base,slen,*source;
911: PetscInt *lens,*lrows,*values,m;
912: MPI_Comm comm = ((PetscObject)A)->comm;
913: MPI_Request *send_waits,*recv_waits;
914: MPI_Status recv_status,*send_status;
915: const PetscScalar *xx;
916: PetscScalar *bb,*mask;
917: Vec xmask,lmask;
918: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
919: const PetscInt *aj, *ii,*ridx;
920: PetscScalar *aa;
921: #if defined(PETSC_DEBUG)
922: PetscBool found = PETSC_FALSE;
923: #endif
926: /* first count number of contributors to each processor */
927: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
928: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
929: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
930: j = 0;
931: for (i=0; i<N; i++) {
932: if (lastidx > (idx = rows[i])) j = 0;
933: lastidx = idx;
934: for (; j<size; j++) {
935: if (idx >= owners[j] && idx < owners[j+1]) {
936: nprocs[2*j]++;
937: nprocs[2*j+1] = 1;
938: owner[i] = j;
939: #if defined(PETSC_DEBUG)
940: found = PETSC_TRUE;
941: #endif
942: break;
943: }
944: }
945: #if defined(PETSC_DEBUG)
946: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
947: found = PETSC_FALSE;
948: #endif
949: }
950: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
952: /* inform other processors of number of messages and max length*/
953: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
955: /* post receives: */
956: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
957: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
958: for (i=0; i<nrecvs; i++) {
959: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
960: }
962: /* do sends:
963: 1) starts[i] gives the starting index in svalues for stuff going to
964: the ith processor
965: */
966: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
967: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
968: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
969: starts[0] = 0;
970: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
971: for (i=0; i<N; i++) {
972: svalues[starts[owner[i]]++] = rows[i];
973: }
975: starts[0] = 0;
976: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
977: count = 0;
978: for (i=0; i<size; i++) {
979: if (nprocs[2*i+1]) {
980: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
981: }
982: }
983: PetscFree(starts);
985: base = owners[rank];
987: /* wait on receives */
988: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
989: count = nrecvs; slen = 0;
990: while (count) {
991: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
992: /* unpack receives into our local space */
993: MPI_Get_count(&recv_status,MPIU_INT,&n);
994: source[imdex] = recv_status.MPI_SOURCE;
995: lens[imdex] = n;
996: slen += n;
997: count--;
998: }
999: PetscFree(recv_waits);
1000:
1001: /* move the data into the send scatter */
1002: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
1003: count = 0;
1004: for (i=0; i<nrecvs; i++) {
1005: values = rvalues + i*nmax;
1006: for (j=0; j<lens[i]; j++) {
1007: lrows[count++] = values[j] - base;
1008: }
1009: }
1010: PetscFree(rvalues);
1011: PetscFree2(lens,source);
1012: PetscFree(owner);
1013: PetscFree(nprocs);
1014: /* lrows are the local rows to be zeroed, slen is the number of local rows */
1016: /* zero diagonal part of matrix */
1017: MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);
1018:
1019: /* handle off diagonal part of matrix */
1020: MatGetVecs(A,&xmask,PETSC_NULL);
1021: VecDuplicate(l->lvec,&lmask);
1022: VecGetArray(xmask,&bb);
1023: for (i=0; i<slen; i++) {
1024: bb[lrows[i]] = 1;
1025: }
1026: VecRestoreArray(xmask,&bb);
1027: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1028: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1029: VecDestroy(&xmask);
1030: if (x) {
1031: VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1032: VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1033: VecGetArrayRead(l->lvec,&xx);
1034: VecGetArray(b,&bb);
1035: }
1036: VecGetArray(lmask,&mask);
1038: /* remove zeroed rows of off diagonal matrix */
1039: ii = aij->i;
1040: for (i=0; i<slen; i++) {
1041: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
1042: }
1044: /* loop over all elements of off process part of matrix zeroing removed columns*/
1045: if (aij->compressedrow.use){
1046: m = aij->compressedrow.nrows;
1047: ii = aij->compressedrow.i;
1048: ridx = aij->compressedrow.rindex;
1049: for (i=0; i<m; i++){
1050: n = ii[i+1] - ii[i];
1051: aj = aij->j + ii[i];
1052: aa = aij->a + ii[i];
1054: for (j=0; j<n; j++) {
1055: if (PetscAbsScalar(mask[*aj])) {
1056: if (b) bb[*ridx] -= *aa*xx[*aj];
1057: *aa = 0.0;
1058: }
1059: aa++;
1060: aj++;
1061: }
1062: ridx++;
1063: }
1064: } else { /* do not use compressed row format */
1065: m = l->B->rmap->n;
1066: for (i=0; i<m; i++) {
1067: n = ii[i+1] - ii[i];
1068: aj = aij->j + ii[i];
1069: aa = aij->a + ii[i];
1070: for (j=0; j<n; j++) {
1071: if (PetscAbsScalar(mask[*aj])) {
1072: if (b) bb[i] -= *aa*xx[*aj];
1073: *aa = 0.0;
1074: }
1075: aa++;
1076: aj++;
1077: }
1078: }
1079: }
1080: if (x) {
1081: VecRestoreArray(b,&bb);
1082: VecRestoreArrayRead(l->lvec,&xx);
1083: }
1084: VecRestoreArray(lmask,&mask);
1085: VecDestroy(&lmask);
1086: PetscFree(lrows);
1088: /* wait on sends */
1089: if (nsends) {
1090: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1091: MPI_Waitall(nsends,send_waits,send_status);
1092: PetscFree(send_status);
1093: }
1094: PetscFree(send_waits);
1095: PetscFree(svalues);
1097: return(0);
1098: }
1102: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103: {
1104: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1106: PetscInt nt;
1109: VecGetLocalSize(xx,&nt);
1110: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1112: (*a->A->ops->mult)(a->A,xx,yy);
1113: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1114: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1115: return(0);
1116: }
1120: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121: {
1122: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1126: MatMultDiagonalBlock(a->A,bb,xx);
1127: return(0);
1128: }
1132: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133: {
1134: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1138: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1139: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1140: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1141: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1142: return(0);
1143: }
1147: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148: {
1149: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1151: PetscBool merged;
1154: VecScatterGetMerged(a->Mvctx,&merged);
1155: /* do nondiagonal part */
1156: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1157: if (!merged) {
1158: /* send it on its way */
1159: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1160: /* do local part */
1161: (*a->A->ops->multtranspose)(a->A,xx,yy);
1162: /* receive remote parts: note this assumes the values are not actually */
1163: /* added in yy until the next line, */
1164: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1165: } else {
1166: /* do local part */
1167: (*a->A->ops->multtranspose)(a->A,xx,yy);
1168: /* send it on its way */
1169: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1170: /* values actually were received in the Begin() but we need to call this nop */
1171: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1172: }
1173: return(0);
1174: }
1176: EXTERN_C_BEGIN
1179: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1180: {
1181: MPI_Comm comm;
1182: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1183: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1184: IS Me,Notme;
1186: PetscInt M,N,first,last,*notme,i;
1187: PetscMPIInt size;
1191: /* Easy test: symmetric diagonal block */
1192: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1193: MatIsTranspose(Adia,Bdia,tol,f);
1194: if (!*f) return(0);
1195: PetscObjectGetComm((PetscObject)Amat,&comm);
1196: MPI_Comm_size(comm,&size);
1197: if (size == 1) return(0);
1199: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1200: MatGetSize(Amat,&M,&N);
1201: MatGetOwnershipRange(Amat,&first,&last);
1202: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
1203: for (i=0; i<first; i++) notme[i] = i;
1204: for (i=last; i<M; i++) notme[i-last+first] = i;
1205: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1206: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1207: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1208: Aoff = Aoffs[0];
1209: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1210: Boff = Boffs[0];
1211: MatIsTranspose(Aoff,Boff,tol,f);
1212: MatDestroyMatrices(1,&Aoffs);
1213: MatDestroyMatrices(1,&Boffs);
1214: ISDestroy(&Me);
1215: ISDestroy(&Notme);
1216: PetscFree(notme);
1217: return(0);
1218: }
1219: EXTERN_C_END
1223: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1224: {
1225: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1229: /* do nondiagonal part */
1230: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1231: /* send it on its way */
1232: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1233: /* do local part */
1234: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1235: /* receive remote parts */
1236: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1237: return(0);
1238: }
1240: /*
1241: This only works correctly for square matrices where the subblock A->A is the
1242: diagonal block
1243: */
1246: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1247: {
1249: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1252: if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1253: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1254: MatGetDiagonal(a->A,v);
1255: return(0);
1256: }
1260: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1261: {
1262: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1266: MatScale(a->A,aa);
1267: MatScale(a->B,aa);
1268: return(0);
1269: }
1273: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1274: {
1275: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1279: #if defined(PETSC_USE_LOG)
1280: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1281: #endif
1282: MatStashDestroy_Private(&mat->stash);
1283: VecDestroy(&aij->diag);
1284: MatDestroy(&aij->A);
1285: MatDestroy(&aij->B);
1286: #if defined (PETSC_USE_CTABLE)
1287: PetscTableDestroy(&aij->colmap);
1288: #else
1289: PetscFree(aij->colmap);
1290: #endif
1291: PetscFree(aij->garray);
1292: VecDestroy(&aij->lvec);
1293: VecScatterDestroy(&aij->Mvctx);
1294: PetscFree2(aij->rowvalues,aij->rowindices);
1295: PetscFree(aij->ld);
1296: PetscFree(mat->data);
1298: PetscObjectChangeTypeName((PetscObject)mat,0);
1299: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1300: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1301: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1302: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
1303: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
1304: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
1305: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1306: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);
1307: return(0);
1308: }
1312: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1313: {
1314: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1315: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
1316: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
1317: PetscErrorCode ierr;
1318: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1319: int fd;
1320: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1321: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1322: PetscScalar *column_values;
1323: PetscInt message_count,flowcontrolcount;
1326: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1327: MPI_Comm_size(((PetscObject)mat)->comm,&size);
1328: nz = A->nz + B->nz;
1329: if (!rank) {
1330: header[0] = MAT_FILE_CLASSID;
1331: header[1] = mat->rmap->N;
1332: header[2] = mat->cmap->N;
1333: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1334: PetscViewerBinaryGetDescriptor(viewer,&fd);
1335: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1336: /* get largest number of rows any processor has */
1337: rlen = mat->rmap->n;
1338: range = mat->rmap->range;
1339: for (i=1; i<size; i++) {
1340: rlen = PetscMax(rlen,range[i+1] - range[i]);
1341: }
1342: } else {
1343: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1344: rlen = mat->rmap->n;
1345: }
1347: /* load up the local row counts */
1348: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
1349: for (i=0; i<mat->rmap->n; i++) {
1350: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1351: }
1353: /* store the row lengths to the file */
1354: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1355: if (!rank) {
1356: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1357: for (i=1; i<size; i++) {
1358: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1359: rlen = range[i+1] - range[i];
1360: MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1361: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1362: }
1363: PetscViewerFlowControlEndMaster(viewer,message_count);
1364: } else {
1365: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1366: MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1367: PetscViewerFlowControlEndWorker(viewer,message_count);
1368: }
1369: PetscFree(row_lengths);
1371: /* load up the local column indices */
1372: nzmax = nz; /* )th processor needs space a largest processor needs */
1373: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);
1374: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
1375: cnt = 0;
1376: for (i=0; i<mat->rmap->n; i++) {
1377: for (j=B->i[i]; j<B->i[i+1]; j++) {
1378: if ( (col = garray[B->j[j]]) > cstart) break;
1379: column_indices[cnt++] = col;
1380: }
1381: for (k=A->i[i]; k<A->i[i+1]; k++) {
1382: column_indices[cnt++] = A->j[k] + cstart;
1383: }
1384: for (; j<B->i[i+1]; j++) {
1385: column_indices[cnt++] = garray[B->j[j]];
1386: }
1387: }
1388: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1390: /* store the column indices to the file */
1391: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1392: if (!rank) {
1393: MPI_Status status;
1394: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1395: for (i=1; i<size; i++) {
1396: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1397: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1398: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1399: MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1400: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1401: }
1402: PetscViewerFlowControlEndMaster(viewer,message_count);
1403: } else {
1404: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1405: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1406: MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1407: PetscViewerFlowControlEndWorker(viewer,message_count);
1408: }
1409: PetscFree(column_indices);
1411: /* load up the local column values */
1412: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
1413: cnt = 0;
1414: for (i=0; i<mat->rmap->n; i++) {
1415: for (j=B->i[i]; j<B->i[i+1]; j++) {
1416: if ( garray[B->j[j]] > cstart) break;
1417: column_values[cnt++] = B->a[j];
1418: }
1419: for (k=A->i[i]; k<A->i[i+1]; k++) {
1420: column_values[cnt++] = A->a[k];
1421: }
1422: for (; j<B->i[i+1]; j++) {
1423: column_values[cnt++] = B->a[j];
1424: }
1425: }
1426: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1428: /* store the column values to the file */
1429: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1430: if (!rank) {
1431: MPI_Status status;
1432: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1433: for (i=1; i<size; i++) {
1434: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1435: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1436: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1437: MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);
1438: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1439: }
1440: PetscViewerFlowControlEndMaster(viewer,message_count);
1441: } else {
1442: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1443: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1444: MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
1445: PetscViewerFlowControlEndWorker(viewer,message_count);
1446: }
1447: PetscFree(column_values);
1448: return(0);
1449: }
1453: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1454: {
1455: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1456: PetscErrorCode ierr;
1457: PetscMPIInt rank = aij->rank,size = aij->size;
1458: PetscBool isdraw,iascii,isbinary;
1459: PetscViewer sviewer;
1460: PetscViewerFormat format;
1463: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1464: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1465: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1466: if (iascii) {
1467: PetscViewerGetFormat(viewer,&format);
1468: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1469: MatInfo info;
1470: PetscBool inodes;
1472: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1473: MatGetInfo(mat,MAT_LOCAL,&info);
1474: MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
1475: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1476: if (!inodes) {
1477: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1478: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1479: } else {
1480: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1481: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1482: }
1483: MatGetInfo(aij->A,MAT_LOCAL,&info);
1484: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1485: MatGetInfo(aij->B,MAT_LOCAL,&info);
1486: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1487: PetscViewerFlush(viewer);
1488: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1489: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1490: VecScatterView(aij->Mvctx,viewer);
1491: return(0);
1492: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1493: PetscInt inodecount,inodelimit,*inodes;
1494: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1495: if (inodes) {
1496: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1497: } else {
1498: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1499: }
1500: return(0);
1501: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1502: return(0);
1503: }
1504: } else if (isbinary) {
1505: if (size == 1) {
1506: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1507: MatView(aij->A,viewer);
1508: } else {
1509: MatView_MPIAIJ_Binary(mat,viewer);
1510: }
1511: return(0);
1512: } else if (isdraw) {
1513: PetscDraw draw;
1514: PetscBool isnull;
1515: PetscViewerDrawGetDraw(viewer,0,&draw);
1516: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1517: }
1519: if (size == 1) {
1520: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1521: MatView(aij->A,viewer);
1522: } else {
1523: /* assemble the entire matrix onto first processor. */
1524: Mat A;
1525: Mat_SeqAIJ *Aloc;
1526: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1527: MatScalar *a;
1529: if (mat->rmap->N > 1024) {
1530: PetscBool flg = PETSC_FALSE;
1532: PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);
1533: if (!flg) {
1534: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1535: }
1536: }
1538: MatCreate(((PetscObject)mat)->comm,&A);
1539: if (!rank) {
1540: MatSetSizes(A,M,N,M,N);
1541: } else {
1542: MatSetSizes(A,0,0,M,N);
1543: }
1544: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1545: MatSetType(A,MATMPIAIJ);
1546: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
1547: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1548: PetscLogObjectParent(mat,A);
1550: /* copy over the A part */
1551: Aloc = (Mat_SeqAIJ*)aij->A->data;
1552: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1553: row = mat->rmap->rstart;
1554: for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1555: for (i=0; i<m; i++) {
1556: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1557: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1558: }
1559: aj = Aloc->j;
1560: for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1562: /* copy over the B part */
1563: Aloc = (Mat_SeqAIJ*)aij->B->data;
1564: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1565: row = mat->rmap->rstart;
1566: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
1567: ct = cols;
1568: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1569: for (i=0; i<m; i++) {
1570: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1571: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1572: }
1573: PetscFree(ct);
1574: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1575: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1576: /*
1577: Everyone has to call to draw the matrix since the graphics waits are
1578: synchronized across all processors that share the PetscDraw object
1579: */
1580: PetscViewerGetSingleton(viewer,&sviewer);
1581: if (!rank) {
1582: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);
1583: /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1584: PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1585: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1586: }
1587: PetscViewerRestoreSingleton(viewer,&sviewer);
1588: MatDestroy(&A);
1589: }
1590: return(0);
1591: }
1595: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1596: {
1598: PetscBool iascii,isdraw,issocket,isbinary;
1599:
1601: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1602: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1603: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1604: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1605: if (iascii || isdraw || isbinary || issocket) {
1606: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1607: } else {
1608: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1609: }
1610: return(0);
1611: }
1615: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1616: {
1617: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1619: Vec bb1 = 0;
1620: PetscBool hasop;
1623: if (flag == SOR_APPLY_UPPER) {
1624: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1625: return(0);
1626: }
1628: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1629: VecDuplicate(bb,&bb1);
1630: }
1632: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1633: if (flag & SOR_ZERO_INITIAL_GUESS) {
1634: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1635: its--;
1636: }
1637:
1638: while (its--) {
1639: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1640: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1642: /* update rhs: bb1 = bb - B*x */
1643: VecScale(mat->lvec,-1.0);
1644: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1646: /* local sweep */
1647: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1648: }
1649: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1650: if (flag & SOR_ZERO_INITIAL_GUESS) {
1651: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1652: its--;
1653: }
1654: while (its--) {
1655: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1656: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1658: /* update rhs: bb1 = bb - B*x */
1659: VecScale(mat->lvec,-1.0);
1660: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1662: /* local sweep */
1663: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1664: }
1665: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1666: if (flag & SOR_ZERO_INITIAL_GUESS) {
1667: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1668: its--;
1669: }
1670: while (its--) {
1671: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1672: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1674: /* update rhs: bb1 = bb - B*x */
1675: VecScale(mat->lvec,-1.0);
1676: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1678: /* local sweep */
1679: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1680: }
1681: } else if (flag & SOR_EISENSTAT) {
1682: Vec xx1;
1684: VecDuplicate(bb,&xx1);
1685: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1687: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1688: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1689: if (!mat->diag) {
1690: MatGetVecs(matin,&mat->diag,PETSC_NULL);
1691: MatGetDiagonal(matin,mat->diag);
1692: }
1693: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1694: if (hasop) {
1695: MatMultDiagonalBlock(matin,xx,bb1);
1696: } else {
1697: VecPointwiseMult(bb1,mat->diag,xx);
1698: }
1699: VecAYPX(bb1,(omega-2.0)/omega,bb);
1701: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1703: /* local sweep */
1704: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1705: VecAXPY(xx,1.0,xx1);
1706: VecDestroy(&xx1);
1707: } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1709: VecDestroy(&bb1);
1710: return(0);
1711: }
1715: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1716: {
1717: MPI_Comm comm;
1718: PetscInt first,local_rowsize,local_colsize;
1719: const PetscInt *rows;
1720: IS crowp,growp,irowp,lrowp,lcolp,icolp;
1724: PetscObjectGetComm((PetscObject)A,&comm);
1725: /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1726: ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);
1727: /* collect the global row permutation and invert it */
1728: ISAllGather(crowp,&growp);
1729: ISSetPermutation(growp);
1730: ISDestroy(&crowp);
1731: ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
1732: ISDestroy(&growp);
1733: /* get the local target indices */
1734: MatGetOwnershipRange(A,&first,PETSC_NULL);
1735: MatGetLocalSize(A,&local_rowsize,&local_colsize);
1736: ISGetIndices(irowp,&rows);
1737: ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);
1738: ISRestoreIndices(irowp,&rows);
1739: ISDestroy(&irowp);
1740: /* the column permutation is so much easier;
1741: make a local version of 'colp' and invert it */
1742: ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);
1743: ISSetPermutation(lcolp);
1744: ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
1745: ISDestroy(&lcolp);
1746: /* now we just get the submatrix */
1747: MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);
1748: /* clean up */
1749: ISDestroy(&lrowp);
1750: ISDestroy(&icolp);
1751: return(0);
1752: }
1756: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1757: {
1758: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1759: Mat A = mat->A,B = mat->B;
1761: PetscReal isend[5],irecv[5];
1764: info->block_size = 1.0;
1765: MatGetInfo(A,MAT_LOCAL,info);
1766: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1767: isend[3] = info->memory; isend[4] = info->mallocs;
1768: MatGetInfo(B,MAT_LOCAL,info);
1769: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1770: isend[3] += info->memory; isend[4] += info->mallocs;
1771: if (flag == MAT_LOCAL) {
1772: info->nz_used = isend[0];
1773: info->nz_allocated = isend[1];
1774: info->nz_unneeded = isend[2];
1775: info->memory = isend[3];
1776: info->mallocs = isend[4];
1777: } else if (flag == MAT_GLOBAL_MAX) {
1778: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);
1779: info->nz_used = irecv[0];
1780: info->nz_allocated = irecv[1];
1781: info->nz_unneeded = irecv[2];
1782: info->memory = irecv[3];
1783: info->mallocs = irecv[4];
1784: } else if (flag == MAT_GLOBAL_SUM) {
1785: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);
1786: info->nz_used = irecv[0];
1787: info->nz_allocated = irecv[1];
1788: info->nz_unneeded = irecv[2];
1789: info->memory = irecv[3];
1790: info->mallocs = irecv[4];
1791: }
1792: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1793: info->fill_ratio_needed = 0;
1794: info->factor_mallocs = 0;
1796: return(0);
1797: }
1801: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1802: {
1803: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1807: switch (op) {
1808: case MAT_NEW_NONZERO_LOCATIONS:
1809: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1810: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1811: case MAT_KEEP_NONZERO_PATTERN:
1812: case MAT_NEW_NONZERO_LOCATION_ERR:
1813: case MAT_USE_INODES:
1814: case MAT_IGNORE_ZERO_ENTRIES:
1815: MatCheckPreallocated(A,1);
1816: MatSetOption(a->A,op,flg);
1817: MatSetOption(a->B,op,flg);
1818: break;
1819: case MAT_ROW_ORIENTED:
1820: a->roworiented = flg;
1821: MatSetOption(a->A,op,flg);
1822: MatSetOption(a->B,op,flg);
1823: break;
1824: case MAT_NEW_DIAGONALS:
1825: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1826: break;
1827: case MAT_IGNORE_OFF_PROC_ENTRIES:
1828: a->donotstash = flg;
1829: break;
1830: case MAT_SPD:
1831: A->spd_set = PETSC_TRUE;
1832: A->spd = flg;
1833: if (flg) {
1834: A->symmetric = PETSC_TRUE;
1835: A->structurally_symmetric = PETSC_TRUE;
1836: A->symmetric_set = PETSC_TRUE;
1837: A->structurally_symmetric_set = PETSC_TRUE;
1838: }
1839: break;
1840: case MAT_SYMMETRIC:
1841: MatSetOption(a->A,op,flg);
1842: break;
1843: case MAT_STRUCTURALLY_SYMMETRIC:
1844: MatSetOption(a->A,op,flg);
1845: break;
1846: case MAT_HERMITIAN:
1847: MatSetOption(a->A,op,flg);
1848: break;
1849: case MAT_SYMMETRY_ETERNAL:
1850: MatSetOption(a->A,op,flg);
1851: break;
1852: default:
1853: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1854: }
1855: return(0);
1856: }
1860: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1861: {
1862: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1863: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1865: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1866: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1867: PetscInt *cmap,*idx_p;
1870: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1871: mat->getrowactive = PETSC_TRUE;
1873: if (!mat->rowvalues && (idx || v)) {
1874: /*
1875: allocate enough space to hold information from the longest row.
1876: */
1877: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1878: PetscInt max = 1,tmp;
1879: for (i=0; i<matin->rmap->n; i++) {
1880: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1881: if (max < tmp) { max = tmp; }
1882: }
1883: PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);
1884: }
1886: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1887: lrow = row - rstart;
1889: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1890: if (!v) {pvA = 0; pvB = 0;}
1891: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1892: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1893: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1894: nztot = nzA + nzB;
1896: cmap = mat->garray;
1897: if (v || idx) {
1898: if (nztot) {
1899: /* Sort by increasing column numbers, assuming A and B already sorted */
1900: PetscInt imark = -1;
1901: if (v) {
1902: *v = v_p = mat->rowvalues;
1903: for (i=0; i<nzB; i++) {
1904: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1905: else break;
1906: }
1907: imark = i;
1908: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1909: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1910: }
1911: if (idx) {
1912: *idx = idx_p = mat->rowindices;
1913: if (imark > -1) {
1914: for (i=0; i<imark; i++) {
1915: idx_p[i] = cmap[cworkB[i]];
1916: }
1917: } else {
1918: for (i=0; i<nzB; i++) {
1919: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1920: else break;
1921: }
1922: imark = i;
1923: }
1924: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1925: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1926: }
1927: } else {
1928: if (idx) *idx = 0;
1929: if (v) *v = 0;
1930: }
1931: }
1932: *nz = nztot;
1933: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1934: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1935: return(0);
1936: }
1940: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1941: {
1942: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1945: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1946: aij->getrowactive = PETSC_FALSE;
1947: return(0);
1948: }
1952: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1953: {
1954: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1955: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1957: PetscInt i,j,cstart = mat->cmap->rstart;
1958: PetscReal sum = 0.0;
1959: MatScalar *v;
1962: if (aij->size == 1) {
1963: MatNorm(aij->A,type,norm);
1964: } else {
1965: if (type == NORM_FROBENIUS) {
1966: v = amat->a;
1967: for (i=0; i<amat->nz; i++) {
1968: #if defined(PETSC_USE_COMPLEX)
1969: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970: #else
1971: sum += (*v)*(*v); v++;
1972: #endif
1973: }
1974: v = bmat->a;
1975: for (i=0; i<bmat->nz; i++) {
1976: #if defined(PETSC_USE_COMPLEX)
1977: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1978: #else
1979: sum += (*v)*(*v); v++;
1980: #endif
1981: }
1982: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1983: *norm = PetscSqrtReal(*norm);
1984: } else if (type == NORM_1) { /* max column norm */
1985: PetscReal *tmp,*tmp2;
1986: PetscInt *jj,*garray = aij->garray;
1987: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);
1988: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);
1989: PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
1990: *norm = 0.0;
1991: v = amat->a; jj = amat->j;
1992: for (j=0; j<amat->nz; j++) {
1993: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1994: }
1995: v = bmat->a; jj = bmat->j;
1996: for (j=0; j<bmat->nz; j++) {
1997: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1998: }
1999: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
2000: for (j=0; j<mat->cmap->N; j++) {
2001: if (tmp2[j] > *norm) *norm = tmp2[j];
2002: }
2003: PetscFree(tmp);
2004: PetscFree(tmp2);
2005: } else if (type == NORM_INFINITY) { /* max row norm */
2006: PetscReal ntemp = 0.0;
2007: for (j=0; j<aij->A->rmap->n; j++) {
2008: v = amat->a + amat->i[j];
2009: sum = 0.0;
2010: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2011: sum += PetscAbsScalar(*v); v++;
2012: }
2013: v = bmat->a + bmat->i[j];
2014: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2015: sum += PetscAbsScalar(*v); v++;
2016: }
2017: if (sum > ntemp) ntemp = sum;
2018: }
2019: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);
2020: } else {
2021: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2022: }
2023: }
2024: return(0);
2025: }
2029: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2030: {
2031: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2032: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2034: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2035: PetscInt cstart=A->cmap->rstart,ncol;
2036: Mat B;
2037: MatScalar *array;
2040: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2042: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2043: ai = Aloc->i; aj = Aloc->j;
2044: bi = Bloc->i; bj = Bloc->j;
2045: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2046: /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2047: PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);
2048: PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));
2049: for (i=0; i<ai[ma]; i++){
2050: d_nnz[aj[i]] ++;
2051: aj[i] += cstart; /* global col index to be used by MatSetValues() */
2052: }
2054: MatCreate(((PetscObject)A)->comm,&B);
2055: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
2056: MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);
2057: MatSetType(B,((PetscObject)A)->type_name);
2058: MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);
2059: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
2060: PetscFree(d_nnz);
2061: } else {
2062: B = *matout;
2063: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2064: for (i=0; i<ai[ma]; i++){
2065: aj[i] += cstart; /* global col index to be used by MatSetValues() */
2066: }
2067: }
2069: /* copy over the A part */
2070: array = Aloc->a;
2071: row = A->rmap->rstart;
2072: for (i=0; i<ma; i++) {
2073: ncol = ai[i+1]-ai[i];
2074: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
2075: row++; array += ncol; aj += ncol;
2076: }
2077: aj = Aloc->j;
2078: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2080: /* copy over the B part */
2081: PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);
2082: PetscMemzero(cols,bi[mb]*sizeof(PetscInt));
2083: array = Bloc->a;
2084: row = A->rmap->rstart;
2085: for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2086: cols_tmp = cols;
2087: for (i=0; i<mb; i++) {
2088: ncol = bi[i+1]-bi[i];
2089: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
2090: row++; array += ncol; cols_tmp += ncol;
2091: }
2092: PetscFree(cols);
2093:
2094: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2095: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2096: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2097: *matout = B;
2098: } else {
2099: MatHeaderMerge(A,B);
2100: }
2101: return(0);
2102: }
2106: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2107: {
2108: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2109: Mat a = aij->A,b = aij->B;
2111: PetscInt s1,s2,s3;
2114: MatGetLocalSize(mat,&s2,&s3);
2115: if (rr) {
2116: VecGetLocalSize(rr,&s1);
2117: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2118: /* Overlap communication with computation. */
2119: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2120: }
2121: if (ll) {
2122: VecGetLocalSize(ll,&s1);
2123: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2124: (*b->ops->diagonalscale)(b,ll,0);
2125: }
2126: /* scale the diagonal block */
2127: (*a->ops->diagonalscale)(a,ll,rr);
2129: if (rr) {
2130: /* Do a scatter end and then right scale the off-diagonal block */
2131: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2132: (*b->ops->diagonalscale)(b,0,aij->lvec);
2133: }
2134:
2135: return(0);
2136: }
2140: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2141: {
2142: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2146: MatSetUnfactored(a->A);
2147: return(0);
2148: }
2152: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2153: {
2154: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2155: Mat a,b,c,d;
2156: PetscBool flg;
2160: a = matA->A; b = matA->B;
2161: c = matB->A; d = matB->B;
2163: MatEqual(a,c,&flg);
2164: if (flg) {
2165: MatEqual(b,d,&flg);
2166: }
2167: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2168: return(0);
2169: }
2173: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2174: {
2176: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2177: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2180: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2181: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2182: /* because of the column compression in the off-processor part of the matrix a->B,
2183: the number of columns in a->B and b->B may be different, hence we cannot call
2184: the MatCopy() directly on the two parts. If need be, we can provide a more
2185: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2186: then copying the submatrices */
2187: MatCopy_Basic(A,B,str);
2188: } else {
2189: MatCopy(a->A,b->A,str);
2190: MatCopy(a->B,b->B,str);
2191: }
2192: return(0);
2193: }
2197: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2198: {
2202: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2203: return(0);
2204: }
2208: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2209: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2210: {
2211: PetscInt i,m=Y->rmap->N;
2212: Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data;
2213: Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data;
2214: const PetscInt *xi = x->i,*yi = y->i;
2217: /* Set the number of nonzeros in the new matrix */
2218: for(i=0; i<m; i++) {
2219: PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2220: const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2221: nnz[i] = 0;
2222: for (j=0,k=0; j<nzx; j++) { /* Point in X */
2223: for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2224: if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */
2225: nnz[i]++;
2226: }
2227: for (; k<nzy; k++) nnz[i]++;
2228: }
2229: return(0);
2230: }
2234: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2235: {
2237: PetscInt i;
2238: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2239: PetscBLASInt bnz,one=1;
2240: Mat_SeqAIJ *x,*y;
2243: if (str == SAME_NONZERO_PATTERN) {
2244: PetscScalar alpha = a;
2245: x = (Mat_SeqAIJ *)xx->A->data;
2246: y = (Mat_SeqAIJ *)yy->A->data;
2247: bnz = PetscBLASIntCast(x->nz);
2248: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2249: x = (Mat_SeqAIJ *)xx->B->data;
2250: y = (Mat_SeqAIJ *)yy->B->data;
2251: bnz = PetscBLASIntCast(x->nz);
2252: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2253: } else if (str == SUBSET_NONZERO_PATTERN) {
2254: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
2256: x = (Mat_SeqAIJ *)xx->B->data;
2257: y = (Mat_SeqAIJ *)yy->B->data;
2258: if (y->xtoy && y->XtoY != xx->B) {
2259: PetscFree(y->xtoy);
2260: MatDestroy(&y->XtoY);
2261: }
2262: if (!y->xtoy) { /* get xtoy */
2263: MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
2264: y->XtoY = xx->B;
2265: PetscObjectReference((PetscObject)xx->B);
2266: }
2267: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2268: } else {
2269: Mat B;
2270: PetscInt *nnz_d,*nnz_o;
2271: PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);
2272: PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);
2273: MatCreate(((PetscObject)Y)->comm,&B);
2274: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2275: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2276: MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);
2277: MatSetType(B,MATMPIAIJ);
2278: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2279: MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2280: MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2281: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2282: MatHeaderReplace(Y,B);
2283: PetscFree(nnz_d);
2284: PetscFree(nnz_o);
2285: }
2286: return(0);
2287: }
2289: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2293: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2294: {
2295: #if defined(PETSC_USE_COMPLEX)
2297: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2300: MatConjugate_SeqAIJ(aij->A);
2301: MatConjugate_SeqAIJ(aij->B);
2302: #else
2304: #endif
2305: return(0);
2306: }
2310: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2311: {
2312: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2316: MatRealPart(a->A);
2317: MatRealPart(a->B);
2318: return(0);
2319: }
2323: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2324: {
2325: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2329: MatImaginaryPart(a->A);
2330: MatImaginaryPart(a->B);
2331: return(0);
2332: }
2334: #ifdef PETSC_HAVE_PBGL
2336: #include <boost/parallel/mpi/bsp_process_group.hpp>
2337: #include <boost/graph/distributed/ilu_default_graph.hpp>
2338: #include <boost/graph/distributed/ilu_0_block.hpp>
2339: #include <boost/graph/distributed/ilu_preconditioner.hpp>
2340: #include <boost/graph/distributed/petsc/interface.hpp>
2341: #include <boost/multi_array.hpp>
2342: #include <boost/parallel/distributed_property_map->hpp>
2346: /*
2347: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2348: */
2349: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2350: {
2351: namespace petsc = boost::distributed::petsc;
2352:
2353: namespace graph_dist = boost::graph::distributed;
2354: using boost::graph::distributed::ilu_default::process_group_type;
2355: using boost::graph::ilu_permuted;
2357: PetscBool row_identity, col_identity;
2358: PetscContainer c;
2359: PetscInt m, n, M, N;
2360: PetscErrorCode ierr;
2363: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2364: ISIdentity(isrow, &row_identity);
2365: ISIdentity(iscol, &col_identity);
2366: if (!row_identity || !col_identity) {
2367: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2368: }
2370: process_group_type pg;
2371: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2372: lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2373: lgraph_type& level_graph = *lgraph_p;
2374: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2376: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2377: ilu_permuted(level_graph);
2379: /* put together the new matrix */
2380: MatCreate(((PetscObject)A)->comm, fact);
2381: MatGetLocalSize(A, &m, &n);
2382: MatGetSize(A, &M, &N);
2383: MatSetSizes(fact, m, n, M, N);
2384: MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);
2385: MatSetType(fact, ((PetscObject)A)->type_name);
2386: MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);
2387: MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);
2389: PetscContainerCreate(((PetscObject)A)->comm, &c);
2390: PetscContainerSetPointer(c, lgraph_p);
2391: PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2392: PetscContainerDestroy(&c);
2393: return(0);
2394: }
2398: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2399: {
2401: return(0);
2402: }
2406: /*
2407: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2408: */
2409: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2410: {
2411: namespace graph_dist = boost::graph::distributed;
2413: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2414: lgraph_type* lgraph_p;
2415: PetscContainer c;
2419: PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);
2420: PetscContainerGetPointer(c, (void **) &lgraph_p);
2421: VecCopy(b, x);
2423: PetscScalar* array_x;
2424: VecGetArray(x, &array_x);
2425: PetscInt sx;
2426: VecGetSize(x, &sx);
2427:
2428: PetscScalar* array_b;
2429: VecGetArray(b, &array_b);
2430: PetscInt sb;
2431: VecGetSize(b, &sb);
2433: lgraph_type& level_graph = *lgraph_p;
2434: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2436: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2437: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]),
2438: ref_x(array_x, boost::extents[num_vertices(graph)]);
2440: typedef boost::iterator_property_map<array_ref_type::iterator,
2441: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
2442: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2443: vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2444:
2445: ilu_set_solve(*lgraph_p, vector_b, vector_x);
2447: return(0);
2448: }
2449: #endif
2451: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2452: PetscInt nzlocal,nsends,nrecvs;
2453: PetscMPIInt *send_rank,*recv_rank;
2454: PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2455: PetscScalar *sbuf_a,**rbuf_a;
2456: PetscErrorCode (*Destroy)(Mat);
2457: } Mat_Redundant;
2461: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2462: {
2463: PetscErrorCode ierr;
2464: Mat_Redundant *redund=(Mat_Redundant*)ptr;
2465: PetscInt i;
2468: PetscFree2(redund->send_rank,redund->recv_rank);
2469: PetscFree(redund->sbuf_j);
2470: PetscFree(redund->sbuf_a);
2471: for (i=0; i<redund->nrecvs; i++){
2472: PetscFree(redund->rbuf_j[i]);
2473: PetscFree(redund->rbuf_a[i]);
2474: }
2475: PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);
2476: PetscFree(redund);
2477: return(0);
2478: }
2482: PetscErrorCode MatDestroy_MatRedundant(Mat A)
2483: {
2484: PetscErrorCode ierr;
2485: PetscContainer container;
2486: Mat_Redundant *redund=PETSC_NULL;
2489: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
2490: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2491: PetscContainerGetPointer(container,(void **)&redund);
2492: A->ops->destroy = redund->Destroy;
2493: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
2494: if (A->ops->destroy) {
2495: (*A->ops->destroy)(A);
2496: }
2497: return(0);
2498: }
2502: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2503: {
2504: PetscMPIInt rank,size;
2505: MPI_Comm comm=((PetscObject)mat)->comm;
2507: PetscInt nsends=0,nrecvs=0,i,rownz_max=0;
2508: PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2509: PetscInt *rowrange=mat->rmap->range;
2510: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2511: Mat A=aij->A,B=aij->B,C=*matredundant;
2512: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2513: PetscScalar *sbuf_a;
2514: PetscInt nzlocal=a->nz+b->nz;
2515: PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2516: PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2517: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2518: MatScalar *aworkA,*aworkB;
2519: PetscScalar *vals;
2520: PetscMPIInt tag1,tag2,tag3,imdex;
2521: MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2522: *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2523: MPI_Status recv_status,*send_status;
2524: PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2525: PetscInt **rbuf_j=PETSC_NULL;
2526: PetscScalar **rbuf_a=PETSC_NULL;
2527: Mat_Redundant *redund=PETSC_NULL;
2528: PetscContainer container;
2531: MPI_Comm_rank(comm,&rank);
2532: MPI_Comm_size(comm,&size);
2534: if (reuse == MAT_REUSE_MATRIX) {
2535: MatGetSize(C,&M,&N);
2536: if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2537: MatGetLocalSize(C,&M,&N);
2538: if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2539: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
2540: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2541: PetscContainerGetPointer(container,(void **)&redund);
2542: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2544: nsends = redund->nsends;
2545: nrecvs = redund->nrecvs;
2546: send_rank = redund->send_rank;
2547: recv_rank = redund->recv_rank;
2548: sbuf_nz = redund->sbuf_nz;
2549: rbuf_nz = redund->rbuf_nz;
2550: sbuf_j = redund->sbuf_j;
2551: sbuf_a = redund->sbuf_a;
2552: rbuf_j = redund->rbuf_j;
2553: rbuf_a = redund->rbuf_a;
2554: }
2556: if (reuse == MAT_INITIAL_MATRIX){
2557: PetscMPIInt subrank,subsize;
2558: PetscInt nleftover,np_subcomm;
2559: /* get the destination processors' id send_rank, nsends and nrecvs */
2560: MPI_Comm_rank(subcomm,&subrank);
2561: MPI_Comm_size(subcomm,&subsize);
2562: PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2563: np_subcomm = size/nsubcomm;
2564: nleftover = size - nsubcomm*np_subcomm;
2565: nsends = 0; nrecvs = 0;
2566: for (i=0; i<size; i++){ /* i=rank*/
2567: if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2568: send_rank[nsends] = i; nsends++;
2569: recv_rank[nrecvs++] = i;
2570: }
2571: }
2572: if (rank >= size - nleftover){/* this proc is a leftover processor */
2573: i = size-nleftover-1;
2574: j = 0;
2575: while (j < nsubcomm - nleftover){
2576: send_rank[nsends++] = i;
2577: i--; j++;
2578: }
2579: }
2581: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2582: for (i=0; i<nleftover; i++){
2583: recv_rank[nrecvs++] = size-nleftover+i;
2584: }
2585: }
2587: /* allocate sbuf_j, sbuf_a */
2588: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2589: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
2590: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
2591: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2593: /* copy mat's local entries into the buffers */
2594: if (reuse == MAT_INITIAL_MATRIX){
2595: rownz_max = 0;
2596: rptr = sbuf_j;
2597: cols = sbuf_j + rend-rstart + 1;
2598: vals = sbuf_a;
2599: rptr[0] = 0;
2600: for (i=0; i<rend-rstart; i++){
2601: row = i + rstart;
2602: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2603: ncols = nzA + nzB;
2604: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2605: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2606: /* load the column indices for this row into cols */
2607: lwrite = 0;
2608: for (l=0; l<nzB; l++) {
2609: if ((ctmp = bmap[cworkB[l]]) < cstart){
2610: vals[lwrite] = aworkB[l];
2611: cols[lwrite++] = ctmp;
2612: }
2613: }
2614: for (l=0; l<nzA; l++){
2615: vals[lwrite] = aworkA[l];
2616: cols[lwrite++] = cstart + cworkA[l];
2617: }
2618: for (l=0; l<nzB; l++) {
2619: if ((ctmp = bmap[cworkB[l]]) >= cend){
2620: vals[lwrite] = aworkB[l];
2621: cols[lwrite++] = ctmp;
2622: }
2623: }
2624: vals += ncols;
2625: cols += ncols;
2626: rptr[i+1] = rptr[i] + ncols;
2627: if (rownz_max < ncols) rownz_max = ncols;
2628: }
2629: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2630: } else { /* only copy matrix values into sbuf_a */
2631: rptr = sbuf_j;
2632: vals = sbuf_a;
2633: rptr[0] = 0;
2634: for (i=0; i<rend-rstart; i++){
2635: row = i + rstart;
2636: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2637: ncols = nzA + nzB;
2638: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2639: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2640: lwrite = 0;
2641: for (l=0; l<nzB; l++) {
2642: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2643: }
2644: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2645: for (l=0; l<nzB; l++) {
2646: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2647: }
2648: vals += ncols;
2649: rptr[i+1] = rptr[i] + ncols;
2650: }
2651: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2653: /* send nzlocal to others, and recv other's nzlocal */
2654: /*--------------------------------------------------*/
2655: if (reuse == MAT_INITIAL_MATRIX){
2656: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2657: s_waits2 = s_waits3 + nsends;
2658: s_waits1 = s_waits2 + nsends;
2659: r_waits1 = s_waits1 + nsends;
2660: r_waits2 = r_waits1 + nrecvs;
2661: r_waits3 = r_waits2 + nrecvs;
2662: } else {
2663: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2664: r_waits3 = s_waits3 + nsends;
2665: }
2667: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2668: if (reuse == MAT_INITIAL_MATRIX){
2669: /* get new tags to keep the communication clean */
2670: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2671: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2672: PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
2674: /* post receives of other's nzlocal */
2675: for (i=0; i<nrecvs; i++){
2676: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2677: }
2678: /* send nzlocal to others */
2679: for (i=0; i<nsends; i++){
2680: sbuf_nz[i] = nzlocal;
2681: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2682: }
2683: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2684: count = nrecvs;
2685: while (count) {
2686: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2687: recv_rank[imdex] = recv_status.MPI_SOURCE;
2688: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2689: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
2691: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2692: rbuf_nz[imdex] += i + 2;
2693: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
2694: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2695: count--;
2696: }
2697: /* wait on sends of nzlocal */
2698: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2699: /* send mat->i,j to others, and recv from other's */
2700: /*------------------------------------------------*/
2701: for (i=0; i<nsends; i++){
2702: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2703: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2704: }
2705: /* wait on receives of mat->i,j */
2706: /*------------------------------*/
2707: count = nrecvs;
2708: while (count) {
2709: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2710: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2711: count--;
2712: }
2713: /* wait on sends of mat->i,j */
2714: /*---------------------------*/
2715: if (nsends) {
2716: MPI_Waitall(nsends,s_waits2,send_status);
2717: }
2718: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2720: /* post receives, send and receive mat->a */
2721: /*----------------------------------------*/
2722: for (imdex=0; imdex<nrecvs; imdex++) {
2723: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2724: }
2725: for (i=0; i<nsends; i++){
2726: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2727: }
2728: count = nrecvs;
2729: while (count) {
2730: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2731: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2732: count--;
2733: }
2734: if (nsends) {
2735: MPI_Waitall(nsends,s_waits3,send_status);
2736: }
2738: PetscFree2(s_waits3,send_status);
2740: /* create redundant matrix */
2741: /*-------------------------*/
2742: if (reuse == MAT_INITIAL_MATRIX){
2743: /* compute rownz_max for preallocation */
2744: for (imdex=0; imdex<nrecvs; imdex++){
2745: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2746: rptr = rbuf_j[imdex];
2747: for (i=0; i<j; i++){
2748: ncols = rptr[i+1] - rptr[i];
2749: if (rownz_max < ncols) rownz_max = ncols;
2750: }
2751: }
2753: MatCreate(subcomm,&C);
2754: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
2755: MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);
2756: MatSetFromOptions(C);
2757: MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
2758: MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
2759: } else {
2760: C = *matredundant;
2761: }
2763: /* insert local matrix entries */
2764: rptr = sbuf_j;
2765: cols = sbuf_j + rend-rstart + 1;
2766: vals = sbuf_a;
2767: for (i=0; i<rend-rstart; i++){
2768: row = i + rstart;
2769: ncols = rptr[i+1] - rptr[i];
2770: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2771: vals += ncols;
2772: cols += ncols;
2773: }
2774: /* insert received matrix entries */
2775: for (imdex=0; imdex<nrecvs; imdex++){
2776: rstart = rowrange[recv_rank[imdex]];
2777: rend = rowrange[recv_rank[imdex]+1];
2778: rptr = rbuf_j[imdex];
2779: cols = rbuf_j[imdex] + rend-rstart + 1;
2780: vals = rbuf_a[imdex];
2781: for (i=0; i<rend-rstart; i++){
2782: row = i + rstart;
2783: ncols = rptr[i+1] - rptr[i];
2784: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2785: vals += ncols;
2786: cols += ncols;
2787: }
2788: }
2789: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2790: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2791: MatGetSize(C,&M,&N);
2792: if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2793: if (reuse == MAT_INITIAL_MATRIX) {
2794: PetscContainer container;
2795: *matredundant = C;
2796: /* create a supporting struct and attach it to C for reuse */
2797: PetscNewLog(C,Mat_Redundant,&redund);
2798: PetscContainerCreate(PETSC_COMM_SELF,&container);
2799: PetscContainerSetPointer(container,redund);
2800: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
2801: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
2802: PetscContainerDestroy(&container);
2804: redund->nzlocal = nzlocal;
2805: redund->nsends = nsends;
2806: redund->nrecvs = nrecvs;
2807: redund->send_rank = send_rank;
2808: redund->recv_rank = recv_rank;
2809: redund->sbuf_nz = sbuf_nz;
2810: redund->rbuf_nz = rbuf_nz;
2811: redund->sbuf_j = sbuf_j;
2812: redund->sbuf_a = sbuf_a;
2813: redund->rbuf_j = rbuf_j;
2814: redund->rbuf_a = rbuf_a;
2816: redund->Destroy = C->ops->destroy;
2817: C->ops->destroy = MatDestroy_MatRedundant;
2818: }
2819: return(0);
2820: }
2824: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2825: {
2826: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2828: PetscInt i,*idxb = 0;
2829: PetscScalar *va,*vb;
2830: Vec vtmp;
2833: MatGetRowMaxAbs(a->A,v,idx);
2834: VecGetArray(v,&va);
2835: if (idx) {
2836: for (i=0; i<A->rmap->n; i++) {
2837: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2838: }
2839: }
2841: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2842: if (idx) {
2843: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2844: }
2845: MatGetRowMaxAbs(a->B,vtmp,idxb);
2846: VecGetArray(vtmp,&vb);
2848: for (i=0; i<A->rmap->n; i++){
2849: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2850: va[i] = vb[i];
2851: if (idx) idx[i] = a->garray[idxb[i]];
2852: }
2853: }
2855: VecRestoreArray(v,&va);
2856: VecRestoreArray(vtmp,&vb);
2857: PetscFree(idxb);
2858: VecDestroy(&vtmp);
2859: return(0);
2860: }
2864: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2865: {
2866: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2868: PetscInt i,*idxb = 0;
2869: PetscScalar *va,*vb;
2870: Vec vtmp;
2873: MatGetRowMinAbs(a->A,v,idx);
2874: VecGetArray(v,&va);
2875: if (idx) {
2876: for (i=0; i<A->cmap->n; i++) {
2877: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2878: }
2879: }
2881: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2882: if (idx) {
2883: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2884: }
2885: MatGetRowMinAbs(a->B,vtmp,idxb);
2886: VecGetArray(vtmp,&vb);
2888: for (i=0; i<A->rmap->n; i++){
2889: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2890: va[i] = vb[i];
2891: if (idx) idx[i] = a->garray[idxb[i]];
2892: }
2893: }
2895: VecRestoreArray(v,&va);
2896: VecRestoreArray(vtmp,&vb);
2897: PetscFree(idxb);
2898: VecDestroy(&vtmp);
2899: return(0);
2900: }
2904: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2905: {
2906: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2907: PetscInt n = A->rmap->n;
2908: PetscInt cstart = A->cmap->rstart;
2909: PetscInt *cmap = mat->garray;
2910: PetscInt *diagIdx, *offdiagIdx;
2911: Vec diagV, offdiagV;
2912: PetscScalar *a, *diagA, *offdiagA;
2913: PetscInt r;
2917: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2918: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2919: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2920: MatGetRowMin(mat->A, diagV, diagIdx);
2921: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2922: VecGetArray(v, &a);
2923: VecGetArray(diagV, &diagA);
2924: VecGetArray(offdiagV, &offdiagA);
2925: for(r = 0; r < n; ++r) {
2926: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2927: a[r] = diagA[r];
2928: idx[r] = cstart + diagIdx[r];
2929: } else {
2930: a[r] = offdiagA[r];
2931: idx[r] = cmap[offdiagIdx[r]];
2932: }
2933: }
2934: VecRestoreArray(v, &a);
2935: VecRestoreArray(diagV, &diagA);
2936: VecRestoreArray(offdiagV, &offdiagA);
2937: VecDestroy(&diagV);
2938: VecDestroy(&offdiagV);
2939: PetscFree2(diagIdx, offdiagIdx);
2940: return(0);
2941: }
2945: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2946: {
2947: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2948: PetscInt n = A->rmap->n;
2949: PetscInt cstart = A->cmap->rstart;
2950: PetscInt *cmap = mat->garray;
2951: PetscInt *diagIdx, *offdiagIdx;
2952: Vec diagV, offdiagV;
2953: PetscScalar *a, *diagA, *offdiagA;
2954: PetscInt r;
2958: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2959: VecCreateSeq(PETSC_COMM_SELF, n, &diagV);
2960: VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);
2961: MatGetRowMax(mat->A, diagV, diagIdx);
2962: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
2963: VecGetArray(v, &a);
2964: VecGetArray(diagV, &diagA);
2965: VecGetArray(offdiagV, &offdiagA);
2966: for(r = 0; r < n; ++r) {
2967: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2968: a[r] = diagA[r];
2969: idx[r] = cstart + diagIdx[r];
2970: } else {
2971: a[r] = offdiagA[r];
2972: idx[r] = cmap[offdiagIdx[r]];
2973: }
2974: }
2975: VecRestoreArray(v, &a);
2976: VecRestoreArray(diagV, &diagA);
2977: VecRestoreArray(offdiagV, &offdiagA);
2978: VecDestroy(&diagV);
2979: VecDestroy(&offdiagV);
2980: PetscFree2(diagIdx, offdiagIdx);
2981: return(0);
2982: }
2986: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2987: {
2989: Mat *dummy;
2992: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
2993: *newmat = *dummy;
2994: PetscFree(dummy);
2995: return(0);
2996: }
2998: extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3002: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3003: {
3004: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
3008: MatInvertBlockDiagonal(a->A,values);
3009: return(0);
3010: }
3013: /* -------------------------------------------------------------------*/
3014: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3015: MatGetRow_MPIAIJ,
3016: MatRestoreRow_MPIAIJ,
3017: MatMult_MPIAIJ,
3018: /* 4*/ MatMultAdd_MPIAIJ,
3019: MatMultTranspose_MPIAIJ,
3020: MatMultTransposeAdd_MPIAIJ,
3021: #ifdef PETSC_HAVE_PBGL
3022: MatSolve_MPIAIJ,
3023: #else
3024: 0,
3025: #endif
3026: 0,
3027: 0,
3028: /*10*/ 0,
3029: 0,
3030: 0,
3031: MatSOR_MPIAIJ,
3032: MatTranspose_MPIAIJ,
3033: /*15*/ MatGetInfo_MPIAIJ,
3034: MatEqual_MPIAIJ,
3035: MatGetDiagonal_MPIAIJ,
3036: MatDiagonalScale_MPIAIJ,
3037: MatNorm_MPIAIJ,
3038: /*20*/ MatAssemblyBegin_MPIAIJ,
3039: MatAssemblyEnd_MPIAIJ,
3040: MatSetOption_MPIAIJ,
3041: MatZeroEntries_MPIAIJ,
3042: /*24*/ MatZeroRows_MPIAIJ,
3043: 0,
3044: #ifdef PETSC_HAVE_PBGL
3045: 0,
3046: #else
3047: 0,
3048: #endif
3049: 0,
3050: 0,
3051: /*29*/ MatSetUp_MPIAIJ,
3052: #ifdef PETSC_HAVE_PBGL
3053: 0,
3054: #else
3055: 0,
3056: #endif
3057: 0,
3058: 0,
3059: 0,
3060: /*34*/ MatDuplicate_MPIAIJ,
3061: 0,
3062: 0,
3063: 0,
3064: 0,
3065: /*39*/ MatAXPY_MPIAIJ,
3066: MatGetSubMatrices_MPIAIJ,
3067: MatIncreaseOverlap_MPIAIJ,
3068: MatGetValues_MPIAIJ,
3069: MatCopy_MPIAIJ,
3070: /*44*/ MatGetRowMax_MPIAIJ,
3071: MatScale_MPIAIJ,
3072: 0,
3073: 0,
3074: MatZeroRowsColumns_MPIAIJ,
3075: /*49*/ 0,
3076: 0,
3077: 0,
3078: 0,
3079: 0,
3080: /*54*/ MatFDColoringCreate_MPIAIJ,
3081: 0,
3082: MatSetUnfactored_MPIAIJ,
3083: 0, /* MatPermute_MPIAIJ, impl currently broken */
3084: 0,
3085: /*59*/ MatGetSubMatrix_MPIAIJ,
3086: MatDestroy_MPIAIJ,
3087: MatView_MPIAIJ,
3088: 0,
3089: 0,
3090: /*64*/ 0,
3091: 0,
3092: 0,
3093: 0,
3094: 0,
3095: /*69*/ MatGetRowMaxAbs_MPIAIJ,
3096: MatGetRowMinAbs_MPIAIJ,
3097: 0,
3098: MatSetColoring_MPIAIJ,
3099: #if defined(PETSC_HAVE_ADIC)
3100: MatSetValuesAdic_MPIAIJ,
3101: #else
3102: 0,
3103: #endif
3104: MatSetValuesAdifor_MPIAIJ,
3105: /*75*/ MatFDColoringApply_AIJ,
3106: 0,
3107: 0,
3108: 0,
3109: MatFindZeroDiagonals_MPIAIJ,
3110: /*80*/ 0,
3111: 0,
3112: 0,
3113: /*83*/ MatLoad_MPIAIJ,
3114: 0,
3115: 0,
3116: 0,
3117: 0,
3118: 0,
3119: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3120: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3121: MatMatMultNumeric_MPIAIJ_MPIAIJ,
3122: MatPtAP_Basic,
3123: MatPtAPSymbolic_MPIAIJ,
3124: /*94*/ MatPtAPNumeric_MPIAIJ,
3125: 0,
3126: 0,
3127: 0,
3128: 0,
3129: /*99*/ 0,
3130: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3131: MatPtAPNumeric_MPIAIJ_MPIAIJ,
3132: MatConjugate_MPIAIJ,
3133: 0,
3134: /*104*/MatSetValuesRow_MPIAIJ,
3135: MatRealPart_MPIAIJ,
3136: MatImaginaryPart_MPIAIJ,
3137: 0,
3138: 0,
3139: /*109*/0,
3140: MatGetRedundantMatrix_MPIAIJ,
3141: MatGetRowMin_MPIAIJ,
3142: 0,
3143: 0,
3144: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3145: 0,
3146: 0,
3147: 0,
3148: 0,
3149: /*119*/0,
3150: 0,
3151: 0,
3152: 0,
3153: MatGetMultiProcBlock_MPIAIJ,
3154: /*124*/MatFindNonzeroRows_MPIAIJ,
3155: MatGetColumnNorms_MPIAIJ,
3156: MatInvertBlockDiagonal_MPIAIJ,
3157: 0,
3158: MatGetSubMatricesParallel_MPIAIJ,
3159: /*129*/0,
3160: MatTransposeMatMult_MPIAIJ_MPIAIJ,
3161: MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3162: MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3163: 0,
3164: /*134*/0,
3165: 0,
3166: 0,
3167: 0,
3168: 0
3169: };
3171: /* ----------------------------------------------------------------------------------------*/
3173: EXTERN_C_BEGIN
3176: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
3177: {
3178: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3182: MatStoreValues(aij->A);
3183: MatStoreValues(aij->B);
3184: return(0);
3185: }
3186: EXTERN_C_END
3188: EXTERN_C_BEGIN
3191: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
3192: {
3193: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3197: MatRetrieveValues(aij->A);
3198: MatRetrieveValues(aij->B);
3199: return(0);
3200: }
3201: EXTERN_C_END
3203: EXTERN_C_BEGIN
3206: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3207: {
3208: Mat_MPIAIJ *b;
3210: PetscInt i;
3211: PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3214: if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3215: if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3216: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3217: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3218: if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3219: if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3221: PetscLayoutSetUp(B->rmap);
3222: PetscLayoutSetUp(B->cmap);
3223: if (d_nnz) {
3224: for (i=0; i<B->rmap->n; i++) {
3225: if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3226: }
3227: }
3228: if (o_nnz) {
3229: for (i=0; i<B->rmap->n; i++) {
3230: if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3231: }
3232: }
3233: b = (Mat_MPIAIJ*)B->data;
3235: if (!B->preallocated) {
3236: /* Explicitly create 2 MATSEQAIJ matrices. */
3237: MatCreate(PETSC_COMM_SELF,&b->A);
3238: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3239: MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);
3240: MatSetType(b->A,MATSEQAIJ);
3241: PetscLogObjectParent(B,b->A);
3242: MatCreate(PETSC_COMM_SELF,&b->B);
3243: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3244: MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);
3245: MatSetType(b->B,MATSEQAIJ);
3246: PetscLogObjectParent(B,b->B);
3247: }
3249: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3250: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3251: /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3252: if (!d_realalloc) {MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);}
3253: if (!o_realalloc) {MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);}
3254: B->preallocated = PETSC_TRUE;
3255: return(0);
3256: }
3257: EXTERN_C_END
3261: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3262: {
3263: Mat mat;
3264: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3268: *newmat = 0;
3269: MatCreate(((PetscObject)matin)->comm,&mat);
3270: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3271: MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);
3272: MatSetType(mat,((PetscObject)matin)->type_name);
3273: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3274: a = (Mat_MPIAIJ*)mat->data;
3275:
3276: mat->factortype = matin->factortype;
3277: mat->rmap->bs = matin->rmap->bs;
3278: mat->cmap->bs = matin->cmap->bs;
3279: mat->assembled = PETSC_TRUE;
3280: mat->insertmode = NOT_SET_VALUES;
3281: mat->preallocated = PETSC_TRUE;
3283: a->size = oldmat->size;
3284: a->rank = oldmat->rank;
3285: a->donotstash = oldmat->donotstash;
3286: a->roworiented = oldmat->roworiented;
3287: a->rowindices = 0;
3288: a->rowvalues = 0;
3289: a->getrowactive = PETSC_FALSE;
3291: PetscLayoutReference(matin->rmap,&mat->rmap);
3292: PetscLayoutReference(matin->cmap,&mat->cmap);
3294: if (oldmat->colmap) {
3295: #if defined (PETSC_USE_CTABLE)
3296: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3297: #else
3298: PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);
3299: PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));
3300: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
3301: #endif
3302: } else a->colmap = 0;
3303: if (oldmat->garray) {
3304: PetscInt len;
3305: len = oldmat->B->cmap->n;
3306: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
3307: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
3308: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
3309: } else a->garray = 0;
3310:
3311: VecDuplicate(oldmat->lvec,&a->lvec);
3312: PetscLogObjectParent(mat,a->lvec);
3313: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3314: PetscLogObjectParent(mat,a->Mvctx);
3315: MatDuplicate(oldmat->A,cpvalues,&a->A);
3316: PetscLogObjectParent(mat,a->A);
3317: MatDuplicate(oldmat->B,cpvalues,&a->B);
3318: PetscLogObjectParent(mat,a->B);
3319: PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3320: *newmat = mat;
3321: return(0);
3322: }
3328: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3329: {
3330: PetscScalar *vals,*svals;
3331: MPI_Comm comm = ((PetscObject)viewer)->comm;
3333: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
3334: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3335: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
3336: PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3337: PetscInt cend,cstart,n,*rowners,sizesset=1;
3338: int fd;
3341: MPI_Comm_size(comm,&size);
3342: MPI_Comm_rank(comm,&rank);
3343: if (!rank) {
3344: PetscViewerBinaryGetDescriptor(viewer,&fd);
3345: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
3346: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3347: }
3349: if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3351: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3352: M = header[1]; N = header[2];
3353: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3354: if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3355: if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3356:
3357: /* If global sizes are set, check if they are consistent with that given in the file */
3358: if (sizesset) {
3359: MatGetSize(newMat,&grows,&gcols);
3360: }
3361: if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3362: if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3364: /* determine ownership of all rows */
3365: if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3366: else m = newMat->rmap->n; /* Set by user */
3367:
3368: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
3369: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3371: /* First process needs enough room for process with most rows */
3372: if (!rank) {
3373: mmax = rowners[1];
3374: for(i=2; i<=size; i++) {
3375: mmax = PetscMax(mmax, rowners[i]);
3376: }
3377: };
3379: rowners[0] = 0;
3380: for (i=2; i<=size; i++) {
3381: rowners[i] += rowners[i-1];
3382: }
3383: rstart = rowners[rank];
3384: rend = rowners[rank+1];
3386: /* distribute row lengths to all processors */
3387: PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);
3388: if (!rank) {
3389: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
3390: PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);
3391: PetscMalloc(size*sizeof(PetscInt),&procsnz);
3392: PetscMemzero(procsnz,size*sizeof(PetscInt));
3393: for (j=0; j<m; j++) {
3394: procsnz[0] += ourlens[j];
3395: }
3396: for (i=1; i<size; i++) {
3397: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
3398: /* calculate the number of nonzeros on each processor */
3399: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3400: procsnz[i] += rowlengths[j];
3401: }
3402: MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
3403: }
3404: PetscFree(rowlengths);
3405: } else {
3406: MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
3407: }
3409: if (!rank) {
3410: /* determine max buffer needed and allocate it */
3411: maxnz = 0;
3412: for (i=0; i<size; i++) {
3413: maxnz = PetscMax(maxnz,procsnz[i]);
3414: }
3415: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
3417: /* read in my part of the matrix column indices */
3418: nz = procsnz[0];
3419: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3420: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3422: /* read in every one elses and ship off */
3423: for (i=1; i<size; i++) {
3424: nz = procsnz[i];
3425: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3426: MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);
3427: }
3428: PetscFree(cols);
3429: } else {
3430: /* determine buffer space needed for message */
3431: nz = 0;
3432: for (i=0; i<m; i++) {
3433: nz += ourlens[i];
3434: }
3435: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3437: /* receive message of column indices*/
3438: MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
3439: }
3441: /* determine column ownership if matrix is not square */
3442: if (N != M) {
3443: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3444: else n = newMat->cmap->n;
3445: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
3446: cstart = cend - n;
3447: } else {
3448: cstart = rstart;
3449: cend = rend;
3450: n = cend - cstart;
3451: }
3453: /* loop over local rows, determining number of off diagonal entries */
3454: PetscMemzero(offlens,m*sizeof(PetscInt));
3455: jj = 0;
3456: for (i=0; i<m; i++) {
3457: for (j=0; j<ourlens[i]; j++) {
3458: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3459: jj++;
3460: }
3461: }
3463: for (i=0; i<m; i++) {
3464: ourlens[i] -= offlens[i];
3465: }
3466: if (!sizesset) {
3467: MatSetSizes(newMat,m,n,M,N);
3468: }
3469: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
3471: for (i=0; i<m; i++) {
3472: ourlens[i] += offlens[i];
3473: }
3475: if (!rank) {
3476: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
3478: /* read in my part of the matrix numerical values */
3479: nz = procsnz[0];
3480: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3481:
3482: /* insert into matrix */
3483: jj = rstart;
3484: smycols = mycols;
3485: svals = vals;
3486: for (i=0; i<m; i++) {
3487: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3488: smycols += ourlens[i];
3489: svals += ourlens[i];
3490: jj++;
3491: }
3493: /* read in other processors and ship out */
3494: for (i=1; i<size; i++) {
3495: nz = procsnz[i];
3496: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3497: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
3498: }
3499: PetscFree(procsnz);
3500: } else {
3501: /* receive numeric values */
3502: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
3504: /* receive message of values*/
3505: MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
3507: /* insert into matrix */
3508: jj = rstart;
3509: smycols = mycols;
3510: svals = vals;
3511: for (i=0; i<m; i++) {
3512: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3513: smycols += ourlens[i];
3514: svals += ourlens[i];
3515: jj++;
3516: }
3517: }
3518: PetscFree2(ourlens,offlens);
3519: PetscFree(vals);
3520: PetscFree(mycols);
3521: PetscFree(rowners);
3523: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3524: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3525: return(0);
3526: }
3530: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3531: {
3533: IS iscol_local;
3534: PetscInt csize;
3537: ISGetLocalSize(iscol,&csize);
3538: if (call == MAT_REUSE_MATRIX) {
3539: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3540: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3541: } else {
3542: PetscInt cbs;
3543: ISGetBlockSize(iscol,&cbs);
3544: ISAllGather(iscol,&iscol_local);
3545: ISSetBlockSize(iscol_local,cbs);
3546: }
3547: MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
3548: if (call == MAT_INITIAL_MATRIX) {
3549: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3550: ISDestroy(&iscol_local);
3551: }
3552: return(0);
3553: }
3555: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3558: /*
3559: Not great since it makes two copies of the submatrix, first an SeqAIJ
3560: in local and then by concatenating the local matrices the end result.
3561: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3563: Note: This requires a sequential iscol with all indices.
3564: */
3565: PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3566: {
3568: PetscMPIInt rank,size;
3569: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3570: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3571: PetscBool allcolumns, colflag;
3572: Mat M,Mreuse;
3573: MatScalar *vwork,*aa;
3574: MPI_Comm comm = ((PetscObject)mat)->comm;
3575: Mat_SeqAIJ *aij;
3579: MPI_Comm_rank(comm,&rank);
3580: MPI_Comm_size(comm,&size);
3582: ISIdentity(iscol,&colflag);
3583: ISGetLocalSize(iscol,&ncol);
3584: if (colflag && ncol == mat->cmap->N){
3585: allcolumns = PETSC_TRUE;
3586: } else {
3587: allcolumns = PETSC_FALSE;
3588: }
3589: if (call == MAT_REUSE_MATRIX) {
3590: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
3591: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3592: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);
3593: } else {
3594: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);
3595: }
3597: /*
3598: m - number of local rows
3599: n - number of columns (same on all processors)
3600: rstart - first row in new global matrix generated
3601: */
3602: MatGetSize(Mreuse,&m,&n);
3603: MatGetBlockSizes(Mreuse,&bs,&cbs);
3604: if (call == MAT_INITIAL_MATRIX) {
3605: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3606: ii = aij->i;
3607: jj = aij->j;
3609: /*
3610: Determine the number of non-zeros in the diagonal and off-diagonal
3611: portions of the matrix in order to do correct preallocation
3612: */
3614: /* first get start and end of "diagonal" columns */
3615: if (csize == PETSC_DECIDE) {
3616: ISGetSize(isrow,&mglobal);
3617: if (mglobal == n) { /* square matrix */
3618: nlocal = m;
3619: } else {
3620: nlocal = n/size + ((n % size) > rank);
3621: }
3622: } else {
3623: nlocal = csize;
3624: }
3625: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3626: rstart = rend - nlocal;
3627: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3629: /* next, compute all the lengths */
3630: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
3631: olens = dlens + m;
3632: for (i=0; i<m; i++) {
3633: jend = ii[i+1] - ii[i];
3634: olen = 0;
3635: dlen = 0;
3636: for (j=0; j<jend; j++) {
3637: if (*jj < rstart || *jj >= rend) olen++;
3638: else dlen++;
3639: jj++;
3640: }
3641: olens[i] = olen;
3642: dlens[i] = dlen;
3643: }
3644: MatCreate(comm,&M);
3645: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3646: MatSetBlockSizes(M,bs,cbs);
3647: MatSetType(M,((PetscObject)mat)->type_name);
3648: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3649: PetscFree(dlens);
3650: } else {
3651: PetscInt ml,nl;
3653: M = *newmat;
3654: MatGetLocalSize(M,&ml,&nl);
3655: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3656: MatZeroEntries(M);
3657: /*
3658: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3659: rather than the slower MatSetValues().
3660: */
3661: M->was_assembled = PETSC_TRUE;
3662: M->assembled = PETSC_FALSE;
3663: }
3664: MatGetOwnershipRange(M,&rstart,&rend);
3665: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3666: ii = aij->i;
3667: jj = aij->j;
3668: aa = aij->a;
3669: for (i=0; i<m; i++) {
3670: row = rstart + i;
3671: nz = ii[i+1] - ii[i];
3672: cwork = jj; jj += nz;
3673: vwork = aa; aa += nz;
3674: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3675: }
3677: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3678: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3679: *newmat = M;
3681: /* save submatrix used in processor for next request */
3682: if (call == MAT_INITIAL_MATRIX) {
3683: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3684: MatDestroy(&Mreuse);
3685: }
3687: return(0);
3688: }
3690: EXTERN_C_BEGIN
3693: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3694: {
3695: PetscInt m,cstart, cend,j,nnz,i,d;
3696: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3697: const PetscInt *JJ;
3698: PetscScalar *values;
3702: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3704: PetscLayoutSetUp(B->rmap);
3705: PetscLayoutSetUp(B->cmap);
3706: m = B->rmap->n;
3707: cstart = B->cmap->rstart;
3708: cend = B->cmap->rend;
3709: rstart = B->rmap->rstart;
3711: PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);
3713: #if defined(PETSC_USE_DEBUGGING)
3714: for (i=0; i<m; i++) {
3715: nnz = Ii[i+1]- Ii[i];
3716: JJ = J + Ii[i];
3717: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3718: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3719: if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3720: }
3721: #endif
3723: for (i=0; i<m; i++) {
3724: nnz = Ii[i+1]- Ii[i];
3725: JJ = J + Ii[i];
3726: nnz_max = PetscMax(nnz_max,nnz);
3727: d = 0;
3728: for (j=0; j<nnz; j++) {
3729: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3730: }
3731: d_nnz[i] = d;
3732: o_nnz[i] = nnz - d;
3733: }
3734: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3735: PetscFree2(d_nnz,o_nnz);
3737: if (v) values = (PetscScalar*)v;
3738: else {
3739: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
3740: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
3741: }
3743: for (i=0; i<m; i++) {
3744: ii = i + rstart;
3745: nnz = Ii[i+1]- Ii[i];
3746: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3747: }
3748: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3749: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3751: if (!v) {
3752: PetscFree(values);
3753: }
3754: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3755: return(0);
3756: }
3757: EXTERN_C_END
3761: /*@
3762: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3763: (the default parallel PETSc format).
3765: Collective on MPI_Comm
3767: Input Parameters:
3768: + B - the matrix
3769: . i - the indices into j for the start of each local row (starts with zero)
3770: . j - the column indices for each local row (starts with zero)
3771: - v - optional values in the matrix
3773: Level: developer
3775: Notes:
3776: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3777: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3778: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3780: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3782: The format which is used for the sparse matrix input, is equivalent to a
3783: row-major ordering.. i.e for the following matrix, the input data expected is
3784: as shown:
3786: 1 0 0
3787: 2 0 3 P0
3788: -------
3789: 4 5 6 P1
3791: Process0 [P0]: rows_owned=[0,1]
3792: i = {0,1,3} [size = nrow+1 = 2+1]
3793: j = {0,0,2} [size = nz = 6]
3794: v = {1,2,3} [size = nz = 6]
3796: Process1 [P1]: rows_owned=[2]
3797: i = {0,3} [size = nrow+1 = 1+1]
3798: j = {0,1,2} [size = nz = 6]
3799: v = {4,5,6} [size = nz = 6]
3801: .keywords: matrix, aij, compressed row, sparse, parallel
3803: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3804: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3805: @*/
3806: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3807: {
3811: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3812: return(0);
3813: }
3817: /*@C
3818: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3819: (the default parallel PETSc format). For good matrix assembly performance
3820: the user should preallocate the matrix storage by setting the parameters
3821: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3822: performance can be increased by more than a factor of 50.
3824: Collective on MPI_Comm
3826: Input Parameters:
3827: + A - the matrix
3828: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3829: (same value is used for all local rows)
3830: . d_nnz - array containing the number of nonzeros in the various rows of the
3831: DIAGONAL portion of the local submatrix (possibly different for each row)
3832: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3833: The size of this array is equal to the number of local rows, i.e 'm'.
3834: For matrices that will be factored, you must leave room for (and set)
3835: the diagonal entry even if it is zero.
3836: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3837: submatrix (same value is used for all local rows).
3838: - o_nnz - array containing the number of nonzeros in the various rows of the
3839: OFF-DIAGONAL portion of the local submatrix (possibly different for
3840: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3841: structure. The size of this array is equal to the number
3842: of local rows, i.e 'm'.
3844: If the *_nnz parameter is given then the *_nz parameter is ignored
3846: The AIJ format (also called the Yale sparse matrix format or
3847: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3848: storage. The stored row and column indices begin with zero.
3849: See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3851: The parallel matrix is partitioned such that the first m0 rows belong to
3852: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3853: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3855: The DIAGONAL portion of the local submatrix of a processor can be defined
3856: as the submatrix which is obtained by extraction the part corresponding to
3857: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3858: first row that belongs to the processor, r2 is the last row belonging to
3859: the this processor, and c1-c2 is range of indices of the local part of a
3860: vector suitable for applying the matrix to. This is an mxn matrix. In the
3861: common case of a square matrix, the row and column ranges are the same and
3862: the DIAGONAL part is also square. The remaining portion of the local
3863: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3865: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3867: You can call MatGetInfo() to get information on how effective the preallocation was;
3868: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3869: You can also run with the option -info and look for messages with the string
3870: malloc in them to see if additional memory allocation was needed.
3872: Example usage:
3873:
3874: Consider the following 8x8 matrix with 34 non-zero values, that is
3875: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3876: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3877: as follows:
3879: .vb
3880: 1 2 0 | 0 3 0 | 0 4
3881: Proc0 0 5 6 | 7 0 0 | 8 0
3882: 9 0 10 | 11 0 0 | 12 0
3883: -------------------------------------
3884: 13 0 14 | 15 16 17 | 0 0
3885: Proc1 0 18 0 | 19 20 21 | 0 0
3886: 0 0 0 | 22 23 0 | 24 0
3887: -------------------------------------
3888: Proc2 25 26 27 | 0 0 28 | 29 0
3889: 30 0 0 | 31 32 33 | 0 34
3890: .ve
3892: This can be represented as a collection of submatrices as:
3894: .vb
3895: A B C
3896: D E F
3897: G H I
3898: .ve
3900: Where the submatrices A,B,C are owned by proc0, D,E,F are
3901: owned by proc1, G,H,I are owned by proc2.
3903: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3904: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3905: The 'M','N' parameters are 8,8, and have the same values on all procs.
3907: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3908: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3909: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3910: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3911: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3912: matrix, ans [DF] as another SeqAIJ matrix.
3914: When d_nz, o_nz parameters are specified, d_nz storage elements are
3915: allocated for every row of the local diagonal submatrix, and o_nz
3916: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3917: One way to choose d_nz and o_nz is to use the max nonzerors per local
3918: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3919: In this case, the values of d_nz,o_nz are:
3920: .vb
3921: proc0 : dnz = 2, o_nz = 2
3922: proc1 : dnz = 3, o_nz = 2
3923: proc2 : dnz = 1, o_nz = 4
3924: .ve
3925: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3926: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3927: for proc3. i.e we are using 12+15+10=37 storage locations to store
3928: 34 values.
3930: When d_nnz, o_nnz parameters are specified, the storage is specified
3931: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3932: In the above case the values for d_nnz,o_nnz are:
3933: .vb
3934: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3935: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3936: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3937: .ve
3938: Here the space allocated is sum of all the above values i.e 34, and
3939: hence pre-allocation is perfect.
3941: Level: intermediate
3943: .keywords: matrix, aij, compressed row, sparse, parallel
3945: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3946: MPIAIJ, MatGetInfo()
3947: @*/
3948: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3949: {
3955: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
3956: return(0);
3957: }
3961: /*@
3962: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3963: CSR format the local rows.
3965: Collective on MPI_Comm
3967: Input Parameters:
3968: + comm - MPI communicator
3969: . m - number of local rows (Cannot be PETSC_DECIDE)
3970: . n - This value should be the same as the local size used in creating the
3971: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3972: calculated if N is given) For square matrices n is almost always m.
3973: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3974: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3975: . i - row indices
3976: . j - column indices
3977: - a - matrix values
3979: Output Parameter:
3980: . mat - the matrix
3982: Level: intermediate
3984: Notes:
3985: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3986: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3987: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3989: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3991: The format which is used for the sparse matrix input, is equivalent to a
3992: row-major ordering.. i.e for the following matrix, the input data expected is
3993: as shown:
3995: 1 0 0
3996: 2 0 3 P0
3997: -------
3998: 4 5 6 P1
4000: Process0 [P0]: rows_owned=[0,1]
4001: i = {0,1,3} [size = nrow+1 = 2+1]
4002: j = {0,0,2} [size = nz = 6]
4003: v = {1,2,3} [size = nz = 6]
4005: Process1 [P1]: rows_owned=[2]
4006: i = {0,3} [size = nrow+1 = 1+1]
4007: j = {0,1,2} [size = nz = 6]
4008: v = {4,5,6} [size = nz = 6]
4010: .keywords: matrix, aij, compressed row, sparse, parallel
4012: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4013: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4014: @*/
4015: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4016: {
4020: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4021: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4022: MatCreate(comm,mat);
4023: MatSetSizes(*mat,m,n,M,N);
4024: /* MatSetBlockSizes(M,bs,cbs); */
4025: MatSetType(*mat,MATMPIAIJ);
4026: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
4027: return(0);
4028: }
4032: /*@C
4033: MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4034: (the default parallel PETSc format). For good matrix assembly performance
4035: the user should preallocate the matrix storage by setting the parameters
4036: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4037: performance can be increased by more than a factor of 50.
4039: Collective on MPI_Comm
4041: Input Parameters:
4042: + comm - MPI communicator
4043: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4044: This value should be the same as the local size used in creating the
4045: y vector for the matrix-vector product y = Ax.
4046: . n - This value should be the same as the local size used in creating the
4047: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4048: calculated if N is given) For square matrices n is almost always m.
4049: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4050: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4051: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4052: (same value is used for all local rows)
4053: . d_nnz - array containing the number of nonzeros in the various rows of the
4054: DIAGONAL portion of the local submatrix (possibly different for each row)
4055: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4056: The size of this array is equal to the number of local rows, i.e 'm'.
4057: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4058: submatrix (same value is used for all local rows).
4059: - o_nnz - array containing the number of nonzeros in the various rows of the
4060: OFF-DIAGONAL portion of the local submatrix (possibly different for
4061: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4062: structure. The size of this array is equal to the number
4063: of local rows, i.e 'm'.
4065: Output Parameter:
4066: . A - the matrix
4068: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4069: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4070: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4072: Notes:
4073: If the *_nnz parameter is given then the *_nz parameter is ignored
4075: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4076: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4077: storage requirements for this matrix.
4079: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4080: processor than it must be used on all processors that share the object for
4081: that argument.
4083: The user MUST specify either the local or global matrix dimensions
4084: (possibly both).
4086: The parallel matrix is partitioned across processors such that the
4087: first m0 rows belong to process 0, the next m1 rows belong to
4088: process 1, the next m2 rows belong to process 2 etc.. where
4089: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4090: values corresponding to [m x N] submatrix.
4092: The columns are logically partitioned with the n0 columns belonging
4093: to 0th partition, the next n1 columns belonging to the next
4094: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4096: The DIAGONAL portion of the local submatrix on any given processor
4097: is the submatrix corresponding to the rows and columns m,n
4098: corresponding to the given processor. i.e diagonal matrix on
4099: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4100: etc. The remaining portion of the local submatrix [m x (N-n)]
4101: constitute the OFF-DIAGONAL portion. The example below better
4102: illustrates this concept.
4104: For a square global matrix we define each processor's diagonal portion
4105: to be its local rows and the corresponding columns (a square submatrix);
4106: each processor's off-diagonal portion encompasses the remainder of the
4107: local matrix (a rectangular submatrix).
4109: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4111: When calling this routine with a single process communicator, a matrix of
4112: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4113: type of communicator, use the construction mechanism:
4114: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4115:
4116: By default, this format uses inodes (identical nodes) when possible.
4117: We search for consecutive rows with the same nonzero structure, thereby
4118: reusing matrix information to achieve increased efficiency.
4120: Options Database Keys:
4121: + -mat_no_inode - Do not use inodes
4122: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4123: - -mat_aij_oneindex - Internally use indexing starting at 1
4124: rather than 0. Note that when calling MatSetValues(),
4125: the user still MUST index entries starting at 0!
4128: Example usage:
4129:
4130: Consider the following 8x8 matrix with 34 non-zero values, that is
4131: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4132: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4133: as follows:
4135: .vb
4136: 1 2 0 | 0 3 0 | 0 4
4137: Proc0 0 5 6 | 7 0 0 | 8 0
4138: 9 0 10 | 11 0 0 | 12 0
4139: -------------------------------------
4140: 13 0 14 | 15 16 17 | 0 0
4141: Proc1 0 18 0 | 19 20 21 | 0 0
4142: 0 0 0 | 22 23 0 | 24 0
4143: -------------------------------------
4144: Proc2 25 26 27 | 0 0 28 | 29 0
4145: 30 0 0 | 31 32 33 | 0 34
4146: .ve
4148: This can be represented as a collection of submatrices as:
4150: .vb
4151: A B C
4152: D E F
4153: G H I
4154: .ve
4156: Where the submatrices A,B,C are owned by proc0, D,E,F are
4157: owned by proc1, G,H,I are owned by proc2.
4159: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4160: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4161: The 'M','N' parameters are 8,8, and have the same values on all procs.
4163: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4164: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4165: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4166: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4167: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4168: matrix, ans [DF] as another SeqAIJ matrix.
4170: When d_nz, o_nz parameters are specified, d_nz storage elements are
4171: allocated for every row of the local diagonal submatrix, and o_nz
4172: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4173: One way to choose d_nz and o_nz is to use the max nonzerors per local
4174: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4175: In this case, the values of d_nz,o_nz are:
4176: .vb
4177: proc0 : dnz = 2, o_nz = 2
4178: proc1 : dnz = 3, o_nz = 2
4179: proc2 : dnz = 1, o_nz = 4
4180: .ve
4181: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4182: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4183: for proc3. i.e we are using 12+15+10=37 storage locations to store
4184: 34 values.
4186: When d_nnz, o_nnz parameters are specified, the storage is specified
4187: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4188: In the above case the values for d_nnz,o_nnz are:
4189: .vb
4190: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4191: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4192: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4193: .ve
4194: Here the space allocated is sum of all the above values i.e 34, and
4195: hence pre-allocation is perfect.
4197: Level: intermediate
4199: .keywords: matrix, aij, compressed row, sparse, parallel
4201: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4202: MPIAIJ, MatCreateMPIAIJWithArrays()
4203: @*/
4204: PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4205: {
4207: PetscMPIInt size;
4210: MatCreate(comm,A);
4211: MatSetSizes(*A,m,n,M,N);
4212: MPI_Comm_size(comm,&size);
4213: if (size > 1) {
4214: MatSetType(*A,MATMPIAIJ);
4215: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4216: } else {
4217: MatSetType(*A,MATSEQAIJ);
4218: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4219: }
4220: return(0);
4221: }
4225: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4226: {
4227: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4230: *Ad = a->A;
4231: *Ao = a->B;
4232: *colmap = a->garray;
4233: return(0);
4234: }
4238: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4239: {
4241: PetscInt i;
4242: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4245: if (coloring->ctype == IS_COLORING_GLOBAL) {
4246: ISColoringValue *allcolors,*colors;
4247: ISColoring ocoloring;
4249: /* set coloring for diagonal portion */
4250: MatSetColoring_SeqAIJ(a->A,coloring);
4252: /* set coloring for off-diagonal portion */
4253: ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
4254: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4255: for (i=0; i<a->B->cmap->n; i++) {
4256: colors[i] = allcolors[a->garray[i]];
4257: }
4258: PetscFree(allcolors);
4259: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4260: MatSetColoring_SeqAIJ(a->B,ocoloring);
4261: ISColoringDestroy(&ocoloring);
4262: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4263: ISColoringValue *colors;
4264: PetscInt *larray;
4265: ISColoring ocoloring;
4267: /* set coloring for diagonal portion */
4268: PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);
4269: for (i=0; i<a->A->cmap->n; i++) {
4270: larray[i] = i + A->cmap->rstart;
4271: }
4272: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);
4273: PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);
4274: for (i=0; i<a->A->cmap->n; i++) {
4275: colors[i] = coloring->colors[larray[i]];
4276: }
4277: PetscFree(larray);
4278: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);
4279: MatSetColoring_SeqAIJ(a->A,ocoloring);
4280: ISColoringDestroy(&ocoloring);
4282: /* set coloring for off-diagonal portion */
4283: PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);
4284: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);
4285: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4286: for (i=0; i<a->B->cmap->n; i++) {
4287: colors[i] = coloring->colors[larray[i]];
4288: }
4289: PetscFree(larray);
4290: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4291: MatSetColoring_SeqAIJ(a->B,ocoloring);
4292: ISColoringDestroy(&ocoloring);
4293: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4295: return(0);
4296: }
4298: #if defined(PETSC_HAVE_ADIC)
4301: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4302: {
4303: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4307: MatSetValuesAdic_SeqAIJ(a->A,advalues);
4308: MatSetValuesAdic_SeqAIJ(a->B,advalues);
4309: return(0);
4310: }
4311: #endif
4315: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4316: {
4317: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4321: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
4322: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
4323: return(0);
4324: }
4328: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4329: {
4331: PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4332: PetscInt *indx;
4335: /* This routine will ONLY return MPIAIJ type matrix */
4336: MatGetSize(inmat,&m,&N);
4337: MatGetBlockSizes(inmat,&bs,&cbs);
4338: if (n == PETSC_DECIDE){
4339: PetscSplitOwnership(comm,&n,&N);
4340: }
4341: /* Check sum(n) = N */
4342: MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
4343: if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4344:
4345: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4346: rstart -= m;
4348: MatPreallocateInitialize(comm,m,n,dnz,onz);
4349: for (i=0;i<m;i++) {
4350: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4351: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4352: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4353: }
4354:
4355: MatCreate(comm,outmat);
4356: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4357: MatSetBlockSizes(*outmat,bs,cbs);
4358: MatSetType(*outmat,MATMPIAIJ);
4359: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4360: MatPreallocateFinalize(dnz,onz);
4361: return(0);
4362: }
4366: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4367: {
4369: PetscInt m,N,i,rstart,nnz,Ii;
4370: PetscInt *indx;
4371: PetscScalar *values;
4374: MatGetSize(inmat,&m,&N);
4375: MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);
4376: for (i=0;i<m;i++) {
4377: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4378: Ii = i + rstart;
4379: MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4380: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4381: }
4382: MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);
4383: MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);
4384: return(0);
4385: }
4389: /*@
4390: MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4391: matrices from each processor
4393: Collective on MPI_Comm
4395: Input Parameters:
4396: + comm - the communicators the parallel matrix will live on
4397: . inmat - the input sequential matrices
4398: . n - number of local columns (or PETSC_DECIDE)
4399: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4401: Output Parameter:
4402: . outmat - the parallel matrix generated
4404: Level: advanced
4406: Notes: The number of columns of the matrix in EACH processor MUST be the same.
4408: @*/
4409: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4410: {
4414: PetscLogEventBegin(MAT_Merge,inmat,0,0,0);
4415: if (scall == MAT_INITIAL_MATRIX){
4416: MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);
4417: }
4418: MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);
4419: PetscLogEventEnd(MAT_Merge,inmat,0,0,0);
4420: return(0);
4421: }
4425: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4426: {
4427: PetscErrorCode ierr;
4428: PetscMPIInt rank;
4429: PetscInt m,N,i,rstart,nnz;
4430: size_t len;
4431: const PetscInt *indx;
4432: PetscViewer out;
4433: char *name;
4434: Mat B;
4435: const PetscScalar *values;
4438: MatGetLocalSize(A,&m,0);
4439: MatGetSize(A,0,&N);
4440: /* Should this be the type of the diagonal block of A? */
4441: MatCreate(PETSC_COMM_SELF,&B);
4442: MatSetSizes(B,m,N,m,N);
4443: MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);
4444: MatSetType(B,MATSEQAIJ);
4445: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
4446: MatGetOwnershipRange(A,&rstart,0);
4447: for (i=0;i<m;i++) {
4448: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4449: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4450: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4451: }
4452: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4453: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4455: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
4456: PetscStrlen(outfile,&len);
4457: PetscMalloc((len+5)*sizeof(char),&name);
4458: sprintf(name,"%s.%d",outfile,rank);
4459: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4460: PetscFree(name);
4461: MatView(B,out);
4462: PetscViewerDestroy(&out);
4463: MatDestroy(&B);
4464: return(0);
4465: }
4467: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4470: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4471: {
4472: PetscErrorCode ierr;
4473: Mat_Merge_SeqsToMPI *merge;
4474: PetscContainer container;
4477: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
4478: if (container) {
4479: PetscContainerGetPointer(container,(void **)&merge);
4480: PetscFree(merge->id_r);
4481: PetscFree(merge->len_s);
4482: PetscFree(merge->len_r);
4483: PetscFree(merge->bi);
4484: PetscFree(merge->bj);
4485: PetscFree(merge->buf_ri[0]);
4486: PetscFree(merge->buf_ri);
4487: PetscFree(merge->buf_rj[0]);
4488: PetscFree(merge->buf_rj);
4489: PetscFree(merge->coi);
4490: PetscFree(merge->coj);
4491: PetscFree(merge->owners_co);
4492: PetscLayoutDestroy(&merge->rowmap);
4493: PetscFree(merge);
4494: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4495: }
4496: MatDestroy_MPIAIJ(A);
4497: return(0);
4498: }
4500: #include <../src/mat/utils/freespace.h>
4501: #include <petscbt.h>
4505: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4506: {
4507: PetscErrorCode ierr;
4508: MPI_Comm comm=((PetscObject)mpimat)->comm;
4509: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4510: PetscMPIInt size,rank,taga,*len_s;
4511: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4512: PetscInt proc,m;
4513: PetscInt **buf_ri,**buf_rj;
4514: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4515: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4516: MPI_Request *s_waits,*r_waits;
4517: MPI_Status *status;
4518: MatScalar *aa=a->a;
4519: MatScalar **abuf_r,*ba_i;
4520: Mat_Merge_SeqsToMPI *merge;
4521: PetscContainer container;
4524: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4526: MPI_Comm_size(comm,&size);
4527: MPI_Comm_rank(comm,&rank);
4529: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
4530: PetscContainerGetPointer(container,(void **)&merge);
4532: bi = merge->bi;
4533: bj = merge->bj;
4534: buf_ri = merge->buf_ri;
4535: buf_rj = merge->buf_rj;
4537: PetscMalloc(size*sizeof(MPI_Status),&status);
4538: owners = merge->rowmap->range;
4539: len_s = merge->len_s;
4541: /* send and recv matrix values */
4542: /*-----------------------------*/
4543: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4544: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4546: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
4547: for (proc=0,k=0; proc<size; proc++){
4548: if (!len_s[proc]) continue;
4549: i = owners[proc];
4550: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4551: k++;
4552: }
4554: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4555: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4556: PetscFree(status);
4558: PetscFree(s_waits);
4559: PetscFree(r_waits);
4561: /* insert mat values of mpimat */
4562: /*----------------------------*/
4563: PetscMalloc(N*sizeof(PetscScalar),&ba_i);
4564: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4566: for (k=0; k<merge->nrecv; k++){
4567: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4568: nrows = *(buf_ri_k[k]);
4569: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4570: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4571: }
4573: /* set values of ba */
4574: m = merge->rowmap->n;
4575: for (i=0; i<m; i++) {
4576: arow = owners[rank] + i;
4577: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4578: bnzi = bi[i+1] - bi[i];
4579: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4581: /* add local non-zero vals of this proc's seqmat into ba */
4582: anzi = ai[arow+1] - ai[arow];
4583: aj = a->j + ai[arow];
4584: aa = a->a + ai[arow];
4585: nextaj = 0;
4586: for (j=0; nextaj<anzi; j++){
4587: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4588: ba_i[j] += aa[nextaj++];
4589: }
4590: }
4592: /* add received vals into ba */
4593: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4594: /* i-th row */
4595: if (i == *nextrow[k]) {
4596: anzi = *(nextai[k]+1) - *nextai[k];
4597: aj = buf_rj[k] + *(nextai[k]);
4598: aa = abuf_r[k] + *(nextai[k]);
4599: nextaj = 0;
4600: for (j=0; nextaj<anzi; j++){
4601: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4602: ba_i[j] += aa[nextaj++];
4603: }
4604: }
4605: nextrow[k]++; nextai[k]++;
4606: }
4607: }
4608: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4609: }
4610: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4611: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4613: PetscFree(abuf_r[0]);
4614: PetscFree(abuf_r);
4615: PetscFree(ba_i);
4616: PetscFree3(buf_ri_k,nextrow,nextai);
4617: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4618: return(0);
4619: }
4621: extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4625: PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4626: {
4627: PetscErrorCode ierr;
4628: Mat B_mpi;
4629: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4630: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4631: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4632: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4633: PetscInt len,proc,*dnz,*onz,bs,cbs;
4634: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4635: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4636: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4637: MPI_Status *status;
4638: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
4639: PetscBT lnkbt;
4640: Mat_Merge_SeqsToMPI *merge;
4641: PetscContainer container;
4644: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4646: /* make sure it is a PETSc comm */
4647: PetscCommDuplicate(comm,&comm,PETSC_NULL);
4648: MPI_Comm_size(comm,&size);
4649: MPI_Comm_rank(comm,&rank);
4651: PetscNew(Mat_Merge_SeqsToMPI,&merge);
4652: PetscMalloc(size*sizeof(MPI_Status),&status);
4654: /* determine row ownership */
4655: /*---------------------------------------------------------*/
4656: PetscLayoutCreate(comm,&merge->rowmap);
4657: PetscLayoutSetLocalSize(merge->rowmap,m);
4658: PetscLayoutSetSize(merge->rowmap,M);
4659: PetscLayoutSetBlockSize(merge->rowmap,1);
4660: PetscLayoutSetUp(merge->rowmap);
4661: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
4662: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
4664: m = merge->rowmap->n;
4665: M = merge->rowmap->N;
4666: owners = merge->rowmap->range;
4668: /* determine the number of messages to send, their lengths */
4669: /*---------------------------------------------------------*/
4670: len_s = merge->len_s;
4672: len = 0; /* length of buf_si[] */
4673: merge->nsend = 0;
4674: for (proc=0; proc<size; proc++){
4675: len_si[proc] = 0;
4676: if (proc == rank){
4677: len_s[proc] = 0;
4678: } else {
4679: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4680: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4681: }
4682: if (len_s[proc]) {
4683: merge->nsend++;
4684: nrows = 0;
4685: for (i=owners[proc]; i<owners[proc+1]; i++){
4686: if (ai[i+1] > ai[i]) nrows++;
4687: }
4688: len_si[proc] = 2*(nrows+1);
4689: len += len_si[proc];
4690: }
4691: }
4693: /* determine the number and length of messages to receive for ij-structure */
4694: /*-------------------------------------------------------------------------*/
4695: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
4696: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4698: /* post the Irecv of j-structure */
4699: /*-------------------------------*/
4700: PetscCommGetNewTag(comm,&tagj);
4701: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4703: /* post the Isend of j-structure */
4704: /*--------------------------------*/
4705: PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);
4707: for (proc=0, k=0; proc<size; proc++){
4708: if (!len_s[proc]) continue;
4709: i = owners[proc];
4710: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4711: k++;
4712: }
4714: /* receives and sends of j-structure are complete */
4715: /*------------------------------------------------*/
4716: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4717: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4719: /* send and recv i-structure */
4720: /*---------------------------*/
4721: PetscCommGetNewTag(comm,&tagi);
4722: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4724: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
4725: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4726: for (proc=0,k=0; proc<size; proc++){
4727: if (!len_s[proc]) continue;
4728: /* form outgoing message for i-structure:
4729: buf_si[0]: nrows to be sent
4730: [1:nrows]: row index (global)
4731: [nrows+1:2*nrows+1]: i-structure index
4732: */
4733: /*-------------------------------------------*/
4734: nrows = len_si[proc]/2 - 1;
4735: buf_si_i = buf_si + nrows+1;
4736: buf_si[0] = nrows;
4737: buf_si_i[0] = 0;
4738: nrows = 0;
4739: for (i=owners[proc]; i<owners[proc+1]; i++){
4740: anzi = ai[i+1] - ai[i];
4741: if (anzi) {
4742: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4743: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4744: nrows++;
4745: }
4746: }
4747: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4748: k++;
4749: buf_si += len_si[proc];
4750: }
4752: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4753: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4755: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4756: for (i=0; i<merge->nrecv; i++){
4757: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4758: }
4760: PetscFree(len_si);
4761: PetscFree(len_ri);
4762: PetscFree(rj_waits);
4763: PetscFree2(si_waits,sj_waits);
4764: PetscFree(ri_waits);
4765: PetscFree(buf_s);
4766: PetscFree(status);
4768: /* compute a local seq matrix in each processor */
4769: /*----------------------------------------------*/
4770: /* allocate bi array and free space for accumulating nonzero column info */
4771: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
4772: bi[0] = 0;
4774: /* create and initialize a linked list */
4775: nlnk = N+1;
4776: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4778: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4779: len = 0;
4780: len = ai[owners[rank+1]] - ai[owners[rank]];
4781: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
4782: current_space = free_space;
4784: /* determine symbolic info for each local row */
4785: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4787: for (k=0; k<merge->nrecv; k++){
4788: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4789: nrows = *buf_ri_k[k];
4790: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4791: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4792: }
4794: MatPreallocateInitialize(comm,m,n,dnz,onz);
4795: len = 0;
4796: for (i=0;i<m;i++) {
4797: bnzi = 0;
4798: /* add local non-zero cols of this proc's seqmat into lnk */
4799: arow = owners[rank] + i;
4800: anzi = ai[arow+1] - ai[arow];
4801: aj = a->j + ai[arow];
4802: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4803: bnzi += nlnk;
4804: /* add received col data into lnk */
4805: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4806: if (i == *nextrow[k]) { /* i-th row */
4807: anzi = *(nextai[k]+1) - *nextai[k];
4808: aj = buf_rj[k] + *nextai[k];
4809: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4810: bnzi += nlnk;
4811: nextrow[k]++; nextai[k]++;
4812: }
4813: }
4814: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4816: /* if free space is not available, make more free space */
4817: if (current_space->local_remaining<bnzi) {
4818: PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);
4819: nspacedouble++;
4820: }
4821: /* copy data into free space, then initialize lnk */
4822: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4823: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4825: current_space->array += bnzi;
4826: current_space->local_used += bnzi;
4827: current_space->local_remaining -= bnzi;
4829: bi[i+1] = bi[i] + bnzi;
4830: }
4832: PetscFree3(buf_ri_k,nextrow,nextai);
4834: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
4835: PetscFreeSpaceContiguous(&free_space,bj);
4836: PetscLLDestroy(lnk,lnkbt);
4838: /* create symbolic parallel matrix B_mpi */
4839: /*---------------------------------------*/
4840: MatGetBlockSizes(seqmat,&bs,&cbs);
4841: MatCreate(comm,&B_mpi);
4842: if (n==PETSC_DECIDE) {
4843: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4844: } else {
4845: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4846: }
4847: MatSetBlockSizes(B_mpi,bs,cbs);
4848: MatSetType(B_mpi,MATMPIAIJ);
4849: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4850: MatPreallocateFinalize(dnz,onz);
4851: MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
4853: /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4854: B_mpi->assembled = PETSC_FALSE;
4855: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4856: merge->bi = bi;
4857: merge->bj = bj;
4858: merge->buf_ri = buf_ri;
4859: merge->buf_rj = buf_rj;
4860: merge->coi = PETSC_NULL;
4861: merge->coj = PETSC_NULL;
4862: merge->owners_co = PETSC_NULL;
4864: PetscCommDestroy(&comm);
4866: /* attach the supporting struct to B_mpi for reuse */
4867: PetscContainerCreate(PETSC_COMM_SELF,&container);
4868: PetscContainerSetPointer(container,merge);
4869: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4870: PetscContainerDestroy(&container);
4871: *mpimat = B_mpi;
4873: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4874: return(0);
4875: }
4879: /*@C
4880: MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4881: matrices from each processor
4883: Collective on MPI_Comm
4885: Input Parameters:
4886: + comm - the communicators the parallel matrix will live on
4887: . seqmat - the input sequential matrices
4888: . m - number of local rows (or PETSC_DECIDE)
4889: . n - number of local columns (or PETSC_DECIDE)
4890: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4892: Output Parameter:
4893: . mpimat - the parallel matrix generated
4895: Level: advanced
4897: Notes:
4898: The dimensions of the sequential matrix in each processor MUST be the same.
4899: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4900: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4901: @*/
4902: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4903: {
4904: PetscErrorCode ierr;
4905: PetscMPIInt size;
4908: MPI_Comm_size(comm,&size);
4909: if (size == 1){
4910: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4911: if (scall == MAT_INITIAL_MATRIX){
4912: MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);
4913: } else {
4914: MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);
4915: }
4916: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4917: return(0);
4918: }
4919: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4920: if (scall == MAT_INITIAL_MATRIX){
4921: MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);
4922: }
4923: MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);
4924: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4925: return(0);
4926: }
4930: /*@
4931: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4932: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4933: with MatGetSize()
4935: Not Collective
4937: Input Parameters:
4938: + A - the matrix
4939: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4941: Output Parameter:
4942: . A_loc - the local sequential matrix generated
4944: Level: developer
4946: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4948: @*/
4949: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4950: {
4951: PetscErrorCode ierr;
4952: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4953: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4954: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4955: MatScalar *aa=a->a,*ba=b->a,*cam;
4956: PetscScalar *ca;
4957: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4958: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4959: PetscBool match;
4962: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4963: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4964: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4965: if (scall == MAT_INITIAL_MATRIX){
4966: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
4967: ci[0] = 0;
4968: for (i=0; i<am; i++){
4969: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4970: }
4971: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
4972: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
4973: k = 0;
4974: for (i=0; i<am; i++) {
4975: ncols_o = bi[i+1] - bi[i];
4976: ncols_d = ai[i+1] - ai[i];
4977: /* off-diagonal portion of A */
4978: for (jo=0; jo<ncols_o; jo++) {
4979: col = cmap[*bj];
4980: if (col >= cstart) break;
4981: cj[k] = col; bj++;
4982: ca[k++] = *ba++;
4983: }
4984: /* diagonal portion of A */
4985: for (j=0; j<ncols_d; j++) {
4986: cj[k] = cstart + *aj++;
4987: ca[k++] = *aa++;
4988: }
4989: /* off-diagonal portion of A */
4990: for (j=jo; j<ncols_o; j++) {
4991: cj[k] = cmap[*bj++];
4992: ca[k++] = *ba++;
4993: }
4994: }
4995: /* put together the new matrix */
4996: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
4997: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4998: /* Since these are PETSc arrays, change flags to free them as necessary. */
4999: mat = (Mat_SeqAIJ*)(*A_loc)->data;
5000: mat->free_a = PETSC_TRUE;
5001: mat->free_ij = PETSC_TRUE;
5002: mat->nonew = 0;
5003: } else if (scall == MAT_REUSE_MATRIX){
5004: mat=(Mat_SeqAIJ*)(*A_loc)->data;
5005: ci = mat->i; cj = mat->j; cam = mat->a;
5006: for (i=0; i<am; i++) {
5007: /* off-diagonal portion of A */
5008: ncols_o = bi[i+1] - bi[i];
5009: for (jo=0; jo<ncols_o; jo++) {
5010: col = cmap[*bj];
5011: if (col >= cstart) break;
5012: *cam++ = *ba++; bj++;
5013: }
5014: /* diagonal portion of A */
5015: ncols_d = ai[i+1] - ai[i];
5016: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5017: /* off-diagonal portion of A */
5018: for (j=jo; j<ncols_o; j++) {
5019: *cam++ = *ba++; bj++;
5020: }
5021: }
5022: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5023: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
5024: return(0);
5025: }
5029: /*@C
5030: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5032: Not Collective
5034: Input Parameters:
5035: + A - the matrix
5036: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5037: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
5039: Output Parameter:
5040: . A_loc - the local sequential matrix generated
5042: Level: developer
5044: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5046: @*/
5047: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5048: {
5049: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5050: PetscErrorCode ierr;
5051: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5052: IS isrowa,iscola;
5053: Mat *aloc;
5054: PetscBool match;
5057: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5058: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5059: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
5060: if (!row){
5061: start = A->rmap->rstart; end = A->rmap->rend;
5062: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
5063: } else {
5064: isrowa = *row;
5065: }
5066: if (!col){
5067: start = A->cmap->rstart;
5068: cmap = a->garray;
5069: nzA = a->A->cmap->n;
5070: nzB = a->B->cmap->n;
5071: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
5072: ncols = 0;
5073: for (i=0; i<nzB; i++) {
5074: if (cmap[i] < start) idx[ncols++] = cmap[i];
5075: else break;
5076: }
5077: imark = i;
5078: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5079: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5080: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
5081: } else {
5082: iscola = *col;
5083: }
5084: if (scall != MAT_INITIAL_MATRIX){
5085: PetscMalloc(sizeof(Mat),&aloc);
5086: aloc[0] = *A_loc;
5087: }
5088: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
5089: *A_loc = aloc[0];
5090: PetscFree(aloc);
5091: if (!row){
5092: ISDestroy(&isrowa);
5093: }
5094: if (!col){
5095: ISDestroy(&iscola);
5096: }
5097: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5098: return(0);
5099: }
5103: /*@C
5104: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5106: Collective on Mat
5108: Input Parameters:
5109: + A,B - the matrices in mpiaij format
5110: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5111: - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5113: Output Parameter:
5114: + rowb, colb - index sets of rows and columns of B to extract
5115: - B_seq - the sequential matrix generated
5117: Level: developer
5119: @*/
5120: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5121: {
5122: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5123: PetscErrorCode ierr;
5124: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5125: IS isrowb,iscolb;
5126: Mat *bseq=PETSC_NULL;
5127:
5129: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5130: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5131: }
5132: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5133:
5134: if (scall == MAT_INITIAL_MATRIX){
5135: start = A->cmap->rstart;
5136: cmap = a->garray;
5137: nzA = a->A->cmap->n;
5138: nzB = a->B->cmap->n;
5139: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
5140: ncols = 0;
5141: for (i=0; i<nzB; i++) { /* row < local row index */
5142: if (cmap[i] < start) idx[ncols++] = cmap[i];
5143: else break;
5144: }
5145: imark = i;
5146: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5147: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5148: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5149: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5150: } else {
5151: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5152: isrowb = *rowb; iscolb = *colb;
5153: PetscMalloc(sizeof(Mat),&bseq);
5154: bseq[0] = *B_seq;
5155: }
5156: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5157: *B_seq = bseq[0];
5158: PetscFree(bseq);
5159: if (!rowb){
5160: ISDestroy(&isrowb);
5161: } else {
5162: *rowb = isrowb;
5163: }
5164: if (!colb){
5165: ISDestroy(&iscolb);
5166: } else {
5167: *colb = iscolb;
5168: }
5169: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5170: return(0);
5171: }
5175: /*
5176: MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5177: of the OFF-DIAGONAL portion of local A
5179: Collective on Mat
5181: Input Parameters:
5182: + A,B - the matrices in mpiaij format
5183: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5185: Output Parameter:
5186: + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5187: . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5188: . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5189: - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5191: Level: developer
5193: */
5194: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5195: {
5196: VecScatter_MPI_General *gen_to,*gen_from;
5197: PetscErrorCode ierr;
5198: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5199: Mat_SeqAIJ *b_oth;
5200: VecScatter ctx=a->Mvctx;
5201: MPI_Comm comm=((PetscObject)ctx)->comm;
5202: PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5203: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5204: PetscScalar *rvalues,*svalues;
5205: MatScalar *b_otha,*bufa,*bufA;
5206: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5207: MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5208: MPI_Status *sstatus,rstatus;
5209: PetscMPIInt jj;
5210: PetscInt *cols,sbs,rbs;
5211: PetscScalar *vals;
5214: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5215: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5216: }
5217: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5218: MPI_Comm_rank(comm,&rank);
5220: gen_to = (VecScatter_MPI_General*)ctx->todata;
5221: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5222: rvalues = gen_from->values; /* holds the length of receiving row */
5223: svalues = gen_to->values; /* holds the length of sending row */
5224: nrecvs = gen_from->n;
5225: nsends = gen_to->n;
5227: PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);
5228: srow = gen_to->indices; /* local row index to be sent */
5229: sstarts = gen_to->starts;
5230: sprocs = gen_to->procs;
5231: sstatus = gen_to->sstatus;
5232: sbs = gen_to->bs;
5233: rstarts = gen_from->starts;
5234: rprocs = gen_from->procs;
5235: rbs = gen_from->bs;
5237: if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5238: if (scall == MAT_INITIAL_MATRIX){
5239: /* i-array */
5240: /*---------*/
5241: /* post receives */
5242: for (i=0; i<nrecvs; i++){
5243: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5244: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5245: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5246: }
5248: /* pack the outgoing message */
5249: PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);
5250: sstartsj[0] = 0; rstartsj[0] = 0;
5251: len = 0; /* total length of j or a array to be sent */
5252: k = 0;
5253: for (i=0; i<nsends; i++){
5254: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5255: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5256: for (j=0; j<nrows; j++) {
5257: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5258: for (l=0; l<sbs; l++){
5259: MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL); /* rowlength */
5260: rowlen[j*sbs+l] = ncols;
5261: len += ncols;
5262: MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);
5263: }
5264: k++;
5265: }
5266: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5267: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5268: }
5269: /* recvs and sends of i-array are completed */
5270: i = nrecvs;
5271: while (i--) {
5272: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5273: }
5274: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5276: /* allocate buffers for sending j and a arrays */
5277: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
5278: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
5280: /* create i-array of B_oth */
5281: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
5282: b_othi[0] = 0;
5283: len = 0; /* total length of j or a array to be received */
5284: k = 0;
5285: for (i=0; i<nrecvs; i++){
5286: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5287: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5288: for (j=0; j<nrows; j++) {
5289: b_othi[k+1] = b_othi[k] + rowlen[j];
5290: len += rowlen[j]; k++;
5291: }
5292: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5293: }
5295: /* allocate space for j and a arrrays of B_oth */
5296: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
5297: PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);
5299: /* j-array */
5300: /*---------*/
5301: /* post receives of j-array */
5302: for (i=0; i<nrecvs; i++){
5303: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5304: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5305: }
5307: /* pack the outgoing message j-array */
5308: k = 0;
5309: for (i=0; i<nsends; i++){
5310: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5311: bufJ = bufj+sstartsj[i];
5312: for (j=0; j<nrows; j++) {
5313: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5314: for (ll=0; ll<sbs; ll++){
5315: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
5316: for (l=0; l<ncols; l++){
5317: *bufJ++ = cols[l];
5318: }
5319: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
5320: }
5321: }
5322: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5323: }
5325: /* recvs and sends of j-array are completed */
5326: i = nrecvs;
5327: while (i--) {
5328: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5329: }
5330: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5331: } else if (scall == MAT_REUSE_MATRIX){
5332: sstartsj = *startsj_s;
5333: rstartsj = *startsj_r;
5334: bufa = *bufa_ptr;
5335: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5336: b_otha = b_oth->a;
5337: } else {
5338: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5339: }
5341: /* a-array */
5342: /*---------*/
5343: /* post receives of a-array */
5344: for (i=0; i<nrecvs; i++){
5345: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5346: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5347: }
5349: /* pack the outgoing message a-array */
5350: k = 0;
5351: for (i=0; i<nsends; i++){
5352: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5353: bufA = bufa+sstartsj[i];
5354: for (j=0; j<nrows; j++) {
5355: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5356: for (ll=0; ll<sbs; ll++){
5357: MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
5358: for (l=0; l<ncols; l++){
5359: *bufA++ = vals[l];
5360: }
5361: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
5362: }
5363: }
5364: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5365: }
5366: /* recvs and sends of a-array are completed */
5367: i = nrecvs;
5368: while (i--) {
5369: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5370: }
5371: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5372: PetscFree2(rwaits,swaits);
5374: if (scall == MAT_INITIAL_MATRIX){
5375: /* put together the new matrix */
5376: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5378: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5379: /* Since these are PETSc arrays, change flags to free them as necessary. */
5380: b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
5381: b_oth->free_a = PETSC_TRUE;
5382: b_oth->free_ij = PETSC_TRUE;
5383: b_oth->nonew = 0;
5385: PetscFree(bufj);
5386: if (!startsj_s || !bufa_ptr){
5387: PetscFree2(sstartsj,rstartsj);
5388: PetscFree(bufa_ptr);
5389: } else {
5390: *startsj_s = sstartsj;
5391: *startsj_r = rstartsj;
5392: *bufa_ptr = bufa;
5393: }
5394: }
5395: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5396: return(0);
5397: }
5401: /*@C
5402: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5404: Not Collective
5406: Input Parameters:
5407: . A - The matrix in mpiaij format
5409: Output Parameter:
5410: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5411: . colmap - A map from global column index to local index into lvec
5412: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5414: Level: developer
5416: @*/
5417: #if defined (PETSC_USE_CTABLE)
5418: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5419: #else
5420: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5421: #endif
5422: {
5423: Mat_MPIAIJ *a;
5430: a = (Mat_MPIAIJ *) A->data;
5431: if (lvec) *lvec = a->lvec;
5432: if (colmap) *colmap = a->colmap;
5433: if (multScatter) *multScatter = a->Mvctx;
5434: return(0);
5435: }
5437: EXTERN_C_BEGIN
5438: extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5439: extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5440: extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5441: EXTERN_C_END
5445: /*
5446: Computes (B'*A')' since computing B*A directly is untenable
5448: n p p
5449: ( ) ( ) ( )
5450: m ( A ) * n ( B ) = m ( C )
5451: ( ) ( ) ( )
5453: */
5454: PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5455: {
5456: PetscErrorCode ierr;
5457: Mat At,Bt,Ct;
5460: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5461: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5462: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);
5463: MatDestroy(&At);
5464: MatDestroy(&Bt);
5465: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5466: MatDestroy(&Ct);
5467: return(0);
5468: }
5472: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5473: {
5475: PetscInt m=A->rmap->n,n=B->cmap->n;
5476: Mat Cmat;
5479: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5480: MatCreate(((PetscObject)A)->comm,&Cmat);
5481: MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5482: MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);
5483: MatSetType(Cmat,MATMPIDENSE);
5484: MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);
5485: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
5486: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
5487: *C = Cmat;
5488: (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5489: return(0);
5490: }
5492: /* ----------------------------------------------------------------*/
5495: PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5496: {
5500: if (scall == MAT_INITIAL_MATRIX){
5501: MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);
5502: }
5503: MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);
5504: return(0);
5505: }
5507: EXTERN_C_BEGIN
5508: #if defined(PETSC_HAVE_MUMPS)
5509: extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5510: #endif
5511: #if defined(PETSC_HAVE_PASTIX)
5512: extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5513: #endif
5514: #if defined(PETSC_HAVE_SUPERLU_DIST)
5515: extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5516: #endif
5517: #if defined(PETSC_HAVE_SPOOLES)
5518: extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5519: #endif
5520: EXTERN_C_END
5522: /*MC
5523: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5525: Options Database Keys:
5526: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5528: Level: beginner
5530: .seealso: MatCreateAIJ()
5531: M*/
5533: EXTERN_C_BEGIN
5536: PetscErrorCode MatCreate_MPIAIJ(Mat B)
5537: {
5538: Mat_MPIAIJ *b;
5540: PetscMPIInt size;
5543: MPI_Comm_size(((PetscObject)B)->comm,&size);
5545: PetscNewLog(B,Mat_MPIAIJ,&b);
5546: B->data = (void*)b;
5547: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
5548: B->assembled = PETSC_FALSE;
5550: B->insertmode = NOT_SET_VALUES;
5551: b->size = size;
5552: MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);
5554: /* build cache for off array entries formed */
5555: MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);
5556: b->donotstash = PETSC_FALSE;
5557: b->colmap = 0;
5558: b->garray = 0;
5559: b->roworiented = PETSC_TRUE;
5561: /* stuff used for matrix vector multiply */
5562: b->lvec = PETSC_NULL;
5563: b->Mvctx = PETSC_NULL;
5565: /* stuff for MatGetRow() */
5566: b->rowindices = 0;
5567: b->rowvalues = 0;
5568: b->getrowactive = PETSC_FALSE;
5570: #if defined(PETSC_HAVE_SPOOLES)
5571: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5572: "MatGetFactor_mpiaij_spooles",
5573: MatGetFactor_mpiaij_spooles);
5574: #endif
5575: #if defined(PETSC_HAVE_MUMPS)
5576: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5577: "MatGetFactor_aij_mumps",
5578: MatGetFactor_aij_mumps);
5579: #endif
5580: #if defined(PETSC_HAVE_PASTIX)
5581: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5582: "MatGetFactor_mpiaij_pastix",
5583: MatGetFactor_mpiaij_pastix);
5584: #endif
5585: #if defined(PETSC_HAVE_SUPERLU_DIST)
5586: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5587: "MatGetFactor_mpiaij_superlu_dist",
5588: MatGetFactor_mpiaij_superlu_dist);
5589: #endif
5590: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5591: "MatStoreValues_MPIAIJ",
5592: MatStoreValues_MPIAIJ);
5593: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5594: "MatRetrieveValues_MPIAIJ",
5595: MatRetrieveValues_MPIAIJ);
5596: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5597: "MatGetDiagonalBlock_MPIAIJ",
5598: MatGetDiagonalBlock_MPIAIJ);
5599: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5600: "MatIsTranspose_MPIAIJ",
5601: MatIsTranspose_MPIAIJ);
5602: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5603: "MatMPIAIJSetPreallocation_MPIAIJ",
5604: MatMPIAIJSetPreallocation_MPIAIJ);
5605: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5606: "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5607: MatMPIAIJSetPreallocationCSR_MPIAIJ);
5608: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5609: "MatDiagonalScaleLocal_MPIAIJ",
5610: MatDiagonalScaleLocal_MPIAIJ);
5611: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5612: "MatConvert_MPIAIJ_MPIAIJPERM",
5613: MatConvert_MPIAIJ_MPIAIJPERM);
5614: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5615: "MatConvert_MPIAIJ_MPIAIJCRL",
5616: MatConvert_MPIAIJ_MPIAIJCRL);
5617: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5618: "MatConvert_MPIAIJ_MPISBAIJ",
5619: MatConvert_MPIAIJ_MPISBAIJ);
5620: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5621: "MatMatMult_MPIDense_MPIAIJ",
5622: MatMatMult_MPIDense_MPIAIJ);
5623: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5624: "MatMatMultSymbolic_MPIDense_MPIAIJ",
5625: MatMatMultSymbolic_MPIDense_MPIAIJ);
5626: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5627: "MatMatMultNumeric_MPIDense_MPIAIJ",
5628: MatMatMultNumeric_MPIDense_MPIAIJ);
5629: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
5630: return(0);
5631: }
5632: EXTERN_C_END
5636: /*@
5637: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5638: and "off-diagonal" part of the matrix in CSR format.
5640: Collective on MPI_Comm
5642: Input Parameters:
5643: + comm - MPI communicator
5644: . m - number of local rows (Cannot be PETSC_DECIDE)
5645: . n - This value should be the same as the local size used in creating the
5646: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5647: calculated if N is given) For square matrices n is almost always m.
5648: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5649: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5650: . i - row indices for "diagonal" portion of matrix
5651: . j - column indices
5652: . a - matrix values
5653: . oi - row indices for "off-diagonal" portion of matrix
5654: . oj - column indices
5655: - oa - matrix values
5657: Output Parameter:
5658: . mat - the matrix
5660: Level: advanced
5662: Notes:
5663: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5664: must free the arrays once the matrix has been destroyed and not before.
5666: The i and j indices are 0 based
5667:
5668: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5670: This sets local rows and cannot be used to set off-processor values.
5672: You cannot later use MatSetValues() to change values in this matrix.
5674: .keywords: matrix, aij, compressed row, sparse, parallel
5676: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5677: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5678: @*/
5679: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5680: PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5681: {
5683: Mat_MPIAIJ *maij;
5686: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5687: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5688: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5689: MatCreate(comm,mat);
5690: MatSetSizes(*mat,m,n,M,N);
5691: MatSetType(*mat,MATMPIAIJ);
5692: maij = (Mat_MPIAIJ*) (*mat)->data;
5693: maij->donotstash = PETSC_TRUE;
5694: (*mat)->preallocated = PETSC_TRUE;
5696: PetscLayoutSetUp((*mat)->rmap);
5697: PetscLayoutSetUp((*mat)->cmap);
5699: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
5700: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
5702: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
5703: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
5704: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
5705: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
5707: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
5708: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
5709: return(0);
5710: }
5712: /*
5713: Special version for direct calls from Fortran
5714: */
5715: #include <petsc-private/fortranimpl.h>
5717: #if defined(PETSC_HAVE_FORTRAN_CAPS)
5718: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5719: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5720: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5721: #endif
5723: /* Change these macros so can be used in void function */
5724: #undef CHKERRQ
5725: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5726: #undef SETERRQ2
5727: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5728: #undef SETERRQ3
5729: #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5730: #undef SETERRQ
5731: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5733: EXTERN_C_BEGIN
5736: void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5737: {
5738: Mat mat = *mmat;
5739: PetscInt m = *mm, n = *mn;
5740: InsertMode addv = *maddv;
5741: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
5742: PetscScalar value;
5743: PetscErrorCode ierr;
5745: MatCheckPreallocated(mat,1);
5746: if (mat->insertmode == NOT_SET_VALUES) {
5747: mat->insertmode = addv;
5748: }
5749: #if defined(PETSC_USE_DEBUG)
5750: else if (mat->insertmode != addv) {
5751: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5752: }
5753: #endif
5754: {
5755: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5756: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5757: PetscBool roworiented = aij->roworiented;
5759: /* Some Variables required in the macro */
5760: Mat A = aij->A;
5761: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
5762: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5763: MatScalar *aa = a->a;
5764: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5765: Mat B = aij->B;
5766: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
5767: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5768: MatScalar *ba = b->a;
5770: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5771: PetscInt nonew = a->nonew;
5772: MatScalar *ap1,*ap2;
5775: for (i=0; i<m; i++) {
5776: if (im[i] < 0) continue;
5777: #if defined(PETSC_USE_DEBUG)
5778: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5779: #endif
5780: if (im[i] >= rstart && im[i] < rend) {
5781: row = im[i] - rstart;
5782: lastcol1 = -1;
5783: rp1 = aj + ai[row];
5784: ap1 = aa + ai[row];
5785: rmax1 = aimax[row];
5786: nrow1 = ailen[row];
5787: low1 = 0;
5788: high1 = nrow1;
5789: lastcol2 = -1;
5790: rp2 = bj + bi[row];
5791: ap2 = ba + bi[row];
5792: rmax2 = bimax[row];
5793: nrow2 = bilen[row];
5794: low2 = 0;
5795: high2 = nrow2;
5797: for (j=0; j<n; j++) {
5798: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5799: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5800: if (in[j] >= cstart && in[j] < cend){
5801: col = in[j] - cstart;
5802: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5803: } else if (in[j] < 0) continue;
5804: #if defined(PETSC_USE_DEBUG)
5805: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5806: #endif
5807: else {
5808: if (mat->was_assembled) {
5809: if (!aij->colmap) {
5810: MatCreateColmap_MPIAIJ_Private(mat);
5811: }
5812: #if defined (PETSC_USE_CTABLE)
5813: PetscTableFind(aij->colmap,in[j]+1,&col);
5814: col--;
5815: #else
5816: col = aij->colmap[in[j]] - 1;
5817: #endif
5818: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5819: MatDisAssemble_MPIAIJ(mat);
5820: col = in[j];
5821: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5822: B = aij->B;
5823: b = (Mat_SeqAIJ*)B->data;
5824: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5825: rp2 = bj + bi[row];
5826: ap2 = ba + bi[row];
5827: rmax2 = bimax[row];
5828: nrow2 = bilen[row];
5829: low2 = 0;
5830: high2 = nrow2;
5831: bm = aij->B->rmap->n;
5832: ba = b->a;
5833: }
5834: } else col = in[j];
5835: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5836: }
5837: }
5838: } else {
5839: if (!aij->donotstash) {
5840: if (roworiented) {
5841: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5842: } else {
5843: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5844: }
5845: }
5846: }
5847: }}
5848: PetscFunctionReturnVoid();
5849: }
5850: EXTERN_C_END