Actual source code: mpiaij.c
petsc-3.3-p1 2012-06-15
2: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: #include <petscblaslapack.h>
5: /*MC
6: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
8: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9: and MATMPIAIJ otherwise. As a result, for single process communicators,
10: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11: for communicators controlling multiple processes. It is recommended that you call both of
12: the above preallocation routines for simplicity.
14: Options Database Keys:
15: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
17: Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18: enough exist.
20: Level: beginner
22: .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23: M*/
25: /*MC
26: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
28: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
30: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31: for communicators controlling multiple processes. It is recommended that you call both of
32: the above preallocation routines for simplicity.
34: Options Database Keys:
35: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
37: Level: beginner
39: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40: M*/
44: PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45: {
46: PetscErrorCode ierr;
47: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
48: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
49: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
50: const PetscInt *ia,*ib;
51: const MatScalar *aa,*bb;
52: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
53: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
56: *keptrows = 0;
57: ia = a->i;
58: ib = b->i;
59: for (i=0; i<m; i++) {
60: na = ia[i+1] - ia[i];
61: nb = ib[i+1] - ib[i];
62: if (!na && !nb) {
63: cnt++;
64: goto ok1;
65: }
66: aa = a->a + ia[i];
67: for (j=0; j<na; j++) {
68: if (aa[j] != 0.0) goto ok1;
69: }
70: bb = b->a + ib[i];
71: for (j=0; j <nb; j++) {
72: if (bb[j] != 0.0) goto ok1;
73: }
74: cnt++;
75: ok1:;
76: }
77: MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);
78: if (!n0rows) return(0);
79: PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);
80: cnt = 0;
81: for (i=0; i<m; i++) {
82: na = ia[i+1] - ia[i];
83: nb = ib[i+1] - ib[i];
84: if (!na && !nb) continue;
85: aa = a->a + ia[i];
86: for(j=0; j<na;j++) {
87: if (aa[j] != 0.0) {
88: rows[cnt++] = rstart + i;
89: goto ok2;
90: }
91: }
92: bb = b->a + ib[i];
93: for (j=0; j<nb; j++) {
94: if (bb[j] != 0.0) {
95: rows[cnt++] = rstart + i;
96: goto ok2;
97: }
98: }
99: ok2:;
100: }
101: ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);
102: return(0);
103: }
107: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108: {
110: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
111: PetscInt i,n,*garray = aij->garray;
112: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
113: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
114: PetscReal *work;
117: MatGetSize(A,PETSC_NULL,&n);
118: PetscMalloc(n*sizeof(PetscReal),&work);
119: PetscMemzero(work,n*sizeof(PetscReal));
120: if (type == NORM_2) {
121: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123: }
124: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126: }
127: } else if (type == NORM_1) {
128: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130: }
131: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133: }
134: } else if (type == NORM_INFINITY) {
135: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137: }
138: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140: }
142: } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143: if (type == NORM_INFINITY) {
144: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
145: } else {
146: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
147: }
148: PetscFree(work);
149: if (type == NORM_2) {
150: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151: }
152: return(0);
153: }
157: /*
158: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
161: Only for square matrices
162: */
163: PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164: {
165: PetscMPIInt rank,size;
166: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
168: Mat mat;
169: Mat_SeqAIJ *gmata;
170: PetscMPIInt tag;
171: MPI_Status status;
172: PetscBool aij;
173: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
176: CHKMEMQ;
177: MPI_Comm_rank(comm,&rank);
178: MPI_Comm_size(comm,&size);
179: if (!rank) {
180: PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
181: if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182: }
183: if (reuse == MAT_INITIAL_MATRIX) {
184: MatCreate(comm,&mat);
185: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
186: MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);
187: MatSetType(mat,MATAIJ);
188: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
189: PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
190: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
191: rowners[0] = 0;
192: for (i=2; i<=size; i++) {
193: rowners[i] += rowners[i-1];
194: }
195: rstart = rowners[rank];
196: rend = rowners[rank+1];
197: PetscObjectGetNewTag((PetscObject)mat,&tag);
198: if (!rank) {
199: gmata = (Mat_SeqAIJ*) gmat->data;
200: /* send row lengths to all processors */
201: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
202: for (i=1; i<size; i++) {
203: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
204: }
205: /* determine number diagonal and off-diagonal counts */
206: PetscMemzero(olens,m*sizeof(PetscInt));
207: PetscMalloc(m*sizeof(PetscInt),&ld);
208: PetscMemzero(ld,m*sizeof(PetscInt));
209: jj = 0;
210: for (i=0; i<m; i++) {
211: for (j=0; j<dlens[i]; j++) {
212: if (gmata->j[jj] < rstart) ld[i]++;
213: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
214: jj++;
215: }
216: }
217: /* send column indices to other processes */
218: for (i=1; i<size; i++) {
219: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
220: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
221: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
222: }
224: /* send numerical values to other processes */
225: for (i=1; i<size; i++) {
226: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
227: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
228: }
229: gmataa = gmata->a;
230: gmataj = gmata->j;
232: } else {
233: /* receive row lengths */
234: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
235: /* receive column indices */
236: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
237: PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
238: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
239: /* determine number diagonal and off-diagonal counts */
240: PetscMemzero(olens,m*sizeof(PetscInt));
241: PetscMalloc(m*sizeof(PetscInt),&ld);
242: PetscMemzero(ld,m*sizeof(PetscInt));
243: jj = 0;
244: for (i=0; i<m; i++) {
245: for (j=0; j<dlens[i]; j++) {
246: if (gmataj[jj] < rstart) ld[i]++;
247: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
248: jj++;
249: }
250: }
251: /* receive numerical values */
252: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
253: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
254: }
255: /* set preallocation */
256: for (i=0; i<m; i++) {
257: dlens[i] -= olens[i];
258: }
259: MatSeqAIJSetPreallocation(mat,0,dlens);
260: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
261:
262: for (i=0; i<m; i++) {
263: dlens[i] += olens[i];
264: }
265: cnt = 0;
266: for (i=0; i<m; i++) {
267: row = rstart + i;
268: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
269: cnt += dlens[i];
270: }
271: if (rank) {
272: PetscFree2(gmataa,gmataj);
273: }
274: PetscFree2(dlens,olens);
275: PetscFree(rowners);
276: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
277: *inmat = mat;
278: } else { /* column indices are already set; only need to move over numerical values from process 0 */
279: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
280: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
281: mat = *inmat;
282: PetscObjectGetNewTag((PetscObject)mat,&tag);
283: if (!rank) {
284: /* send numerical values to other processes */
285: gmata = (Mat_SeqAIJ*) gmat->data;
286: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
287: gmataa = gmata->a;
288: for (i=1; i<size; i++) {
289: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
290: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
291: }
292: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
293: } else {
294: /* receive numerical values from process 0*/
295: nz = Ad->nz + Ao->nz;
296: PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
297: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
298: }
299: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
300: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
301: ad = Ad->a;
302: ao = Ao->a;
303: if (mat->rmap->n) {
304: i = 0;
305: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
306: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
307: }
308: for (i=1; i<mat->rmap->n; i++) {
309: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
310: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
311: }
312: i--;
313: if (mat->rmap->n) {
314: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
315: }
316: if (rank) {
317: PetscFree(gmataarestore);
318: }
319: }
320: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
321: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
322: CHKMEMQ;
323: return(0);
324: }
326: /*
327: Local utility routine that creates a mapping from the global column
328: number to the local number in the off-diagonal part of the local
329: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
330: a slightly higher hash table cost; without it it is not scalable (each processor
331: has an order N integer array but is fast to acess.
332: */
335: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
336: {
337: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
339: PetscInt n = aij->B->cmap->n,i;
342: if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
343: #if defined (PETSC_USE_CTABLE)
344: PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
345: for (i=0; i<n; i++){
346: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
347: }
348: #else
349: PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);
350: PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));
351: PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));
352: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
353: #endif
354: return(0);
355: }
357: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
358: { \
359: if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
360: lastcol1 = col;\
361: while (high1-low1 > 5) { \
362: t = (low1+high1)/2; \
363: if (rp1[t] > col) high1 = t; \
364: else low1 = t; \
365: } \
366: for (_i=low1; _i<high1; _i++) { \
367: if (rp1[_i] > col) break; \
368: if (rp1[_i] == col) { \
369: if (addv == ADD_VALUES) ap1[_i] += value; \
370: else ap1[_i] = value; \
371: goto a_noinsert; \
372: } \
373: } \
374: if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
375: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
376: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
377: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
378: N = nrow1++ - 1; a->nz++; high1++; \
379: /* shift up all the later entries in this row */ \
380: for (ii=N; ii>=_i; ii--) { \
381: rp1[ii+1] = rp1[ii]; \
382: ap1[ii+1] = ap1[ii]; \
383: } \
384: rp1[_i] = col; \
385: ap1[_i] = value; \
386: a_noinsert: ; \
387: ailen[row] = nrow1; \
388: }
391: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
392: { \
393: if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
394: lastcol2 = col;\
395: while (high2-low2 > 5) { \
396: t = (low2+high2)/2; \
397: if (rp2[t] > col) high2 = t; \
398: else low2 = t; \
399: } \
400: for (_i=low2; _i<high2; _i++) { \
401: if (rp2[_i] > col) break; \
402: if (rp2[_i] == col) { \
403: if (addv == ADD_VALUES) ap2[_i] += value; \
404: else ap2[_i] = value; \
405: goto b_noinsert; \
406: } \
407: } \
408: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
409: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
410: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
411: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
412: N = nrow2++ - 1; b->nz++; high2++; \
413: /* shift up all the later entries in this row */ \
414: for (ii=N; ii>=_i; ii--) { \
415: rp2[ii+1] = rp2[ii]; \
416: ap2[ii+1] = ap2[ii]; \
417: } \
418: rp2[_i] = col; \
419: ap2[_i] = value; \
420: b_noinsert: ; \
421: bilen[row] = nrow2; \
422: }
426: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
427: {
428: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
429: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
431: PetscInt l,*garray = mat->garray,diag;
434: /* code only works for square matrices A */
436: /* find size of row to the left of the diagonal part */
437: MatGetOwnershipRange(A,&diag,0);
438: row = row - diag;
439: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
440: if (garray[b->j[b->i[row]+l]] > diag) break;
441: }
442: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
444: /* diagonal part */
445: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
447: /* right of diagonal part */
448: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
449: return(0);
450: }
454: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455: {
456: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
457: PetscScalar value;
459: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
460: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
461: PetscBool roworiented = aij->roworiented;
463: /* Some Variables required in the macro */
464: Mat A = aij->A;
465: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
466: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
467: MatScalar *aa = a->a;
468: PetscBool ignorezeroentries = a->ignorezeroentries;
469: Mat B = aij->B;
470: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
471: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
472: MatScalar *ba = b->a;
474: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
475: PetscInt nonew;
476: MatScalar *ap1,*ap2;
480: for (i=0; i<m; i++) {
481: if (im[i] < 0) continue;
482: #if defined(PETSC_USE_DEBUG)
483: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
484: #endif
485: if (im[i] >= rstart && im[i] < rend) {
486: row = im[i] - rstart;
487: lastcol1 = -1;
488: rp1 = aj + ai[row];
489: ap1 = aa + ai[row];
490: rmax1 = aimax[row];
491: nrow1 = ailen[row];
492: low1 = 0;
493: high1 = nrow1;
494: lastcol2 = -1;
495: rp2 = bj + bi[row];
496: ap2 = ba + bi[row];
497: rmax2 = bimax[row];
498: nrow2 = bilen[row];
499: low2 = 0;
500: high2 = nrow2;
502: for (j=0; j<n; j++) {
503: if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
504: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
505: if (in[j] >= cstart && in[j] < cend){
506: col = in[j] - cstart;
507: nonew = a->nonew;
508: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
509: } else if (in[j] < 0) continue;
510: #if defined(PETSC_USE_DEBUG)
511: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
512: #endif
513: else {
514: if (mat->was_assembled) {
515: if (!aij->colmap) {
516: MatCreateColmap_MPIAIJ_Private(mat);
517: }
518: #if defined (PETSC_USE_CTABLE)
519: PetscTableFind(aij->colmap,in[j]+1,&col);
520: col--;
521: #else
522: col = aij->colmap[in[j]] - 1;
523: #endif
524: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
525: MatDisAssemble_MPIAIJ(mat);
526: col = in[j];
527: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
528: B = aij->B;
529: b = (Mat_SeqAIJ*)B->data;
530: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
531: rp2 = bj + bi[row];
532: ap2 = ba + bi[row];
533: rmax2 = bimax[row];
534: nrow2 = bilen[row];
535: low2 = 0;
536: high2 = nrow2;
537: bm = aij->B->rmap->n;
538: ba = b->a;
539: }
540: } else col = in[j];
541: nonew = b->nonew;
542: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
543: }
544: }
545: } else {
546: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
547: if (!aij->donotstash) {
548: mat->assembled = PETSC_FALSE;
549: if (roworiented) {
550: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
551: } else {
552: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
553: }
554: }
555: }
556: }
557: return(0);
558: }
562: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
563: {
564: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
566: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
567: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
570: for (i=0; i<m; i++) {
571: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
572: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
573: if (idxm[i] >= rstart && idxm[i] < rend) {
574: row = idxm[i] - rstart;
575: for (j=0; j<n; j++) {
576: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
577: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
578: if (idxn[j] >= cstart && idxn[j] < cend){
579: col = idxn[j] - cstart;
580: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
581: } else {
582: if (!aij->colmap) {
583: MatCreateColmap_MPIAIJ_Private(mat);
584: }
585: #if defined (PETSC_USE_CTABLE)
586: PetscTableFind(aij->colmap,idxn[j]+1,&col);
587: col --;
588: #else
589: col = aij->colmap[idxn[j]] - 1;
590: #endif
591: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
592: else {
593: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
594: }
595: }
596: }
597: } else {
598: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
599: }
600: }
601: return(0);
602: }
604: extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
608: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
609: {
610: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
612: PetscInt nstash,reallocs;
613: InsertMode addv;
616: if (aij->donotstash || mat->nooffprocentries) {
617: return(0);
618: }
620: /* make sure all processors are either in INSERTMODE or ADDMODE */
621: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
622: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
623: mat->insertmode = addv; /* in case this processor had no cache */
625: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
626: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
627: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
628: return(0);
629: }
633: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
634: {
635: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
636: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data;
638: PetscMPIInt n;
639: PetscInt i,j,rstart,ncols,flg;
640: PetscInt *row,*col;
641: PetscBool other_disassembled;
642: PetscScalar *val;
643: InsertMode addv = mat->insertmode;
645: /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
647: if (!aij->donotstash && !mat->nooffprocentries) {
648: while (1) {
649: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
650: if (!flg) break;
652: for (i=0; i<n;) {
653: /* Now identify the consecutive vals belonging to the same row */
654: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
655: if (j < n) ncols = j-i;
656: else ncols = n-i;
657: /* Now assemble all these values with a single function call */
658: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
659: i = j;
660: }
661: }
662: MatStashScatterEnd_Private(&mat->stash);
663: }
664: MatAssemblyBegin(aij->A,mode);
665: MatAssemblyEnd(aij->A,mode);
667: /* determine if any processor has disassembled, if so we must
668: also disassemble ourselfs, in order that we may reassemble. */
669: /*
670: if nonzero structure of submatrix B cannot change then we know that
671: no processor disassembled thus we can skip this stuff
672: */
673: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
674: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
675: if (mat->was_assembled && !other_disassembled) {
676: MatDisAssemble_MPIAIJ(mat);
677: }
678: }
679: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
680: MatSetUpMultiply_MPIAIJ(mat);
681: }
682: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
683: MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);
684: MatAssemblyBegin(aij->B,mode);
685: MatAssemblyEnd(aij->B,mode);
687: PetscFree2(aij->rowvalues,aij->rowindices);
688: aij->rowvalues = 0;
690: /* used by MatAXPY() */
691: a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
692: a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
694: VecDestroy(&aij->diag);
695: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
696: return(0);
697: }
701: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
702: {
703: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
707: MatZeroEntries(l->A);
708: MatZeroEntries(l->B);
709: return(0);
710: }
714: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
715: {
716: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
717: PetscErrorCode ierr;
718: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
719: PetscInt i,*owners = A->rmap->range;
720: PetscInt *nprocs,j,idx,nsends,row;
721: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
722: PetscInt *rvalues,count,base,slen,*source;
723: PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart;
724: MPI_Comm comm = ((PetscObject)A)->comm;
725: MPI_Request *send_waits,*recv_waits;
726: MPI_Status recv_status,*send_status;
727: const PetscScalar *xx;
728: PetscScalar *bb;
729: #if defined(PETSC_DEBUG)
730: PetscBool found = PETSC_FALSE;
731: #endif
734: /* first count number of contributors to each processor */
735: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
736: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
737: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
738: j = 0;
739: for (i=0; i<N; i++) {
740: if (lastidx > (idx = rows[i])) j = 0;
741: lastidx = idx;
742: for (; j<size; j++) {
743: if (idx >= owners[j] && idx < owners[j+1]) {
744: nprocs[2*j]++;
745: nprocs[2*j+1] = 1;
746: owner[i] = j;
747: #if defined(PETSC_DEBUG)
748: found = PETSC_TRUE;
749: #endif
750: break;
751: }
752: }
753: #if defined(PETSC_DEBUG)
754: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
755: found = PETSC_FALSE;
756: #endif
757: }
758: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
760: if (A->nooffproczerorows) {
761: if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
762: nrecvs = nsends;
763: nmax = N;
764: } else {
765: /* inform other processors of number of messages and max length*/
766: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
767: }
769: /* post receives: */
770: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
771: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
772: for (i=0; i<nrecvs; i++) {
773: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
774: }
776: /* do sends:
777: 1) starts[i] gives the starting index in svalues for stuff going to
778: the ith processor
779: */
780: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
781: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
782: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
783: starts[0] = 0;
784: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
785: for (i=0; i<N; i++) {
786: svalues[starts[owner[i]]++] = rows[i];
787: }
789: starts[0] = 0;
790: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
791: count = 0;
792: for (i=0; i<size; i++) {
793: if (nprocs[2*i+1]) {
794: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
795: }
796: }
797: PetscFree(starts);
799: base = owners[rank];
801: /* wait on receives */
802: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
803: count = nrecvs; slen = 0;
804: while (count) {
805: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
806: /* unpack receives into our local space */
807: MPI_Get_count(&recv_status,MPIU_INT,&n);
808: source[imdex] = recv_status.MPI_SOURCE;
809: lens[imdex] = n;
810: slen += n;
811: count--;
812: }
813: PetscFree(recv_waits);
814:
815: /* move the data into the send scatter */
816: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
817: count = 0;
818: for (i=0; i<nrecvs; i++) {
819: values = rvalues + i*nmax;
820: for (j=0; j<lens[i]; j++) {
821: lrows[count++] = values[j] - base;
822: }
823: }
824: PetscFree(rvalues);
825: PetscFree2(lens,source);
826: PetscFree(owner);
827: PetscFree(nprocs);
828:
829: /* fix right hand side if needed */
830: if (x && b) {
831: VecGetArrayRead(x,&xx);
832: VecGetArray(b,&bb);
833: for (i=0; i<slen; i++) {
834: bb[lrows[i]] = diag*xx[lrows[i]];
835: }
836: VecRestoreArrayRead(x,&xx);
837: VecRestoreArray(b,&bb);
838: }
839: /*
840: Zero the required rows. If the "diagonal block" of the matrix
841: is square and the user wishes to set the diagonal we use separate
842: code so that MatSetValues() is not called for each diagonal allocating
843: new memory, thus calling lots of mallocs and slowing things down.
845: */
846: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
847: MatZeroRows(l->B,slen,lrows,0.0,0,0);
848: if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
849: MatZeroRows(l->A,slen,lrows,diag,0,0);
850: } else if (diag != 0.0) {
851: MatZeroRows(l->A,slen,lrows,0.0,0,0);
852: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
853: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
854: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
855: }
856: for (i = 0; i < slen; i++) {
857: row = lrows[i] + rstart;
858: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
859: }
860: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
861: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
862: } else {
863: MatZeroRows(l->A,slen,lrows,0.0,0,0);
864: }
865: PetscFree(lrows);
867: /* wait on sends */
868: if (nsends) {
869: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
870: MPI_Waitall(nsends,send_waits,send_status);
871: PetscFree(send_status);
872: }
873: PetscFree(send_waits);
874: PetscFree(svalues);
875: return(0);
876: }
880: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
881: {
882: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
883: PetscErrorCode ierr;
884: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
885: PetscInt i,*owners = A->rmap->range;
886: PetscInt *nprocs,j,idx,nsends;
887: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
888: PetscInt *rvalues,count,base,slen,*source;
889: PetscInt *lens,*lrows,*values,m;
890: MPI_Comm comm = ((PetscObject)A)->comm;
891: MPI_Request *send_waits,*recv_waits;
892: MPI_Status recv_status,*send_status;
893: const PetscScalar *xx;
894: PetscScalar *bb,*mask;
895: Vec xmask,lmask;
896: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
897: const PetscInt *aj, *ii,*ridx;
898: PetscScalar *aa;
899: #if defined(PETSC_DEBUG)
900: PetscBool found = PETSC_FALSE;
901: #endif
904: /* first count number of contributors to each processor */
905: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
906: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
907: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
908: j = 0;
909: for (i=0; i<N; i++) {
910: if (lastidx > (idx = rows[i])) j = 0;
911: lastidx = idx;
912: for (; j<size; j++) {
913: if (idx >= owners[j] && idx < owners[j+1]) {
914: nprocs[2*j]++;
915: nprocs[2*j+1] = 1;
916: owner[i] = j;
917: #if defined(PETSC_DEBUG)
918: found = PETSC_TRUE;
919: #endif
920: break;
921: }
922: }
923: #if defined(PETSC_DEBUG)
924: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
925: found = PETSC_FALSE;
926: #endif
927: }
928: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
930: /* inform other processors of number of messages and max length*/
931: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
933: /* post receives: */
934: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
935: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
936: for (i=0; i<nrecvs; i++) {
937: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
938: }
940: /* do sends:
941: 1) starts[i] gives the starting index in svalues for stuff going to
942: the ith processor
943: */
944: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
945: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
946: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
947: starts[0] = 0;
948: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
949: for (i=0; i<N; i++) {
950: svalues[starts[owner[i]]++] = rows[i];
951: }
953: starts[0] = 0;
954: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
955: count = 0;
956: for (i=0; i<size; i++) {
957: if (nprocs[2*i+1]) {
958: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
959: }
960: }
961: PetscFree(starts);
963: base = owners[rank];
965: /* wait on receives */
966: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
967: count = nrecvs; slen = 0;
968: while (count) {
969: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
970: /* unpack receives into our local space */
971: MPI_Get_count(&recv_status,MPIU_INT,&n);
972: source[imdex] = recv_status.MPI_SOURCE;
973: lens[imdex] = n;
974: slen += n;
975: count--;
976: }
977: PetscFree(recv_waits);
978:
979: /* move the data into the send scatter */
980: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
981: count = 0;
982: for (i=0; i<nrecvs; i++) {
983: values = rvalues + i*nmax;
984: for (j=0; j<lens[i]; j++) {
985: lrows[count++] = values[j] - base;
986: }
987: }
988: PetscFree(rvalues);
989: PetscFree2(lens,source);
990: PetscFree(owner);
991: PetscFree(nprocs);
992: /* lrows are the local rows to be zeroed, slen is the number of local rows */
994: /* zero diagonal part of matrix */
995: MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);
996:
997: /* handle off diagonal part of matrix */
998: MatGetVecs(A,&xmask,PETSC_NULL);
999: VecDuplicate(l->lvec,&lmask);
1000: VecGetArray(xmask,&bb);
1001: for (i=0; i<slen; i++) {
1002: bb[lrows[i]] = 1;
1003: }
1004: VecRestoreArray(xmask,&bb);
1005: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1006: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1007: VecDestroy(&xmask);
1008: if (x) {
1009: VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1010: VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1011: VecGetArrayRead(l->lvec,&xx);
1012: VecGetArray(b,&bb);
1013: }
1014: VecGetArray(lmask,&mask);
1016: /* remove zeroed rows of off diagonal matrix */
1017: ii = aij->i;
1018: for (i=0; i<slen; i++) {
1019: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
1020: }
1022: /* loop over all elements of off process part of matrix zeroing removed columns*/
1023: if (aij->compressedrow.use){
1024: m = aij->compressedrow.nrows;
1025: ii = aij->compressedrow.i;
1026: ridx = aij->compressedrow.rindex;
1027: for (i=0; i<m; i++){
1028: n = ii[i+1] - ii[i];
1029: aj = aij->j + ii[i];
1030: aa = aij->a + ii[i];
1032: for (j=0; j<n; j++) {
1033: if (PetscAbsScalar(mask[*aj])) {
1034: if (b) bb[*ridx] -= *aa*xx[*aj];
1035: *aa = 0.0;
1036: }
1037: aa++;
1038: aj++;
1039: }
1040: ridx++;
1041: }
1042: } else { /* do not use compressed row format */
1043: m = l->B->rmap->n;
1044: for (i=0; i<m; i++) {
1045: n = ii[i+1] - ii[i];
1046: aj = aij->j + ii[i];
1047: aa = aij->a + ii[i];
1048: for (j=0; j<n; j++) {
1049: if (PetscAbsScalar(mask[*aj])) {
1050: if (b) bb[i] -= *aa*xx[*aj];
1051: *aa = 0.0;
1052: }
1053: aa++;
1054: aj++;
1055: }
1056: }
1057: }
1058: if (x) {
1059: VecRestoreArray(b,&bb);
1060: VecRestoreArrayRead(l->lvec,&xx);
1061: }
1062: VecRestoreArray(lmask,&mask);
1063: VecDestroy(&lmask);
1064: PetscFree(lrows);
1066: /* wait on sends */
1067: if (nsends) {
1068: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1069: MPI_Waitall(nsends,send_waits,send_status);
1070: PetscFree(send_status);
1071: }
1072: PetscFree(send_waits);
1073: PetscFree(svalues);
1075: return(0);
1076: }
1080: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1081: {
1082: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1084: PetscInt nt;
1087: VecGetLocalSize(xx,&nt);
1088: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1089: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1090: (*a->A->ops->mult)(a->A,xx,yy);
1091: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1092: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1093: return(0);
1094: }
1098: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1099: {
1100: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1104: MatMultDiagonalBlock(a->A,bb,xx);
1105: return(0);
1106: }
1110: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111: {
1112: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1116: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1117: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1118: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1119: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1120: return(0);
1121: }
1125: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1126: {
1127: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1129: PetscBool merged;
1132: VecScatterGetMerged(a->Mvctx,&merged);
1133: /* do nondiagonal part */
1134: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1135: if (!merged) {
1136: /* send it on its way */
1137: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1138: /* do local part */
1139: (*a->A->ops->multtranspose)(a->A,xx,yy);
1140: /* receive remote parts: note this assumes the values are not actually */
1141: /* added in yy until the next line, */
1142: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1143: } else {
1144: /* do local part */
1145: (*a->A->ops->multtranspose)(a->A,xx,yy);
1146: /* send it on its way */
1147: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1148: /* values actually were received in the Begin() but we need to call this nop */
1149: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1150: }
1151: return(0);
1152: }
1154: EXTERN_C_BEGIN
1157: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1158: {
1159: MPI_Comm comm;
1160: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1161: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162: IS Me,Notme;
1164: PetscInt M,N,first,last,*notme,i;
1165: PetscMPIInt size;
1169: /* Easy test: symmetric diagonal block */
1170: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1171: MatIsTranspose(Adia,Bdia,tol,f);
1172: if (!*f) return(0);
1173: PetscObjectGetComm((PetscObject)Amat,&comm);
1174: MPI_Comm_size(comm,&size);
1175: if (size == 1) return(0);
1177: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1178: MatGetSize(Amat,&M,&N);
1179: MatGetOwnershipRange(Amat,&first,&last);
1180: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
1181: for (i=0; i<first; i++) notme[i] = i;
1182: for (i=last; i<M; i++) notme[i-last+first] = i;
1183: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1184: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1185: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1186: Aoff = Aoffs[0];
1187: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1188: Boff = Boffs[0];
1189: MatIsTranspose(Aoff,Boff,tol,f);
1190: MatDestroyMatrices(1,&Aoffs);
1191: MatDestroyMatrices(1,&Boffs);
1192: ISDestroy(&Me);
1193: ISDestroy(&Notme);
1194: PetscFree(notme);
1195: return(0);
1196: }
1197: EXTERN_C_END
1201: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1202: {
1203: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1207: /* do nondiagonal part */
1208: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1209: /* send it on its way */
1210: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1211: /* do local part */
1212: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1213: /* receive remote parts */
1214: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1215: return(0);
1216: }
1218: /*
1219: This only works correctly for square matrices where the subblock A->A is the
1220: diagonal block
1221: */
1224: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1225: {
1227: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1230: if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1231: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1232: MatGetDiagonal(a->A,v);
1233: return(0);
1234: }
1238: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1239: {
1240: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1244: MatScale(a->A,aa);
1245: MatScale(a->B,aa);
1246: return(0);
1247: }
1251: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252: {
1253: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1257: #if defined(PETSC_USE_LOG)
1258: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259: #endif
1260: MatStashDestroy_Private(&mat->stash);
1261: VecDestroy(&aij->diag);
1262: MatDestroy(&aij->A);
1263: MatDestroy(&aij->B);
1264: #if defined (PETSC_USE_CTABLE)
1265: PetscTableDestroy(&aij->colmap);
1266: #else
1267: PetscFree(aij->colmap);
1268: #endif
1269: PetscFree(aij->garray);
1270: VecDestroy(&aij->lvec);
1271: VecScatterDestroy(&aij->Mvctx);
1272: PetscFree2(aij->rowvalues,aij->rowindices);
1273: PetscFree(aij->ld);
1274: PetscFree(mat->data);
1276: PetscObjectChangeTypeName((PetscObject)mat,0);
1277: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1278: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1279: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1280: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
1281: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
1282: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
1283: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1284: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);
1285: return(0);
1286: }
1290: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1291: {
1292: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1293: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
1294: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
1295: PetscErrorCode ierr;
1296: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1297: int fd;
1298: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1299: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1300: PetscScalar *column_values;
1301: PetscInt message_count,flowcontrolcount;
1304: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1305: MPI_Comm_size(((PetscObject)mat)->comm,&size);
1306: nz = A->nz + B->nz;
1307: if (!rank) {
1308: header[0] = MAT_FILE_CLASSID;
1309: header[1] = mat->rmap->N;
1310: header[2] = mat->cmap->N;
1311: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1312: PetscViewerBinaryGetDescriptor(viewer,&fd);
1313: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1314: /* get largest number of rows any processor has */
1315: rlen = mat->rmap->n;
1316: range = mat->rmap->range;
1317: for (i=1; i<size; i++) {
1318: rlen = PetscMax(rlen,range[i+1] - range[i]);
1319: }
1320: } else {
1321: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);
1322: rlen = mat->rmap->n;
1323: }
1325: /* load up the local row counts */
1326: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
1327: for (i=0; i<mat->rmap->n; i++) {
1328: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1329: }
1331: /* store the row lengths to the file */
1332: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1333: if (!rank) {
1334: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1335: for (i=1; i<size; i++) {
1336: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1337: rlen = range[i+1] - range[i];
1338: MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1339: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1340: }
1341: PetscViewerFlowControlEndMaster(viewer,message_count);
1342: } else {
1343: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1344: MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1345: PetscViewerFlowControlEndWorker(viewer,message_count);
1346: }
1347: PetscFree(row_lengths);
1349: /* load up the local column indices */
1350: nzmax = nz; /* )th processor needs space a largest processor needs */
1351: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);
1352: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
1353: cnt = 0;
1354: for (i=0; i<mat->rmap->n; i++) {
1355: for (j=B->i[i]; j<B->i[i+1]; j++) {
1356: if ( (col = garray[B->j[j]]) > cstart) break;
1357: column_indices[cnt++] = col;
1358: }
1359: for (k=A->i[i]; k<A->i[i+1]; k++) {
1360: column_indices[cnt++] = A->j[k] + cstart;
1361: }
1362: for (; j<B->i[i+1]; j++) {
1363: column_indices[cnt++] = garray[B->j[j]];
1364: }
1365: }
1366: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1368: /* store the column indices to the file */
1369: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1370: if (!rank) {
1371: MPI_Status status;
1372: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1373: for (i=1; i<size; i++) {
1374: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1375: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1376: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1377: MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);
1378: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1379: }
1380: PetscViewerFlowControlEndMaster(viewer,message_count);
1381: } else {
1382: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1383: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1384: MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1385: PetscViewerFlowControlEndWorker(viewer,message_count);
1386: }
1387: PetscFree(column_indices);
1389: /* load up the local column values */
1390: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
1391: cnt = 0;
1392: for (i=0; i<mat->rmap->n; i++) {
1393: for (j=B->i[i]; j<B->i[i+1]; j++) {
1394: if ( garray[B->j[j]] > cstart) break;
1395: column_values[cnt++] = B->a[j];
1396: }
1397: for (k=A->i[i]; k<A->i[i+1]; k++) {
1398: column_values[cnt++] = A->a[k];
1399: }
1400: for (; j<B->i[i+1]; j++) {
1401: column_values[cnt++] = B->a[j];
1402: }
1403: }
1404: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1406: /* store the column values to the file */
1407: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1408: if (!rank) {
1409: MPI_Status status;
1410: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1411: for (i=1; i<size; i++) {
1412: PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);
1413: MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);
1414: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1415: MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);
1416: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1417: }
1418: PetscViewerFlowControlEndMaster(viewer,message_count);
1419: } else {
1420: PetscViewerFlowControlStepWorker(viewer,rank,message_count);
1421: MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);
1422: MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
1423: PetscViewerFlowControlEndWorker(viewer,message_count);
1424: }
1425: PetscFree(column_values);
1426: return(0);
1427: }
1431: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1432: {
1433: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1434: PetscErrorCode ierr;
1435: PetscMPIInt rank = aij->rank,size = aij->size;
1436: PetscBool isdraw,iascii,isbinary;
1437: PetscViewer sviewer;
1438: PetscViewerFormat format;
1441: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1442: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1443: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1444: if (iascii) {
1445: PetscViewerGetFormat(viewer,&format);
1446: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1447: MatInfo info;
1448: PetscBool inodes;
1450: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
1451: MatGetInfo(mat,MAT_LOCAL,&info);
1452: MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
1453: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1454: if (!inodes) {
1455: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1456: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1457: } else {
1458: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1459: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1460: }
1461: MatGetInfo(aij->A,MAT_LOCAL,&info);
1462: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1463: MatGetInfo(aij->B,MAT_LOCAL,&info);
1464: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1465: PetscViewerFlush(viewer);
1466: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1467: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1468: VecScatterView(aij->Mvctx,viewer);
1469: return(0);
1470: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1471: PetscInt inodecount,inodelimit,*inodes;
1472: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1473: if (inodes) {
1474: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1475: } else {
1476: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1477: }
1478: return(0);
1479: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1480: return(0);
1481: }
1482: } else if (isbinary) {
1483: if (size == 1) {
1484: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1485: MatView(aij->A,viewer);
1486: } else {
1487: MatView_MPIAIJ_Binary(mat,viewer);
1488: }
1489: return(0);
1490: } else if (isdraw) {
1491: PetscDraw draw;
1492: PetscBool isnull;
1493: PetscViewerDrawGetDraw(viewer,0,&draw);
1494: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1495: }
1497: if (size == 1) {
1498: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1499: MatView(aij->A,viewer);
1500: } else {
1501: /* assemble the entire matrix onto first processor. */
1502: Mat A;
1503: Mat_SeqAIJ *Aloc;
1504: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1505: MatScalar *a;
1507: if (mat->rmap->N > 1024) {
1508: PetscBool flg = PETSC_FALSE;
1510: PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);
1511: if (!flg) {
1512: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1513: }
1514: }
1516: MatCreate(((PetscObject)mat)->comm,&A);
1517: if (!rank) {
1518: MatSetSizes(A,M,N,M,N);
1519: } else {
1520: MatSetSizes(A,0,0,M,N);
1521: }
1522: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1523: MatSetType(A,MATMPIAIJ);
1524: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
1525: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1526: PetscLogObjectParent(mat,A);
1528: /* copy over the A part */
1529: Aloc = (Mat_SeqAIJ*)aij->A->data;
1530: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1531: row = mat->rmap->rstart;
1532: for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1533: for (i=0; i<m; i++) {
1534: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1535: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1536: }
1537: aj = Aloc->j;
1538: for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1540: /* copy over the B part */
1541: Aloc = (Mat_SeqAIJ*)aij->B->data;
1542: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543: row = mat->rmap->rstart;
1544: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
1545: ct = cols;
1546: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1547: for (i=0; i<m; i++) {
1548: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1549: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1550: }
1551: PetscFree(ct);
1552: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1553: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1554: /*
1555: Everyone has to call to draw the matrix since the graphics waits are
1556: synchronized across all processors that share the PetscDraw object
1557: */
1558: PetscViewerGetSingleton(viewer,&sviewer);
1559: if (!rank) {
1560: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);
1561: /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1562: PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1563: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1564: }
1565: PetscViewerRestoreSingleton(viewer,&sviewer);
1566: MatDestroy(&A);
1567: }
1568: return(0);
1569: }
1573: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1574: {
1576: PetscBool iascii,isdraw,issocket,isbinary;
1577:
1579: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1580: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1581: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1582: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1583: if (iascii || isdraw || isbinary || issocket) {
1584: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1585: } else {
1586: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1587: }
1588: return(0);
1589: }
1593: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1594: {
1595: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1597: Vec bb1 = 0;
1598: PetscBool hasop;
1601: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1602: VecDuplicate(bb,&bb1);
1603: }
1605: if (flag == SOR_APPLY_UPPER) {
1606: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1607: return(0);
1608: }
1610: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1611: if (flag & SOR_ZERO_INITIAL_GUESS) {
1612: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1613: its--;
1614: }
1615:
1616: while (its--) {
1617: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1618: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1620: /* update rhs: bb1 = bb - B*x */
1621: VecScale(mat->lvec,-1.0);
1622: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1624: /* local sweep */
1625: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1626: }
1627: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1628: if (flag & SOR_ZERO_INITIAL_GUESS) {
1629: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1630: its--;
1631: }
1632: while (its--) {
1633: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1634: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1636: /* update rhs: bb1 = bb - B*x */
1637: VecScale(mat->lvec,-1.0);
1638: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1640: /* local sweep */
1641: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1642: }
1643: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1644: if (flag & SOR_ZERO_INITIAL_GUESS) {
1645: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1646: its--;
1647: }
1648: while (its--) {
1649: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1650: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1652: /* update rhs: bb1 = bb - B*x */
1653: VecScale(mat->lvec,-1.0);
1654: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1656: /* local sweep */
1657: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1658: }
1659: } else if (flag & SOR_EISENSTAT) {
1660: Vec xx1;
1662: VecDuplicate(bb,&xx1);
1663: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1665: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1666: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1667: if (!mat->diag) {
1668: MatGetVecs(matin,&mat->diag,PETSC_NULL);
1669: MatGetDiagonal(matin,mat->diag);
1670: }
1671: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1672: if (hasop) {
1673: MatMultDiagonalBlock(matin,xx,bb1);
1674: } else {
1675: VecPointwiseMult(bb1,mat->diag,xx);
1676: }
1677: VecAYPX(bb1,(omega-2.0)/omega,bb);
1679: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1681: /* local sweep */
1682: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1683: VecAXPY(xx,1.0,xx1);
1684: VecDestroy(&xx1);
1685: } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1687: VecDestroy(&bb1);
1688: return(0);
1689: }
1693: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1694: {
1695: MPI_Comm comm;
1696: PetscInt first,local_rowsize,local_colsize;
1697: const PetscInt *rows;
1698: IS crowp,growp,irowp,lrowp,lcolp,icolp;
1702: PetscObjectGetComm((PetscObject)A,&comm);
1703: /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1704: ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);
1705: /* collect the global row permutation and invert it */
1706: ISAllGather(crowp,&growp);
1707: ISSetPermutation(growp);
1708: ISDestroy(&crowp);
1709: ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
1710: ISDestroy(&growp);
1711: /* get the local target indices */
1712: MatGetOwnershipRange(A,&first,PETSC_NULL);
1713: MatGetLocalSize(A,&local_rowsize,&local_colsize);
1714: ISGetIndices(irowp,&rows);
1715: ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);
1716: ISRestoreIndices(irowp,&rows);
1717: ISDestroy(&irowp);
1718: /* the column permutation is so much easier;
1719: make a local version of 'colp' and invert it */
1720: ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);
1721: ISSetPermutation(lcolp);
1722: ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
1723: ISDestroy(&lcolp);
1724: /* now we just get the submatrix */
1725: MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);
1726: /* clean up */
1727: ISDestroy(&lrowp);
1728: ISDestroy(&icolp);
1729: return(0);
1730: }
1734: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1735: {
1736: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1737: Mat A = mat->A,B = mat->B;
1739: PetscReal isend[5],irecv[5];
1742: info->block_size = 1.0;
1743: MatGetInfo(A,MAT_LOCAL,info);
1744: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1745: isend[3] = info->memory; isend[4] = info->mallocs;
1746: MatGetInfo(B,MAT_LOCAL,info);
1747: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1748: isend[3] += info->memory; isend[4] += info->mallocs;
1749: if (flag == MAT_LOCAL) {
1750: info->nz_used = isend[0];
1751: info->nz_allocated = isend[1];
1752: info->nz_unneeded = isend[2];
1753: info->memory = isend[3];
1754: info->mallocs = isend[4];
1755: } else if (flag == MAT_GLOBAL_MAX) {
1756: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);
1757: info->nz_used = irecv[0];
1758: info->nz_allocated = irecv[1];
1759: info->nz_unneeded = irecv[2];
1760: info->memory = irecv[3];
1761: info->mallocs = irecv[4];
1762: } else if (flag == MAT_GLOBAL_SUM) {
1763: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);
1764: info->nz_used = irecv[0];
1765: info->nz_allocated = irecv[1];
1766: info->nz_unneeded = irecv[2];
1767: info->memory = irecv[3];
1768: info->mallocs = irecv[4];
1769: }
1770: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1771: info->fill_ratio_needed = 0;
1772: info->factor_mallocs = 0;
1774: return(0);
1775: }
1779: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1780: {
1781: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1785: switch (op) {
1786: case MAT_NEW_NONZERO_LOCATIONS:
1787: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1788: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1789: case MAT_KEEP_NONZERO_PATTERN:
1790: case MAT_NEW_NONZERO_LOCATION_ERR:
1791: case MAT_USE_INODES:
1792: case MAT_IGNORE_ZERO_ENTRIES:
1793: MatSetOption(a->A,op,flg);
1794: MatSetOption(a->B,op,flg);
1795: break;
1796: case MAT_ROW_ORIENTED:
1797: a->roworiented = flg;
1798: MatSetOption(a->A,op,flg);
1799: MatSetOption(a->B,op,flg);
1800: break;
1801: case MAT_NEW_DIAGONALS:
1802: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1803: break;
1804: case MAT_IGNORE_OFF_PROC_ENTRIES:
1805: a->donotstash = flg;
1806: break;
1807: case MAT_SPD:
1808: A->spd_set = PETSC_TRUE;
1809: A->spd = flg;
1810: if (flg) {
1811: A->symmetric = PETSC_TRUE;
1812: A->structurally_symmetric = PETSC_TRUE;
1813: A->symmetric_set = PETSC_TRUE;
1814: A->structurally_symmetric_set = PETSC_TRUE;
1815: }
1816: break;
1817: case MAT_SYMMETRIC:
1818: MatSetOption(a->A,op,flg);
1819: break;
1820: case MAT_STRUCTURALLY_SYMMETRIC:
1821: MatSetOption(a->A,op,flg);
1822: break;
1823: case MAT_HERMITIAN:
1824: MatSetOption(a->A,op,flg);
1825: break;
1826: case MAT_SYMMETRY_ETERNAL:
1827: MatSetOption(a->A,op,flg);
1828: break;
1829: default:
1830: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1831: }
1832: return(0);
1833: }
1837: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1838: {
1839: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1840: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1842: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1843: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1844: PetscInt *cmap,*idx_p;
1847: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1848: mat->getrowactive = PETSC_TRUE;
1850: if (!mat->rowvalues && (idx || v)) {
1851: /*
1852: allocate enough space to hold information from the longest row.
1853: */
1854: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1855: PetscInt max = 1,tmp;
1856: for (i=0; i<matin->rmap->n; i++) {
1857: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1858: if (max < tmp) { max = tmp; }
1859: }
1860: PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);
1861: }
1863: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1864: lrow = row - rstart;
1866: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1867: if (!v) {pvA = 0; pvB = 0;}
1868: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1869: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1870: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1871: nztot = nzA + nzB;
1873: cmap = mat->garray;
1874: if (v || idx) {
1875: if (nztot) {
1876: /* Sort by increasing column numbers, assuming A and B already sorted */
1877: PetscInt imark = -1;
1878: if (v) {
1879: *v = v_p = mat->rowvalues;
1880: for (i=0; i<nzB; i++) {
1881: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1882: else break;
1883: }
1884: imark = i;
1885: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1886: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1887: }
1888: if (idx) {
1889: *idx = idx_p = mat->rowindices;
1890: if (imark > -1) {
1891: for (i=0; i<imark; i++) {
1892: idx_p[i] = cmap[cworkB[i]];
1893: }
1894: } else {
1895: for (i=0; i<nzB; i++) {
1896: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1897: else break;
1898: }
1899: imark = i;
1900: }
1901: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1902: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1903: }
1904: } else {
1905: if (idx) *idx = 0;
1906: if (v) *v = 0;
1907: }
1908: }
1909: *nz = nztot;
1910: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1911: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1912: return(0);
1913: }
1917: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1918: {
1919: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1922: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1923: aij->getrowactive = PETSC_FALSE;
1924: return(0);
1925: }
1929: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1930: {
1931: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1932: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1934: PetscInt i,j,cstart = mat->cmap->rstart;
1935: PetscReal sum = 0.0;
1936: MatScalar *v;
1939: if (aij->size == 1) {
1940: MatNorm(aij->A,type,norm);
1941: } else {
1942: if (type == NORM_FROBENIUS) {
1943: v = amat->a;
1944: for (i=0; i<amat->nz; i++) {
1945: #if defined(PETSC_USE_COMPLEX)
1946: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1947: #else
1948: sum += (*v)*(*v); v++;
1949: #endif
1950: }
1951: v = bmat->a;
1952: for (i=0; i<bmat->nz; i++) {
1953: #if defined(PETSC_USE_COMPLEX)
1954: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1955: #else
1956: sum += (*v)*(*v); v++;
1957: #endif
1958: }
1959: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1960: *norm = PetscSqrtReal(*norm);
1961: } else if (type == NORM_1) { /* max column norm */
1962: PetscReal *tmp,*tmp2;
1963: PetscInt *jj,*garray = aij->garray;
1964: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);
1965: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);
1966: PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
1967: *norm = 0.0;
1968: v = amat->a; jj = amat->j;
1969: for (j=0; j<amat->nz; j++) {
1970: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1971: }
1972: v = bmat->a; jj = bmat->j;
1973: for (j=0; j<bmat->nz; j++) {
1974: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1975: }
1976: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);
1977: for (j=0; j<mat->cmap->N; j++) {
1978: if (tmp2[j] > *norm) *norm = tmp2[j];
1979: }
1980: PetscFree(tmp);
1981: PetscFree(tmp2);
1982: } else if (type == NORM_INFINITY) { /* max row norm */
1983: PetscReal ntemp = 0.0;
1984: for (j=0; j<aij->A->rmap->n; j++) {
1985: v = amat->a + amat->i[j];
1986: sum = 0.0;
1987: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1988: sum += PetscAbsScalar(*v); v++;
1989: }
1990: v = bmat->a + bmat->i[j];
1991: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1992: sum += PetscAbsScalar(*v); v++;
1993: }
1994: if (sum > ntemp) ntemp = sum;
1995: }
1996: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);
1997: } else {
1998: SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1999: }
2000: }
2001: return(0);
2002: }
2006: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2007: {
2008: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2009: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2011: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2012: PetscInt cstart=A->cmap->rstart,ncol;
2013: Mat B;
2014: MatScalar *array;
2017: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2019: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2020: ai = Aloc->i; aj = Aloc->j;
2021: bi = Bloc->i; bj = Bloc->j;
2022: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2023: /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2024: PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);
2025: PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));
2026: for (i=0; i<ai[ma]; i++){
2027: d_nnz[aj[i]] ++;
2028: aj[i] += cstart; /* global col index to be used by MatSetValues() */
2029: }
2031: MatCreate(((PetscObject)A)->comm,&B);
2032: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
2033: MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);
2034: MatSetType(B,((PetscObject)A)->type_name);
2035: MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);
2036: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
2037: PetscFree(d_nnz);
2038: } else {
2039: B = *matout;
2040: }
2042: /* copy over the A part */
2043: array = Aloc->a;
2044: row = A->rmap->rstart;
2045: for (i=0; i<ma; i++) {
2046: ncol = ai[i+1]-ai[i];
2047: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
2048: row++; array += ncol; aj += ncol;
2049: }
2050: aj = Aloc->j;
2051: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2053: /* copy over the B part */
2054: PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);
2055: PetscMemzero(cols,bi[mb]*sizeof(PetscInt));
2056: array = Bloc->a;
2057: row = A->rmap->rstart;
2058: for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2059: cols_tmp = cols;
2060: for (i=0; i<mb; i++) {
2061: ncol = bi[i+1]-bi[i];
2062: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
2063: row++; array += ncol; cols_tmp += ncol;
2064: }
2065: PetscFree(cols);
2066:
2067: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2068: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2069: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2070: *matout = B;
2071: } else {
2072: MatHeaderMerge(A,B);
2073: }
2074: return(0);
2075: }
2079: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2080: {
2081: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2082: Mat a = aij->A,b = aij->B;
2084: PetscInt s1,s2,s3;
2087: MatGetLocalSize(mat,&s2,&s3);
2088: if (rr) {
2089: VecGetLocalSize(rr,&s1);
2090: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2091: /* Overlap communication with computation. */
2092: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2093: }
2094: if (ll) {
2095: VecGetLocalSize(ll,&s1);
2096: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2097: (*b->ops->diagonalscale)(b,ll,0);
2098: }
2099: /* scale the diagonal block */
2100: (*a->ops->diagonalscale)(a,ll,rr);
2102: if (rr) {
2103: /* Do a scatter end and then right scale the off-diagonal block */
2104: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2105: (*b->ops->diagonalscale)(b,0,aij->lvec);
2106: }
2107:
2108: return(0);
2109: }
2113: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2114: {
2115: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2119: MatSetUnfactored(a->A);
2120: return(0);
2121: }
2125: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2126: {
2127: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2128: Mat a,b,c,d;
2129: PetscBool flg;
2133: a = matA->A; b = matA->B;
2134: c = matB->A; d = matB->B;
2136: MatEqual(a,c,&flg);
2137: if (flg) {
2138: MatEqual(b,d,&flg);
2139: }
2140: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2141: return(0);
2142: }
2146: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2147: {
2149: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2150: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2153: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2154: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2155: /* because of the column compression in the off-processor part of the matrix a->B,
2156: the number of columns in a->B and b->B may be different, hence we cannot call
2157: the MatCopy() directly on the two parts. If need be, we can provide a more
2158: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2159: then copying the submatrices */
2160: MatCopy_Basic(A,B,str);
2161: } else {
2162: MatCopy(a->A,b->A,str);
2163: MatCopy(a->B,b->B,str);
2164: }
2165: return(0);
2166: }
2170: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171: {
2175: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2176: return(0);
2177: }
2181: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2182: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz)
2183: {
2184: PetscInt i,m=Y->rmap->N;
2185: Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data;
2186: Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data;
2187: const PetscInt *xi = x->i,*yi = y->i;
2190: /* Set the number of nonzeros in the new matrix */
2191: for(i=0; i<m; i++) {
2192: PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2193: const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2194: nnz[i] = 0;
2195: for (j=0,k=0; j<nzx; j++) { /* Point in X */
2196: for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2197: if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */
2198: nnz[i]++;
2199: }
2200: for (; k<nzy; k++) nnz[i]++;
2201: }
2202: return(0);
2203: }
2207: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2208: {
2210: PetscInt i;
2211: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2212: PetscBLASInt bnz,one=1;
2213: Mat_SeqAIJ *x,*y;
2216: if (str == SAME_NONZERO_PATTERN) {
2217: PetscScalar alpha = a;
2218: x = (Mat_SeqAIJ *)xx->A->data;
2219: y = (Mat_SeqAIJ *)yy->A->data;
2220: bnz = PetscBLASIntCast(x->nz);
2221: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2222: x = (Mat_SeqAIJ *)xx->B->data;
2223: y = (Mat_SeqAIJ *)yy->B->data;
2224: bnz = PetscBLASIntCast(x->nz);
2225: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2226: } else if (str == SUBSET_NONZERO_PATTERN) {
2227: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
2229: x = (Mat_SeqAIJ *)xx->B->data;
2230: y = (Mat_SeqAIJ *)yy->B->data;
2231: if (y->xtoy && y->XtoY != xx->B) {
2232: PetscFree(y->xtoy);
2233: MatDestroy(&y->XtoY);
2234: }
2235: if (!y->xtoy) { /* get xtoy */
2236: MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
2237: y->XtoY = xx->B;
2238: PetscObjectReference((PetscObject)xx->B);
2239: }
2240: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2241: } else {
2242: Mat B;
2243: PetscInt *nnz_d,*nnz_o;
2244: PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);
2245: PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);
2246: MatCreate(((PetscObject)Y)->comm,&B);
2247: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2248: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2249: MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);
2250: MatSetType(B,MATMPIAIJ);
2251: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2252: MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2253: MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2254: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2255: MatHeaderReplace(Y,B);
2256: PetscFree(nnz_d);
2257: PetscFree(nnz_o);
2258: }
2259: return(0);
2260: }
2262: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2266: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2267: {
2268: #if defined(PETSC_USE_COMPLEX)
2270: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2273: MatConjugate_SeqAIJ(aij->A);
2274: MatConjugate_SeqAIJ(aij->B);
2275: #else
2277: #endif
2278: return(0);
2279: }
2283: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2284: {
2285: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2289: MatRealPart(a->A);
2290: MatRealPart(a->B);
2291: return(0);
2292: }
2296: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2297: {
2298: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2302: MatImaginaryPart(a->A);
2303: MatImaginaryPart(a->B);
2304: return(0);
2305: }
2307: #ifdef PETSC_HAVE_PBGL
2309: #include <boost/parallel/mpi/bsp_process_group.hpp>
2310: #include <boost/graph/distributed/ilu_default_graph.hpp>
2311: #include <boost/graph/distributed/ilu_0_block.hpp>
2312: #include <boost/graph/distributed/ilu_preconditioner.hpp>
2313: #include <boost/graph/distributed/petsc/interface.hpp>
2314: #include <boost/multi_array.hpp>
2315: #include <boost/parallel/distributed_property_map->hpp>
2319: /*
2320: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2321: */
2322: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2323: {
2324: namespace petsc = boost::distributed::petsc;
2325:
2326: namespace graph_dist = boost::graph::distributed;
2327: using boost::graph::distributed::ilu_default::process_group_type;
2328: using boost::graph::ilu_permuted;
2330: PetscBool row_identity, col_identity;
2331: PetscContainer c;
2332: PetscInt m, n, M, N;
2333: PetscErrorCode ierr;
2336: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2337: ISIdentity(isrow, &row_identity);
2338: ISIdentity(iscol, &col_identity);
2339: if (!row_identity || !col_identity) {
2340: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2341: }
2343: process_group_type pg;
2344: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2345: lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2346: lgraph_type& level_graph = *lgraph_p;
2347: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2349: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2350: ilu_permuted(level_graph);
2352: /* put together the new matrix */
2353: MatCreate(((PetscObject)A)->comm, fact);
2354: MatGetLocalSize(A, &m, &n);
2355: MatGetSize(A, &M, &N);
2356: MatSetSizes(fact, m, n, M, N);
2357: MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);
2358: MatSetType(fact, ((PetscObject)A)->type_name);
2359: MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);
2360: MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);
2362: PetscContainerCreate(((PetscObject)A)->comm, &c);
2363: PetscContainerSetPointer(c, lgraph_p);
2364: PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2365: PetscContainerDestroy(&c);
2366: return(0);
2367: }
2371: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2372: {
2374: return(0);
2375: }
2379: /*
2380: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2381: */
2382: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2383: {
2384: namespace graph_dist = boost::graph::distributed;
2386: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2387: lgraph_type* lgraph_p;
2388: PetscContainer c;
2392: PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);
2393: PetscContainerGetPointer(c, (void **) &lgraph_p);
2394: VecCopy(b, x);
2396: PetscScalar* array_x;
2397: VecGetArray(x, &array_x);
2398: PetscInt sx;
2399: VecGetSize(x, &sx);
2400:
2401: PetscScalar* array_b;
2402: VecGetArray(b, &array_b);
2403: PetscInt sb;
2404: VecGetSize(b, &sb);
2406: lgraph_type& level_graph = *lgraph_p;
2407: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2409: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2410: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]),
2411: ref_x(array_x, boost::extents[num_vertices(graph)]);
2413: typedef boost::iterator_property_map<array_ref_type::iterator,
2414: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
2415: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2416: vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2417:
2418: ilu_set_solve(*lgraph_p, vector_b, vector_x);
2420: return(0);
2421: }
2422: #endif
2424: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2425: PetscInt nzlocal,nsends,nrecvs;
2426: PetscMPIInt *send_rank,*recv_rank;
2427: PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2428: PetscScalar *sbuf_a,**rbuf_a;
2429: PetscErrorCode (*Destroy)(Mat);
2430: } Mat_Redundant;
2434: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2435: {
2436: PetscErrorCode ierr;
2437: Mat_Redundant *redund=(Mat_Redundant*)ptr;
2438: PetscInt i;
2441: PetscFree2(redund->send_rank,redund->recv_rank);
2442: PetscFree(redund->sbuf_j);
2443: PetscFree(redund->sbuf_a);
2444: for (i=0; i<redund->nrecvs; i++){
2445: PetscFree(redund->rbuf_j[i]);
2446: PetscFree(redund->rbuf_a[i]);
2447: }
2448: PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);
2449: PetscFree(redund);
2450: return(0);
2451: }
2455: PetscErrorCode MatDestroy_MatRedundant(Mat A)
2456: {
2457: PetscErrorCode ierr;
2458: PetscContainer container;
2459: Mat_Redundant *redund=PETSC_NULL;
2462: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
2463: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2464: PetscContainerGetPointer(container,(void **)&redund);
2465: A->ops->destroy = redund->Destroy;
2466: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
2467: if (A->ops->destroy) {
2468: (*A->ops->destroy)(A);
2469: }
2470: return(0);
2471: }
2475: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2476: {
2477: PetscMPIInt rank,size;
2478: MPI_Comm comm=((PetscObject)mat)->comm;
2480: PetscInt nsends=0,nrecvs=0,i,rownz_max=0;
2481: PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2482: PetscInt *rowrange=mat->rmap->range;
2483: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2484: Mat A=aij->A,B=aij->B,C=*matredundant;
2485: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2486: PetscScalar *sbuf_a;
2487: PetscInt nzlocal=a->nz+b->nz;
2488: PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2489: PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2490: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2491: MatScalar *aworkA,*aworkB;
2492: PetscScalar *vals;
2493: PetscMPIInt tag1,tag2,tag3,imdex;
2494: MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2495: *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2496: MPI_Status recv_status,*send_status;
2497: PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2498: PetscInt **rbuf_j=PETSC_NULL;
2499: PetscScalar **rbuf_a=PETSC_NULL;
2500: Mat_Redundant *redund=PETSC_NULL;
2501: PetscContainer container;
2504: MPI_Comm_rank(comm,&rank);
2505: MPI_Comm_size(comm,&size);
2507: if (reuse == MAT_REUSE_MATRIX) {
2508: MatGetSize(C,&M,&N);
2509: if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2510: MatGetLocalSize(C,&M,&N);
2511: if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2512: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
2513: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2514: PetscContainerGetPointer(container,(void **)&redund);
2515: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2517: nsends = redund->nsends;
2518: nrecvs = redund->nrecvs;
2519: send_rank = redund->send_rank;
2520: recv_rank = redund->recv_rank;
2521: sbuf_nz = redund->sbuf_nz;
2522: rbuf_nz = redund->rbuf_nz;
2523: sbuf_j = redund->sbuf_j;
2524: sbuf_a = redund->sbuf_a;
2525: rbuf_j = redund->rbuf_j;
2526: rbuf_a = redund->rbuf_a;
2527: }
2529: if (reuse == MAT_INITIAL_MATRIX){
2530: PetscMPIInt subrank,subsize;
2531: PetscInt nleftover,np_subcomm;
2532: /* get the destination processors' id send_rank, nsends and nrecvs */
2533: MPI_Comm_rank(subcomm,&subrank);
2534: MPI_Comm_size(subcomm,&subsize);
2535: PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2536: np_subcomm = size/nsubcomm;
2537: nleftover = size - nsubcomm*np_subcomm;
2538: nsends = 0; nrecvs = 0;
2539: for (i=0; i<size; i++){ /* i=rank*/
2540: if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2541: send_rank[nsends] = i; nsends++;
2542: recv_rank[nrecvs++] = i;
2543: }
2544: }
2545: if (rank >= size - nleftover){/* this proc is a leftover processor */
2546: i = size-nleftover-1;
2547: j = 0;
2548: while (j < nsubcomm - nleftover){
2549: send_rank[nsends++] = i;
2550: i--; j++;
2551: }
2552: }
2554: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2555: for (i=0; i<nleftover; i++){
2556: recv_rank[nrecvs++] = size-nleftover+i;
2557: }
2558: }
2560: /* allocate sbuf_j, sbuf_a */
2561: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2562: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
2563: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
2564: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2566: /* copy mat's local entries into the buffers */
2567: if (reuse == MAT_INITIAL_MATRIX){
2568: rownz_max = 0;
2569: rptr = sbuf_j;
2570: cols = sbuf_j + rend-rstart + 1;
2571: vals = sbuf_a;
2572: rptr[0] = 0;
2573: for (i=0; i<rend-rstart; i++){
2574: row = i + rstart;
2575: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2576: ncols = nzA + nzB;
2577: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2578: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2579: /* load the column indices for this row into cols */
2580: lwrite = 0;
2581: for (l=0; l<nzB; l++) {
2582: if ((ctmp = bmap[cworkB[l]]) < cstart){
2583: vals[lwrite] = aworkB[l];
2584: cols[lwrite++] = ctmp;
2585: }
2586: }
2587: for (l=0; l<nzA; l++){
2588: vals[lwrite] = aworkA[l];
2589: cols[lwrite++] = cstart + cworkA[l];
2590: }
2591: for (l=0; l<nzB; l++) {
2592: if ((ctmp = bmap[cworkB[l]]) >= cend){
2593: vals[lwrite] = aworkB[l];
2594: cols[lwrite++] = ctmp;
2595: }
2596: }
2597: vals += ncols;
2598: cols += ncols;
2599: rptr[i+1] = rptr[i] + ncols;
2600: if (rownz_max < ncols) rownz_max = ncols;
2601: }
2602: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2603: } else { /* only copy matrix values into sbuf_a */
2604: rptr = sbuf_j;
2605: vals = sbuf_a;
2606: rptr[0] = 0;
2607: for (i=0; i<rend-rstart; i++){
2608: row = i + rstart;
2609: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2610: ncols = nzA + nzB;
2611: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2612: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2613: lwrite = 0;
2614: for (l=0; l<nzB; l++) {
2615: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2616: }
2617: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2618: for (l=0; l<nzB; l++) {
2619: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2620: }
2621: vals += ncols;
2622: rptr[i+1] = rptr[i] + ncols;
2623: }
2624: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2626: /* send nzlocal to others, and recv other's nzlocal */
2627: /*--------------------------------------------------*/
2628: if (reuse == MAT_INITIAL_MATRIX){
2629: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2630: s_waits2 = s_waits3 + nsends;
2631: s_waits1 = s_waits2 + nsends;
2632: r_waits1 = s_waits1 + nsends;
2633: r_waits2 = r_waits1 + nrecvs;
2634: r_waits3 = r_waits2 + nrecvs;
2635: } else {
2636: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2637: r_waits3 = s_waits3 + nsends;
2638: }
2640: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2641: if (reuse == MAT_INITIAL_MATRIX){
2642: /* get new tags to keep the communication clean */
2643: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2644: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2645: PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
2647: /* post receives of other's nzlocal */
2648: for (i=0; i<nrecvs; i++){
2649: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2650: }
2651: /* send nzlocal to others */
2652: for (i=0; i<nsends; i++){
2653: sbuf_nz[i] = nzlocal;
2654: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2655: }
2656: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2657: count = nrecvs;
2658: while (count) {
2659: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2660: recv_rank[imdex] = recv_status.MPI_SOURCE;
2661: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2662: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
2664: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2665: rbuf_nz[imdex] += i + 2;
2666: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
2667: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2668: count--;
2669: }
2670: /* wait on sends of nzlocal */
2671: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2672: /* send mat->i,j to others, and recv from other's */
2673: /*------------------------------------------------*/
2674: for (i=0; i<nsends; i++){
2675: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2676: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2677: }
2678: /* wait on receives of mat->i,j */
2679: /*------------------------------*/
2680: count = nrecvs;
2681: while (count) {
2682: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2683: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2684: count--;
2685: }
2686: /* wait on sends of mat->i,j */
2687: /*---------------------------*/
2688: if (nsends) {
2689: MPI_Waitall(nsends,s_waits2,send_status);
2690: }
2691: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2693: /* post receives, send and receive mat->a */
2694: /*----------------------------------------*/
2695: for (imdex=0; imdex<nrecvs; imdex++) {
2696: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2697: }
2698: for (i=0; i<nsends; i++){
2699: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2700: }
2701: count = nrecvs;
2702: while (count) {
2703: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2704: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2705: count--;
2706: }
2707: if (nsends) {
2708: MPI_Waitall(nsends,s_waits3,send_status);
2709: }
2711: PetscFree2(s_waits3,send_status);
2713: /* create redundant matrix */
2714: /*-------------------------*/
2715: if (reuse == MAT_INITIAL_MATRIX){
2716: /* compute rownz_max for preallocation */
2717: for (imdex=0; imdex<nrecvs; imdex++){
2718: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2719: rptr = rbuf_j[imdex];
2720: for (i=0; i<j; i++){
2721: ncols = rptr[i+1] - rptr[i];
2722: if (rownz_max < ncols) rownz_max = ncols;
2723: }
2724: }
2726: MatCreate(subcomm,&C);
2727: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
2728: MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);
2729: MatSetFromOptions(C);
2730: MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
2731: MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
2732: } else {
2733: C = *matredundant;
2734: }
2736: /* insert local matrix entries */
2737: rptr = sbuf_j;
2738: cols = sbuf_j + rend-rstart + 1;
2739: vals = sbuf_a;
2740: for (i=0; i<rend-rstart; i++){
2741: row = i + rstart;
2742: ncols = rptr[i+1] - rptr[i];
2743: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2744: vals += ncols;
2745: cols += ncols;
2746: }
2747: /* insert received matrix entries */
2748: for (imdex=0; imdex<nrecvs; imdex++){
2749: rstart = rowrange[recv_rank[imdex]];
2750: rend = rowrange[recv_rank[imdex]+1];
2751: rptr = rbuf_j[imdex];
2752: cols = rbuf_j[imdex] + rend-rstart + 1;
2753: vals = rbuf_a[imdex];
2754: for (i=0; i<rend-rstart; i++){
2755: row = i + rstart;
2756: ncols = rptr[i+1] - rptr[i];
2757: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2758: vals += ncols;
2759: cols += ncols;
2760: }
2761: }
2762: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2763: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2764: MatGetSize(C,&M,&N);
2765: if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2766: if (reuse == MAT_INITIAL_MATRIX) {
2767: PetscContainer container;
2768: *matredundant = C;
2769: /* create a supporting struct and attach it to C for reuse */
2770: PetscNewLog(C,Mat_Redundant,&redund);
2771: PetscContainerCreate(PETSC_COMM_SELF,&container);
2772: PetscContainerSetPointer(container,redund);
2773: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
2774: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
2775: PetscContainerDestroy(&container);
2777: redund->nzlocal = nzlocal;
2778: redund->nsends = nsends;
2779: redund->nrecvs = nrecvs;
2780: redund->send_rank = send_rank;
2781: redund->recv_rank = recv_rank;
2782: redund->sbuf_nz = sbuf_nz;
2783: redund->rbuf_nz = rbuf_nz;
2784: redund->sbuf_j = sbuf_j;
2785: redund->sbuf_a = sbuf_a;
2786: redund->rbuf_j = rbuf_j;
2787: redund->rbuf_a = rbuf_a;
2789: redund->Destroy = C->ops->destroy;
2790: C->ops->destroy = MatDestroy_MatRedundant;
2791: }
2792: return(0);
2793: }
2797: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2798: {
2799: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2801: PetscInt i,*idxb = 0;
2802: PetscScalar *va,*vb;
2803: Vec vtmp;
2806: MatGetRowMaxAbs(a->A,v,idx);
2807: VecGetArray(v,&va);
2808: if (idx) {
2809: for (i=0; i<A->rmap->n; i++) {
2810: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2811: }
2812: }
2814: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2815: if (idx) {
2816: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2817: }
2818: MatGetRowMaxAbs(a->B,vtmp,idxb);
2819: VecGetArray(vtmp,&vb);
2821: for (i=0; i<A->rmap->n; i++){
2822: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2823: va[i] = vb[i];
2824: if (idx) idx[i] = a->garray[idxb[i]];
2825: }
2826: }
2828: VecRestoreArray(v,&va);
2829: VecRestoreArray(vtmp,&vb);
2830: PetscFree(idxb);
2831: VecDestroy(&vtmp);
2832: return(0);
2833: }
2837: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2838: {
2839: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2841: PetscInt i,*idxb = 0;
2842: PetscScalar *va,*vb;
2843: Vec vtmp;
2846: MatGetRowMinAbs(a->A,v,idx);
2847: VecGetArray(v,&va);
2848: if (idx) {
2849: for (i=0; i<A->cmap->n; i++) {
2850: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2851: }
2852: }
2854: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2855: if (idx) {
2856: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2857: }
2858: MatGetRowMinAbs(a->B,vtmp,idxb);
2859: VecGetArray(vtmp,&vb);
2861: for (i=0; i<A->rmap->n; i++){
2862: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2863: va[i] = vb[i];
2864: if (idx) idx[i] = a->garray[idxb[i]];
2865: }
2866: }
2868: VecRestoreArray(v,&va);
2869: VecRestoreArray(vtmp,&vb);
2870: PetscFree(idxb);
2871: VecDestroy(&vtmp);
2872: return(0);
2873: }
2877: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2878: {
2879: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2880: PetscInt n = A->rmap->n;
2881: PetscInt cstart = A->cmap->rstart;
2882: PetscInt *cmap = mat->garray;
2883: PetscInt *diagIdx, *offdiagIdx;
2884: Vec diagV, offdiagV;
2885: PetscScalar *a, *diagA, *offdiagA;
2886: PetscInt r;
2890: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2891: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2892: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2893: MatGetRowMin(mat->A, diagV, diagIdx);
2894: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2895: VecGetArray(v, &a);
2896: VecGetArray(diagV, &diagA);
2897: VecGetArray(offdiagV, &offdiagA);
2898: for(r = 0; r < n; ++r) {
2899: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2900: a[r] = diagA[r];
2901: idx[r] = cstart + diagIdx[r];
2902: } else {
2903: a[r] = offdiagA[r];
2904: idx[r] = cmap[offdiagIdx[r]];
2905: }
2906: }
2907: VecRestoreArray(v, &a);
2908: VecRestoreArray(diagV, &diagA);
2909: VecRestoreArray(offdiagV, &offdiagA);
2910: VecDestroy(&diagV);
2911: VecDestroy(&offdiagV);
2912: PetscFree2(diagIdx, offdiagIdx);
2913: return(0);
2914: }
2918: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2919: {
2920: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
2921: PetscInt n = A->rmap->n;
2922: PetscInt cstart = A->cmap->rstart;
2923: PetscInt *cmap = mat->garray;
2924: PetscInt *diagIdx, *offdiagIdx;
2925: Vec diagV, offdiagV;
2926: PetscScalar *a, *diagA, *offdiagA;
2927: PetscInt r;
2931: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2932: VecCreateSeq(((PetscObject)A)->comm, n, &diagV);
2933: VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);
2934: MatGetRowMax(mat->A, diagV, diagIdx);
2935: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
2936: VecGetArray(v, &a);
2937: VecGetArray(diagV, &diagA);
2938: VecGetArray(offdiagV, &offdiagA);
2939: for(r = 0; r < n; ++r) {
2940: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2941: a[r] = diagA[r];
2942: idx[r] = cstart + diagIdx[r];
2943: } else {
2944: a[r] = offdiagA[r];
2945: idx[r] = cmap[offdiagIdx[r]];
2946: }
2947: }
2948: VecRestoreArray(v, &a);
2949: VecRestoreArray(diagV, &diagA);
2950: VecRestoreArray(offdiagV, &offdiagA);
2951: VecDestroy(&diagV);
2952: VecDestroy(&offdiagV);
2953: PetscFree2(diagIdx, offdiagIdx);
2954: return(0);
2955: }
2959: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2960: {
2962: Mat *dummy;
2965: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
2966: *newmat = *dummy;
2967: PetscFree(dummy);
2968: return(0);
2969: }
2971: extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2975: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2976: {
2977: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
2981: MatInvertBlockDiagonal(a->A,values);
2982: return(0);
2983: }
2986: /* -------------------------------------------------------------------*/
2987: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2988: MatGetRow_MPIAIJ,
2989: MatRestoreRow_MPIAIJ,
2990: MatMult_MPIAIJ,
2991: /* 4*/ MatMultAdd_MPIAIJ,
2992: MatMultTranspose_MPIAIJ,
2993: MatMultTransposeAdd_MPIAIJ,
2994: #ifdef PETSC_HAVE_PBGL
2995: MatSolve_MPIAIJ,
2996: #else
2997: 0,
2998: #endif
2999: 0,
3000: 0,
3001: /*10*/ 0,
3002: 0,
3003: 0,
3004: MatSOR_MPIAIJ,
3005: MatTranspose_MPIAIJ,
3006: /*15*/ MatGetInfo_MPIAIJ,
3007: MatEqual_MPIAIJ,
3008: MatGetDiagonal_MPIAIJ,
3009: MatDiagonalScale_MPIAIJ,
3010: MatNorm_MPIAIJ,
3011: /*20*/ MatAssemblyBegin_MPIAIJ,
3012: MatAssemblyEnd_MPIAIJ,
3013: MatSetOption_MPIAIJ,
3014: MatZeroEntries_MPIAIJ,
3015: /*24*/ MatZeroRows_MPIAIJ,
3016: 0,
3017: #ifdef PETSC_HAVE_PBGL
3018: 0,
3019: #else
3020: 0,
3021: #endif
3022: 0,
3023: 0,
3024: /*29*/ MatSetUp_MPIAIJ,
3025: #ifdef PETSC_HAVE_PBGL
3026: 0,
3027: #else
3028: 0,
3029: #endif
3030: 0,
3031: 0,
3032: 0,
3033: /*34*/ MatDuplicate_MPIAIJ,
3034: 0,
3035: 0,
3036: 0,
3037: 0,
3038: /*39*/ MatAXPY_MPIAIJ,
3039: MatGetSubMatrices_MPIAIJ,
3040: MatIncreaseOverlap_MPIAIJ,
3041: MatGetValues_MPIAIJ,
3042: MatCopy_MPIAIJ,
3043: /*44*/ MatGetRowMax_MPIAIJ,
3044: MatScale_MPIAIJ,
3045: 0,
3046: 0,
3047: MatZeroRowsColumns_MPIAIJ,
3048: /*49*/ 0,
3049: 0,
3050: 0,
3051: 0,
3052: 0,
3053: /*54*/ MatFDColoringCreate_MPIAIJ,
3054: 0,
3055: MatSetUnfactored_MPIAIJ,
3056: 0, /* MatPermute_MPIAIJ, impl currently broken */
3057: 0,
3058: /*59*/ MatGetSubMatrix_MPIAIJ,
3059: MatDestroy_MPIAIJ,
3060: MatView_MPIAIJ,
3061: 0,
3062: 0,
3063: /*64*/ 0,
3064: 0,
3065: 0,
3066: 0,
3067: 0,
3068: /*69*/ MatGetRowMaxAbs_MPIAIJ,
3069: MatGetRowMinAbs_MPIAIJ,
3070: 0,
3071: MatSetColoring_MPIAIJ,
3072: #if defined(PETSC_HAVE_ADIC)
3073: MatSetValuesAdic_MPIAIJ,
3074: #else
3075: 0,
3076: #endif
3077: MatSetValuesAdifor_MPIAIJ,
3078: /*75*/ MatFDColoringApply_AIJ,
3079: 0,
3080: 0,
3081: 0,
3082: 0,
3083: /*80*/ 0,
3084: 0,
3085: 0,
3086: /*83*/ MatLoad_MPIAIJ,
3087: 0,
3088: 0,
3089: 0,
3090: 0,
3091: 0,
3092: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3093: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3094: MatMatMultNumeric_MPIAIJ_MPIAIJ,
3095: MatPtAP_Basic,
3096: MatPtAPSymbolic_MPIAIJ,
3097: /*94*/ MatPtAPNumeric_MPIAIJ,
3098: 0,
3099: 0,
3100: 0,
3101: 0,
3102: /*99*/ 0,
3103: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3104: MatPtAPNumeric_MPIAIJ_MPIAIJ,
3105: MatConjugate_MPIAIJ,
3106: 0,
3107: /*104*/MatSetValuesRow_MPIAIJ,
3108: MatRealPart_MPIAIJ,
3109: MatImaginaryPart_MPIAIJ,
3110: 0,
3111: 0,
3112: /*109*/0,
3113: MatGetRedundantMatrix_MPIAIJ,
3114: MatGetRowMin_MPIAIJ,
3115: 0,
3116: 0,
3117: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3118: 0,
3119: 0,
3120: 0,
3121: 0,
3122: /*119*/0,
3123: 0,
3124: 0,
3125: 0,
3126: MatGetMultiProcBlock_MPIAIJ,
3127: /*124*/MatFindNonZeroRows_MPIAIJ,
3128: MatGetColumnNorms_MPIAIJ,
3129: MatInvertBlockDiagonal_MPIAIJ,
3130: 0,
3131: MatGetSubMatricesParallel_MPIAIJ,
3132: /*129*/0,
3133: MatTransposeMatMult_MPIAIJ_MPIAIJ,
3134: MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3135: MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3136: 0,
3137: /*134*/0,
3138: 0,
3139: 0,
3140: 0,
3141: 0
3142: };
3144: /* ----------------------------------------------------------------------------------------*/
3146: EXTERN_C_BEGIN
3149: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
3150: {
3151: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3155: MatStoreValues(aij->A);
3156: MatStoreValues(aij->B);
3157: return(0);
3158: }
3159: EXTERN_C_END
3161: EXTERN_C_BEGIN
3164: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
3165: {
3166: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
3170: MatRetrieveValues(aij->A);
3171: MatRetrieveValues(aij->B);
3172: return(0);
3173: }
3174: EXTERN_C_END
3176: EXTERN_C_BEGIN
3179: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3180: {
3181: Mat_MPIAIJ *b;
3183: PetscInt i;
3184: PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3187: if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3188: if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3189: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3190: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3191: if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3192: if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3194: PetscLayoutSetUp(B->rmap);
3195: PetscLayoutSetUp(B->cmap);
3196: if (d_nnz) {
3197: for (i=0; i<B->rmap->n; i++) {
3198: if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3199: }
3200: }
3201: if (o_nnz) {
3202: for (i=0; i<B->rmap->n; i++) {
3203: if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3204: }
3205: }
3206: b = (Mat_MPIAIJ*)B->data;
3208: if (!B->preallocated) {
3209: /* Explicitly create 2 MATSEQAIJ matrices. */
3210: MatCreate(PETSC_COMM_SELF,&b->A);
3211: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3212: MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);
3213: MatSetType(b->A,MATSEQAIJ);
3214: PetscLogObjectParent(B,b->A);
3215: MatCreate(PETSC_COMM_SELF,&b->B);
3216: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3217: MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);
3218: MatSetType(b->B,MATSEQAIJ);
3219: PetscLogObjectParent(B,b->B);
3220: }
3222: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3223: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3224: /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3225: if (!d_realalloc) {MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);}
3226: if (!o_realalloc) {MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);}
3227: B->preallocated = PETSC_TRUE;
3228: return(0);
3229: }
3230: EXTERN_C_END
3234: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3235: {
3236: Mat mat;
3237: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3241: *newmat = 0;
3242: MatCreate(((PetscObject)matin)->comm,&mat);
3243: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3244: MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);
3245: MatSetType(mat,((PetscObject)matin)->type_name);
3246: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3247: a = (Mat_MPIAIJ*)mat->data;
3248:
3249: mat->factortype = matin->factortype;
3250: mat->rmap->bs = matin->rmap->bs;
3251: mat->cmap->bs = matin->cmap->bs;
3252: mat->assembled = PETSC_TRUE;
3253: mat->insertmode = NOT_SET_VALUES;
3254: mat->preallocated = PETSC_TRUE;
3256: a->size = oldmat->size;
3257: a->rank = oldmat->rank;
3258: a->donotstash = oldmat->donotstash;
3259: a->roworiented = oldmat->roworiented;
3260: a->rowindices = 0;
3261: a->rowvalues = 0;
3262: a->getrowactive = PETSC_FALSE;
3264: PetscLayoutReference(matin->rmap,&mat->rmap);
3265: PetscLayoutReference(matin->cmap,&mat->cmap);
3267: if (oldmat->colmap) {
3268: #if defined (PETSC_USE_CTABLE)
3269: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3270: #else
3271: PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);
3272: PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));
3273: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
3274: #endif
3275: } else a->colmap = 0;
3276: if (oldmat->garray) {
3277: PetscInt len;
3278: len = oldmat->B->cmap->n;
3279: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
3280: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
3281: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
3282: } else a->garray = 0;
3283:
3284: VecDuplicate(oldmat->lvec,&a->lvec);
3285: PetscLogObjectParent(mat,a->lvec);
3286: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3287: PetscLogObjectParent(mat,a->Mvctx);
3288: MatDuplicate(oldmat->A,cpvalues,&a->A);
3289: PetscLogObjectParent(mat,a->A);
3290: MatDuplicate(oldmat->B,cpvalues,&a->B);
3291: PetscLogObjectParent(mat,a->B);
3292: PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3293: *newmat = mat;
3294: return(0);
3295: }
3301: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3302: {
3303: PetscScalar *vals,*svals;
3304: MPI_Comm comm = ((PetscObject)viewer)->comm;
3306: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
3307: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3308: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
3309: PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3310: PetscInt cend,cstart,n,*rowners,sizesset=1;
3311: int fd;
3314: MPI_Comm_size(comm,&size);
3315: MPI_Comm_rank(comm,&rank);
3316: if (!rank) {
3317: PetscViewerBinaryGetDescriptor(viewer,&fd);
3318: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
3319: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3320: }
3322: if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3324: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3325: M = header[1]; N = header[2];
3326: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3327: if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3328: if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3329:
3330: /* If global sizes are set, check if they are consistent with that given in the file */
3331: if (sizesset) {
3332: MatGetSize(newMat,&grows,&gcols);
3333: }
3334: if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3335: if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3337: /* determine ownership of all rows */
3338: if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3339: else m = newMat->rmap->n; /* Set by user */
3340:
3341: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
3342: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3344: /* First process needs enough room for process with most rows */
3345: if (!rank) {
3346: mmax = rowners[1];
3347: for (i=2; i<size; i++) {
3348: mmax = PetscMax(mmax,rowners[i]);
3349: }
3350: } else mmax = m;
3352: rowners[0] = 0;
3353: for (i=2; i<=size; i++) {
3354: rowners[i] += rowners[i-1];
3355: }
3356: rstart = rowners[rank];
3357: rend = rowners[rank+1];
3359: /* distribute row lengths to all processors */
3360: PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);
3361: if (!rank) {
3362: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
3363: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
3364: PetscMalloc(size*sizeof(PetscInt),&procsnz);
3365: PetscMemzero(procsnz,size*sizeof(PetscInt));
3366: for (j=0; j<m; j++) {
3367: procsnz[0] += ourlens[j];
3368: }
3369: for (i=1; i<size; i++) {
3370: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
3371: /* calculate the number of nonzeros on each processor */
3372: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3373: procsnz[i] += rowlengths[j];
3374: }
3375: MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
3376: }
3377: PetscFree(rowlengths);
3378: } else {
3379: MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
3380: }
3382: if (!rank) {
3383: /* determine max buffer needed and allocate it */
3384: maxnz = 0;
3385: for (i=0; i<size; i++) {
3386: maxnz = PetscMax(maxnz,procsnz[i]);
3387: }
3388: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
3390: /* read in my part of the matrix column indices */
3391: nz = procsnz[0];
3392: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3393: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3395: /* read in every one elses and ship off */
3396: for (i=1; i<size; i++) {
3397: nz = procsnz[i];
3398: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3399: MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);
3400: }
3401: PetscFree(cols);
3402: } else {
3403: /* determine buffer space needed for message */
3404: nz = 0;
3405: for (i=0; i<m; i++) {
3406: nz += ourlens[i];
3407: }
3408: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3410: /* receive message of column indices*/
3411: MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
3412: }
3414: /* determine column ownership if matrix is not square */
3415: if (N != M) {
3416: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3417: else n = newMat->cmap->n;
3418: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
3419: cstart = cend - n;
3420: } else {
3421: cstart = rstart;
3422: cend = rend;
3423: n = cend - cstart;
3424: }
3426: /* loop over local rows, determining number of off diagonal entries */
3427: PetscMemzero(offlens,m*sizeof(PetscInt));
3428: jj = 0;
3429: for (i=0; i<m; i++) {
3430: for (j=0; j<ourlens[i]; j++) {
3431: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3432: jj++;
3433: }
3434: }
3436: for (i=0; i<m; i++) {
3437: ourlens[i] -= offlens[i];
3438: }
3439: if (!sizesset) {
3440: MatSetSizes(newMat,m,n,M,N);
3441: }
3442: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
3444: for (i=0; i<m; i++) {
3445: ourlens[i] += offlens[i];
3446: }
3448: if (!rank) {
3449: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
3451: /* read in my part of the matrix numerical values */
3452: nz = procsnz[0];
3453: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3454:
3455: /* insert into matrix */
3456: jj = rstart;
3457: smycols = mycols;
3458: svals = vals;
3459: for (i=0; i<m; i++) {
3460: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3461: smycols += ourlens[i];
3462: svals += ourlens[i];
3463: jj++;
3464: }
3466: /* read in other processors and ship out */
3467: for (i=1; i<size; i++) {
3468: nz = procsnz[i];
3469: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3470: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
3471: }
3472: PetscFree(procsnz);
3473: } else {
3474: /* receive numeric values */
3475: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
3477: /* receive message of values*/
3478: MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
3480: /* insert into matrix */
3481: jj = rstart;
3482: smycols = mycols;
3483: svals = vals;
3484: for (i=0; i<m; i++) {
3485: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3486: smycols += ourlens[i];
3487: svals += ourlens[i];
3488: jj++;
3489: }
3490: }
3491: PetscFree2(ourlens,offlens);
3492: PetscFree(vals);
3493: PetscFree(mycols);
3494: PetscFree(rowners);
3496: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3497: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3498: return(0);
3499: }
3503: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3504: {
3506: IS iscol_local;
3507: PetscInt csize;
3510: ISGetLocalSize(iscol,&csize);
3511: if (call == MAT_REUSE_MATRIX) {
3512: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3513: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3514: } else {
3515: PetscInt cbs;
3516: ISGetBlockSize(iscol,&cbs);
3517: ISAllGather(iscol,&iscol_local);
3518: ISSetBlockSize(iscol_local,cbs);
3519: }
3520: MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
3521: if (call == MAT_INITIAL_MATRIX) {
3522: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3523: ISDestroy(&iscol_local);
3524: }
3525: return(0);
3526: }
3528: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3531: /*
3532: Not great since it makes two copies of the submatrix, first an SeqAIJ
3533: in local and then by concatenating the local matrices the end result.
3534: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3536: Note: This requires a sequential iscol with all indices.
3537: */
3538: PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3539: {
3541: PetscMPIInt rank,size;
3542: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3543: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3544: PetscBool allcolumns, colflag;
3545: Mat M,Mreuse;
3546: MatScalar *vwork,*aa;
3547: MPI_Comm comm = ((PetscObject)mat)->comm;
3548: Mat_SeqAIJ *aij;
3552: MPI_Comm_rank(comm,&rank);
3553: MPI_Comm_size(comm,&size);
3555: ISIdentity(iscol,&colflag);
3556: ISGetLocalSize(iscol,&ncol);
3557: if (colflag && ncol == mat->cmap->N){
3558: allcolumns = PETSC_TRUE;
3559: } else {
3560: allcolumns = PETSC_FALSE;
3561: }
3562: if (call == MAT_REUSE_MATRIX) {
3563: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
3564: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3565: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);
3566: } else {
3567: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);
3568: }
3570: /*
3571: m - number of local rows
3572: n - number of columns (same on all processors)
3573: rstart - first row in new global matrix generated
3574: */
3575: MatGetSize(Mreuse,&m,&n);
3576: MatGetBlockSizes(Mreuse,&bs,&cbs);
3577: if (call == MAT_INITIAL_MATRIX) {
3578: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3579: ii = aij->i;
3580: jj = aij->j;
3582: /*
3583: Determine the number of non-zeros in the diagonal and off-diagonal
3584: portions of the matrix in order to do correct preallocation
3585: */
3587: /* first get start and end of "diagonal" columns */
3588: if (csize == PETSC_DECIDE) {
3589: ISGetSize(isrow,&mglobal);
3590: if (mglobal == n) { /* square matrix */
3591: nlocal = m;
3592: } else {
3593: nlocal = n/size + ((n % size) > rank);
3594: }
3595: } else {
3596: nlocal = csize;
3597: }
3598: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3599: rstart = rend - nlocal;
3600: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3602: /* next, compute all the lengths */
3603: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
3604: olens = dlens + m;
3605: for (i=0; i<m; i++) {
3606: jend = ii[i+1] - ii[i];
3607: olen = 0;
3608: dlen = 0;
3609: for (j=0; j<jend; j++) {
3610: if (*jj < rstart || *jj >= rend) olen++;
3611: else dlen++;
3612: jj++;
3613: }
3614: olens[i] = olen;
3615: dlens[i] = dlen;
3616: }
3617: MatCreate(comm,&M);
3618: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3619: MatSetBlockSizes(M,bs,cbs);
3620: MatSetType(M,((PetscObject)mat)->type_name);
3621: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3622: PetscFree(dlens);
3623: } else {
3624: PetscInt ml,nl;
3626: M = *newmat;
3627: MatGetLocalSize(M,&ml,&nl);
3628: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3629: MatZeroEntries(M);
3630: /*
3631: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3632: rather than the slower MatSetValues().
3633: */
3634: M->was_assembled = PETSC_TRUE;
3635: M->assembled = PETSC_FALSE;
3636: }
3637: MatGetOwnershipRange(M,&rstart,&rend);
3638: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3639: ii = aij->i;
3640: jj = aij->j;
3641: aa = aij->a;
3642: for (i=0; i<m; i++) {
3643: row = rstart + i;
3644: nz = ii[i+1] - ii[i];
3645: cwork = jj; jj += nz;
3646: vwork = aa; aa += nz;
3647: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3648: }
3650: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3651: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3652: *newmat = M;
3654: /* save submatrix used in processor for next request */
3655: if (call == MAT_INITIAL_MATRIX) {
3656: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3657: MatDestroy(&Mreuse);
3658: }
3660: return(0);
3661: }
3663: EXTERN_C_BEGIN
3666: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3667: {
3668: PetscInt m,cstart, cend,j,nnz,i,d;
3669: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3670: const PetscInt *JJ;
3671: PetscScalar *values;
3675: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3677: PetscLayoutSetUp(B->rmap);
3678: PetscLayoutSetUp(B->cmap);
3679: m = B->rmap->n;
3680: cstart = B->cmap->rstart;
3681: cend = B->cmap->rend;
3682: rstart = B->rmap->rstart;
3684: PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);
3686: #if defined(PETSC_USE_DEBUGGING)
3687: for (i=0; i<m; i++) {
3688: nnz = Ii[i+1]- Ii[i];
3689: JJ = J + Ii[i];
3690: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3691: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3692: if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3693: }
3694: #endif
3696: for (i=0; i<m; i++) {
3697: nnz = Ii[i+1]- Ii[i];
3698: JJ = J + Ii[i];
3699: nnz_max = PetscMax(nnz_max,nnz);
3700: d = 0;
3701: for (j=0; j<nnz; j++) {
3702: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3703: }
3704: d_nnz[i] = d;
3705: o_nnz[i] = nnz - d;
3706: }
3707: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3708: PetscFree2(d_nnz,o_nnz);
3710: if (v) values = (PetscScalar*)v;
3711: else {
3712: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
3713: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
3714: }
3716: for (i=0; i<m; i++) {
3717: ii = i + rstart;
3718: nnz = Ii[i+1]- Ii[i];
3719: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3720: }
3721: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3722: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3724: if (!v) {
3725: PetscFree(values);
3726: }
3727: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3728: return(0);
3729: }
3730: EXTERN_C_END
3734: /*@
3735: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3736: (the default parallel PETSc format).
3738: Collective on MPI_Comm
3740: Input Parameters:
3741: + B - the matrix
3742: . i - the indices into j for the start of each local row (starts with zero)
3743: . j - the column indices for each local row (starts with zero)
3744: - v - optional values in the matrix
3746: Level: developer
3748: Notes:
3749: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3750: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3751: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3753: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3755: The format which is used for the sparse matrix input, is equivalent to a
3756: row-major ordering.. i.e for the following matrix, the input data expected is
3757: as shown:
3759: 1 0 0
3760: 2 0 3 P0
3761: -------
3762: 4 5 6 P1
3764: Process0 [P0]: rows_owned=[0,1]
3765: i = {0,1,3} [size = nrow+1 = 2+1]
3766: j = {0,0,2} [size = nz = 6]
3767: v = {1,2,3} [size = nz = 6]
3769: Process1 [P1]: rows_owned=[2]
3770: i = {0,3} [size = nrow+1 = 1+1]
3771: j = {0,1,2} [size = nz = 6]
3772: v = {4,5,6} [size = nz = 6]
3774: .keywords: matrix, aij, compressed row, sparse, parallel
3776: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3777: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3778: @*/
3779: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3780: {
3784: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3785: return(0);
3786: }
3790: /*@C
3791: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3792: (the default parallel PETSc format). For good matrix assembly performance
3793: the user should preallocate the matrix storage by setting the parameters
3794: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3795: performance can be increased by more than a factor of 50.
3797: Collective on MPI_Comm
3799: Input Parameters:
3800: + A - the matrix
3801: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3802: (same value is used for all local rows)
3803: . d_nnz - array containing the number of nonzeros in the various rows of the
3804: DIAGONAL portion of the local submatrix (possibly different for each row)
3805: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3806: The size of this array is equal to the number of local rows, i.e 'm'.
3807: For matrices that will be factored, you must leave room for (and set)
3808: the diagonal entry even if it is zero.
3809: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3810: submatrix (same value is used for all local rows).
3811: - o_nnz - array containing the number of nonzeros in the various rows of the
3812: OFF-DIAGONAL portion of the local submatrix (possibly different for
3813: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3814: structure. The size of this array is equal to the number
3815: of local rows, i.e 'm'.
3817: If the *_nnz parameter is given then the *_nz parameter is ignored
3819: The AIJ format (also called the Yale sparse matrix format or
3820: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3821: storage. The stored row and column indices begin with zero.
3822: See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3824: The parallel matrix is partitioned such that the first m0 rows belong to
3825: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3826: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3828: The DIAGONAL portion of the local submatrix of a processor can be defined
3829: as the submatrix which is obtained by extraction the part corresponding to
3830: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3831: first row that belongs to the processor, r2 is the last row belonging to
3832: the this processor, and c1-c2 is range of indices of the local part of a
3833: vector suitable for applying the matrix to. This is an mxn matrix. In the
3834: common case of a square matrix, the row and column ranges are the same and
3835: the DIAGONAL part is also square. The remaining portion of the local
3836: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3838: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3840: You can call MatGetInfo() to get information on how effective the preallocation was;
3841: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3842: You can also run with the option -info and look for messages with the string
3843: malloc in them to see if additional memory allocation was needed.
3845: Example usage:
3846:
3847: Consider the following 8x8 matrix with 34 non-zero values, that is
3848: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3849: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3850: as follows:
3852: .vb
3853: 1 2 0 | 0 3 0 | 0 4
3854: Proc0 0 5 6 | 7 0 0 | 8 0
3855: 9 0 10 | 11 0 0 | 12 0
3856: -------------------------------------
3857: 13 0 14 | 15 16 17 | 0 0
3858: Proc1 0 18 0 | 19 20 21 | 0 0
3859: 0 0 0 | 22 23 0 | 24 0
3860: -------------------------------------
3861: Proc2 25 26 27 | 0 0 28 | 29 0
3862: 30 0 0 | 31 32 33 | 0 34
3863: .ve
3865: This can be represented as a collection of submatrices as:
3867: .vb
3868: A B C
3869: D E F
3870: G H I
3871: .ve
3873: Where the submatrices A,B,C are owned by proc0, D,E,F are
3874: owned by proc1, G,H,I are owned by proc2.
3876: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3877: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3878: The 'M','N' parameters are 8,8, and have the same values on all procs.
3880: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3881: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3882: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3883: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3884: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3885: matrix, ans [DF] as another SeqAIJ matrix.
3887: When d_nz, o_nz parameters are specified, d_nz storage elements are
3888: allocated for every row of the local diagonal submatrix, and o_nz
3889: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3890: One way to choose d_nz and o_nz is to use the max nonzerors per local
3891: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3892: In this case, the values of d_nz,o_nz are:
3893: .vb
3894: proc0 : dnz = 2, o_nz = 2
3895: proc1 : dnz = 3, o_nz = 2
3896: proc2 : dnz = 1, o_nz = 4
3897: .ve
3898: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3899: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3900: for proc3. i.e we are using 12+15+10=37 storage locations to store
3901: 34 values.
3903: When d_nnz, o_nnz parameters are specified, the storage is specified
3904: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3905: In the above case the values for d_nnz,o_nnz are:
3906: .vb
3907: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3908: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3909: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3910: .ve
3911: Here the space allocated is sum of all the above values i.e 34, and
3912: hence pre-allocation is perfect.
3914: Level: intermediate
3916: .keywords: matrix, aij, compressed row, sparse, parallel
3918: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3919: MPIAIJ, MatGetInfo()
3920: @*/
3921: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3922: {
3928: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
3929: return(0);
3930: }
3934: /*@
3935: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3936: CSR format the local rows.
3938: Collective on MPI_Comm
3940: Input Parameters:
3941: + comm - MPI communicator
3942: . m - number of local rows (Cannot be PETSC_DECIDE)
3943: . n - This value should be the same as the local size used in creating the
3944: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3945: calculated if N is given) For square matrices n is almost always m.
3946: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3947: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3948: . i - row indices
3949: . j - column indices
3950: - a - matrix values
3952: Output Parameter:
3953: . mat - the matrix
3955: Level: intermediate
3957: Notes:
3958: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3959: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3960: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3962: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3964: The format which is used for the sparse matrix input, is equivalent to a
3965: row-major ordering.. i.e for the following matrix, the input data expected is
3966: as shown:
3968: 1 0 0
3969: 2 0 3 P0
3970: -------
3971: 4 5 6 P1
3973: Process0 [P0]: rows_owned=[0,1]
3974: i = {0,1,3} [size = nrow+1 = 2+1]
3975: j = {0,0,2} [size = nz = 6]
3976: v = {1,2,3} [size = nz = 6]
3978: Process1 [P1]: rows_owned=[2]
3979: i = {0,3} [size = nrow+1 = 1+1]
3980: j = {0,1,2} [size = nz = 6]
3981: v = {4,5,6} [size = nz = 6]
3983: .keywords: matrix, aij, compressed row, sparse, parallel
3985: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3986: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3987: @*/
3988: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3989: {
3993: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3994: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3995: MatCreate(comm,mat);
3996: MatSetSizes(*mat,m,n,M,N);
3997: /* MatSetBlockSizes(M,bs,cbs); */
3998: MatSetType(*mat,MATMPIAIJ);
3999: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
4000: return(0);
4001: }
4005: /*@C
4006: MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4007: (the default parallel PETSc format). For good matrix assembly performance
4008: the user should preallocate the matrix storage by setting the parameters
4009: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4010: performance can be increased by more than a factor of 50.
4012: Collective on MPI_Comm
4014: Input Parameters:
4015: + comm - MPI communicator
4016: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4017: This value should be the same as the local size used in creating the
4018: y vector for the matrix-vector product y = Ax.
4019: . n - This value should be the same as the local size used in creating the
4020: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4021: calculated if N is given) For square matrices n is almost always m.
4022: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4023: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4024: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4025: (same value is used for all local rows)
4026: . d_nnz - array containing the number of nonzeros in the various rows of the
4027: DIAGONAL portion of the local submatrix (possibly different for each row)
4028: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4029: The size of this array is equal to the number of local rows, i.e 'm'.
4030: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4031: submatrix (same value is used for all local rows).
4032: - o_nnz - array containing the number of nonzeros in the various rows of the
4033: OFF-DIAGONAL portion of the local submatrix (possibly different for
4034: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4035: structure. The size of this array is equal to the number
4036: of local rows, i.e 'm'.
4038: Output Parameter:
4039: . A - the matrix
4041: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4042: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4043: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4045: Notes:
4046: If the *_nnz parameter is given then the *_nz parameter is ignored
4048: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4049: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4050: storage requirements for this matrix.
4052: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4053: processor than it must be used on all processors that share the object for
4054: that argument.
4056: The user MUST specify either the local or global matrix dimensions
4057: (possibly both).
4059: The parallel matrix is partitioned across processors such that the
4060: first m0 rows belong to process 0, the next m1 rows belong to
4061: process 1, the next m2 rows belong to process 2 etc.. where
4062: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4063: values corresponding to [m x N] submatrix.
4065: The columns are logically partitioned with the n0 columns belonging
4066: to 0th partition, the next n1 columns belonging to the next
4067: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4069: The DIAGONAL portion of the local submatrix on any given processor
4070: is the submatrix corresponding to the rows and columns m,n
4071: corresponding to the given processor. i.e diagonal matrix on
4072: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4073: etc. The remaining portion of the local submatrix [m x (N-n)]
4074: constitute the OFF-DIAGONAL portion. The example below better
4075: illustrates this concept.
4077: For a square global matrix we define each processor's diagonal portion
4078: to be its local rows and the corresponding columns (a square submatrix);
4079: each processor's off-diagonal portion encompasses the remainder of the
4080: local matrix (a rectangular submatrix).
4082: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4084: When calling this routine with a single process communicator, a matrix of
4085: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4086: type of communicator, use the construction mechanism:
4087: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4088:
4089: By default, this format uses inodes (identical nodes) when possible.
4090: We search for consecutive rows with the same nonzero structure, thereby
4091: reusing matrix information to achieve increased efficiency.
4093: Options Database Keys:
4094: + -mat_no_inode - Do not use inodes
4095: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4096: - -mat_aij_oneindex - Internally use indexing starting at 1
4097: rather than 0. Note that when calling MatSetValues(),
4098: the user still MUST index entries starting at 0!
4101: Example usage:
4102:
4103: Consider the following 8x8 matrix with 34 non-zero values, that is
4104: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4105: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4106: as follows:
4108: .vb
4109: 1 2 0 | 0 3 0 | 0 4
4110: Proc0 0 5 6 | 7 0 0 | 8 0
4111: 9 0 10 | 11 0 0 | 12 0
4112: -------------------------------------
4113: 13 0 14 | 15 16 17 | 0 0
4114: Proc1 0 18 0 | 19 20 21 | 0 0
4115: 0 0 0 | 22 23 0 | 24 0
4116: -------------------------------------
4117: Proc2 25 26 27 | 0 0 28 | 29 0
4118: 30 0 0 | 31 32 33 | 0 34
4119: .ve
4121: This can be represented as a collection of submatrices as:
4123: .vb
4124: A B C
4125: D E F
4126: G H I
4127: .ve
4129: Where the submatrices A,B,C are owned by proc0, D,E,F are
4130: owned by proc1, G,H,I are owned by proc2.
4132: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4133: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4134: The 'M','N' parameters are 8,8, and have the same values on all procs.
4136: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4137: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4138: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4139: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4140: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4141: matrix, ans [DF] as another SeqAIJ matrix.
4143: When d_nz, o_nz parameters are specified, d_nz storage elements are
4144: allocated for every row of the local diagonal submatrix, and o_nz
4145: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4146: One way to choose d_nz and o_nz is to use the max nonzerors per local
4147: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4148: In this case, the values of d_nz,o_nz are:
4149: .vb
4150: proc0 : dnz = 2, o_nz = 2
4151: proc1 : dnz = 3, o_nz = 2
4152: proc2 : dnz = 1, o_nz = 4
4153: .ve
4154: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4155: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4156: for proc3. i.e we are using 12+15+10=37 storage locations to store
4157: 34 values.
4159: When d_nnz, o_nnz parameters are specified, the storage is specified
4160: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4161: In the above case the values for d_nnz,o_nnz are:
4162: .vb
4163: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4164: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4165: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4166: .ve
4167: Here the space allocated is sum of all the above values i.e 34, and
4168: hence pre-allocation is perfect.
4170: Level: intermediate
4172: .keywords: matrix, aij, compressed row, sparse, parallel
4174: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4175: MPIAIJ, MatCreateMPIAIJWithArrays()
4176: @*/
4177: PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4178: {
4180: PetscMPIInt size;
4183: MatCreate(comm,A);
4184: MatSetSizes(*A,m,n,M,N);
4185: MPI_Comm_size(comm,&size);
4186: if (size > 1) {
4187: MatSetType(*A,MATMPIAIJ);
4188: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4189: } else {
4190: MatSetType(*A,MATSEQAIJ);
4191: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4192: }
4193: return(0);
4194: }
4198: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4199: {
4200: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4203: *Ad = a->A;
4204: *Ao = a->B;
4205: *colmap = a->garray;
4206: return(0);
4207: }
4211: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4212: {
4214: PetscInt i;
4215: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4218: if (coloring->ctype == IS_COLORING_GLOBAL) {
4219: ISColoringValue *allcolors,*colors;
4220: ISColoring ocoloring;
4222: /* set coloring for diagonal portion */
4223: MatSetColoring_SeqAIJ(a->A,coloring);
4225: /* set coloring for off-diagonal portion */
4226: ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
4227: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4228: for (i=0; i<a->B->cmap->n; i++) {
4229: colors[i] = allcolors[a->garray[i]];
4230: }
4231: PetscFree(allcolors);
4232: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4233: MatSetColoring_SeqAIJ(a->B,ocoloring);
4234: ISColoringDestroy(&ocoloring);
4235: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4236: ISColoringValue *colors;
4237: PetscInt *larray;
4238: ISColoring ocoloring;
4240: /* set coloring for diagonal portion */
4241: PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);
4242: for (i=0; i<a->A->cmap->n; i++) {
4243: larray[i] = i + A->cmap->rstart;
4244: }
4245: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);
4246: PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);
4247: for (i=0; i<a->A->cmap->n; i++) {
4248: colors[i] = coloring->colors[larray[i]];
4249: }
4250: PetscFree(larray);
4251: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);
4252: MatSetColoring_SeqAIJ(a->A,ocoloring);
4253: ISColoringDestroy(&ocoloring);
4255: /* set coloring for off-diagonal portion */
4256: PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);
4257: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);
4258: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4259: for (i=0; i<a->B->cmap->n; i++) {
4260: colors[i] = coloring->colors[larray[i]];
4261: }
4262: PetscFree(larray);
4263: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4264: MatSetColoring_SeqAIJ(a->B,ocoloring);
4265: ISColoringDestroy(&ocoloring);
4266: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4268: return(0);
4269: }
4271: #if defined(PETSC_HAVE_ADIC)
4274: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4275: {
4276: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4280: MatSetValuesAdic_SeqAIJ(a->A,advalues);
4281: MatSetValuesAdic_SeqAIJ(a->B,advalues);
4282: return(0);
4283: }
4284: #endif
4288: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4289: {
4290: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4294: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
4295: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
4296: return(0);
4297: }
4301: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4302: {
4304: PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4305: PetscInt *indx;
4308: /* This routine will ONLY return MPIAIJ type matrix */
4309: MatGetSize(inmat,&m,&N);
4310: MatGetBlockSizes(inmat,&bs,&cbs);
4311: if (n == PETSC_DECIDE){
4312: PetscSplitOwnership(comm,&n,&N);
4313: }
4314: /* Check sum(n) = N */
4315: MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
4316: if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4317:
4318: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4319: rstart -= m;
4321: MatPreallocateInitialize(comm,m,n,dnz,onz);
4322: for (i=0;i<m;i++) {
4323: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4324: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4325: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
4326: }
4327:
4328: MatCreate(comm,outmat);
4329: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4330: MatSetBlockSizes(*outmat,bs,cbs);
4331: MatSetType(*outmat,MATMPIAIJ);
4332: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4333: MatPreallocateFinalize(dnz,onz);
4334: return(0);
4335: }
4339: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4340: {
4342: PetscInt m,N,i,rstart,nnz,Ii;
4343: PetscInt *indx;
4344: PetscScalar *values;
4347: MatGetSize(inmat,&m,&N);
4348: MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);
4349: for (i=0;i<m;i++) {
4350: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4351: Ii = i + rstart;
4352: MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4353: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4354: }
4355: MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);
4356: MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);
4357: return(0);
4358: }
4362: /*@
4363: MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4364: matrices from each processor
4366: Collective on MPI_Comm
4368: Input Parameters:
4369: + comm - the communicators the parallel matrix will live on
4370: . inmat - the input sequential matrices
4371: . n - number of local columns (or PETSC_DECIDE)
4372: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4374: Output Parameter:
4375: . outmat - the parallel matrix generated
4377: Level: advanced
4379: Notes: The number of columns of the matrix in EACH processor MUST be the same.
4381: @*/
4382: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4383: {
4387: PetscLogEventBegin(MAT_Merge,inmat,0,0,0);
4388: if (scall == MAT_INITIAL_MATRIX){
4389: MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);
4390: }
4391: MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);
4392: PetscLogEventEnd(MAT_Merge,inmat,0,0,0);
4393: return(0);
4394: }
4398: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4399: {
4400: PetscErrorCode ierr;
4401: PetscMPIInt rank;
4402: PetscInt m,N,i,rstart,nnz;
4403: size_t len;
4404: const PetscInt *indx;
4405: PetscViewer out;
4406: char *name;
4407: Mat B;
4408: const PetscScalar *values;
4411: MatGetLocalSize(A,&m,0);
4412: MatGetSize(A,0,&N);
4413: /* Should this be the type of the diagonal block of A? */
4414: MatCreate(PETSC_COMM_SELF,&B);
4415: MatSetSizes(B,m,N,m,N);
4416: MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);
4417: MatSetType(B,MATSEQAIJ);
4418: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
4419: MatGetOwnershipRange(A,&rstart,0);
4420: for (i=0;i<m;i++) {
4421: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4422: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4423: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4424: }
4425: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4426: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4428: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
4429: PetscStrlen(outfile,&len);
4430: PetscMalloc((len+5)*sizeof(char),&name);
4431: sprintf(name,"%s.%d",outfile,rank);
4432: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4433: PetscFree(name);
4434: MatView(B,out);
4435: PetscViewerDestroy(&out);
4436: MatDestroy(&B);
4437: return(0);
4438: }
4440: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4443: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4444: {
4445: PetscErrorCode ierr;
4446: Mat_Merge_SeqsToMPI *merge;
4447: PetscContainer container;
4450: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
4451: if (container) {
4452: PetscContainerGetPointer(container,(void **)&merge);
4453: PetscFree(merge->id_r);
4454: PetscFree(merge->len_s);
4455: PetscFree(merge->len_r);
4456: PetscFree(merge->bi);
4457: PetscFree(merge->bj);
4458: PetscFree(merge->buf_ri[0]);
4459: PetscFree(merge->buf_ri);
4460: PetscFree(merge->buf_rj[0]);
4461: PetscFree(merge->buf_rj);
4462: PetscFree(merge->coi);
4463: PetscFree(merge->coj);
4464: PetscFree(merge->owners_co);
4465: PetscLayoutDestroy(&merge->rowmap);
4466: PetscFree(merge);
4467: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4468: }
4469: MatDestroy_MPIAIJ(A);
4470: return(0);
4471: }
4473: #include <../src/mat/utils/freespace.h>
4474: #include <petscbt.h>
4478: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4479: {
4480: PetscErrorCode ierr;
4481: MPI_Comm comm=((PetscObject)mpimat)->comm;
4482: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4483: PetscMPIInt size,rank,taga,*len_s;
4484: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4485: PetscInt proc,m;
4486: PetscInt **buf_ri,**buf_rj;
4487: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4488: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4489: MPI_Request *s_waits,*r_waits;
4490: MPI_Status *status;
4491: MatScalar *aa=a->a;
4492: MatScalar **abuf_r,*ba_i;
4493: Mat_Merge_SeqsToMPI *merge;
4494: PetscContainer container;
4497: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4499: MPI_Comm_size(comm,&size);
4500: MPI_Comm_rank(comm,&rank);
4502: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
4503: PetscContainerGetPointer(container,(void **)&merge);
4505: bi = merge->bi;
4506: bj = merge->bj;
4507: buf_ri = merge->buf_ri;
4508: buf_rj = merge->buf_rj;
4510: PetscMalloc(size*sizeof(MPI_Status),&status);
4511: owners = merge->rowmap->range;
4512: len_s = merge->len_s;
4514: /* send and recv matrix values */
4515: /*-----------------------------*/
4516: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4517: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4519: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
4520: for (proc=0,k=0; proc<size; proc++){
4521: if (!len_s[proc]) continue;
4522: i = owners[proc];
4523: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4524: k++;
4525: }
4527: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4528: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4529: PetscFree(status);
4531: PetscFree(s_waits);
4532: PetscFree(r_waits);
4534: /* insert mat values of mpimat */
4535: /*----------------------------*/
4536: PetscMalloc(N*sizeof(PetscScalar),&ba_i);
4537: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4539: for (k=0; k<merge->nrecv; k++){
4540: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4541: nrows = *(buf_ri_k[k]);
4542: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4543: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4544: }
4546: /* set values of ba */
4547: m = merge->rowmap->n;
4548: for (i=0; i<m; i++) {
4549: arow = owners[rank] + i;
4550: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4551: bnzi = bi[i+1] - bi[i];
4552: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4554: /* add local non-zero vals of this proc's seqmat into ba */
4555: anzi = ai[arow+1] - ai[arow];
4556: aj = a->j + ai[arow];
4557: aa = a->a + ai[arow];
4558: nextaj = 0;
4559: for (j=0; nextaj<anzi; j++){
4560: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4561: ba_i[j] += aa[nextaj++];
4562: }
4563: }
4565: /* add received vals into ba */
4566: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4567: /* i-th row */
4568: if (i == *nextrow[k]) {
4569: anzi = *(nextai[k]+1) - *nextai[k];
4570: aj = buf_rj[k] + *(nextai[k]);
4571: aa = abuf_r[k] + *(nextai[k]);
4572: nextaj = 0;
4573: for (j=0; nextaj<anzi; j++){
4574: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4575: ba_i[j] += aa[nextaj++];
4576: }
4577: }
4578: nextrow[k]++; nextai[k]++;
4579: }
4580: }
4581: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4582: }
4583: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4584: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4586: PetscFree(abuf_r[0]);
4587: PetscFree(abuf_r);
4588: PetscFree(ba_i);
4589: PetscFree3(buf_ri_k,nextrow,nextai);
4590: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4591: return(0);
4592: }
4594: extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4598: PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4599: {
4600: PetscErrorCode ierr;
4601: Mat B_mpi;
4602: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4603: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4604: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4605: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4606: PetscInt len,proc,*dnz,*onz,bs,cbs;
4607: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4608: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4609: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4610: MPI_Status *status;
4611: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
4612: PetscBT lnkbt;
4613: Mat_Merge_SeqsToMPI *merge;
4614: PetscContainer container;
4617: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4619: /* make sure it is a PETSc comm */
4620: PetscCommDuplicate(comm,&comm,PETSC_NULL);
4621: MPI_Comm_size(comm,&size);
4622: MPI_Comm_rank(comm,&rank);
4624: PetscNew(Mat_Merge_SeqsToMPI,&merge);
4625: PetscMalloc(size*sizeof(MPI_Status),&status);
4627: /* determine row ownership */
4628: /*---------------------------------------------------------*/
4629: PetscLayoutCreate(comm,&merge->rowmap);
4630: PetscLayoutSetLocalSize(merge->rowmap,m);
4631: PetscLayoutSetSize(merge->rowmap,M);
4632: PetscLayoutSetBlockSize(merge->rowmap,1);
4633: PetscLayoutSetUp(merge->rowmap);
4634: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
4635: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
4637: m = merge->rowmap->n;
4638: M = merge->rowmap->N;
4639: owners = merge->rowmap->range;
4641: /* determine the number of messages to send, their lengths */
4642: /*---------------------------------------------------------*/
4643: len_s = merge->len_s;
4645: len = 0; /* length of buf_si[] */
4646: merge->nsend = 0;
4647: for (proc=0; proc<size; proc++){
4648: len_si[proc] = 0;
4649: if (proc == rank){
4650: len_s[proc] = 0;
4651: } else {
4652: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4653: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4654: }
4655: if (len_s[proc]) {
4656: merge->nsend++;
4657: nrows = 0;
4658: for (i=owners[proc]; i<owners[proc+1]; i++){
4659: if (ai[i+1] > ai[i]) nrows++;
4660: }
4661: len_si[proc] = 2*(nrows+1);
4662: len += len_si[proc];
4663: }
4664: }
4666: /* determine the number and length of messages to receive for ij-structure */
4667: /*-------------------------------------------------------------------------*/
4668: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
4669: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4671: /* post the Irecv of j-structure */
4672: /*-------------------------------*/
4673: PetscCommGetNewTag(comm,&tagj);
4674: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4676: /* post the Isend of j-structure */
4677: /*--------------------------------*/
4678: PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);
4680: for (proc=0, k=0; proc<size; proc++){
4681: if (!len_s[proc]) continue;
4682: i = owners[proc];
4683: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4684: k++;
4685: }
4687: /* receives and sends of j-structure are complete */
4688: /*------------------------------------------------*/
4689: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4690: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4692: /* send and recv i-structure */
4693: /*---------------------------*/
4694: PetscCommGetNewTag(comm,&tagi);
4695: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4697: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
4698: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4699: for (proc=0,k=0; proc<size; proc++){
4700: if (!len_s[proc]) continue;
4701: /* form outgoing message for i-structure:
4702: buf_si[0]: nrows to be sent
4703: [1:nrows]: row index (global)
4704: [nrows+1:2*nrows+1]: i-structure index
4705: */
4706: /*-------------------------------------------*/
4707: nrows = len_si[proc]/2 - 1;
4708: buf_si_i = buf_si + nrows+1;
4709: buf_si[0] = nrows;
4710: buf_si_i[0] = 0;
4711: nrows = 0;
4712: for (i=owners[proc]; i<owners[proc+1]; i++){
4713: anzi = ai[i+1] - ai[i];
4714: if (anzi) {
4715: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4716: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4717: nrows++;
4718: }
4719: }
4720: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4721: k++;
4722: buf_si += len_si[proc];
4723: }
4725: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4726: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4728: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4729: for (i=0; i<merge->nrecv; i++){
4730: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4731: }
4733: PetscFree(len_si);
4734: PetscFree(len_ri);
4735: PetscFree(rj_waits);
4736: PetscFree2(si_waits,sj_waits);
4737: PetscFree(ri_waits);
4738: PetscFree(buf_s);
4739: PetscFree(status);
4741: /* compute a local seq matrix in each processor */
4742: /*----------------------------------------------*/
4743: /* allocate bi array and free space for accumulating nonzero column info */
4744: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
4745: bi[0] = 0;
4747: /* create and initialize a linked list */
4748: nlnk = N+1;
4749: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4751: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4752: len = 0;
4753: len = ai[owners[rank+1]] - ai[owners[rank]];
4754: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
4755: current_space = free_space;
4757: /* determine symbolic info for each local row */
4758: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4760: for (k=0; k<merge->nrecv; k++){
4761: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4762: nrows = *buf_ri_k[k];
4763: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4764: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
4765: }
4767: MatPreallocateInitialize(comm,m,n,dnz,onz);
4768: len = 0;
4769: for (i=0;i<m;i++) {
4770: bnzi = 0;
4771: /* add local non-zero cols of this proc's seqmat into lnk */
4772: arow = owners[rank] + i;
4773: anzi = ai[arow+1] - ai[arow];
4774: aj = a->j + ai[arow];
4775: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4776: bnzi += nlnk;
4777: /* add received col data into lnk */
4778: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4779: if (i == *nextrow[k]) { /* i-th row */
4780: anzi = *(nextai[k]+1) - *nextai[k];
4781: aj = buf_rj[k] + *nextai[k];
4782: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4783: bnzi += nlnk;
4784: nextrow[k]++; nextai[k]++;
4785: }
4786: }
4787: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4789: /* if free space is not available, make more free space */
4790: if (current_space->local_remaining<bnzi) {
4791: PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);
4792: nspacedouble++;
4793: }
4794: /* copy data into free space, then initialize lnk */
4795: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4796: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4798: current_space->array += bnzi;
4799: current_space->local_used += bnzi;
4800: current_space->local_remaining -= bnzi;
4802: bi[i+1] = bi[i] + bnzi;
4803: }
4805: PetscFree3(buf_ri_k,nextrow,nextai);
4807: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
4808: PetscFreeSpaceContiguous(&free_space,bj);
4809: PetscLLDestroy(lnk,lnkbt);
4811: /* create symbolic parallel matrix B_mpi */
4812: /*---------------------------------------*/
4813: MatGetBlockSizes(seqmat,&bs,&cbs);
4814: MatCreate(comm,&B_mpi);
4815: if (n==PETSC_DECIDE) {
4816: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4817: } else {
4818: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4819: }
4820: MatSetBlockSizes(B_mpi,bs,cbs);
4821: MatSetType(B_mpi,MATMPIAIJ);
4822: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4823: MatPreallocateFinalize(dnz,onz);
4824: MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
4826: /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4827: B_mpi->assembled = PETSC_FALSE;
4828: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4829: merge->bi = bi;
4830: merge->bj = bj;
4831: merge->buf_ri = buf_ri;
4832: merge->buf_rj = buf_rj;
4833: merge->coi = PETSC_NULL;
4834: merge->coj = PETSC_NULL;
4835: merge->owners_co = PETSC_NULL;
4837: PetscCommDestroy(&comm);
4839: /* attach the supporting struct to B_mpi for reuse */
4840: PetscContainerCreate(PETSC_COMM_SELF,&container);
4841: PetscContainerSetPointer(container,merge);
4842: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4843: PetscContainerDestroy(&container);
4844: *mpimat = B_mpi;
4846: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4847: return(0);
4848: }
4852: /*@C
4853: MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4854: matrices from each processor
4856: Collective on MPI_Comm
4858: Input Parameters:
4859: + comm - the communicators the parallel matrix will live on
4860: . seqmat - the input sequential matrices
4861: . m - number of local rows (or PETSC_DECIDE)
4862: . n - number of local columns (or PETSC_DECIDE)
4863: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4865: Output Parameter:
4866: . mpimat - the parallel matrix generated
4868: Level: advanced
4870: Notes:
4871: The dimensions of the sequential matrix in each processor MUST be the same.
4872: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4873: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4874: @*/
4875: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4876: {
4877: PetscErrorCode ierr;
4878: PetscMPIInt size;
4881: MPI_Comm_size(comm,&size);
4882: if (size == 1){
4883: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4884: if (scall == MAT_INITIAL_MATRIX){
4885: MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);
4886: } else {
4887: MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);
4888: }
4889: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4890: return(0);
4891: }
4892: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4893: if (scall == MAT_INITIAL_MATRIX){
4894: MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);
4895: }
4896: MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);
4897: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4898: return(0);
4899: }
4903: /*@
4904: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4905: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4906: with MatGetSize()
4908: Not Collective
4910: Input Parameters:
4911: + A - the matrix
4912: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4914: Output Parameter:
4915: . A_loc - the local sequential matrix generated
4917: Level: developer
4919: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4921: @*/
4922: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4923: {
4924: PetscErrorCode ierr;
4925: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4926: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4927: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4928: MatScalar *aa=a->a,*ba=b->a,*cam;
4929: PetscScalar *ca;
4930: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4931: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4932: PetscBool match;
4935: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4936: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4937: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4938: if (scall == MAT_INITIAL_MATRIX){
4939: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
4940: ci[0] = 0;
4941: for (i=0; i<am; i++){
4942: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4943: }
4944: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
4945: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
4946: k = 0;
4947: for (i=0; i<am; i++) {
4948: ncols_o = bi[i+1] - bi[i];
4949: ncols_d = ai[i+1] - ai[i];
4950: /* off-diagonal portion of A */
4951: for (jo=0; jo<ncols_o; jo++) {
4952: col = cmap[*bj];
4953: if (col >= cstart) break;
4954: cj[k] = col; bj++;
4955: ca[k++] = *ba++;
4956: }
4957: /* diagonal portion of A */
4958: for (j=0; j<ncols_d; j++) {
4959: cj[k] = cstart + *aj++;
4960: ca[k++] = *aa++;
4961: }
4962: /* off-diagonal portion of A */
4963: for (j=jo; j<ncols_o; j++) {
4964: cj[k] = cmap[*bj++];
4965: ca[k++] = *ba++;
4966: }
4967: }
4968: /* put together the new matrix */
4969: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
4970: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4971: /* Since these are PETSc arrays, change flags to free them as necessary. */
4972: mat = (Mat_SeqAIJ*)(*A_loc)->data;
4973: mat->free_a = PETSC_TRUE;
4974: mat->free_ij = PETSC_TRUE;
4975: mat->nonew = 0;
4976: } else if (scall == MAT_REUSE_MATRIX){
4977: mat=(Mat_SeqAIJ*)(*A_loc)->data;
4978: ci = mat->i; cj = mat->j; cam = mat->a;
4979: for (i=0; i<am; i++) {
4980: /* off-diagonal portion of A */
4981: ncols_o = bi[i+1] - bi[i];
4982: for (jo=0; jo<ncols_o; jo++) {
4983: col = cmap[*bj];
4984: if (col >= cstart) break;
4985: *cam++ = *ba++; bj++;
4986: }
4987: /* diagonal portion of A */
4988: ncols_d = ai[i+1] - ai[i];
4989: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4990: /* off-diagonal portion of A */
4991: for (j=jo; j<ncols_o; j++) {
4992: *cam++ = *ba++; bj++;
4993: }
4994: }
4995: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4996: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
4997: return(0);
4998: }
5002: /*@C
5003: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5005: Not Collective
5007: Input Parameters:
5008: + A - the matrix
5009: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5010: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
5012: Output Parameter:
5013: . A_loc - the local sequential matrix generated
5015: Level: developer
5017: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5019: @*/
5020: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5021: {
5022: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5023: PetscErrorCode ierr;
5024: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5025: IS isrowa,iscola;
5026: Mat *aloc;
5027: PetscBool match;
5030: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5031: if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5032: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
5033: if (!row){
5034: start = A->rmap->rstart; end = A->rmap->rend;
5035: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
5036: } else {
5037: isrowa = *row;
5038: }
5039: if (!col){
5040: start = A->cmap->rstart;
5041: cmap = a->garray;
5042: nzA = a->A->cmap->n;
5043: nzB = a->B->cmap->n;
5044: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
5045: ncols = 0;
5046: for (i=0; i<nzB; i++) {
5047: if (cmap[i] < start) idx[ncols++] = cmap[i];
5048: else break;
5049: }
5050: imark = i;
5051: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5052: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5053: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
5054: } else {
5055: iscola = *col;
5056: }
5057: if (scall != MAT_INITIAL_MATRIX){
5058: PetscMalloc(sizeof(Mat),&aloc);
5059: aloc[0] = *A_loc;
5060: }
5061: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
5062: *A_loc = aloc[0];
5063: PetscFree(aloc);
5064: if (!row){
5065: ISDestroy(&isrowa);
5066: }
5067: if (!col){
5068: ISDestroy(&iscola);
5069: }
5070: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5071: return(0);
5072: }
5076: /*@C
5077: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5079: Collective on Mat
5081: Input Parameters:
5082: + A,B - the matrices in mpiaij format
5083: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5084: - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5086: Output Parameter:
5087: + rowb, colb - index sets of rows and columns of B to extract
5088: - B_seq - the sequential matrix generated
5090: Level: developer
5092: @*/
5093: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5094: {
5095: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5096: PetscErrorCode ierr;
5097: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5098: IS isrowb,iscolb;
5099: Mat *bseq=PETSC_NULL;
5100:
5102: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5103: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5104: }
5105: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5106:
5107: if (scall == MAT_INITIAL_MATRIX){
5108: start = A->cmap->rstart;
5109: cmap = a->garray;
5110: nzA = a->A->cmap->n;
5111: nzB = a->B->cmap->n;
5112: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
5113: ncols = 0;
5114: for (i=0; i<nzB; i++) { /* row < local row index */
5115: if (cmap[i] < start) idx[ncols++] = cmap[i];
5116: else break;
5117: }
5118: imark = i;
5119: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5120: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5121: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5122: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5123: } else {
5124: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5125: isrowb = *rowb; iscolb = *colb;
5126: PetscMalloc(sizeof(Mat),&bseq);
5127: bseq[0] = *B_seq;
5128: }
5129: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5130: *B_seq = bseq[0];
5131: PetscFree(bseq);
5132: if (!rowb){
5133: ISDestroy(&isrowb);
5134: } else {
5135: *rowb = isrowb;
5136: }
5137: if (!colb){
5138: ISDestroy(&iscolb);
5139: } else {
5140: *colb = iscolb;
5141: }
5142: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5143: return(0);
5144: }
5148: /*
5149: MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5150: of the OFF-DIAGONAL portion of local A
5152: Collective on Mat
5154: Input Parameters:
5155: + A,B - the matrices in mpiaij format
5156: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5158: Output Parameter:
5159: + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5160: . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5161: . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5162: - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5164: Level: developer
5166: */
5167: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5168: {
5169: VecScatter_MPI_General *gen_to,*gen_from;
5170: PetscErrorCode ierr;
5171: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5172: Mat_SeqAIJ *b_oth;
5173: VecScatter ctx=a->Mvctx;
5174: MPI_Comm comm=((PetscObject)ctx)->comm;
5175: PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5176: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5177: PetscScalar *rvalues,*svalues;
5178: MatScalar *b_otha,*bufa,*bufA;
5179: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5180: MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5181: MPI_Status *sstatus,rstatus;
5182: PetscMPIInt jj;
5183: PetscInt *cols,sbs,rbs;
5184: PetscScalar *vals;
5187: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5188: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5189: }
5190: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5191: MPI_Comm_rank(comm,&rank);
5193: gen_to = (VecScatter_MPI_General*)ctx->todata;
5194: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5195: rvalues = gen_from->values; /* holds the length of receiving row */
5196: svalues = gen_to->values; /* holds the length of sending row */
5197: nrecvs = gen_from->n;
5198: nsends = gen_to->n;
5200: PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);
5201: srow = gen_to->indices; /* local row index to be sent */
5202: sstarts = gen_to->starts;
5203: sprocs = gen_to->procs;
5204: sstatus = gen_to->sstatus;
5205: sbs = gen_to->bs;
5206: rstarts = gen_from->starts;
5207: rprocs = gen_from->procs;
5208: rbs = gen_from->bs;
5210: if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5211: if (scall == MAT_INITIAL_MATRIX){
5212: /* i-array */
5213: /*---------*/
5214: /* post receives */
5215: for (i=0; i<nrecvs; i++){
5216: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5217: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5218: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5219: }
5221: /* pack the outgoing message */
5222: PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);
5223: sstartsj[0] = 0; rstartsj[0] = 0;
5224: len = 0; /* total length of j or a array to be sent */
5225: k = 0;
5226: for (i=0; i<nsends; i++){
5227: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5228: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5229: for (j=0; j<nrows; j++) {
5230: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5231: for (l=0; l<sbs; l++){
5232: MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL); /* rowlength */
5233: rowlen[j*sbs+l] = ncols;
5234: len += ncols;
5235: MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);
5236: }
5237: k++;
5238: }
5239: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5240: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5241: }
5242: /* recvs and sends of i-array are completed */
5243: i = nrecvs;
5244: while (i--) {
5245: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5246: }
5247: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5249: /* allocate buffers for sending j and a arrays */
5250: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
5251: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
5253: /* create i-array of B_oth */
5254: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
5255: b_othi[0] = 0;
5256: len = 0; /* total length of j or a array to be received */
5257: k = 0;
5258: for (i=0; i<nrecvs; i++){
5259: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5260: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5261: for (j=0; j<nrows; j++) {
5262: b_othi[k+1] = b_othi[k] + rowlen[j];
5263: len += rowlen[j]; k++;
5264: }
5265: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5266: }
5268: /* allocate space for j and a arrrays of B_oth */
5269: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
5270: PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);
5272: /* j-array */
5273: /*---------*/
5274: /* post receives of j-array */
5275: for (i=0; i<nrecvs; i++){
5276: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5277: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5278: }
5280: /* pack the outgoing message j-array */
5281: k = 0;
5282: for (i=0; i<nsends; i++){
5283: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5284: bufJ = bufj+sstartsj[i];
5285: for (j=0; j<nrows; j++) {
5286: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5287: for (ll=0; ll<sbs; ll++){
5288: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
5289: for (l=0; l<ncols; l++){
5290: *bufJ++ = cols[l];
5291: }
5292: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
5293: }
5294: }
5295: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5296: }
5298: /* recvs and sends of j-array are completed */
5299: i = nrecvs;
5300: while (i--) {
5301: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5302: }
5303: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5304: } else if (scall == MAT_REUSE_MATRIX){
5305: sstartsj = *startsj_s;
5306: rstartsj = *startsj_r;
5307: bufa = *bufa_ptr;
5308: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5309: b_otha = b_oth->a;
5310: } else {
5311: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5312: }
5314: /* a-array */
5315: /*---------*/
5316: /* post receives of a-array */
5317: for (i=0; i<nrecvs; i++){
5318: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5319: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5320: }
5322: /* pack the outgoing message a-array */
5323: k = 0;
5324: for (i=0; i<nsends; i++){
5325: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5326: bufA = bufa+sstartsj[i];
5327: for (j=0; j<nrows; j++) {
5328: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5329: for (ll=0; ll<sbs; ll++){
5330: MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
5331: for (l=0; l<ncols; l++){
5332: *bufA++ = vals[l];
5333: }
5334: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
5335: }
5336: }
5337: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5338: }
5339: /* recvs and sends of a-array are completed */
5340: i = nrecvs;
5341: while (i--) {
5342: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5343: }
5344: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5345: PetscFree2(rwaits,swaits);
5347: if (scall == MAT_INITIAL_MATRIX){
5348: /* put together the new matrix */
5349: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5351: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5352: /* Since these are PETSc arrays, change flags to free them as necessary. */
5353: b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
5354: b_oth->free_a = PETSC_TRUE;
5355: b_oth->free_ij = PETSC_TRUE;
5356: b_oth->nonew = 0;
5358: PetscFree(bufj);
5359: if (!startsj_s || !bufa_ptr){
5360: PetscFree2(sstartsj,rstartsj);
5361: PetscFree(bufa_ptr);
5362: } else {
5363: *startsj_s = sstartsj;
5364: *startsj_r = rstartsj;
5365: *bufa_ptr = bufa;
5366: }
5367: }
5368: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5369: return(0);
5370: }
5374: /*@C
5375: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5377: Not Collective
5379: Input Parameters:
5380: . A - The matrix in mpiaij format
5382: Output Parameter:
5383: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5384: . colmap - A map from global column index to local index into lvec
5385: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5387: Level: developer
5389: @*/
5390: #if defined (PETSC_USE_CTABLE)
5391: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5392: #else
5393: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5394: #endif
5395: {
5396: Mat_MPIAIJ *a;
5403: a = (Mat_MPIAIJ *) A->data;
5404: if (lvec) *lvec = a->lvec;
5405: if (colmap) *colmap = a->colmap;
5406: if (multScatter) *multScatter = a->Mvctx;
5407: return(0);
5408: }
5410: EXTERN_C_BEGIN
5411: extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5412: extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5413: extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5414: EXTERN_C_END
5418: /*
5419: Computes (B'*A')' since computing B*A directly is untenable
5421: n p p
5422: ( ) ( ) ( )
5423: m ( A ) * n ( B ) = m ( C )
5424: ( ) ( ) ( )
5426: */
5427: PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5428: {
5429: PetscErrorCode ierr;
5430: Mat At,Bt,Ct;
5433: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5434: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5435: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);
5436: MatDestroy(&At);
5437: MatDestroy(&Bt);
5438: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5439: MatDestroy(&Ct);
5440: return(0);
5441: }
5445: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5446: {
5448: PetscInt m=A->rmap->n,n=B->cmap->n;
5449: Mat Cmat;
5452: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5453: MatCreate(((PetscObject)A)->comm,&Cmat);
5454: MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5455: MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);
5456: MatSetType(Cmat,MATMPIDENSE);
5457: MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);
5458: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
5459: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
5460: *C = Cmat;
5461: (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5462: return(0);
5463: }
5465: /* ----------------------------------------------------------------*/
5468: PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5469: {
5473: if (scall == MAT_INITIAL_MATRIX){
5474: MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);
5475: }
5476: MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);
5477: return(0);
5478: }
5480: EXTERN_C_BEGIN
5481: #if defined(PETSC_HAVE_MUMPS)
5482: extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5483: #endif
5484: #if defined(PETSC_HAVE_PASTIX)
5485: extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5486: #endif
5487: #if defined(PETSC_HAVE_SUPERLU_DIST)
5488: extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5489: #endif
5490: #if defined(PETSC_HAVE_SPOOLES)
5491: extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5492: #endif
5493: EXTERN_C_END
5495: /*MC
5496: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5498: Options Database Keys:
5499: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5501: Level: beginner
5503: .seealso: MatCreateAIJ()
5504: M*/
5506: EXTERN_C_BEGIN
5509: PetscErrorCode MatCreate_MPIAIJ(Mat B)
5510: {
5511: Mat_MPIAIJ *b;
5513: PetscMPIInt size;
5516: MPI_Comm_size(((PetscObject)B)->comm,&size);
5518: PetscNewLog(B,Mat_MPIAIJ,&b);
5519: B->data = (void*)b;
5520: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
5521: B->assembled = PETSC_FALSE;
5523: B->insertmode = NOT_SET_VALUES;
5524: b->size = size;
5525: MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);
5527: /* build cache for off array entries formed */
5528: MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);
5529: b->donotstash = PETSC_FALSE;
5530: b->colmap = 0;
5531: b->garray = 0;
5532: b->roworiented = PETSC_TRUE;
5534: /* stuff used for matrix vector multiply */
5535: b->lvec = PETSC_NULL;
5536: b->Mvctx = PETSC_NULL;
5538: /* stuff for MatGetRow() */
5539: b->rowindices = 0;
5540: b->rowvalues = 0;
5541: b->getrowactive = PETSC_FALSE;
5543: #if defined(PETSC_HAVE_SPOOLES)
5544: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5545: "MatGetFactor_mpiaij_spooles",
5546: MatGetFactor_mpiaij_spooles);
5547: #endif
5548: #if defined(PETSC_HAVE_MUMPS)
5549: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5550: "MatGetFactor_aij_mumps",
5551: MatGetFactor_aij_mumps);
5552: #endif
5553: #if defined(PETSC_HAVE_PASTIX)
5554: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5555: "MatGetFactor_mpiaij_pastix",
5556: MatGetFactor_mpiaij_pastix);
5557: #endif
5558: #if defined(PETSC_HAVE_SUPERLU_DIST)
5559: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5560: "MatGetFactor_mpiaij_superlu_dist",
5561: MatGetFactor_mpiaij_superlu_dist);
5562: #endif
5563: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5564: "MatStoreValues_MPIAIJ",
5565: MatStoreValues_MPIAIJ);
5566: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5567: "MatRetrieveValues_MPIAIJ",
5568: MatRetrieveValues_MPIAIJ);
5569: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5570: "MatGetDiagonalBlock_MPIAIJ",
5571: MatGetDiagonalBlock_MPIAIJ);
5572: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5573: "MatIsTranspose_MPIAIJ",
5574: MatIsTranspose_MPIAIJ);
5575: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5576: "MatMPIAIJSetPreallocation_MPIAIJ",
5577: MatMPIAIJSetPreallocation_MPIAIJ);
5578: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5579: "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5580: MatMPIAIJSetPreallocationCSR_MPIAIJ);
5581: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5582: "MatDiagonalScaleLocal_MPIAIJ",
5583: MatDiagonalScaleLocal_MPIAIJ);
5584: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5585: "MatConvert_MPIAIJ_MPIAIJPERM",
5586: MatConvert_MPIAIJ_MPIAIJPERM);
5587: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5588: "MatConvert_MPIAIJ_MPIAIJCRL",
5589: MatConvert_MPIAIJ_MPIAIJCRL);
5590: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5591: "MatConvert_MPIAIJ_MPISBAIJ",
5592: MatConvert_MPIAIJ_MPISBAIJ);
5593: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5594: "MatMatMult_MPIDense_MPIAIJ",
5595: MatMatMult_MPIDense_MPIAIJ);
5596: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5597: "MatMatMultSymbolic_MPIDense_MPIAIJ",
5598: MatMatMultSymbolic_MPIDense_MPIAIJ);
5599: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5600: "MatMatMultNumeric_MPIDense_MPIAIJ",
5601: MatMatMultNumeric_MPIDense_MPIAIJ);
5602: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
5603: return(0);
5604: }
5605: EXTERN_C_END
5609: /*@
5610: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5611: and "off-diagonal" part of the matrix in CSR format.
5613: Collective on MPI_Comm
5615: Input Parameters:
5616: + comm - MPI communicator
5617: . m - number of local rows (Cannot be PETSC_DECIDE)
5618: . n - This value should be the same as the local size used in creating the
5619: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5620: calculated if N is given) For square matrices n is almost always m.
5621: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5622: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5623: . i - row indices for "diagonal" portion of matrix
5624: . j - column indices
5625: . a - matrix values
5626: . oi - row indices for "off-diagonal" portion of matrix
5627: . oj - column indices
5628: - oa - matrix values
5630: Output Parameter:
5631: . mat - the matrix
5633: Level: advanced
5635: Notes:
5636: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5637: must free the arrays once the matrix has been destroyed and not before.
5639: The i and j indices are 0 based
5640:
5641: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5643: This sets local rows and cannot be used to set off-processor values.
5645: You cannot later use MatSetValues() to change values in this matrix.
5647: .keywords: matrix, aij, compressed row, sparse, parallel
5649: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5650: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5651: @*/
5652: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5653: PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5654: {
5656: Mat_MPIAIJ *maij;
5659: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5660: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5661: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5662: MatCreate(comm,mat);
5663: MatSetSizes(*mat,m,n,M,N);
5664: MatSetType(*mat,MATMPIAIJ);
5665: maij = (Mat_MPIAIJ*) (*mat)->data;
5666: maij->donotstash = PETSC_TRUE;
5667: (*mat)->preallocated = PETSC_TRUE;
5669: PetscLayoutSetUp((*mat)->rmap);
5670: PetscLayoutSetUp((*mat)->cmap);
5672: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
5673: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
5675: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
5676: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
5677: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
5678: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
5680: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
5681: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
5682: return(0);
5683: }
5685: /*
5686: Special version for direct calls from Fortran
5687: */
5688: #include <petsc-private/fortranimpl.h>
5690: #if defined(PETSC_HAVE_FORTRAN_CAPS)
5691: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5692: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5693: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5694: #endif
5696: /* Change these macros so can be used in void function */
5697: #undef CHKERRQ
5698: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5699: #undef SETERRQ2
5700: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5701: #undef SETERRQ3
5702: #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5703: #undef SETERRQ
5704: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5706: EXTERN_C_BEGIN
5709: void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5710: {
5711: Mat mat = *mmat;
5712: PetscInt m = *mm, n = *mn;
5713: InsertMode addv = *maddv;
5714: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
5715: PetscScalar value;
5716: PetscErrorCode ierr;
5718: MatCheckPreallocated(mat,1);
5719: if (mat->insertmode == NOT_SET_VALUES) {
5720: mat->insertmode = addv;
5721: }
5722: #if defined(PETSC_USE_DEBUG)
5723: else if (mat->insertmode != addv) {
5724: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5725: }
5726: #endif
5727: {
5728: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5729: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5730: PetscBool roworiented = aij->roworiented;
5732: /* Some Variables required in the macro */
5733: Mat A = aij->A;
5734: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
5735: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5736: MatScalar *aa = a->a;
5737: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5738: Mat B = aij->B;
5739: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
5740: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5741: MatScalar *ba = b->a;
5743: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5744: PetscInt nonew = a->nonew;
5745: MatScalar *ap1,*ap2;
5748: for (i=0; i<m; i++) {
5749: if (im[i] < 0) continue;
5750: #if defined(PETSC_USE_DEBUG)
5751: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5752: #endif
5753: if (im[i] >= rstart && im[i] < rend) {
5754: row = im[i] - rstart;
5755: lastcol1 = -1;
5756: rp1 = aj + ai[row];
5757: ap1 = aa + ai[row];
5758: rmax1 = aimax[row];
5759: nrow1 = ailen[row];
5760: low1 = 0;
5761: high1 = nrow1;
5762: lastcol2 = -1;
5763: rp2 = bj + bi[row];
5764: ap2 = ba + bi[row];
5765: rmax2 = bimax[row];
5766: nrow2 = bilen[row];
5767: low2 = 0;
5768: high2 = nrow2;
5770: for (j=0; j<n; j++) {
5771: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5772: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5773: if (in[j] >= cstart && in[j] < cend){
5774: col = in[j] - cstart;
5775: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5776: } else if (in[j] < 0) continue;
5777: #if defined(PETSC_USE_DEBUG)
5778: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5779: #endif
5780: else {
5781: if (mat->was_assembled) {
5782: if (!aij->colmap) {
5783: MatCreateColmap_MPIAIJ_Private(mat);
5784: }
5785: #if defined (PETSC_USE_CTABLE)
5786: PetscTableFind(aij->colmap,in[j]+1,&col);
5787: col--;
5788: #else
5789: col = aij->colmap[in[j]] - 1;
5790: #endif
5791: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5792: MatDisAssemble_MPIAIJ(mat);
5793: col = in[j];
5794: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5795: B = aij->B;
5796: b = (Mat_SeqAIJ*)B->data;
5797: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5798: rp2 = bj + bi[row];
5799: ap2 = ba + bi[row];
5800: rmax2 = bimax[row];
5801: nrow2 = bilen[row];
5802: low2 = 0;
5803: high2 = nrow2;
5804: bm = aij->B->rmap->n;
5805: ba = b->a;
5806: }
5807: } else col = in[j];
5808: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5809: }
5810: }
5811: } else {
5812: if (!aij->donotstash) {
5813: if (roworiented) {
5814: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5815: } else {
5816: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5817: }
5818: }
5819: }
5820: }}
5821: PetscFunctionReturnVoid();
5822: }
5823: EXTERN_C_END