Actual source code: mpiaij.c
petsc-3.5.2 2014-09-08
2: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: #include <petsc-private/vecimpl.h>
4: #include <petscblaslapack.h>
5: #include <petscsf.h>
7: /*MC
8: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11: and MATMPIAIJ otherwise. As a result, for single process communicators,
12: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13: for communicators controlling multiple processes. It is recommended that you call both of
14: the above preallocation routines for simplicity.
16: Options Database Keys:
17: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19: Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20: enough exist.
22: Level: beginner
24: .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25: M*/
27: /*MC
28: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
32: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33: for communicators controlling multiple processes. It is recommended that you call both of
34: the above preallocation routines for simplicity.
36: Options Database Keys:
37: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39: Level: beginner
41: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42: M*/
46: PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47: {
48: PetscErrorCode ierr;
49: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
50: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
51: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
52: const PetscInt *ia,*ib;
53: const MatScalar *aa,*bb;
54: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
55: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
58: *keptrows = 0;
59: ia = a->i;
60: ib = b->i;
61: for (i=0; i<m; i++) {
62: na = ia[i+1] - ia[i];
63: nb = ib[i+1] - ib[i];
64: if (!na && !nb) {
65: cnt++;
66: goto ok1;
67: }
68: aa = a->a + ia[i];
69: for (j=0; j<na; j++) {
70: if (aa[j] != 0.0) goto ok1;
71: }
72: bb = b->a + ib[i];
73: for (j=0; j <nb; j++) {
74: if (bb[j] != 0.0) goto ok1;
75: }
76: cnt++;
77: ok1:;
78: }
79: MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));
80: if (!n0rows) return(0);
81: PetscMalloc1((M->rmap->n-cnt),&rows);
82: cnt = 0;
83: for (i=0; i<m; i++) {
84: na = ia[i+1] - ia[i];
85: nb = ib[i+1] - ib[i];
86: if (!na && !nb) continue;
87: aa = a->a + ia[i];
88: for (j=0; j<na;j++) {
89: if (aa[j] != 0.0) {
90: rows[cnt++] = rstart + i;
91: goto ok2;
92: }
93: }
94: bb = b->a + ib[i];
95: for (j=0; j<nb; j++) {
96: if (bb[j] != 0.0) {
97: rows[cnt++] = rstart + i;
98: goto ok2;
99: }
100: }
101: ok2:;
102: }
103: ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);
104: return(0);
105: }
109: PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110: {
111: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data;
113: PetscInt i,rstart,nrows,*rows;
116: *zrows = NULL;
117: MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);
118: MatGetOwnershipRange(M,&rstart,NULL);
119: for (i=0; i<nrows; i++) rows[i] += rstart;
120: ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);
121: return(0);
122: }
126: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127: {
129: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
130: PetscInt i,n,*garray = aij->garray;
131: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
132: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
133: PetscReal *work;
136: MatGetSize(A,NULL,&n);
137: PetscCalloc1(n,&work);
138: if (type == NORM_2) {
139: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141: }
142: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144: }
145: } else if (type == NORM_1) {
146: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148: }
149: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151: }
152: } else if (type == NORM_INFINITY) {
153: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155: }
156: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158: }
160: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161: if (type == NORM_INFINITY) {
162: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
163: } else {
164: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
165: }
166: PetscFree(work);
167: if (type == NORM_2) {
168: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169: }
170: return(0);
171: }
175: /*
176: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179: Only for square matrices
181: Used by a preconditioner, hence PETSC_EXTERN
182: */
183: PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184: {
185: PetscMPIInt rank,size;
186: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
188: Mat mat;
189: Mat_SeqAIJ *gmata;
190: PetscMPIInt tag;
191: MPI_Status status;
192: PetscBool aij;
193: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
196: MPI_Comm_rank(comm,&rank);
197: MPI_Comm_size(comm,&size);
198: if (!rank) {
199: PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
200: if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201: }
202: if (reuse == MAT_INITIAL_MATRIX) {
203: MatCreate(comm,&mat);
204: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
205: MatGetBlockSizes(gmat,&bses[0],&bses[1]);
206: MPI_Bcast(bses,2,MPIU_INT,0,comm);
207: MatSetBlockSizes(mat,bses[0],bses[1]);
208: MatSetType(mat,MATAIJ);
209: PetscMalloc1((size+1),&rowners);
210: PetscMalloc2(m,&dlens,m,&olens);
211: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
213: rowners[0] = 0;
214: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215: rstart = rowners[rank];
216: rend = rowners[rank+1];
217: PetscObjectGetNewTag((PetscObject)mat,&tag);
218: if (!rank) {
219: gmata = (Mat_SeqAIJ*) gmat->data;
220: /* send row lengths to all processors */
221: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222: for (i=1; i<size; i++) {
223: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
224: }
225: /* determine number diagonal and off-diagonal counts */
226: PetscMemzero(olens,m*sizeof(PetscInt));
227: PetscCalloc1(m,&ld);
228: jj = 0;
229: for (i=0; i<m; i++) {
230: for (j=0; j<dlens[i]; j++) {
231: if (gmata->j[jj] < rstart) ld[i]++;
232: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233: jj++;
234: }
235: }
236: /* send column indices to other processes */
237: for (i=1; i<size; i++) {
238: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
240: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
241: }
243: /* send numerical values to other processes */
244: for (i=1; i<size; i++) {
245: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
247: }
248: gmataa = gmata->a;
249: gmataj = gmata->j;
251: } else {
252: /* receive row lengths */
253: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
254: /* receive column indices */
255: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
256: PetscMalloc2(nz,&gmataa,nz,&gmataj);
257: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
258: /* determine number diagonal and off-diagonal counts */
259: PetscMemzero(olens,m*sizeof(PetscInt));
260: PetscCalloc1(m,&ld);
261: jj = 0;
262: for (i=0; i<m; i++) {
263: for (j=0; j<dlens[i]; j++) {
264: if (gmataj[jj] < rstart) ld[i]++;
265: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266: jj++;
267: }
268: }
269: /* receive numerical values */
270: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
271: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
272: }
273: /* set preallocation */
274: for (i=0; i<m; i++) {
275: dlens[i] -= olens[i];
276: }
277: MatSeqAIJSetPreallocation(mat,0,dlens);
278: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
280: for (i=0; i<m; i++) {
281: dlens[i] += olens[i];
282: }
283: cnt = 0;
284: for (i=0; i<m; i++) {
285: row = rstart + i;
286: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
287: cnt += dlens[i];
288: }
289: if (rank) {
290: PetscFree2(gmataa,gmataj);
291: }
292: PetscFree2(dlens,olens);
293: PetscFree(rowners);
295: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
297: *inmat = mat;
298: } else { /* column indices are already set; only need to move over numerical values from process 0 */
299: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301: mat = *inmat;
302: PetscObjectGetNewTag((PetscObject)mat,&tag);
303: if (!rank) {
304: /* send numerical values to other processes */
305: gmata = (Mat_SeqAIJ*) gmat->data;
306: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
307: gmataa = gmata->a;
308: for (i=1; i<size; i++) {
309: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
311: }
312: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313: } else {
314: /* receive numerical values from process 0*/
315: nz = Ad->nz + Ao->nz;
316: PetscMalloc1(nz,&gmataa); gmataarestore = gmataa;
317: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
318: }
319: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321: ad = Ad->a;
322: ao = Ao->a;
323: if (mat->rmap->n) {
324: i = 0;
325: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
326: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
327: }
328: for (i=1; i<mat->rmap->n; i++) {
329: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
330: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
331: }
332: i--;
333: if (mat->rmap->n) {
334: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));
335: }
336: if (rank) {
337: PetscFree(gmataarestore);
338: }
339: }
340: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
341: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
342: return(0);
343: }
345: /*
346: Local utility routine that creates a mapping from the global column
347: number to the local number in the off-diagonal part of the local
348: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
349: a slightly higher hash table cost; without it it is not scalable (each processor
350: has an order N integer array but is fast to acess.
351: */
354: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355: {
356: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
358: PetscInt n = aij->B->cmap->n,i;
361: if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362: #if defined(PETSC_USE_CTABLE)
363: PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
364: for (i=0; i<n; i++) {
365: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
366: }
367: #else
368: PetscCalloc1((mat->cmap->N+1),&aij->colmap);
369: PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));
370: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371: #endif
372: return(0);
373: }
375: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376: { \
377: if (col <= lastcol1) low1 = 0; \
378: else high1 = nrow1; \
379: lastcol1 = col;\
380: while (high1-low1 > 5) { \
381: t = (low1+high1)/2; \
382: if (rp1[t] > col) high1 = t; \
383: else low1 = t; \
384: } \
385: for (_i=low1; _i<high1; _i++) { \
386: if (rp1[_i] > col) break; \
387: if (rp1[_i] == col) { \
388: if (addv == ADD_VALUES) ap1[_i] += value; \
389: else ap1[_i] = value; \
390: goto a_noinsert; \
391: } \
392: } \
393: if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
395: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397: N = nrow1++ - 1; a->nz++; high1++; \
398: /* shift up all the later entries in this row */ \
399: for (ii=N; ii>=_i; ii--) { \
400: rp1[ii+1] = rp1[ii]; \
401: ap1[ii+1] = ap1[ii]; \
402: } \
403: rp1[_i] = col; \
404: ap1[_i] = value; \
405: A->nonzerostate++;\
406: a_noinsert: ; \
407: ailen[row] = nrow1; \
408: }
411: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412: { \
413: if (col <= lastcol2) low2 = 0; \
414: else high2 = nrow2; \
415: lastcol2 = col; \
416: while (high2-low2 > 5) { \
417: t = (low2+high2)/2; \
418: if (rp2[t] > col) high2 = t; \
419: else low2 = t; \
420: } \
421: for (_i=low2; _i<high2; _i++) { \
422: if (rp2[_i] > col) break; \
423: if (rp2[_i] == col) { \
424: if (addv == ADD_VALUES) ap2[_i] += value; \
425: else ap2[_i] = value; \
426: goto b_noinsert; \
427: } \
428: } \
429: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
431: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433: N = nrow2++ - 1; b->nz++; high2++; \
434: /* shift up all the later entries in this row */ \
435: for (ii=N; ii>=_i; ii--) { \
436: rp2[ii+1] = rp2[ii]; \
437: ap2[ii+1] = ap2[ii]; \
438: } \
439: rp2[_i] = col; \
440: ap2[_i] = value; \
441: B->nonzerostate++; \
442: b_noinsert: ; \
443: bilen[row] = nrow2; \
444: }
448: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449: {
450: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
451: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
453: PetscInt l,*garray = mat->garray,diag;
456: /* code only works for square matrices A */
458: /* find size of row to the left of the diagonal part */
459: MatGetOwnershipRange(A,&diag,0);
460: row = row - diag;
461: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462: if (garray[b->j[b->i[row]+l]] > diag) break;
463: }
464: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
466: /* diagonal part */
467: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
469: /* right of diagonal part */
470: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
471: return(0);
472: }
476: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477: {
478: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
479: PetscScalar value;
481: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
482: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483: PetscBool roworiented = aij->roworiented;
485: /* Some Variables required in the macro */
486: Mat A = aij->A;
487: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
488: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489: MatScalar *aa = a->a;
490: PetscBool ignorezeroentries = a->ignorezeroentries;
491: Mat B = aij->B;
492: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
493: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494: MatScalar *ba = b->a;
496: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497: PetscInt nonew;
498: MatScalar *ap1,*ap2;
501: for (i=0; i<m; i++) {
502: if (im[i] < 0) continue;
503: #if defined(PETSC_USE_DEBUG)
504: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505: #endif
506: if (im[i] >= rstart && im[i] < rend) {
507: row = im[i] - rstart;
508: lastcol1 = -1;
509: rp1 = aj + ai[row];
510: ap1 = aa + ai[row];
511: rmax1 = aimax[row];
512: nrow1 = ailen[row];
513: low1 = 0;
514: high1 = nrow1;
515: lastcol2 = -1;
516: rp2 = bj + bi[row];
517: ap2 = ba + bi[row];
518: rmax2 = bimax[row];
519: nrow2 = bilen[row];
520: low2 = 0;
521: high2 = nrow2;
523: for (j=0; j<n; j++) {
524: if (roworiented) value = v[i*n+j];
525: else value = v[i+j*m];
526: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527: if (in[j] >= cstart && in[j] < cend) {
528: col = in[j] - cstart;
529: nonew = a->nonew;
530: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531: } else if (in[j] < 0) continue;
532: #if defined(PETSC_USE_DEBUG)
533: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534: #endif
535: else {
536: if (mat->was_assembled) {
537: if (!aij->colmap) {
538: MatCreateColmap_MPIAIJ_Private(mat);
539: }
540: #if defined(PETSC_USE_CTABLE)
541: PetscTableFind(aij->colmap,in[j]+1,&col);
542: col--;
543: #else
544: col = aij->colmap[in[j]] - 1;
545: #endif
546: if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547: MatDisAssemble_MPIAIJ(mat);
548: col = in[j];
549: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550: B = aij->B;
551: b = (Mat_SeqAIJ*)B->data;
552: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553: rp2 = bj + bi[row];
554: ap2 = ba + bi[row];
555: rmax2 = bimax[row];
556: nrow2 = bilen[row];
557: low2 = 0;
558: high2 = nrow2;
559: bm = aij->B->rmap->n;
560: ba = b->a;
561: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562: } else col = in[j];
563: nonew = b->nonew;
564: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565: }
566: }
567: } else {
568: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569: if (!aij->donotstash) {
570: mat->assembled = PETSC_FALSE;
571: if (roworiented) {
572: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
573: } else {
574: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
575: }
576: }
577: }
578: }
579: return(0);
580: }
584: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585: {
586: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
588: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
592: for (i=0; i<m; i++) {
593: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595: if (idxm[i] >= rstart && idxm[i] < rend) {
596: row = idxm[i] - rstart;
597: for (j=0; j<n; j++) {
598: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600: if (idxn[j] >= cstart && idxn[j] < cend) {
601: col = idxn[j] - cstart;
602: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
603: } else {
604: if (!aij->colmap) {
605: MatCreateColmap_MPIAIJ_Private(mat);
606: }
607: #if defined(PETSC_USE_CTABLE)
608: PetscTableFind(aij->colmap,idxn[j]+1,&col);
609: col--;
610: #else
611: col = aij->colmap[idxn[j]] - 1;
612: #endif
613: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614: else {
615: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
616: }
617: }
618: }
619: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620: }
621: return(0);
622: }
624: extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
628: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629: {
630: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
632: PetscInt nstash,reallocs;
633: InsertMode addv;
636: if (aij->donotstash || mat->nooffprocentries) return(0);
638: /* make sure all processors are either in INSERTMODE or ADDMODE */
639: MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
640: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641: mat->insertmode = addv; /* in case this processor had no cache */
643: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
644: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
645: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
646: return(0);
647: }
651: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652: {
653: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
654: Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data;
656: PetscMPIInt n;
657: PetscInt i,j,rstart,ncols,flg;
658: PetscInt *row,*col;
659: PetscBool other_disassembled;
660: PetscScalar *val;
661: InsertMode addv = mat->insertmode;
663: /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
666: if (!aij->donotstash && !mat->nooffprocentries) {
667: while (1) {
668: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
669: if (!flg) break;
671: for (i=0; i<n; ) {
672: /* Now identify the consecutive vals belonging to the same row */
673: for (j=i,rstart=row[j]; j<n; j++) {
674: if (row[j] != rstart) break;
675: }
676: if (j < n) ncols = j-i;
677: else ncols = n-i;
678: /* Now assemble all these values with a single function call */
679: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
681: i = j;
682: }
683: }
684: MatStashScatterEnd_Private(&mat->stash);
685: }
686: MatAssemblyBegin(aij->A,mode);
687: MatAssemblyEnd(aij->A,mode);
689: /* determine if any processor has disassembled, if so we must
690: also disassemble ourselfs, in order that we may reassemble. */
691: /*
692: if nonzero structure of submatrix B cannot change then we know that
693: no processor disassembled thus we can skip this stuff
694: */
695: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
697: if (mat->was_assembled && !other_disassembled) {
698: MatDisAssemble_MPIAIJ(mat);
699: }
700: }
701: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702: MatSetUpMultiply_MPIAIJ(mat);
703: }
704: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
705: MatAssemblyBegin(aij->B,mode);
706: MatAssemblyEnd(aij->B,mode);
708: PetscFree2(aij->rowvalues,aij->rowindices);
710: aij->rowvalues = 0;
712: /* used by MatAXPY() */
713: a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
714: a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
716: VecDestroy(&aij->diag);
717: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
719: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721: PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722: MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
723: }
724: return(0);
725: }
729: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730: {
731: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
735: MatZeroEntries(l->A);
736: MatZeroEntries(l->B);
737: return(0);
738: }
742: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743: {
744: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
745: PetscInt *owners = A->rmap->range;
746: PetscInt n = A->rmap->n;
747: PetscSF sf;
748: PetscInt *lrows;
749: PetscSFNode *rrows;
750: PetscInt r, p = 0, len = 0;
754: /* Create SF where leaves are input rows and roots are owned rows */
755: PetscMalloc1(n, &lrows);
756: for (r = 0; r < n; ++r) lrows[r] = -1;
757: if (!A->nooffproczerorows) {PetscMalloc1(N, &rrows);}
758: for (r = 0; r < N; ++r) {
759: const PetscInt idx = rows[r];
760: if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
761: if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
762: PetscLayoutFindOwner(A->rmap,idx,&p);
763: }
764: if (A->nooffproczerorows) {
765: if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
766: lrows[len++] = idx - owners[p];
767: } else {
768: rrows[r].rank = p;
769: rrows[r].index = rows[r] - owners[p];
770: }
771: }
772: if (!A->nooffproczerorows) {
773: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
774: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
775: /* Collect flags for rows to be zeroed */
776: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);
777: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);
778: PetscSFDestroy(&sf);
779: /* Compress and put in row numbers */
780: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781: }
782: /* fix right hand side if needed */
783: if (x && b) {
784: const PetscScalar *xx;
785: PetscScalar *bb;
787: VecGetArrayRead(x, &xx);
788: VecGetArray(b, &bb);
789: for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790: VecRestoreArrayRead(x, &xx);
791: VecRestoreArray(b, &bb);
792: }
793: /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794: MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
795: if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
796: MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);
797: } else if (diag != 0.0) {
798: MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
799: if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
800: for (r = 0; r < len; ++r) {
801: const PetscInt row = lrows[r] + A->rmap->rstart;
802: MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);
803: }
804: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
805: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
806: } else {
807: MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
808: }
809: PetscFree(lrows);
811: /* only change matrix nonzero state if pattern was allowed to be changed */
812: if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
813: PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
814: MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
815: }
816: return(0);
817: }
821: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822: {
823: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
824: PetscErrorCode ierr;
825: PetscMPIInt n = A->rmap->n;
826: PetscInt i,j,r,m,p = 0,len = 0;
827: PetscInt *lrows,*owners = A->rmap->range;
828: PetscSFNode *rrows;
829: PetscSF sf;
830: const PetscScalar *xx;
831: PetscScalar *bb,*mask;
832: Vec xmask,lmask;
833: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
834: const PetscInt *aj, *ii,*ridx;
835: PetscScalar *aa;
838: /* Create SF where leaves are input rows and roots are owned rows */
839: PetscMalloc1(n, &lrows);
840: for (r = 0; r < n; ++r) lrows[r] = -1;
841: PetscMalloc1(N, &rrows);
842: for (r = 0; r < N; ++r) {
843: const PetscInt idx = rows[r];
844: if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845: if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846: PetscLayoutFindOwner(A->rmap,idx,&p);
847: }
848: rrows[r].rank = p;
849: rrows[r].index = rows[r] - owners[p];
850: }
851: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
852: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
853: /* Collect flags for rows to be zeroed */
854: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
855: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
856: PetscSFDestroy(&sf);
857: /* Compress and put in row numbers */
858: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859: /* zero diagonal part of matrix */
860: MatZeroRowsColumns(l->A,len,lrows,diag,x,b);
861: /* handle off diagonal part of matrix */
862: MatGetVecs(A,&xmask,NULL);
863: VecDuplicate(l->lvec,&lmask);
864: VecGetArray(xmask,&bb);
865: for (i=0; i<len; i++) bb[lrows[i]] = 1;
866: VecRestoreArray(xmask,&bb);
867: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
868: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
869: VecDestroy(&xmask);
870: if (x) {
871: VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
872: VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
873: VecGetArrayRead(l->lvec,&xx);
874: VecGetArray(b,&bb);
875: }
876: VecGetArray(lmask,&mask);
877: /* remove zeroed rows of off diagonal matrix */
878: ii = aij->i;
879: for (i=0; i<len; i++) {
880: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
881: }
882: /* loop over all elements of off process part of matrix zeroing removed columns*/
883: if (aij->compressedrow.use) {
884: m = aij->compressedrow.nrows;
885: ii = aij->compressedrow.i;
886: ridx = aij->compressedrow.rindex;
887: for (i=0; i<m; i++) {
888: n = ii[i+1] - ii[i];
889: aj = aij->j + ii[i];
890: aa = aij->a + ii[i];
892: for (j=0; j<n; j++) {
893: if (PetscAbsScalar(mask[*aj])) {
894: if (b) bb[*ridx] -= *aa*xx[*aj];
895: *aa = 0.0;
896: }
897: aa++;
898: aj++;
899: }
900: ridx++;
901: }
902: } else { /* do not use compressed row format */
903: m = l->B->rmap->n;
904: for (i=0; i<m; i++) {
905: n = ii[i+1] - ii[i];
906: aj = aij->j + ii[i];
907: aa = aij->a + ii[i];
908: for (j=0; j<n; j++) {
909: if (PetscAbsScalar(mask[*aj])) {
910: if (b) bb[i] -= *aa*xx[*aj];
911: *aa = 0.0;
912: }
913: aa++;
914: aj++;
915: }
916: }
917: }
918: if (x) {
919: VecRestoreArray(b,&bb);
920: VecRestoreArrayRead(l->lvec,&xx);
921: }
922: VecRestoreArray(lmask,&mask);
923: VecDestroy(&lmask);
924: PetscFree(lrows);
926: /* only change matrix nonzero state if pattern was allowed to be changed */
927: if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928: PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929: MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
930: }
931: return(0);
932: }
936: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
937: {
938: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
940: PetscInt nt;
943: VecGetLocalSize(xx,&nt);
944: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
945: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
946: (*a->A->ops->mult)(a->A,xx,yy);
947: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
948: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
949: return(0);
950: }
954: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
955: {
956: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
960: MatMultDiagonalBlock(a->A,bb,xx);
961: return(0);
962: }
966: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
967: {
968: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
972: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
973: (*a->A->ops->multadd)(a->A,xx,yy,zz);
974: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
975: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
976: return(0);
977: }
981: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
982: {
983: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
985: PetscBool merged;
988: VecScatterGetMerged(a->Mvctx,&merged);
989: /* do nondiagonal part */
990: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
991: if (!merged) {
992: /* send it on its way */
993: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
994: /* do local part */
995: (*a->A->ops->multtranspose)(a->A,xx,yy);
996: /* receive remote parts: note this assumes the values are not actually */
997: /* added in yy until the next line, */
998: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
999: } else {
1000: /* do local part */
1001: (*a->A->ops->multtranspose)(a->A,xx,yy);
1002: /* send it on its way */
1003: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1004: /* values actually were received in the Begin() but we need to call this nop */
1005: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1006: }
1007: return(0);
1008: }
1012: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1013: {
1014: MPI_Comm comm;
1015: Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017: IS Me,Notme;
1019: PetscInt M,N,first,last,*notme,i;
1020: PetscMPIInt size;
1023: /* Easy test: symmetric diagonal block */
1024: Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1025: MatIsTranspose(Adia,Bdia,tol,f);
1026: if (!*f) return(0);
1027: PetscObjectGetComm((PetscObject)Amat,&comm);
1028: MPI_Comm_size(comm,&size);
1029: if (size == 1) return(0);
1031: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1032: MatGetSize(Amat,&M,&N);
1033: MatGetOwnershipRange(Amat,&first,&last);
1034: PetscMalloc1((N-last+first),¬me);
1035: for (i=0; i<first; i++) notme[i] = i;
1036: for (i=last; i<M; i++) notme[i-last+first] = i;
1037: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1038: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1039: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1040: Aoff = Aoffs[0];
1041: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1042: Boff = Boffs[0];
1043: MatIsTranspose(Aoff,Boff,tol,f);
1044: MatDestroyMatrices(1,&Aoffs);
1045: MatDestroyMatrices(1,&Boffs);
1046: ISDestroy(&Me);
1047: ISDestroy(&Notme);
1048: PetscFree(notme);
1049: return(0);
1050: }
1054: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055: {
1056: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1060: /* do nondiagonal part */
1061: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1062: /* send it on its way */
1063: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1064: /* do local part */
1065: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1066: /* receive remote parts */
1067: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1068: return(0);
1069: }
1071: /*
1072: This only works correctly for square matrices where the subblock A->A is the
1073: diagonal block
1074: */
1077: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1078: {
1080: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1083: if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1084: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1085: MatGetDiagonal(a->A,v);
1086: return(0);
1087: }
1091: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1092: {
1093: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1097: MatScale(a->A,aa);
1098: MatScale(a->B,aa);
1099: return(0);
1100: }
1104: PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1105: {
1107: Mat_Redundant *redund = *redundant;
1108: PetscInt i;
1111: *redundant = NULL;
1112: if (redund){
1113: if (redund->matseq) { /* via MatGetSubMatrices() */
1114: ISDestroy(&redund->isrow);
1115: ISDestroy(&redund->iscol);
1116: MatDestroy(&redund->matseq[0]);
1117: PetscFree(redund->matseq);
1118: } else {
1119: PetscFree2(redund->send_rank,redund->recv_rank);
1120: PetscFree(redund->sbuf_j);
1121: PetscFree(redund->sbuf_a);
1122: for (i=0; i<redund->nrecvs; i++) {
1123: PetscFree(redund->rbuf_j[i]);
1124: PetscFree(redund->rbuf_a[i]);
1125: }
1126: PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);
1127: }
1129: if (redund->psubcomm) {
1130: PetscSubcommDestroy(&redund->psubcomm);
1131: }
1132: PetscFree(redund);
1133: }
1134: return(0);
1135: }
1139: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1140: {
1141: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1145: #if defined(PETSC_USE_LOG)
1146: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1147: #endif
1148: MatDestroy_Redundant(&aij->redundant);
1149: MatStashDestroy_Private(&mat->stash);
1150: VecDestroy(&aij->diag);
1151: MatDestroy(&aij->A);
1152: MatDestroy(&aij->B);
1153: #if defined(PETSC_USE_CTABLE)
1154: PetscTableDestroy(&aij->colmap);
1155: #else
1156: PetscFree(aij->colmap);
1157: #endif
1158: PetscFree(aij->garray);
1159: VecDestroy(&aij->lvec);
1160: VecScatterDestroy(&aij->Mvctx);
1161: PetscFree2(aij->rowvalues,aij->rowindices);
1162: PetscFree(aij->ld);
1163: PetscFree(mat->data);
1165: PetscObjectChangeTypeName((PetscObject)mat,0);
1166: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1167: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1168: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
1169: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);
1170: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);
1171: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);
1172: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);
1173: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);
1174: return(0);
1175: }
1179: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1180: {
1181: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1182: Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data;
1183: Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data;
1185: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1186: int fd;
1187: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1188: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1189: PetscScalar *column_values;
1190: PetscInt message_count,flowcontrolcount;
1191: FILE *file;
1194: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1195: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
1196: nz = A->nz + B->nz;
1197: if (!rank) {
1198: header[0] = MAT_FILE_CLASSID;
1199: header[1] = mat->rmap->N;
1200: header[2] = mat->cmap->N;
1202: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1203: PetscViewerBinaryGetDescriptor(viewer,&fd);
1204: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1205: /* get largest number of rows any processor has */
1206: rlen = mat->rmap->n;
1207: range = mat->rmap->range;
1208: for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1209: } else {
1210: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1211: rlen = mat->rmap->n;
1212: }
1214: /* load up the local row counts */
1215: PetscMalloc1((rlen+1),&row_lengths);
1216: for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1218: /* store the row lengths to the file */
1219: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1220: if (!rank) {
1221: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1222: for (i=1; i<size; i++) {
1223: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1224: rlen = range[i+1] - range[i];
1225: MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));
1226: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1227: }
1228: PetscViewerFlowControlEndMaster(viewer,&message_count);
1229: } else {
1230: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1231: MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1232: PetscViewerFlowControlEndWorker(viewer,&message_count);
1233: }
1234: PetscFree(row_lengths);
1236: /* load up the local column indices */
1237: nzmax = nz; /* th processor needs space a largest processor needs */
1238: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));
1239: PetscMalloc1((nzmax+1),&column_indices);
1240: cnt = 0;
1241: for (i=0; i<mat->rmap->n; i++) {
1242: for (j=B->i[i]; j<B->i[i+1]; j++) {
1243: if ((col = garray[B->j[j]]) > cstart) break;
1244: column_indices[cnt++] = col;
1245: }
1246: for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1247: for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1248: }
1249: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1251: /* store the column indices to the file */
1252: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1253: if (!rank) {
1254: MPI_Status status;
1255: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1256: for (i=1; i<size; i++) {
1257: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1258: MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1259: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1260: MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));
1261: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1262: }
1263: PetscViewerFlowControlEndMaster(viewer,&message_count);
1264: } else {
1265: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1266: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1267: MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1268: PetscViewerFlowControlEndWorker(viewer,&message_count);
1269: }
1270: PetscFree(column_indices);
1272: /* load up the local column values */
1273: PetscMalloc1((nzmax+1),&column_values);
1274: cnt = 0;
1275: for (i=0; i<mat->rmap->n; i++) {
1276: for (j=B->i[i]; j<B->i[i+1]; j++) {
1277: if (garray[B->j[j]] > cstart) break;
1278: column_values[cnt++] = B->a[j];
1279: }
1280: for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1281: for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1282: }
1283: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1285: /* store the column values to the file */
1286: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1287: if (!rank) {
1288: MPI_Status status;
1289: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1290: for (i=1; i<size; i++) {
1291: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1292: MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1293: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1294: MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));
1295: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1296: }
1297: PetscViewerFlowControlEndMaster(viewer,&message_count);
1298: } else {
1299: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1300: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1301: MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
1302: PetscViewerFlowControlEndWorker(viewer,&message_count);
1303: }
1304: PetscFree(column_values);
1306: PetscViewerBinaryGetInfoPointer(viewer,&file);
1307: if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1308: return(0);
1309: }
1311: #include <petscdraw.h>
1314: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1315: {
1316: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1317: PetscErrorCode ierr;
1318: PetscMPIInt rank = aij->rank,size = aij->size;
1319: PetscBool isdraw,iascii,isbinary;
1320: PetscViewer sviewer;
1321: PetscViewerFormat format;
1324: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1325: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1326: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1327: if (iascii) {
1328: PetscViewerGetFormat(viewer,&format);
1329: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1330: MatInfo info;
1331: PetscBool inodes;
1333: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1334: MatGetInfo(mat,MAT_LOCAL,&info);
1335: MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);
1336: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1337: if (!inodes) {
1338: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1339: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1340: } else {
1341: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1342: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1343: }
1344: MatGetInfo(aij->A,MAT_LOCAL,&info);
1345: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1346: MatGetInfo(aij->B,MAT_LOCAL,&info);
1347: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1348: PetscViewerFlush(viewer);
1349: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1350: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1351: VecScatterView(aij->Mvctx,viewer);
1352: return(0);
1353: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1354: PetscInt inodecount,inodelimit,*inodes;
1355: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1356: if (inodes) {
1357: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1358: } else {
1359: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1360: }
1361: return(0);
1362: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1363: return(0);
1364: }
1365: } else if (isbinary) {
1366: if (size == 1) {
1367: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1368: MatView(aij->A,viewer);
1369: } else {
1370: MatView_MPIAIJ_Binary(mat,viewer);
1371: }
1372: return(0);
1373: } else if (isdraw) {
1374: PetscDraw draw;
1375: PetscBool isnull;
1376: PetscViewerDrawGetDraw(viewer,0,&draw);
1377: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1378: }
1380: {
1381: /* assemble the entire matrix onto first processor. */
1382: Mat A;
1383: Mat_SeqAIJ *Aloc;
1384: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1385: MatScalar *a;
1387: MatCreate(PetscObjectComm((PetscObject)mat),&A);
1388: if (!rank) {
1389: MatSetSizes(A,M,N,M,N);
1390: } else {
1391: MatSetSizes(A,0,0,M,N);
1392: }
1393: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1394: MatSetType(A,MATMPIAIJ);
1395: MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);
1396: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1397: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
1399: /* copy over the A part */
1400: Aloc = (Mat_SeqAIJ*)aij->A->data;
1401: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1402: row = mat->rmap->rstart;
1403: for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1404: for (i=0; i<m; i++) {
1405: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1406: row++;
1407: a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1408: }
1409: aj = Aloc->j;
1410: for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1412: /* copy over the B part */
1413: Aloc = (Mat_SeqAIJ*)aij->B->data;
1414: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1415: row = mat->rmap->rstart;
1416: PetscMalloc1((ai[m]+1),&cols);
1417: ct = cols;
1418: for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1419: for (i=0; i<m; i++) {
1420: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1421: row++;
1422: a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1423: }
1424: PetscFree(ct);
1425: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1426: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1427: /*
1428: Everyone has to call to draw the matrix since the graphics waits are
1429: synchronized across all processors that share the PetscDraw object
1430: */
1431: PetscViewerGetSingleton(viewer,&sviewer);
1432: if (!rank) {
1433: MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1434: }
1435: PetscViewerRestoreSingleton(viewer,&sviewer);
1436: MatDestroy(&A);
1437: }
1438: return(0);
1439: }
1443: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1444: {
1446: PetscBool iascii,isdraw,issocket,isbinary;
1449: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1450: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1451: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1452: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1453: if (iascii || isdraw || isbinary || issocket) {
1454: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1455: }
1456: return(0);
1457: }
1461: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1462: {
1463: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1465: Vec bb1 = 0;
1466: PetscBool hasop;
1469: if (flag == SOR_APPLY_UPPER) {
1470: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1471: return(0);
1472: }
1474: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1475: VecDuplicate(bb,&bb1);
1476: }
1478: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1479: if (flag & SOR_ZERO_INITIAL_GUESS) {
1480: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1481: its--;
1482: }
1484: while (its--) {
1485: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1486: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1488: /* update rhs: bb1 = bb - B*x */
1489: VecScale(mat->lvec,-1.0);
1490: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1492: /* local sweep */
1493: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1494: }
1495: } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1496: if (flag & SOR_ZERO_INITIAL_GUESS) {
1497: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1498: its--;
1499: }
1500: while (its--) {
1501: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1502: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1504: /* update rhs: bb1 = bb - B*x */
1505: VecScale(mat->lvec,-1.0);
1506: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1508: /* local sweep */
1509: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1510: }
1511: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1512: if (flag & SOR_ZERO_INITIAL_GUESS) {
1513: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1514: its--;
1515: }
1516: while (its--) {
1517: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1518: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1520: /* update rhs: bb1 = bb - B*x */
1521: VecScale(mat->lvec,-1.0);
1522: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1524: /* local sweep */
1525: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1526: }
1527: } else if (flag & SOR_EISENSTAT) {
1528: Vec xx1;
1530: VecDuplicate(bb,&xx1);
1531: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1533: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1534: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1535: if (!mat->diag) {
1536: MatGetVecs(matin,&mat->diag,NULL);
1537: MatGetDiagonal(matin,mat->diag);
1538: }
1539: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1540: if (hasop) {
1541: MatMultDiagonalBlock(matin,xx,bb1);
1542: } else {
1543: VecPointwiseMult(bb1,mat->diag,xx);
1544: }
1545: VecAYPX(bb1,(omega-2.0)/omega,bb);
1547: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1549: /* local sweep */
1550: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1551: VecAXPY(xx,1.0,xx1);
1552: VecDestroy(&xx1);
1553: } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1555: VecDestroy(&bb1);
1556: return(0);
1557: }
1561: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1562: {
1563: Mat aA,aB,Aperm;
1564: const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1565: PetscScalar *aa,*ba;
1566: PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1567: PetscSF rowsf,sf;
1568: IS parcolp = NULL;
1569: PetscBool done;
1573: MatGetLocalSize(A,&m,&n);
1574: ISGetIndices(rowp,&rwant);
1575: ISGetIndices(colp,&cwant);
1576: PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);
1578: /* Invert row permutation to find out where my rows should go */
1579: PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);
1580: PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);
1581: PetscSFSetFromOptions(rowsf);
1582: for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1583: PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1584: PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1586: /* Invert column permutation to find out where my columns should go */
1587: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1588: PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);
1589: PetscSFSetFromOptions(sf);
1590: for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1591: PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1592: PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1593: PetscSFDestroy(&sf);
1595: ISRestoreIndices(rowp,&rwant);
1596: ISRestoreIndices(colp,&cwant);
1597: MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);
1599: /* Find out where my gcols should go */
1600: MatGetSize(aB,NULL,&ng);
1601: PetscMalloc1(ng,&gcdest);
1602: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1603: PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);
1604: PetscSFSetFromOptions(sf);
1605: PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);
1606: PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);
1607: PetscSFDestroy(&sf);
1609: PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);
1610: MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1611: MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1612: for (i=0; i<m; i++) {
1613: PetscInt row = rdest[i],rowner;
1614: PetscLayoutFindOwner(A->rmap,row,&rowner);
1615: for (j=ai[i]; j<ai[i+1]; j++) {
1616: PetscInt cowner,col = cdest[aj[j]];
1617: PetscLayoutFindOwner(A->cmap,col,&cowner); /* Could build an index for the columns to eliminate this search */
1618: if (rowner == cowner) dnnz[i]++;
1619: else onnz[i]++;
1620: }
1621: for (j=bi[i]; j<bi[i+1]; j++) {
1622: PetscInt cowner,col = gcdest[bj[j]];
1623: PetscLayoutFindOwner(A->cmap,col,&cowner);
1624: if (rowner == cowner) dnnz[i]++;
1625: else onnz[i]++;
1626: }
1627: }
1628: PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);
1629: PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);
1630: PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);
1631: PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);
1632: PetscSFDestroy(&rowsf);
1634: MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);
1635: MatSeqAIJGetArray(aA,&aa);
1636: MatSeqAIJGetArray(aB,&ba);
1637: for (i=0; i<m; i++) {
1638: PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1639: PetscInt j0,rowlen;
1640: rowlen = ai[i+1] - ai[i];
1641: for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1642: for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1643: MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);
1644: }
1645: rowlen = bi[i+1] - bi[i];
1646: for (j0=j=0; j<rowlen; j0=j) {
1647: for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1648: MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);
1649: }
1650: }
1651: MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);
1652: MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);
1653: MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1654: MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1655: MatSeqAIJRestoreArray(aA,&aa);
1656: MatSeqAIJRestoreArray(aB,&ba);
1657: PetscFree4(dnnz,onnz,tdnnz,tonnz);
1658: PetscFree3(work,rdest,cdest);
1659: PetscFree(gcdest);
1660: if (parcolp) {ISDestroy(&colp);}
1661: *B = Aperm;
1662: return(0);
1663: }
1667: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1668: {
1669: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1670: Mat A = mat->A,B = mat->B;
1672: PetscReal isend[5],irecv[5];
1675: info->block_size = 1.0;
1676: MatGetInfo(A,MAT_LOCAL,info);
1678: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1679: isend[3] = info->memory; isend[4] = info->mallocs;
1681: MatGetInfo(B,MAT_LOCAL,info);
1683: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1684: isend[3] += info->memory; isend[4] += info->mallocs;
1685: if (flag == MAT_LOCAL) {
1686: info->nz_used = isend[0];
1687: info->nz_allocated = isend[1];
1688: info->nz_unneeded = isend[2];
1689: info->memory = isend[3];
1690: info->mallocs = isend[4];
1691: } else if (flag == MAT_GLOBAL_MAX) {
1692: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));
1694: info->nz_used = irecv[0];
1695: info->nz_allocated = irecv[1];
1696: info->nz_unneeded = irecv[2];
1697: info->memory = irecv[3];
1698: info->mallocs = irecv[4];
1699: } else if (flag == MAT_GLOBAL_SUM) {
1700: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));
1702: info->nz_used = irecv[0];
1703: info->nz_allocated = irecv[1];
1704: info->nz_unneeded = irecv[2];
1705: info->memory = irecv[3];
1706: info->mallocs = irecv[4];
1707: }
1708: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1709: info->fill_ratio_needed = 0;
1710: info->factor_mallocs = 0;
1711: return(0);
1712: }
1716: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1717: {
1718: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1722: switch (op) {
1723: case MAT_NEW_NONZERO_LOCATIONS:
1724: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1725: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1726: case MAT_KEEP_NONZERO_PATTERN:
1727: case MAT_NEW_NONZERO_LOCATION_ERR:
1728: case MAT_USE_INODES:
1729: case MAT_IGNORE_ZERO_ENTRIES:
1730: MatCheckPreallocated(A,1);
1731: MatSetOption(a->A,op,flg);
1732: MatSetOption(a->B,op,flg);
1733: break;
1734: case MAT_ROW_ORIENTED:
1735: a->roworiented = flg;
1737: MatSetOption(a->A,op,flg);
1738: MatSetOption(a->B,op,flg);
1739: break;
1740: case MAT_NEW_DIAGONALS:
1741: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1742: break;
1743: case MAT_IGNORE_OFF_PROC_ENTRIES:
1744: a->donotstash = flg;
1745: break;
1746: case MAT_SPD:
1747: A->spd_set = PETSC_TRUE;
1748: A->spd = flg;
1749: if (flg) {
1750: A->symmetric = PETSC_TRUE;
1751: A->structurally_symmetric = PETSC_TRUE;
1752: A->symmetric_set = PETSC_TRUE;
1753: A->structurally_symmetric_set = PETSC_TRUE;
1754: }
1755: break;
1756: case MAT_SYMMETRIC:
1757: MatSetOption(a->A,op,flg);
1758: break;
1759: case MAT_STRUCTURALLY_SYMMETRIC:
1760: MatSetOption(a->A,op,flg);
1761: break;
1762: case MAT_HERMITIAN:
1763: MatSetOption(a->A,op,flg);
1764: break;
1765: case MAT_SYMMETRY_ETERNAL:
1766: MatSetOption(a->A,op,flg);
1767: break;
1768: default:
1769: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1770: }
1771: return(0);
1772: }
1776: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1777: {
1778: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1779: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1781: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1782: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1783: PetscInt *cmap,*idx_p;
1786: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1787: mat->getrowactive = PETSC_TRUE;
1789: if (!mat->rowvalues && (idx || v)) {
1790: /*
1791: allocate enough space to hold information from the longest row.
1792: */
1793: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1794: PetscInt max = 1,tmp;
1795: for (i=0; i<matin->rmap->n; i++) {
1796: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1797: if (max < tmp) max = tmp;
1798: }
1799: PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);
1800: }
1802: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1803: lrow = row - rstart;
1805: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1806: if (!v) {pvA = 0; pvB = 0;}
1807: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1808: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1809: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1810: nztot = nzA + nzB;
1812: cmap = mat->garray;
1813: if (v || idx) {
1814: if (nztot) {
1815: /* Sort by increasing column numbers, assuming A and B already sorted */
1816: PetscInt imark = -1;
1817: if (v) {
1818: *v = v_p = mat->rowvalues;
1819: for (i=0; i<nzB; i++) {
1820: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1821: else break;
1822: }
1823: imark = i;
1824: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1825: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1826: }
1827: if (idx) {
1828: *idx = idx_p = mat->rowindices;
1829: if (imark > -1) {
1830: for (i=0; i<imark; i++) {
1831: idx_p[i] = cmap[cworkB[i]];
1832: }
1833: } else {
1834: for (i=0; i<nzB; i++) {
1835: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1836: else break;
1837: }
1838: imark = i;
1839: }
1840: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1841: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1842: }
1843: } else {
1844: if (idx) *idx = 0;
1845: if (v) *v = 0;
1846: }
1847: }
1848: *nz = nztot;
1849: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1850: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1851: return(0);
1852: }
1856: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1857: {
1858: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1861: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1862: aij->getrowactive = PETSC_FALSE;
1863: return(0);
1864: }
1868: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1869: {
1870: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1871: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1873: PetscInt i,j,cstart = mat->cmap->rstart;
1874: PetscReal sum = 0.0;
1875: MatScalar *v;
1878: if (aij->size == 1) {
1879: MatNorm(aij->A,type,norm);
1880: } else {
1881: if (type == NORM_FROBENIUS) {
1882: v = amat->a;
1883: for (i=0; i<amat->nz; i++) {
1884: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1885: }
1886: v = bmat->a;
1887: for (i=0; i<bmat->nz; i++) {
1888: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1889: }
1890: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1891: *norm = PetscSqrtReal(*norm);
1892: } else if (type == NORM_1) { /* max column norm */
1893: PetscReal *tmp,*tmp2;
1894: PetscInt *jj,*garray = aij->garray;
1895: PetscCalloc1((mat->cmap->N+1),&tmp);
1896: PetscMalloc1((mat->cmap->N+1),&tmp2);
1897: *norm = 0.0;
1898: v = amat->a; jj = amat->j;
1899: for (j=0; j<amat->nz; j++) {
1900: tmp[cstart + *jj++] += PetscAbsScalar(*v); v++;
1901: }
1902: v = bmat->a; jj = bmat->j;
1903: for (j=0; j<bmat->nz; j++) {
1904: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1905: }
1906: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1907: for (j=0; j<mat->cmap->N; j++) {
1908: if (tmp2[j] > *norm) *norm = tmp2[j];
1909: }
1910: PetscFree(tmp);
1911: PetscFree(tmp2);
1912: } else if (type == NORM_INFINITY) { /* max row norm */
1913: PetscReal ntemp = 0.0;
1914: for (j=0; j<aij->A->rmap->n; j++) {
1915: v = amat->a + amat->i[j];
1916: sum = 0.0;
1917: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1918: sum += PetscAbsScalar(*v); v++;
1919: }
1920: v = bmat->a + bmat->i[j];
1921: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1922: sum += PetscAbsScalar(*v); v++;
1923: }
1924: if (sum > ntemp) ntemp = sum;
1925: }
1926: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));
1927: } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1928: }
1929: return(0);
1930: }
1934: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1935: {
1936: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1937: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1939: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1940: PetscInt cstart = A->cmap->rstart,ncol;
1941: Mat B;
1942: MatScalar *array;
1945: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1947: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1948: ai = Aloc->i; aj = Aloc->j;
1949: bi = Bloc->i; bj = Bloc->j;
1950: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1951: PetscInt *d_nnz,*g_nnz,*o_nnz;
1952: PetscSFNode *oloc;
1953: PETSC_UNUSED PetscSF sf;
1955: PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);
1956: /* compute d_nnz for preallocation */
1957: PetscMemzero(d_nnz,na*sizeof(PetscInt));
1958: for (i=0; i<ai[ma]; i++) {
1959: d_nnz[aj[i]]++;
1960: aj[i] += cstart; /* global col index to be used by MatSetValues() */
1961: }
1962: /* compute local off-diagonal contributions */
1963: PetscMemzero(g_nnz,nb*sizeof(PetscInt));
1964: for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1965: /* map those to global */
1966: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1967: PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);
1968: PetscSFSetFromOptions(sf);
1969: PetscMemzero(o_nnz,na*sizeof(PetscInt));
1970: PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
1971: PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
1972: PetscSFDestroy(&sf);
1974: MatCreate(PetscObjectComm((PetscObject)A),&B);
1975: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1976: MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1977: MatSetType(B,((PetscObject)A)->type_name);
1978: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
1979: PetscFree4(d_nnz,o_nnz,g_nnz,oloc);
1980: } else {
1981: B = *matout;
1982: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
1983: for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1984: }
1986: /* copy over the A part */
1987: array = Aloc->a;
1988: row = A->rmap->rstart;
1989: for (i=0; i<ma; i++) {
1990: ncol = ai[i+1]-ai[i];
1991: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
1992: row++;
1993: array += ncol; aj += ncol;
1994: }
1995: aj = Aloc->j;
1996: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1998: /* copy over the B part */
1999: PetscCalloc1(bi[mb],&cols);
2000: array = Bloc->a;
2001: row = A->rmap->rstart;
2002: for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2003: cols_tmp = cols;
2004: for (i=0; i<mb; i++) {
2005: ncol = bi[i+1]-bi[i];
2006: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
2007: row++;
2008: array += ncol; cols_tmp += ncol;
2009: }
2010: PetscFree(cols);
2012: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2013: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2014: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2015: *matout = B;
2016: } else {
2017: MatHeaderMerge(A,B);
2018: }
2019: return(0);
2020: }
2024: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2025: {
2026: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2027: Mat a = aij->A,b = aij->B;
2029: PetscInt s1,s2,s3;
2032: MatGetLocalSize(mat,&s2,&s3);
2033: if (rr) {
2034: VecGetLocalSize(rr,&s1);
2035: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2036: /* Overlap communication with computation. */
2037: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2038: }
2039: if (ll) {
2040: VecGetLocalSize(ll,&s1);
2041: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2042: (*b->ops->diagonalscale)(b,ll,0);
2043: }
2044: /* scale the diagonal block */
2045: (*a->ops->diagonalscale)(a,ll,rr);
2047: if (rr) {
2048: /* Do a scatter end and then right scale the off-diagonal block */
2049: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2050: (*b->ops->diagonalscale)(b,0,aij->lvec);
2051: }
2052: return(0);
2053: }
2057: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2058: {
2059: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2063: MatSetUnfactored(a->A);
2064: return(0);
2065: }
2069: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2070: {
2071: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2072: Mat a,b,c,d;
2073: PetscBool flg;
2077: a = matA->A; b = matA->B;
2078: c = matB->A; d = matB->B;
2080: MatEqual(a,c,&flg);
2081: if (flg) {
2082: MatEqual(b,d,&flg);
2083: }
2084: MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
2085: return(0);
2086: }
2090: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2091: {
2093: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2094: Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
2097: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2098: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2099: /* because of the column compression in the off-processor part of the matrix a->B,
2100: the number of columns in a->B and b->B may be different, hence we cannot call
2101: the MatCopy() directly on the two parts. If need be, we can provide a more
2102: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2103: then copying the submatrices */
2104: MatCopy_Basic(A,B,str);
2105: } else {
2106: MatCopy(a->A,b->A,str);
2107: MatCopy(a->B,b->B,str);
2108: }
2109: return(0);
2110: }
2114: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2115: {
2119: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2120: return(0);
2121: }
2123: /*
2124: Computes the number of nonzeros per row needed for preallocation when X and Y
2125: have different nonzero structure.
2126: */
2129: PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2130: {
2131: PetscInt i,j,k,nzx,nzy;
2134: /* Set the number of nonzeros in the new matrix */
2135: for (i=0; i<m; i++) {
2136: const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2137: nzx = xi[i+1] - xi[i];
2138: nzy = yi[i+1] - yi[i];
2139: nnz[i] = 0;
2140: for (j=0,k=0; j<nzx; j++) { /* Point in X */
2141: for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2142: if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */
2143: nnz[i]++;
2144: }
2145: for (; k<nzy; k++) nnz[i]++;
2146: }
2147: return(0);
2148: }
2150: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2153: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2154: {
2156: PetscInt m = Y->rmap->N;
2157: Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data;
2158: Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data;
2161: MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);
2162: return(0);
2163: }
2167: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2168: {
2170: PetscInt i;
2171: Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2172: PetscBLASInt bnz,one=1;
2173: Mat_SeqAIJ *x,*y;
2176: if (str == SAME_NONZERO_PATTERN) {
2177: PetscScalar alpha = a;
2178: x = (Mat_SeqAIJ*)xx->A->data;
2179: PetscBLASIntCast(x->nz,&bnz);
2180: y = (Mat_SeqAIJ*)yy->A->data;
2181: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2182: x = (Mat_SeqAIJ*)xx->B->data;
2183: y = (Mat_SeqAIJ*)yy->B->data;
2184: PetscBLASIntCast(x->nz,&bnz);
2185: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2186: PetscObjectStateIncrease((PetscObject)Y);
2187: } else if (str == SUBSET_NONZERO_PATTERN) {
2188: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
2190: x = (Mat_SeqAIJ*)xx->B->data;
2191: y = (Mat_SeqAIJ*)yy->B->data;
2192: if (y->xtoy && y->XtoY != xx->B) {
2193: PetscFree(y->xtoy);
2194: MatDestroy(&y->XtoY);
2195: }
2196: if (!y->xtoy) { /* get xtoy */
2197: MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
2198: y->XtoY = xx->B;
2199: PetscObjectReference((PetscObject)xx->B);
2200: }
2201: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2202: PetscObjectStateIncrease((PetscObject)Y);
2203: } else {
2204: Mat B;
2205: PetscInt *nnz_d,*nnz_o;
2206: PetscMalloc1(yy->A->rmap->N,&nnz_d);
2207: PetscMalloc1(yy->B->rmap->N,&nnz_o);
2208: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2209: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2210: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2211: MatSetBlockSizesFromMats(B,Y,Y);
2212: MatSetType(B,MATMPIAIJ);
2213: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2214: MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2215: MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2216: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2217: MatHeaderReplace(Y,B);
2218: PetscFree(nnz_d);
2219: PetscFree(nnz_o);
2220: }
2221: return(0);
2222: }
2224: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2228: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2229: {
2230: #if defined(PETSC_USE_COMPLEX)
2232: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2235: MatConjugate_SeqAIJ(aij->A);
2236: MatConjugate_SeqAIJ(aij->B);
2237: #else
2239: #endif
2240: return(0);
2241: }
2245: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2246: {
2247: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2251: MatRealPart(a->A);
2252: MatRealPart(a->B);
2253: return(0);
2254: }
2258: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2259: {
2260: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2264: MatImaginaryPart(a->A);
2265: MatImaginaryPart(a->B);
2266: return(0);
2267: }
2269: #if defined(PETSC_HAVE_PBGL)
2271: #include <boost/parallel/mpi/bsp_process_group.hpp>
2272: #include <boost/graph/distributed/ilu_default_graph.hpp>
2273: #include <boost/graph/distributed/ilu_0_block.hpp>
2274: #include <boost/graph/distributed/ilu_preconditioner.hpp>
2275: #include <boost/graph/distributed/petsc/interface.hpp>
2276: #include <boost/multi_array.hpp>
2277: #include <boost/parallel/distributed_property_map->hpp>
2281: /*
2282: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2283: */
2284: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2285: {
2286: namespace petsc = boost::distributed::petsc;
2288: namespace graph_dist = boost::graph::distributed;
2289: using boost::graph::distributed::ilu_default::process_group_type;
2290: using boost::graph::ilu_permuted;
2292: PetscBool row_identity, col_identity;
2293: PetscContainer c;
2294: PetscInt m, n, M, N;
2298: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2299: ISIdentity(isrow, &row_identity);
2300: ISIdentity(iscol, &col_identity);
2301: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2303: process_group_type pg;
2304: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2305: lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2306: lgraph_type& level_graph = *lgraph_p;
2307: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2309: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2310: ilu_permuted(level_graph);
2312: /* put together the new matrix */
2313: MatCreate(PetscObjectComm((PetscObject)A), fact);
2314: MatGetLocalSize(A, &m, &n);
2315: MatGetSize(A, &M, &N);
2316: MatSetSizes(fact, m, n, M, N);
2317: MatSetBlockSizesFromMats(fact,A,A);
2318: MatSetType(fact, ((PetscObject)A)->type_name);
2319: MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);
2320: MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);
2322: PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2323: PetscContainerSetPointer(c, lgraph_p);
2324: PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2325: PetscContainerDestroy(&c);
2326: return(0);
2327: }
2331: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2332: {
2334: return(0);
2335: }
2339: /*
2340: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2341: */
2342: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2343: {
2344: namespace graph_dist = boost::graph::distributed;
2346: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2347: lgraph_type *lgraph_p;
2348: PetscContainer c;
2352: PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);
2353: PetscContainerGetPointer(c, (void**) &lgraph_p);
2354: VecCopy(b, x);
2356: PetscScalar *array_x;
2357: VecGetArray(x, &array_x);
2358: PetscInt sx;
2359: VecGetSize(x, &sx);
2361: PetscScalar *array_b;
2362: VecGetArray(b, &array_b);
2363: PetscInt sb;
2364: VecGetSize(b, &sb);
2366: lgraph_type& level_graph = *lgraph_p;
2367: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2369: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2370: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]);
2371: array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]);
2373: typedef boost::iterator_property_map<array_ref_type::iterator,
2374: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
2375: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2376: gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2378: ilu_set_solve(*lgraph_p, vector_b, vector_x);
2379: return(0);
2380: }
2381: #endif
2386: PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2387: {
2388: PetscMPIInt rank,size;
2389: MPI_Comm comm;
2391: PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2392: PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2393: PetscInt *rowrange = mat->rmap->range;
2394: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2395: Mat A = aij->A,B=aij->B,C=*matredundant;
2396: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2397: PetscScalar *sbuf_a;
2398: PetscInt nzlocal=a->nz+b->nz;
2399: PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2400: PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2401: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2402: MatScalar *aworkA,*aworkB;
2403: PetscScalar *vals;
2404: PetscMPIInt tag1,tag2,tag3,imdex;
2405: MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2406: MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2407: MPI_Status recv_status,*send_status;
2408: PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2409: PetscInt **rbuf_j=NULL;
2410: PetscScalar **rbuf_a=NULL;
2411: Mat_Redundant *redund =NULL;
2412:
2414: PetscObjectGetComm((PetscObject)mat,&comm);
2415: MPI_Comm_rank(comm,&rank);
2416: MPI_Comm_size(comm,&size);
2417: MPI_Comm_rank(subcomm,&subrank);
2418: MPI_Comm_size(subcomm,&subsize);
2420: if (reuse == MAT_REUSE_MATRIX) {
2421: if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2422: if (subsize == 1) {
2423: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2424: redund = c->redundant;
2425: } else {
2426: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2427: redund = c->redundant;
2428: }
2429: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2431: nsends = redund->nsends;
2432: nrecvs = redund->nrecvs;
2433: send_rank = redund->send_rank;
2434: recv_rank = redund->recv_rank;
2435: sbuf_nz = redund->sbuf_nz;
2436: rbuf_nz = redund->rbuf_nz;
2437: sbuf_j = redund->sbuf_j;
2438: sbuf_a = redund->sbuf_a;
2439: rbuf_j = redund->rbuf_j;
2440: rbuf_a = redund->rbuf_a;
2441: }
2443: if (reuse == MAT_INITIAL_MATRIX) {
2444: PetscInt nleftover,np_subcomm;
2446: /* get the destination processors' id send_rank, nsends and nrecvs */
2447: PetscMalloc2(size,&send_rank,size,&recv_rank);
2449: np_subcomm = size/nsubcomm;
2450: nleftover = size - nsubcomm*np_subcomm;
2452: /* block of codes below is specific for INTERLACED */
2453: /* ------------------------------------------------*/
2454: nsends = 0; nrecvs = 0;
2455: for (i=0; i<size; i++) {
2456: if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2457: send_rank[nsends++] = i;
2458: recv_rank[nrecvs++] = i;
2459: }
2460: }
2461: if (rank >= size - nleftover) { /* this proc is a leftover processor */
2462: i = size-nleftover-1;
2463: j = 0;
2464: while (j < nsubcomm - nleftover) {
2465: send_rank[nsends++] = i;
2466: i--; j++;
2467: }
2468: }
2470: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2471: for (i=0; i<nleftover; i++) {
2472: recv_rank[nrecvs++] = size-nleftover+i;
2473: }
2474: }
2475: /*----------------------------------------------*/
2477: /* allocate sbuf_j, sbuf_a */
2478: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2479: PetscMalloc1(i,&sbuf_j);
2480: PetscMalloc1((nzlocal+1),&sbuf_a);
2481: /*
2482: PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);
2483: PetscSynchronizedFlush(comm,PETSC_STDOUT);
2484: */
2485: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2487: /* copy mat's local entries into the buffers */
2488: if (reuse == MAT_INITIAL_MATRIX) {
2489: rownz_max = 0;
2490: rptr = sbuf_j;
2491: cols = sbuf_j + rend-rstart + 1;
2492: vals = sbuf_a;
2493: rptr[0] = 0;
2494: for (i=0; i<rend-rstart; i++) {
2495: row = i + rstart;
2496: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2497: ncols = nzA + nzB;
2498: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2499: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2500: /* load the column indices for this row into cols */
2501: lwrite = 0;
2502: for (l=0; l<nzB; l++) {
2503: if ((ctmp = bmap[cworkB[l]]) < cstart) {
2504: vals[lwrite] = aworkB[l];
2505: cols[lwrite++] = ctmp;
2506: }
2507: }
2508: for (l=0; l<nzA; l++) {
2509: vals[lwrite] = aworkA[l];
2510: cols[lwrite++] = cstart + cworkA[l];
2511: }
2512: for (l=0; l<nzB; l++) {
2513: if ((ctmp = bmap[cworkB[l]]) >= cend) {
2514: vals[lwrite] = aworkB[l];
2515: cols[lwrite++] = ctmp;
2516: }
2517: }
2518: vals += ncols;
2519: cols += ncols;
2520: rptr[i+1] = rptr[i] + ncols;
2521: if (rownz_max < ncols) rownz_max = ncols;
2522: }
2523: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2524: } else { /* only copy matrix values into sbuf_a */
2525: rptr = sbuf_j;
2526: vals = sbuf_a;
2527: rptr[0] = 0;
2528: for (i=0; i<rend-rstart; i++) {
2529: row = i + rstart;
2530: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2531: ncols = nzA + nzB;
2532: cworkB = b->j + b->i[i];
2533: aworkA = a->a + a->i[i];
2534: aworkB = b->a + b->i[i];
2535: lwrite = 0;
2536: for (l=0; l<nzB; l++) {
2537: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2538: }
2539: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2540: for (l=0; l<nzB; l++) {
2541: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2542: }
2543: vals += ncols;
2544: rptr[i+1] = rptr[i] + ncols;
2545: }
2546: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2548: /* send nzlocal to others, and recv other's nzlocal */
2549: /*--------------------------------------------------*/
2550: if (reuse == MAT_INITIAL_MATRIX) {
2551: PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);
2553: s_waits2 = s_waits3 + nsends;
2554: s_waits1 = s_waits2 + nsends;
2555: r_waits1 = s_waits1 + nsends;
2556: r_waits2 = r_waits1 + nrecvs;
2557: r_waits3 = r_waits2 + nrecvs;
2558: } else {
2559: PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);
2561: r_waits3 = s_waits3 + nsends;
2562: }
2564: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2565: if (reuse == MAT_INITIAL_MATRIX) {
2566: /* get new tags to keep the communication clean */
2567: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2568: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2569: PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);
2571: /* post receives of other's nzlocal */
2572: for (i=0; i<nrecvs; i++) {
2573: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2574: }
2575: /* send nzlocal to others */
2576: for (i=0; i<nsends; i++) {
2577: sbuf_nz[i] = nzlocal;
2578: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2579: }
2580: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2581: count = nrecvs;
2582: while (count) {
2583: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2585: recv_rank[imdex] = recv_status.MPI_SOURCE;
2586: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2587: PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);
2589: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2591: rbuf_nz[imdex] += i + 2;
2593: PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);
2594: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2595: count--;
2596: }
2597: /* wait on sends of nzlocal */
2598: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2599: /* send mat->i,j to others, and recv from other's */
2600: /*------------------------------------------------*/
2601: for (i=0; i<nsends; i++) {
2602: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2603: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2604: }
2605: /* wait on receives of mat->i,j */
2606: /*------------------------------*/
2607: count = nrecvs;
2608: while (count) {
2609: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2610: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2611: count--;
2612: }
2613: /* wait on sends of mat->i,j */
2614: /*---------------------------*/
2615: if (nsends) {
2616: MPI_Waitall(nsends,s_waits2,send_status);
2617: }
2618: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2620: /* post receives, send and receive mat->a */
2621: /*----------------------------------------*/
2622: for (imdex=0; imdex<nrecvs; imdex++) {
2623: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2624: }
2625: for (i=0; i<nsends; i++) {
2626: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2627: }
2628: count = nrecvs;
2629: while (count) {
2630: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2631: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2632: count--;
2633: }
2634: if (nsends) {
2635: MPI_Waitall(nsends,s_waits3,send_status);
2636: }
2638: PetscFree2(s_waits3,send_status);
2640: /* create redundant matrix */
2641: /*-------------------------*/
2642: if (reuse == MAT_INITIAL_MATRIX) {
2643: const PetscInt *range;
2644: PetscInt rstart_sub,rend_sub,mloc_sub;
2646: /* compute rownz_max for preallocation */
2647: for (imdex=0; imdex<nrecvs; imdex++) {
2648: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2649: rptr = rbuf_j[imdex];
2650: for (i=0; i<j; i++) {
2651: ncols = rptr[i+1] - rptr[i];
2652: if (rownz_max < ncols) rownz_max = ncols;
2653: }
2654: }
2656: MatCreate(subcomm,&C);
2658: /* get local size of redundant matrix
2659: - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2660: MatGetOwnershipRanges(mat,&range);
2661: rstart_sub = range[nsubcomm*subrank];
2662: if (subrank+1 < subsize) { /* not the last proc in subcomm */
2663: rend_sub = range[nsubcomm*(subrank+1)];
2664: } else {
2665: rend_sub = mat->rmap->N;
2666: }
2667: mloc_sub = rend_sub - rstart_sub;
2669: if (M == N) {
2670: MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);
2671: } else { /* non-square matrix */
2672: MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);
2673: }
2674: MatSetBlockSizesFromMats(C,mat,mat);
2675: MatSetFromOptions(C);
2676: MatSeqAIJSetPreallocation(C,rownz_max,NULL);
2677: MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);
2678: } else {
2679: C = *matredundant;
2680: }
2682: /* insert local matrix entries */
2683: rptr = sbuf_j;
2684: cols = sbuf_j + rend-rstart + 1;
2685: vals = sbuf_a;
2686: for (i=0; i<rend-rstart; i++) {
2687: row = i + rstart;
2688: ncols = rptr[i+1] - rptr[i];
2689: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2690: vals += ncols;
2691: cols += ncols;
2692: }
2693: /* insert received matrix entries */
2694: for (imdex=0; imdex<nrecvs; imdex++) {
2695: rstart = rowrange[recv_rank[imdex]];
2696: rend = rowrange[recv_rank[imdex]+1];
2697: /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2698: rptr = rbuf_j[imdex];
2699: cols = rbuf_j[imdex] + rend-rstart + 1;
2700: vals = rbuf_a[imdex];
2701: for (i=0; i<rend-rstart; i++) {
2702: row = i + rstart;
2703: ncols = rptr[i+1] - rptr[i];
2704: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2705: vals += ncols;
2706: cols += ncols;
2707: }
2708: }
2709: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2710: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2712: if (reuse == MAT_INITIAL_MATRIX) {
2713: *matredundant = C;
2715: /* create a supporting struct and attach it to C for reuse */
2716: PetscNewLog(C,&redund);
2717: if (subsize == 1) {
2718: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2719: c->redundant = redund;
2720: } else {
2721: Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2722: c->redundant = redund;
2723: }
2725: redund->nzlocal = nzlocal;
2726: redund->nsends = nsends;
2727: redund->nrecvs = nrecvs;
2728: redund->send_rank = send_rank;
2729: redund->recv_rank = recv_rank;
2730: redund->sbuf_nz = sbuf_nz;
2731: redund->rbuf_nz = rbuf_nz;
2732: redund->sbuf_j = sbuf_j;
2733: redund->sbuf_a = sbuf_a;
2734: redund->rbuf_j = rbuf_j;
2735: redund->rbuf_a = rbuf_a;
2736: redund->psubcomm = NULL;
2737: }
2738: return(0);
2739: }
2743: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2744: {
2746: MPI_Comm comm;
2747: PetscMPIInt size,subsize;
2748: PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2749: Mat_Redundant *redund=NULL;
2750: PetscSubcomm psubcomm=NULL;
2751: MPI_Comm subcomm_in=subcomm;
2752: Mat *matseq;
2753: IS isrow,iscol;
2756: if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2757: if (reuse == MAT_INITIAL_MATRIX) {
2758: /* create psubcomm, then get subcomm */
2759: PetscObjectGetComm((PetscObject)mat,&comm);
2760: MPI_Comm_size(comm,&size);
2761: if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2763: PetscSubcommCreate(comm,&psubcomm);
2764: PetscSubcommSetNumber(psubcomm,nsubcomm);
2765: PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);
2766: PetscSubcommSetFromOptions(psubcomm);
2767: subcomm = psubcomm->comm;
2768: } else { /* retrieve psubcomm and subcomm */
2769: PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);
2770: MPI_Comm_size(subcomm,&subsize);
2771: if (subsize == 1) {
2772: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2773: redund = c->redundant;
2774: } else {
2775: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2776: redund = c->redundant;
2777: }
2778: psubcomm = redund->psubcomm;
2779: }
2780: if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2781: MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);
2782: if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2783: MPI_Comm_size(psubcomm->comm,&subsize);
2784: if (subsize == 1) {
2785: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2786: c->redundant->psubcomm = psubcomm;
2787: } else {
2788: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2789: c->redundant->psubcomm = psubcomm ;
2790: }
2791: }
2792: return(0);
2793: }
2794: }
2796: /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2797: MPI_Comm_size(subcomm,&subsize);
2798: if (reuse == MAT_INITIAL_MATRIX) {
2799: /* create a local sequential matrix matseq[0] */
2800: mloc_sub = PETSC_DECIDE;
2801: PetscSplitOwnership(subcomm,&mloc_sub,&M);
2802: MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);
2803: rstart = rend - mloc_sub;
2804: ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);
2805: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);
2806: } else { /* reuse == MAT_REUSE_MATRIX */
2807: if (subsize == 1) {
2808: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2809: redund = c->redundant;
2810: } else {
2811: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2812: redund = c->redundant;
2813: }
2815: isrow = redund->isrow;
2816: iscol = redund->iscol;
2817: matseq = redund->matseq;
2818: }
2819: MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);
2820: MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);
2822: if (reuse == MAT_INITIAL_MATRIX) {
2823: /* create a supporting struct and attach it to C for reuse */
2824: PetscNewLog(*matredundant,&redund);
2825: if (subsize == 1) {
2826: Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2827: c->redundant = redund;
2828: } else {
2829: Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2830: c->redundant = redund;
2831: }
2832: redund->isrow = isrow;
2833: redund->iscol = iscol;
2834: redund->matseq = matseq;
2835: redund->psubcomm = psubcomm;
2836: }
2837: return(0);
2838: }
2842: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2843: {
2844: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2846: PetscInt i,*idxb = 0;
2847: PetscScalar *va,*vb;
2848: Vec vtmp;
2851: MatGetRowMaxAbs(a->A,v,idx);
2852: VecGetArray(v,&va);
2853: if (idx) {
2854: for (i=0; i<A->rmap->n; i++) {
2855: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2856: }
2857: }
2859: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2860: if (idx) {
2861: PetscMalloc1(A->rmap->n,&idxb);
2862: }
2863: MatGetRowMaxAbs(a->B,vtmp,idxb);
2864: VecGetArray(vtmp,&vb);
2866: for (i=0; i<A->rmap->n; i++) {
2867: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2868: va[i] = vb[i];
2869: if (idx) idx[i] = a->garray[idxb[i]];
2870: }
2871: }
2873: VecRestoreArray(v,&va);
2874: VecRestoreArray(vtmp,&vb);
2875: PetscFree(idxb);
2876: VecDestroy(&vtmp);
2877: return(0);
2878: }
2882: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2883: {
2884: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2886: PetscInt i,*idxb = 0;
2887: PetscScalar *va,*vb;
2888: Vec vtmp;
2891: MatGetRowMinAbs(a->A,v,idx);
2892: VecGetArray(v,&va);
2893: if (idx) {
2894: for (i=0; i<A->cmap->n; i++) {
2895: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2896: }
2897: }
2899: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2900: if (idx) {
2901: PetscMalloc1(A->rmap->n,&idxb);
2902: }
2903: MatGetRowMinAbs(a->B,vtmp,idxb);
2904: VecGetArray(vtmp,&vb);
2906: for (i=0; i<A->rmap->n; i++) {
2907: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2908: va[i] = vb[i];
2909: if (idx) idx[i] = a->garray[idxb[i]];
2910: }
2911: }
2913: VecRestoreArray(v,&va);
2914: VecRestoreArray(vtmp,&vb);
2915: PetscFree(idxb);
2916: VecDestroy(&vtmp);
2917: return(0);
2918: }
2922: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2923: {
2924: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2925: PetscInt n = A->rmap->n;
2926: PetscInt cstart = A->cmap->rstart;
2927: PetscInt *cmap = mat->garray;
2928: PetscInt *diagIdx, *offdiagIdx;
2929: Vec diagV, offdiagV;
2930: PetscScalar *a, *diagA, *offdiagA;
2931: PetscInt r;
2935: PetscMalloc2(n,&diagIdx,n,&offdiagIdx);
2936: VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);
2937: VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);
2938: MatGetRowMin(mat->A, diagV, diagIdx);
2939: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2940: VecGetArray(v, &a);
2941: VecGetArray(diagV, &diagA);
2942: VecGetArray(offdiagV, &offdiagA);
2943: for (r = 0; r < n; ++r) {
2944: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2945: a[r] = diagA[r];
2946: idx[r] = cstart + diagIdx[r];
2947: } else {
2948: a[r] = offdiagA[r];
2949: idx[r] = cmap[offdiagIdx[r]];
2950: }
2951: }
2952: VecRestoreArray(v, &a);
2953: VecRestoreArray(diagV, &diagA);
2954: VecRestoreArray(offdiagV, &offdiagA);
2955: VecDestroy(&diagV);
2956: VecDestroy(&offdiagV);
2957: PetscFree2(diagIdx, offdiagIdx);
2958: return(0);
2959: }
2963: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2964: {
2965: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2966: PetscInt n = A->rmap->n;
2967: PetscInt cstart = A->cmap->rstart;
2968: PetscInt *cmap = mat->garray;
2969: PetscInt *diagIdx, *offdiagIdx;
2970: Vec diagV, offdiagV;
2971: PetscScalar *a, *diagA, *offdiagA;
2972: PetscInt r;
2976: PetscMalloc2(n,&diagIdx,n,&offdiagIdx);
2977: VecCreateSeq(PETSC_COMM_SELF, n, &diagV);
2978: VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);
2979: MatGetRowMax(mat->A, diagV, diagIdx);
2980: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
2981: VecGetArray(v, &a);
2982: VecGetArray(diagV, &diagA);
2983: VecGetArray(offdiagV, &offdiagA);
2984: for (r = 0; r < n; ++r) {
2985: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2986: a[r] = diagA[r];
2987: idx[r] = cstart + diagIdx[r];
2988: } else {
2989: a[r] = offdiagA[r];
2990: idx[r] = cmap[offdiagIdx[r]];
2991: }
2992: }
2993: VecRestoreArray(v, &a);
2994: VecRestoreArray(diagV, &diagA);
2995: VecRestoreArray(offdiagV, &offdiagA);
2996: VecDestroy(&diagV);
2997: VecDestroy(&offdiagV);
2998: PetscFree2(diagIdx, offdiagIdx);
2999: return(0);
3000: }
3004: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3005: {
3007: Mat *dummy;
3010: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
3011: *newmat = *dummy;
3012: PetscFree(dummy);
3013: return(0);
3014: }
3018: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3019: {
3020: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
3024: MatInvertBlockDiagonal(a->A,values);
3025: return(0);
3026: }
3030: static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3031: {
3033: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data;
3036: MatSetRandom(aij->A,rctx);
3037: MatSetRandom(aij->B,rctx);
3038: MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);
3039: MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);
3040: return(0);
3041: }
3043: /* -------------------------------------------------------------------*/
3044: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3045: MatGetRow_MPIAIJ,
3046: MatRestoreRow_MPIAIJ,
3047: MatMult_MPIAIJ,
3048: /* 4*/ MatMultAdd_MPIAIJ,
3049: MatMultTranspose_MPIAIJ,
3050: MatMultTransposeAdd_MPIAIJ,
3051: #if defined(PETSC_HAVE_PBGL)
3052: MatSolve_MPIAIJ,
3053: #else
3054: 0,
3055: #endif
3056: 0,
3057: 0,
3058: /*10*/ 0,
3059: 0,
3060: 0,
3061: MatSOR_MPIAIJ,
3062: MatTranspose_MPIAIJ,
3063: /*15*/ MatGetInfo_MPIAIJ,
3064: MatEqual_MPIAIJ,
3065: MatGetDiagonal_MPIAIJ,
3066: MatDiagonalScale_MPIAIJ,
3067: MatNorm_MPIAIJ,
3068: /*20*/ MatAssemblyBegin_MPIAIJ,
3069: MatAssemblyEnd_MPIAIJ,
3070: MatSetOption_MPIAIJ,
3071: MatZeroEntries_MPIAIJ,
3072: /*24*/ MatZeroRows_MPIAIJ,
3073: 0,
3074: #if defined(PETSC_HAVE_PBGL)
3075: 0,
3076: #else
3077: 0,
3078: #endif
3079: 0,
3080: 0,
3081: /*29*/ MatSetUp_MPIAIJ,
3082: #if defined(PETSC_HAVE_PBGL)
3083: 0,
3084: #else
3085: 0,
3086: #endif
3087: 0,
3088: 0,
3089: 0,
3090: /*34*/ MatDuplicate_MPIAIJ,
3091: 0,
3092: 0,
3093: 0,
3094: 0,
3095: /*39*/ MatAXPY_MPIAIJ,
3096: MatGetSubMatrices_MPIAIJ,
3097: MatIncreaseOverlap_MPIAIJ,
3098: MatGetValues_MPIAIJ,
3099: MatCopy_MPIAIJ,
3100: /*44*/ MatGetRowMax_MPIAIJ,
3101: MatScale_MPIAIJ,
3102: 0,
3103: 0,
3104: MatZeroRowsColumns_MPIAIJ,
3105: /*49*/ MatSetRandom_MPIAIJ,
3106: 0,
3107: 0,
3108: 0,
3109: 0,
3110: /*54*/ MatFDColoringCreate_MPIXAIJ,
3111: 0,
3112: MatSetUnfactored_MPIAIJ,
3113: MatPermute_MPIAIJ,
3114: 0,
3115: /*59*/ MatGetSubMatrix_MPIAIJ,
3116: MatDestroy_MPIAIJ,
3117: MatView_MPIAIJ,
3118: 0,
3119: MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3120: /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3121: MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3122: 0,
3123: 0,
3124: 0,
3125: /*69*/ MatGetRowMaxAbs_MPIAIJ,
3126: MatGetRowMinAbs_MPIAIJ,
3127: 0,
3128: MatSetColoring_MPIAIJ,
3129: 0,
3130: MatSetValuesAdifor_MPIAIJ,
3131: /*75*/ MatFDColoringApply_AIJ,
3132: 0,
3133: 0,
3134: 0,
3135: MatFindZeroDiagonals_MPIAIJ,
3136: /*80*/ 0,
3137: 0,
3138: 0,
3139: /*83*/ MatLoad_MPIAIJ,
3140: 0,
3141: 0,
3142: 0,
3143: 0,
3144: 0,
3145: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3146: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3147: MatMatMultNumeric_MPIAIJ_MPIAIJ,
3148: MatPtAP_MPIAIJ_MPIAIJ,
3149: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3150: /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3151: 0,
3152: 0,
3153: 0,
3154: 0,
3155: /*99*/ 0,
3156: 0,
3157: 0,
3158: MatConjugate_MPIAIJ,
3159: 0,
3160: /*104*/MatSetValuesRow_MPIAIJ,
3161: MatRealPart_MPIAIJ,
3162: MatImaginaryPart_MPIAIJ,
3163: 0,
3164: 0,
3165: /*109*/0,
3166: MatGetRedundantMatrix_MPIAIJ,
3167: MatGetRowMin_MPIAIJ,
3168: 0,
3169: 0,
3170: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3171: 0,
3172: 0,
3173: 0,
3174: 0,
3175: /*119*/0,
3176: 0,
3177: 0,
3178: 0,
3179: MatGetMultiProcBlock_MPIAIJ,
3180: /*124*/MatFindNonzeroRows_MPIAIJ,
3181: MatGetColumnNorms_MPIAIJ,
3182: MatInvertBlockDiagonal_MPIAIJ,
3183: 0,
3184: MatGetSubMatricesParallel_MPIAIJ,
3185: /*129*/0,
3186: MatTransposeMatMult_MPIAIJ_MPIAIJ,
3187: MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3188: MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3189: 0,
3190: /*134*/0,
3191: 0,
3192: 0,
3193: 0,
3194: 0,
3195: /*139*/0,
3196: 0,
3197: 0,
3198: MatFDColoringSetUp_MPIXAIJ
3199: };
3201: /* ----------------------------------------------------------------------------------------*/
3205: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
3206: {
3207: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
3211: MatStoreValues(aij->A);
3212: MatStoreValues(aij->B);
3213: return(0);
3214: }
3218: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
3219: {
3220: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
3224: MatRetrieveValues(aij->A);
3225: MatRetrieveValues(aij->B);
3226: return(0);
3227: }
3231: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3232: {
3233: Mat_MPIAIJ *b;
3237: PetscLayoutSetUp(B->rmap);
3238: PetscLayoutSetUp(B->cmap);
3239: b = (Mat_MPIAIJ*)B->data;
3241: if (!B->preallocated) {
3242: /* Explicitly create 2 MATSEQAIJ matrices. */
3243: MatCreate(PETSC_COMM_SELF,&b->A);
3244: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3245: MatSetBlockSizesFromMats(b->A,B,B);
3246: MatSetType(b->A,MATSEQAIJ);
3247: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
3248: MatCreate(PETSC_COMM_SELF,&b->B);
3249: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3250: MatSetBlockSizesFromMats(b->B,B,B);
3251: MatSetType(b->B,MATSEQAIJ);
3252: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
3253: }
3255: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3256: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3257: B->preallocated = PETSC_TRUE;
3258: return(0);
3259: }
3263: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3264: {
3265: Mat mat;
3266: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3270: *newmat = 0;
3271: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
3272: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3273: MatSetBlockSizesFromMats(mat,matin,matin);
3274: MatSetType(mat,((PetscObject)matin)->type_name);
3275: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3276: a = (Mat_MPIAIJ*)mat->data;
3278: mat->factortype = matin->factortype;
3279: mat->assembled = PETSC_TRUE;
3280: mat->insertmode = NOT_SET_VALUES;
3281: mat->preallocated = PETSC_TRUE;
3283: a->size = oldmat->size;
3284: a->rank = oldmat->rank;
3285: a->donotstash = oldmat->donotstash;
3286: a->roworiented = oldmat->roworiented;
3287: a->rowindices = 0;
3288: a->rowvalues = 0;
3289: a->getrowactive = PETSC_FALSE;
3291: PetscLayoutReference(matin->rmap,&mat->rmap);
3292: PetscLayoutReference(matin->cmap,&mat->cmap);
3294: if (oldmat->colmap) {
3295: #if defined(PETSC_USE_CTABLE)
3296: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3297: #else
3298: PetscMalloc1((mat->cmap->N),&a->colmap);
3299: PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));
3300: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
3301: #endif
3302: } else a->colmap = 0;
3303: if (oldmat->garray) {
3304: PetscInt len;
3305: len = oldmat->B->cmap->n;
3306: PetscMalloc1((len+1),&a->garray);
3307: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
3308: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
3309: } else a->garray = 0;
3311: VecDuplicate(oldmat->lvec,&a->lvec);
3312: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
3313: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3314: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
3315: MatDuplicate(oldmat->A,cpvalues,&a->A);
3316: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
3317: MatDuplicate(oldmat->B,cpvalues,&a->B);
3318: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
3319: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3320: *newmat = mat;
3321: return(0);
3322: }
3328: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3329: {
3330: PetscScalar *vals,*svals;
3331: MPI_Comm comm;
3333: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
3334: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3335: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
3336: PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3337: PetscInt cend,cstart,n,*rowners,sizesset=1;
3338: int fd;
3339: PetscInt bs = 1;
3342: PetscObjectGetComm((PetscObject)viewer,&comm);
3343: MPI_Comm_size(comm,&size);
3344: MPI_Comm_rank(comm,&rank);
3345: if (!rank) {
3346: PetscViewerBinaryGetDescriptor(viewer,&fd);
3347: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
3348: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3349: }
3351: PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");
3352: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3353: PetscOptionsEnd();
3355: if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3357: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3358: M = header[1]; N = header[2];
3359: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3360: if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3361: if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3363: /* If global sizes are set, check if they are consistent with that given in the file */
3364: if (sizesset) {
3365: MatGetSize(newMat,&grows,&gcols);
3366: }
3367: if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3368: if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3370: /* determine ownership of all (block) rows */
3371: if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3372: if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3373: else m = newMat->rmap->n; /* Set by user */
3375: PetscMalloc1((size+1),&rowners);
3376: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3378: /* First process needs enough room for process with most rows */
3379: if (!rank) {
3380: mmax = rowners[1];
3381: for (i=2; i<=size; i++) {
3382: mmax = PetscMax(mmax, rowners[i]);
3383: }
3384: } else mmax = -1; /* unused, but compilers complain */
3386: rowners[0] = 0;
3387: for (i=2; i<=size; i++) {
3388: rowners[i] += rowners[i-1];
3389: }
3390: rstart = rowners[rank];
3391: rend = rowners[rank+1];
3393: /* distribute row lengths to all processors */
3394: PetscMalloc2(m,&ourlens,m,&offlens);
3395: if (!rank) {
3396: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
3397: PetscMalloc1(mmax,&rowlengths);
3398: PetscCalloc1(size,&procsnz);
3399: for (j=0; j<m; j++) {
3400: procsnz[0] += ourlens[j];
3401: }
3402: for (i=1; i<size; i++) {
3403: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
3404: /* calculate the number of nonzeros on each processor */
3405: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3406: procsnz[i] += rowlengths[j];
3407: }
3408: MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
3409: }
3410: PetscFree(rowlengths);
3411: } else {
3412: MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
3413: }
3415: if (!rank) {
3416: /* determine max buffer needed and allocate it */
3417: maxnz = 0;
3418: for (i=0; i<size; i++) {
3419: maxnz = PetscMax(maxnz,procsnz[i]);
3420: }
3421: PetscMalloc1(maxnz,&cols);
3423: /* read in my part of the matrix column indices */
3424: nz = procsnz[0];
3425: PetscMalloc1(nz,&mycols);
3426: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3428: /* read in every one elses and ship off */
3429: for (i=1; i<size; i++) {
3430: nz = procsnz[i];
3431: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3432: MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);
3433: }
3434: PetscFree(cols);
3435: } else {
3436: /* determine buffer space needed for message */
3437: nz = 0;
3438: for (i=0; i<m; i++) {
3439: nz += ourlens[i];
3440: }
3441: PetscMalloc1(nz,&mycols);
3443: /* receive message of column indices*/
3444: MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
3445: }
3447: /* determine column ownership if matrix is not square */
3448: if (N != M) {
3449: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3450: else n = newMat->cmap->n;
3451: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
3452: cstart = cend - n;
3453: } else {
3454: cstart = rstart;
3455: cend = rend;
3456: n = cend - cstart;
3457: }
3459: /* loop over local rows, determining number of off diagonal entries */
3460: PetscMemzero(offlens,m*sizeof(PetscInt));
3461: jj = 0;
3462: for (i=0; i<m; i++) {
3463: for (j=0; j<ourlens[i]; j++) {
3464: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3465: jj++;
3466: }
3467: }
3469: for (i=0; i<m; i++) {
3470: ourlens[i] -= offlens[i];
3471: }
3472: if (!sizesset) {
3473: MatSetSizes(newMat,m,n,M,N);
3474: }
3476: if (bs > 1) {MatSetBlockSize(newMat,bs);}
3478: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
3480: for (i=0; i<m; i++) {
3481: ourlens[i] += offlens[i];
3482: }
3484: if (!rank) {
3485: PetscMalloc1((maxnz+1),&vals);
3487: /* read in my part of the matrix numerical values */
3488: nz = procsnz[0];
3489: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3491: /* insert into matrix */
3492: jj = rstart;
3493: smycols = mycols;
3494: svals = vals;
3495: for (i=0; i<m; i++) {
3496: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3497: smycols += ourlens[i];
3498: svals += ourlens[i];
3499: jj++;
3500: }
3502: /* read in other processors and ship out */
3503: for (i=1; i<size; i++) {
3504: nz = procsnz[i];
3505: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3506: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
3507: }
3508: PetscFree(procsnz);
3509: } else {
3510: /* receive numeric values */
3511: PetscMalloc1((nz+1),&vals);
3513: /* receive message of values*/
3514: MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
3516: /* insert into matrix */
3517: jj = rstart;
3518: smycols = mycols;
3519: svals = vals;
3520: for (i=0; i<m; i++) {
3521: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3522: smycols += ourlens[i];
3523: svals += ourlens[i];
3524: jj++;
3525: }
3526: }
3527: PetscFree2(ourlens,offlens);
3528: PetscFree(vals);
3529: PetscFree(mycols);
3530: PetscFree(rowners);
3531: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3532: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3533: return(0);
3534: }
3538: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3539: {
3541: IS iscol_local;
3542: PetscInt csize;
3545: ISGetLocalSize(iscol,&csize);
3546: if (call == MAT_REUSE_MATRIX) {
3547: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3548: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3549: } else {
3550: PetscInt cbs;
3551: ISGetBlockSize(iscol,&cbs);
3552: ISAllGather(iscol,&iscol_local);
3553: ISSetBlockSize(iscol_local,cbs);
3554: }
3555: MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
3556: if (call == MAT_INITIAL_MATRIX) {
3557: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3558: ISDestroy(&iscol_local);
3559: }
3560: return(0);
3561: }
3563: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3566: /*
3567: Not great since it makes two copies of the submatrix, first an SeqAIJ
3568: in local and then by concatenating the local matrices the end result.
3569: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3571: Note: This requires a sequential iscol with all indices.
3572: */
3573: PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3574: {
3576: PetscMPIInt rank,size;
3577: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3578: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3579: PetscBool allcolumns, colflag;
3580: Mat M,Mreuse;
3581: MatScalar *vwork,*aa;
3582: MPI_Comm comm;
3583: Mat_SeqAIJ *aij;
3586: PetscObjectGetComm((PetscObject)mat,&comm);
3587: MPI_Comm_rank(comm,&rank);
3588: MPI_Comm_size(comm,&size);
3590: ISIdentity(iscol,&colflag);
3591: ISGetLocalSize(iscol,&ncol);
3592: if (colflag && ncol == mat->cmap->N) {
3593: allcolumns = PETSC_TRUE;
3594: } else {
3595: allcolumns = PETSC_FALSE;
3596: }
3597: if (call == MAT_REUSE_MATRIX) {
3598: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);
3599: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3600: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);
3601: } else {
3602: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);
3603: }
3605: /*
3606: m - number of local rows
3607: n - number of columns (same on all processors)
3608: rstart - first row in new global matrix generated
3609: */
3610: MatGetSize(Mreuse,&m,&n);
3611: MatGetBlockSizes(Mreuse,&bs,&cbs);
3612: if (call == MAT_INITIAL_MATRIX) {
3613: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3614: ii = aij->i;
3615: jj = aij->j;
3617: /*
3618: Determine the number of non-zeros in the diagonal and off-diagonal
3619: portions of the matrix in order to do correct preallocation
3620: */
3622: /* first get start and end of "diagonal" columns */
3623: if (csize == PETSC_DECIDE) {
3624: ISGetSize(isrow,&mglobal);
3625: if (mglobal == n) { /* square matrix */
3626: nlocal = m;
3627: } else {
3628: nlocal = n/size + ((n % size) > rank);
3629: }
3630: } else {
3631: nlocal = csize;
3632: }
3633: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3634: rstart = rend - nlocal;
3635: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3637: /* next, compute all the lengths */
3638: PetscMalloc1((2*m+1),&dlens);
3639: olens = dlens + m;
3640: for (i=0; i<m; i++) {
3641: jend = ii[i+1] - ii[i];
3642: olen = 0;
3643: dlen = 0;
3644: for (j=0; j<jend; j++) {
3645: if (*jj < rstart || *jj >= rend) olen++;
3646: else dlen++;
3647: jj++;
3648: }
3649: olens[i] = olen;
3650: dlens[i] = dlen;
3651: }
3652: MatCreate(comm,&M);
3653: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3654: MatSetBlockSizes(M,bs,cbs);
3655: MatSetType(M,((PetscObject)mat)->type_name);
3656: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3657: PetscFree(dlens);
3658: } else {
3659: PetscInt ml,nl;
3661: M = *newmat;
3662: MatGetLocalSize(M,&ml,&nl);
3663: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3664: MatZeroEntries(M);
3665: /*
3666: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3667: rather than the slower MatSetValues().
3668: */
3669: M->was_assembled = PETSC_TRUE;
3670: M->assembled = PETSC_FALSE;
3671: }
3672: MatGetOwnershipRange(M,&rstart,&rend);
3673: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3674: ii = aij->i;
3675: jj = aij->j;
3676: aa = aij->a;
3677: for (i=0; i<m; i++) {
3678: row = rstart + i;
3679: nz = ii[i+1] - ii[i];
3680: cwork = jj; jj += nz;
3681: vwork = aa; aa += nz;
3682: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3683: }
3685: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3686: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3687: *newmat = M;
3689: /* save submatrix used in processor for next request */
3690: if (call == MAT_INITIAL_MATRIX) {
3691: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3692: MatDestroy(&Mreuse);
3693: }
3694: return(0);
3695: }
3699: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3700: {
3701: PetscInt m,cstart, cend,j,nnz,i,d;
3702: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3703: const PetscInt *JJ;
3704: PetscScalar *values;
3708: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3710: PetscLayoutSetUp(B->rmap);
3711: PetscLayoutSetUp(B->cmap);
3712: m = B->rmap->n;
3713: cstart = B->cmap->rstart;
3714: cend = B->cmap->rend;
3715: rstart = B->rmap->rstart;
3717: PetscMalloc2(m,&d_nnz,m,&o_nnz);
3719: #if defined(PETSC_USE_DEBUGGING)
3720: for (i=0; i<m; i++) {
3721: nnz = Ii[i+1]- Ii[i];
3722: JJ = J + Ii[i];
3723: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3724: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3725: if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3726: }
3727: #endif
3729: for (i=0; i<m; i++) {
3730: nnz = Ii[i+1]- Ii[i];
3731: JJ = J + Ii[i];
3732: nnz_max = PetscMax(nnz_max,nnz);
3733: d = 0;
3734: for (j=0; j<nnz; j++) {
3735: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3736: }
3737: d_nnz[i] = d;
3738: o_nnz[i] = nnz - d;
3739: }
3740: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3741: PetscFree2(d_nnz,o_nnz);
3743: if (v) values = (PetscScalar*)v;
3744: else {
3745: PetscCalloc1((nnz_max+1),&values);
3746: }
3748: for (i=0; i<m; i++) {
3749: ii = i + rstart;
3750: nnz = Ii[i+1]- Ii[i];
3751: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3752: }
3753: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3754: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3756: if (!v) {
3757: PetscFree(values);
3758: }
3759: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3760: return(0);
3761: }
3765: /*@
3766: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3767: (the default parallel PETSc format).
3769: Collective on MPI_Comm
3771: Input Parameters:
3772: + B - the matrix
3773: . i - the indices into j for the start of each local row (starts with zero)
3774: . j - the column indices for each local row (starts with zero)
3775: - v - optional values in the matrix
3777: Level: developer
3779: Notes:
3780: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3781: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3782: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3784: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3786: The format which is used for the sparse matrix input, is equivalent to a
3787: row-major ordering.. i.e for the following matrix, the input data expected is
3788: as shown:
3790: 1 0 0
3791: 2 0 3 P0
3792: -------
3793: 4 5 6 P1
3795: Process0 [P0]: rows_owned=[0,1]
3796: i = {0,1,3} [size = nrow+1 = 2+1]
3797: j = {0,0,2} [size = nz = 6]
3798: v = {1,2,3} [size = nz = 6]
3800: Process1 [P1]: rows_owned=[2]
3801: i = {0,3} [size = nrow+1 = 1+1]
3802: j = {0,1,2} [size = nz = 6]
3803: v = {4,5,6} [size = nz = 6]
3805: .keywords: matrix, aij, compressed row, sparse, parallel
3807: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3808: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3809: @*/
3810: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3811: {
3815: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3816: return(0);
3817: }
3821: /*@C
3822: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3823: (the default parallel PETSc format). For good matrix assembly performance
3824: the user should preallocate the matrix storage by setting the parameters
3825: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3826: performance can be increased by more than a factor of 50.
3828: Collective on MPI_Comm
3830: Input Parameters:
3831: + B - the matrix
3832: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3833: (same value is used for all local rows)
3834: . d_nnz - array containing the number of nonzeros in the various rows of the
3835: DIAGONAL portion of the local submatrix (possibly different for each row)
3836: or NULL, if d_nz is used to specify the nonzero structure.
3837: The size of this array is equal to the number of local rows, i.e 'm'.
3838: For matrices that will be factored, you must leave room for (and set)
3839: the diagonal entry even if it is zero.
3840: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3841: submatrix (same value is used for all local rows).
3842: - o_nnz - array containing the number of nonzeros in the various rows of the
3843: OFF-DIAGONAL portion of the local submatrix (possibly different for
3844: each row) or NULL, if o_nz is used to specify the nonzero
3845: structure. The size of this array is equal to the number
3846: of local rows, i.e 'm'.
3848: If the *_nnz parameter is given then the *_nz parameter is ignored
3850: The AIJ format (also called the Yale sparse matrix format or
3851: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3852: storage. The stored row and column indices begin with zero.
3853: See Users-Manual: ch_mat for details.
3855: The parallel matrix is partitioned such that the first m0 rows belong to
3856: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3857: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3859: The DIAGONAL portion of the local submatrix of a processor can be defined
3860: as the submatrix which is obtained by extraction the part corresponding to
3861: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3862: first row that belongs to the processor, r2 is the last row belonging to
3863: the this processor, and c1-c2 is range of indices of the local part of a
3864: vector suitable for applying the matrix to. This is an mxn matrix. In the
3865: common case of a square matrix, the row and column ranges are the same and
3866: the DIAGONAL part is also square. The remaining portion of the local
3867: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3869: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3871: You can call MatGetInfo() to get information on how effective the preallocation was;
3872: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3873: You can also run with the option -info and look for messages with the string
3874: malloc in them to see if additional memory allocation was needed.
3876: Example usage:
3878: Consider the following 8x8 matrix with 34 non-zero values, that is
3879: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3880: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3881: as follows:
3883: .vb
3884: 1 2 0 | 0 3 0 | 0 4
3885: Proc0 0 5 6 | 7 0 0 | 8 0
3886: 9 0 10 | 11 0 0 | 12 0
3887: -------------------------------------
3888: 13 0 14 | 15 16 17 | 0 0
3889: Proc1 0 18 0 | 19 20 21 | 0 0
3890: 0 0 0 | 22 23 0 | 24 0
3891: -------------------------------------
3892: Proc2 25 26 27 | 0 0 28 | 29 0
3893: 30 0 0 | 31 32 33 | 0 34
3894: .ve
3896: This can be represented as a collection of submatrices as:
3898: .vb
3899: A B C
3900: D E F
3901: G H I
3902: .ve
3904: Where the submatrices A,B,C are owned by proc0, D,E,F are
3905: owned by proc1, G,H,I are owned by proc2.
3907: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3908: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3909: The 'M','N' parameters are 8,8, and have the same values on all procs.
3911: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3912: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3913: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3914: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3915: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3916: matrix, ans [DF] as another SeqAIJ matrix.
3918: When d_nz, o_nz parameters are specified, d_nz storage elements are
3919: allocated for every row of the local diagonal submatrix, and o_nz
3920: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3921: One way to choose d_nz and o_nz is to use the max nonzerors per local
3922: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3923: In this case, the values of d_nz,o_nz are:
3924: .vb
3925: proc0 : dnz = 2, o_nz = 2
3926: proc1 : dnz = 3, o_nz = 2
3927: proc2 : dnz = 1, o_nz = 4
3928: .ve
3929: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3930: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3931: for proc3. i.e we are using 12+15+10=37 storage locations to store
3932: 34 values.
3934: When d_nnz, o_nnz parameters are specified, the storage is specified
3935: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3936: In the above case the values for d_nnz,o_nnz are:
3937: .vb
3938: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3939: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3940: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3941: .ve
3942: Here the space allocated is sum of all the above values i.e 34, and
3943: hence pre-allocation is perfect.
3945: Level: intermediate
3947: .keywords: matrix, aij, compressed row, sparse, parallel
3949: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3950: MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3951: @*/
3952: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3953: {
3959: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
3960: return(0);
3961: }
3965: /*@
3966: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3967: CSR format the local rows.
3969: Collective on MPI_Comm
3971: Input Parameters:
3972: + comm - MPI communicator
3973: . m - number of local rows (Cannot be PETSC_DECIDE)
3974: . n - This value should be the same as the local size used in creating the
3975: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3976: calculated if N is given) For square matrices n is almost always m.
3977: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3978: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3979: . i - row indices
3980: . j - column indices
3981: - a - matrix values
3983: Output Parameter:
3984: . mat - the matrix
3986: Level: intermediate
3988: Notes:
3989: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3990: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3991: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3993: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3995: The format which is used for the sparse matrix input, is equivalent to a
3996: row-major ordering.. i.e for the following matrix, the input data expected is
3997: as shown:
3999: 1 0 0
4000: 2 0 3 P0
4001: -------
4002: 4 5 6 P1
4004: Process0 [P0]: rows_owned=[0,1]
4005: i = {0,1,3} [size = nrow+1 = 2+1]
4006: j = {0,0,2} [size = nz = 6]
4007: v = {1,2,3} [size = nz = 6]
4009: Process1 [P1]: rows_owned=[2]
4010: i = {0,3} [size = nrow+1 = 1+1]
4011: j = {0,1,2} [size = nz = 6]
4012: v = {4,5,6} [size = nz = 6]
4014: .keywords: matrix, aij, compressed row, sparse, parallel
4016: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4017: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4018: @*/
4019: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4020: {
4024: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4025: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4026: MatCreate(comm,mat);
4027: MatSetSizes(*mat,m,n,M,N);
4028: /* MatSetBlockSizes(M,bs,cbs); */
4029: MatSetType(*mat,MATMPIAIJ);
4030: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
4031: return(0);
4032: }
4036: /*@C
4037: MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4038: (the default parallel PETSc format). For good matrix assembly performance
4039: the user should preallocate the matrix storage by setting the parameters
4040: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4041: performance can be increased by more than a factor of 50.
4043: Collective on MPI_Comm
4045: Input Parameters:
4046: + comm - MPI communicator
4047: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4048: This value should be the same as the local size used in creating the
4049: y vector for the matrix-vector product y = Ax.
4050: . n - This value should be the same as the local size used in creating the
4051: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4052: calculated if N is given) For square matrices n is almost always m.
4053: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4054: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4055: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4056: (same value is used for all local rows)
4057: . d_nnz - array containing the number of nonzeros in the various rows of the
4058: DIAGONAL portion of the local submatrix (possibly different for each row)
4059: or NULL, if d_nz is used to specify the nonzero structure.
4060: The size of this array is equal to the number of local rows, i.e 'm'.
4061: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4062: submatrix (same value is used for all local rows).
4063: - o_nnz - array containing the number of nonzeros in the various rows of the
4064: OFF-DIAGONAL portion of the local submatrix (possibly different for
4065: each row) or NULL, if o_nz is used to specify the nonzero
4066: structure. The size of this array is equal to the number
4067: of local rows, i.e 'm'.
4069: Output Parameter:
4070: . A - the matrix
4072: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4073: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4074: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4076: Notes:
4077: If the *_nnz parameter is given then the *_nz parameter is ignored
4079: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4080: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4081: storage requirements for this matrix.
4083: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4084: processor than it must be used on all processors that share the object for
4085: that argument.
4087: The user MUST specify either the local or global matrix dimensions
4088: (possibly both).
4090: The parallel matrix is partitioned across processors such that the
4091: first m0 rows belong to process 0, the next m1 rows belong to
4092: process 1, the next m2 rows belong to process 2 etc.. where
4093: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4094: values corresponding to [m x N] submatrix.
4096: The columns are logically partitioned with the n0 columns belonging
4097: to 0th partition, the next n1 columns belonging to the next
4098: partition etc.. where n0,n1,n2... are the input parameter 'n'.
4100: The DIAGONAL portion of the local submatrix on any given processor
4101: is the submatrix corresponding to the rows and columns m,n
4102: corresponding to the given processor. i.e diagonal matrix on
4103: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4104: etc. The remaining portion of the local submatrix [m x (N-n)]
4105: constitute the OFF-DIAGONAL portion. The example below better
4106: illustrates this concept.
4108: For a square global matrix we define each processor's diagonal portion
4109: to be its local rows and the corresponding columns (a square submatrix);
4110: each processor's off-diagonal portion encompasses the remainder of the
4111: local matrix (a rectangular submatrix).
4113: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4115: When calling this routine with a single process communicator, a matrix of
4116: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4117: type of communicator, use the construction mechanism:
4118: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4120: By default, this format uses inodes (identical nodes) when possible.
4121: We search for consecutive rows with the same nonzero structure, thereby
4122: reusing matrix information to achieve increased efficiency.
4124: Options Database Keys:
4125: + -mat_no_inode - Do not use inodes
4126: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4127: - -mat_aij_oneindex - Internally use indexing starting at 1
4128: rather than 0. Note that when calling MatSetValues(),
4129: the user still MUST index entries starting at 0!
4132: Example usage:
4134: Consider the following 8x8 matrix with 34 non-zero values, that is
4135: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4136: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4137: as follows:
4139: .vb
4140: 1 2 0 | 0 3 0 | 0 4
4141: Proc0 0 5 6 | 7 0 0 | 8 0
4142: 9 0 10 | 11 0 0 | 12 0
4143: -------------------------------------
4144: 13 0 14 | 15 16 17 | 0 0
4145: Proc1 0 18 0 | 19 20 21 | 0 0
4146: 0 0 0 | 22 23 0 | 24 0
4147: -------------------------------------
4148: Proc2 25 26 27 | 0 0 28 | 29 0
4149: 30 0 0 | 31 32 33 | 0 34
4150: .ve
4152: This can be represented as a collection of submatrices as:
4154: .vb
4155: A B C
4156: D E F
4157: G H I
4158: .ve
4160: Where the submatrices A,B,C are owned by proc0, D,E,F are
4161: owned by proc1, G,H,I are owned by proc2.
4163: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4164: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4165: The 'M','N' parameters are 8,8, and have the same values on all procs.
4167: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4168: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4169: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4170: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4171: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4172: matrix, ans [DF] as another SeqAIJ matrix.
4174: When d_nz, o_nz parameters are specified, d_nz storage elements are
4175: allocated for every row of the local diagonal submatrix, and o_nz
4176: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4177: One way to choose d_nz and o_nz is to use the max nonzerors per local
4178: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4179: In this case, the values of d_nz,o_nz are:
4180: .vb
4181: proc0 : dnz = 2, o_nz = 2
4182: proc1 : dnz = 3, o_nz = 2
4183: proc2 : dnz = 1, o_nz = 4
4184: .ve
4185: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4186: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4187: for proc3. i.e we are using 12+15+10=37 storage locations to store
4188: 34 values.
4190: When d_nnz, o_nnz parameters are specified, the storage is specified
4191: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4192: In the above case the values for d_nnz,o_nnz are:
4193: .vb
4194: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4195: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4196: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4197: .ve
4198: Here the space allocated is sum of all the above values i.e 34, and
4199: hence pre-allocation is perfect.
4201: Level: intermediate
4203: .keywords: matrix, aij, compressed row, sparse, parallel
4205: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4206: MPIAIJ, MatCreateMPIAIJWithArrays()
4207: @*/
4208: PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4209: {
4211: PetscMPIInt size;
4214: MatCreate(comm,A);
4215: MatSetSizes(*A,m,n,M,N);
4216: MPI_Comm_size(comm,&size);
4217: if (size > 1) {
4218: MatSetType(*A,MATMPIAIJ);
4219: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4220: } else {
4221: MatSetType(*A,MATSEQAIJ);
4222: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4223: }
4224: return(0);
4225: }
4229: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4230: {
4231: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4234: if (Ad) *Ad = a->A;
4235: if (Ao) *Ao = a->B;
4236: if (colmap) *colmap = a->garray;
4237: return(0);
4238: }
4242: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4243: {
4245: PetscInt i;
4246: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4249: if (coloring->ctype == IS_COLORING_GLOBAL) {
4250: ISColoringValue *allcolors,*colors;
4251: ISColoring ocoloring;
4253: /* set coloring for diagonal portion */
4254: MatSetColoring_SeqAIJ(a->A,coloring);
4256: /* set coloring for off-diagonal portion */
4257: ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);
4258: PetscMalloc1((a->B->cmap->n+1),&colors);
4259: for (i=0; i<a->B->cmap->n; i++) {
4260: colors[i] = allcolors[a->garray[i]];
4261: }
4262: PetscFree(allcolors);
4263: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4264: MatSetColoring_SeqAIJ(a->B,ocoloring);
4265: ISColoringDestroy(&ocoloring);
4266: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4267: ISColoringValue *colors;
4268: PetscInt *larray;
4269: ISColoring ocoloring;
4271: /* set coloring for diagonal portion */
4272: PetscMalloc1((a->A->cmap->n+1),&larray);
4273: for (i=0; i<a->A->cmap->n; i++) {
4274: larray[i] = i + A->cmap->rstart;
4275: }
4276: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);
4277: PetscMalloc1((a->A->cmap->n+1),&colors);
4278: for (i=0; i<a->A->cmap->n; i++) {
4279: colors[i] = coloring->colors[larray[i]];
4280: }
4281: PetscFree(larray);
4282: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);
4283: MatSetColoring_SeqAIJ(a->A,ocoloring);
4284: ISColoringDestroy(&ocoloring);
4286: /* set coloring for off-diagonal portion */
4287: PetscMalloc1((a->B->cmap->n+1),&larray);
4288: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);
4289: PetscMalloc1((a->B->cmap->n+1),&colors);
4290: for (i=0; i<a->B->cmap->n; i++) {
4291: colors[i] = coloring->colors[larray[i]];
4292: }
4293: PetscFree(larray);
4294: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4295: MatSetColoring_SeqAIJ(a->B,ocoloring);
4296: ISColoringDestroy(&ocoloring);
4297: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4298: return(0);
4299: }
4303: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4304: {
4305: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4309: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
4310: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
4311: return(0);
4312: }
4316: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4317: {
4319: PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4320: PetscInt *indx;
4323: /* This routine will ONLY return MPIAIJ type matrix */
4324: MatGetSize(inmat,&m,&N);
4325: MatGetBlockSizes(inmat,&bs,&cbs);
4326: if (n == PETSC_DECIDE) {
4327: PetscSplitOwnership(comm,&n,&N);
4328: }
4329: /* Check sum(n) = N */
4330: MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
4331: if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4333: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4334: rstart -= m;
4336: MatPreallocateInitialize(comm,m,n,dnz,onz);
4337: for (i=0; i<m; i++) {
4338: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4339: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4340: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4341: }
4343: MatCreate(comm,outmat);
4344: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4345: MatSetBlockSizes(*outmat,bs,cbs);
4346: MatSetType(*outmat,MATMPIAIJ);
4347: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4348: MatPreallocateFinalize(dnz,onz);
4349: return(0);
4350: }
4354: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4355: {
4357: PetscInt m,N,i,rstart,nnz,Ii;
4358: PetscInt *indx;
4359: PetscScalar *values;
4362: MatGetSize(inmat,&m,&N);
4363: MatGetOwnershipRange(outmat,&rstart,NULL);
4364: for (i=0; i<m; i++) {
4365: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4366: Ii = i + rstart;
4367: MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4368: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4369: }
4370: MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);
4371: MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);
4372: return(0);
4373: }
4377: /*@
4378: MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4379: matrices from each processor
4381: Collective on MPI_Comm
4383: Input Parameters:
4384: + comm - the communicators the parallel matrix will live on
4385: . inmat - the input sequential matrices
4386: . n - number of local columns (or PETSC_DECIDE)
4387: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4389: Output Parameter:
4390: . outmat - the parallel matrix generated
4392: Level: advanced
4394: Notes: The number of columns of the matrix in EACH processor MUST be the same.
4396: @*/
4397: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4398: {
4400: PetscMPIInt size;
4403: MPI_Comm_size(comm,&size);
4404: PetscLogEventBegin(MAT_Merge,inmat,0,0,0);
4405: if (size == 1) {
4406: if (scall == MAT_INITIAL_MATRIX) {
4407: MatDuplicate(inmat,MAT_COPY_VALUES,outmat);
4408: } else {
4409: MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
4410: }
4411: } else {
4412: if (scall == MAT_INITIAL_MATRIX) {
4413: MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);
4414: }
4415: MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);
4416: }
4417: PetscLogEventEnd(MAT_Merge,inmat,0,0,0);
4418: return(0);
4419: }
4423: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4424: {
4425: PetscErrorCode ierr;
4426: PetscMPIInt rank;
4427: PetscInt m,N,i,rstart,nnz;
4428: size_t len;
4429: const PetscInt *indx;
4430: PetscViewer out;
4431: char *name;
4432: Mat B;
4433: const PetscScalar *values;
4436: MatGetLocalSize(A,&m,0);
4437: MatGetSize(A,0,&N);
4438: /* Should this be the type of the diagonal block of A? */
4439: MatCreate(PETSC_COMM_SELF,&B);
4440: MatSetSizes(B,m,N,m,N);
4441: MatSetBlockSizesFromMats(B,A,A);
4442: MatSetType(B,MATSEQAIJ);
4443: MatSeqAIJSetPreallocation(B,0,NULL);
4444: MatGetOwnershipRange(A,&rstart,0);
4445: for (i=0; i<m; i++) {
4446: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4447: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4448: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4449: }
4450: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4451: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4453: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
4454: PetscStrlen(outfile,&len);
4455: PetscMalloc1((len+5),&name);
4456: sprintf(name,"%s.%d",outfile,rank);
4457: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4458: PetscFree(name);
4459: MatView(B,out);
4460: PetscViewerDestroy(&out);
4461: MatDestroy(&B);
4462: return(0);
4463: }
4465: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4468: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4469: {
4470: PetscErrorCode ierr;
4471: Mat_Merge_SeqsToMPI *merge;
4472: PetscContainer container;
4475: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);
4476: if (container) {
4477: PetscContainerGetPointer(container,(void**)&merge);
4478: PetscFree(merge->id_r);
4479: PetscFree(merge->len_s);
4480: PetscFree(merge->len_r);
4481: PetscFree(merge->bi);
4482: PetscFree(merge->bj);
4483: PetscFree(merge->buf_ri[0]);
4484: PetscFree(merge->buf_ri);
4485: PetscFree(merge->buf_rj[0]);
4486: PetscFree(merge->buf_rj);
4487: PetscFree(merge->coi);
4488: PetscFree(merge->coj);
4489: PetscFree(merge->owners_co);
4490: PetscLayoutDestroy(&merge->rowmap);
4491: PetscFree(merge);
4492: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4493: }
4494: MatDestroy_MPIAIJ(A);
4495: return(0);
4496: }
4498: #include <../src/mat/utils/freespace.h>
4499: #include <petscbt.h>
4503: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4504: {
4505: PetscErrorCode ierr;
4506: MPI_Comm comm;
4507: Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data;
4508: PetscMPIInt size,rank,taga,*len_s;
4509: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4510: PetscInt proc,m;
4511: PetscInt **buf_ri,**buf_rj;
4512: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4513: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4514: MPI_Request *s_waits,*r_waits;
4515: MPI_Status *status;
4516: MatScalar *aa=a->a;
4517: MatScalar **abuf_r,*ba_i;
4518: Mat_Merge_SeqsToMPI *merge;
4519: PetscContainer container;
4522: PetscObjectGetComm((PetscObject)mpimat,&comm);
4523: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4525: MPI_Comm_size(comm,&size);
4526: MPI_Comm_rank(comm,&rank);
4528: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);
4529: PetscContainerGetPointer(container,(void**)&merge);
4531: bi = merge->bi;
4532: bj = merge->bj;
4533: buf_ri = merge->buf_ri;
4534: buf_rj = merge->buf_rj;
4536: PetscMalloc1(size,&status);
4537: owners = merge->rowmap->range;
4538: len_s = merge->len_s;
4540: /* send and recv matrix values */
4541: /*-----------------------------*/
4542: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4543: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4545: PetscMalloc1((merge->nsend+1),&s_waits);
4546: for (proc=0,k=0; proc<size; proc++) {
4547: if (!len_s[proc]) continue;
4548: i = owners[proc];
4549: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4550: k++;
4551: }
4553: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4554: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4555: PetscFree(status);
4557: PetscFree(s_waits);
4558: PetscFree(r_waits);
4560: /* insert mat values of mpimat */
4561: /*----------------------------*/
4562: PetscMalloc1(N,&ba_i);
4563: PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);
4565: for (k=0; k<merge->nrecv; k++) {
4566: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4567: nrows = *(buf_ri_k[k]);
4568: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4569: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4570: }
4572: /* set values of ba */
4573: m = merge->rowmap->n;
4574: for (i=0; i<m; i++) {
4575: arow = owners[rank] + i;
4576: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4577: bnzi = bi[i+1] - bi[i];
4578: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4580: /* add local non-zero vals of this proc's seqmat into ba */
4581: anzi = ai[arow+1] - ai[arow];
4582: aj = a->j + ai[arow];
4583: aa = a->a + ai[arow];
4584: nextaj = 0;
4585: for (j=0; nextaj<anzi; j++) {
4586: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4587: ba_i[j] += aa[nextaj++];
4588: }
4589: }
4591: /* add received vals into ba */
4592: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4593: /* i-th row */
4594: if (i == *nextrow[k]) {
4595: anzi = *(nextai[k]+1) - *nextai[k];
4596: aj = buf_rj[k] + *(nextai[k]);
4597: aa = abuf_r[k] + *(nextai[k]);
4598: nextaj = 0;
4599: for (j=0; nextaj<anzi; j++) {
4600: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4601: ba_i[j] += aa[nextaj++];
4602: }
4603: }
4604: nextrow[k]++; nextai[k]++;
4605: }
4606: }
4607: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4608: }
4609: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4610: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4612: PetscFree(abuf_r[0]);
4613: PetscFree(abuf_r);
4614: PetscFree(ba_i);
4615: PetscFree3(buf_ri_k,nextrow,nextai);
4616: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4617: return(0);
4618: }
4620: extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4624: PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4625: {
4626: PetscErrorCode ierr;
4627: Mat B_mpi;
4628: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4629: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4630: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4631: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4632: PetscInt len,proc,*dnz,*onz,bs,cbs;
4633: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4634: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4635: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4636: MPI_Status *status;
4637: PetscFreeSpaceList free_space=NULL,current_space=NULL;
4638: PetscBT lnkbt;
4639: Mat_Merge_SeqsToMPI *merge;
4640: PetscContainer container;
4643: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4645: /* make sure it is a PETSc comm */
4646: PetscCommDuplicate(comm,&comm,NULL);
4647: MPI_Comm_size(comm,&size);
4648: MPI_Comm_rank(comm,&rank);
4650: PetscNew(&merge);
4651: PetscMalloc1(size,&status);
4653: /* determine row ownership */
4654: /*---------------------------------------------------------*/
4655: PetscLayoutCreate(comm,&merge->rowmap);
4656: PetscLayoutSetLocalSize(merge->rowmap,m);
4657: PetscLayoutSetSize(merge->rowmap,M);
4658: PetscLayoutSetBlockSize(merge->rowmap,1);
4659: PetscLayoutSetUp(merge->rowmap);
4660: PetscMalloc1(size,&len_si);
4661: PetscMalloc1(size,&merge->len_s);
4663: m = merge->rowmap->n;
4664: owners = merge->rowmap->range;
4666: /* determine the number of messages to send, their lengths */
4667: /*---------------------------------------------------------*/
4668: len_s = merge->len_s;
4670: len = 0; /* length of buf_si[] */
4671: merge->nsend = 0;
4672: for (proc=0; proc<size; proc++) {
4673: len_si[proc] = 0;
4674: if (proc == rank) {
4675: len_s[proc] = 0;
4676: } else {
4677: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4678: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4679: }
4680: if (len_s[proc]) {
4681: merge->nsend++;
4682: nrows = 0;
4683: for (i=owners[proc]; i<owners[proc+1]; i++) {
4684: if (ai[i+1] > ai[i]) nrows++;
4685: }
4686: len_si[proc] = 2*(nrows+1);
4687: len += len_si[proc];
4688: }
4689: }
4691: /* determine the number and length of messages to receive for ij-structure */
4692: /*-------------------------------------------------------------------------*/
4693: PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);
4694: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4696: /* post the Irecv of j-structure */
4697: /*-------------------------------*/
4698: PetscCommGetNewTag(comm,&tagj);
4699: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4701: /* post the Isend of j-structure */
4702: /*--------------------------------*/
4703: PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);
4705: for (proc=0, k=0; proc<size; proc++) {
4706: if (!len_s[proc]) continue;
4707: i = owners[proc];
4708: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4709: k++;
4710: }
4712: /* receives and sends of j-structure are complete */
4713: /*------------------------------------------------*/
4714: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4715: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4717: /* send and recv i-structure */
4718: /*---------------------------*/
4719: PetscCommGetNewTag(comm,&tagi);
4720: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4722: PetscMalloc1((len+1),&buf_s);
4723: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4724: for (proc=0,k=0; proc<size; proc++) {
4725: if (!len_s[proc]) continue;
4726: /* form outgoing message for i-structure:
4727: buf_si[0]: nrows to be sent
4728: [1:nrows]: row index (global)
4729: [nrows+1:2*nrows+1]: i-structure index
4730: */
4731: /*-------------------------------------------*/
4732: nrows = len_si[proc]/2 - 1;
4733: buf_si_i = buf_si + nrows+1;
4734: buf_si[0] = nrows;
4735: buf_si_i[0] = 0;
4736: nrows = 0;
4737: for (i=owners[proc]; i<owners[proc+1]; i++) {
4738: anzi = ai[i+1] - ai[i];
4739: if (anzi) {
4740: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4741: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4742: nrows++;
4743: }
4744: }
4745: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4746: k++;
4747: buf_si += len_si[proc];
4748: }
4750: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4751: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4753: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4754: for (i=0; i<merge->nrecv; i++) {
4755: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4756: }
4758: PetscFree(len_si);
4759: PetscFree(len_ri);
4760: PetscFree(rj_waits);
4761: PetscFree2(si_waits,sj_waits);
4762: PetscFree(ri_waits);
4763: PetscFree(buf_s);
4764: PetscFree(status);
4766: /* compute a local seq matrix in each processor */
4767: /*----------------------------------------------*/
4768: /* allocate bi array and free space for accumulating nonzero column info */
4769: PetscMalloc1((m+1),&bi);
4770: bi[0] = 0;
4772: /* create and initialize a linked list */
4773: nlnk = N+1;
4774: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4776: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4777: len = ai[owners[rank+1]] - ai[owners[rank]];
4778: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
4780: current_space = free_space;
4782: /* determine symbolic info for each local row */
4783: PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);
4785: for (k=0; k<merge->nrecv; k++) {
4786: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4787: nrows = *buf_ri_k[k];
4788: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4789: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4790: }
4792: MatPreallocateInitialize(comm,m,n,dnz,onz);
4793: len = 0;
4794: for (i=0; i<m; i++) {
4795: bnzi = 0;
4796: /* add local non-zero cols of this proc's seqmat into lnk */
4797: arow = owners[rank] + i;
4798: anzi = ai[arow+1] - ai[arow];
4799: aj = a->j + ai[arow];
4800: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4801: bnzi += nlnk;
4802: /* add received col data into lnk */
4803: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4804: if (i == *nextrow[k]) { /* i-th row */
4805: anzi = *(nextai[k]+1) - *nextai[k];
4806: aj = buf_rj[k] + *nextai[k];
4807: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4808: bnzi += nlnk;
4809: nextrow[k]++; nextai[k]++;
4810: }
4811: }
4812: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4814: /* if free space is not available, make more free space */
4815: if (current_space->local_remaining<bnzi) {
4816: PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);
4817: nspacedouble++;
4818: }
4819: /* copy data into free space, then initialize lnk */
4820: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4821: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4823: current_space->array += bnzi;
4824: current_space->local_used += bnzi;
4825: current_space->local_remaining -= bnzi;
4827: bi[i+1] = bi[i] + bnzi;
4828: }
4830: PetscFree3(buf_ri_k,nextrow,nextai);
4832: PetscMalloc1((bi[m]+1),&bj);
4833: PetscFreeSpaceContiguous(&free_space,bj);
4834: PetscLLDestroy(lnk,lnkbt);
4836: /* create symbolic parallel matrix B_mpi */
4837: /*---------------------------------------*/
4838: MatGetBlockSizes(seqmat,&bs,&cbs);
4839: MatCreate(comm,&B_mpi);
4840: if (n==PETSC_DECIDE) {
4841: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4842: } else {
4843: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4844: }
4845: MatSetBlockSizes(B_mpi,bs,cbs);
4846: MatSetType(B_mpi,MATMPIAIJ);
4847: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4848: MatPreallocateFinalize(dnz,onz);
4849: MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
4851: /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4852: B_mpi->assembled = PETSC_FALSE;
4853: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4854: merge->bi = bi;
4855: merge->bj = bj;
4856: merge->buf_ri = buf_ri;
4857: merge->buf_rj = buf_rj;
4858: merge->coi = NULL;
4859: merge->coj = NULL;
4860: merge->owners_co = NULL;
4862: PetscCommDestroy(&comm);
4864: /* attach the supporting struct to B_mpi for reuse */
4865: PetscContainerCreate(PETSC_COMM_SELF,&container);
4866: PetscContainerSetPointer(container,merge);
4867: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4868: PetscContainerDestroy(&container);
4869: *mpimat = B_mpi;
4871: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4872: return(0);
4873: }
4877: /*@C
4878: MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4879: matrices from each processor
4881: Collective on MPI_Comm
4883: Input Parameters:
4884: + comm - the communicators the parallel matrix will live on
4885: . seqmat - the input sequential matrices
4886: . m - number of local rows (or PETSC_DECIDE)
4887: . n - number of local columns (or PETSC_DECIDE)
4888: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4890: Output Parameter:
4891: . mpimat - the parallel matrix generated
4893: Level: advanced
4895: Notes:
4896: The dimensions of the sequential matrix in each processor MUST be the same.
4897: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4898: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4899: @*/
4900: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4901: {
4903: PetscMPIInt size;
4906: MPI_Comm_size(comm,&size);
4907: if (size == 1) {
4908: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4909: if (scall == MAT_INITIAL_MATRIX) {
4910: MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);
4911: } else {
4912: MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);
4913: }
4914: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4915: return(0);
4916: }
4917: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4918: if (scall == MAT_INITIAL_MATRIX) {
4919: MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);
4920: }
4921: MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);
4922: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4923: return(0);
4924: }
4928: /*@
4929: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4930: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4931: with MatGetSize()
4933: Not Collective
4935: Input Parameters:
4936: + A - the matrix
4937: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4939: Output Parameter:
4940: . A_loc - the local sequential matrix generated
4942: Level: developer
4944: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4946: @*/
4947: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4948: {
4950: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4951: Mat_SeqAIJ *mat,*a,*b;
4952: PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4953: MatScalar *aa,*ba,*cam;
4954: PetscScalar *ca;
4955: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4956: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4957: PetscBool match;
4960: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4961: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4962: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4963: a = (Mat_SeqAIJ*)(mpimat->A)->data;
4964: b = (Mat_SeqAIJ*)(mpimat->B)->data;
4965: ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4966: aa = a->a; ba = b->a;
4967: if (scall == MAT_INITIAL_MATRIX) {
4968: PetscMalloc1((1+am),&ci);
4969: ci[0] = 0;
4970: for (i=0; i<am; i++) {
4971: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4972: }
4973: PetscMalloc1((1+ci[am]),&cj);
4974: PetscMalloc1((1+ci[am]),&ca);
4975: k = 0;
4976: for (i=0; i<am; i++) {
4977: ncols_o = bi[i+1] - bi[i];
4978: ncols_d = ai[i+1] - ai[i];
4979: /* off-diagonal portion of A */
4980: for (jo=0; jo<ncols_o; jo++) {
4981: col = cmap[*bj];
4982: if (col >= cstart) break;
4983: cj[k] = col; bj++;
4984: ca[k++] = *ba++;
4985: }
4986: /* diagonal portion of A */
4987: for (j=0; j<ncols_d; j++) {
4988: cj[k] = cstart + *aj++;
4989: ca[k++] = *aa++;
4990: }
4991: /* off-diagonal portion of A */
4992: for (j=jo; j<ncols_o; j++) {
4993: cj[k] = cmap[*bj++];
4994: ca[k++] = *ba++;
4995: }
4996: }
4997: /* put together the new matrix */
4998: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
4999: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5000: /* Since these are PETSc arrays, change flags to free them as necessary. */
5001: mat = (Mat_SeqAIJ*)(*A_loc)->data;
5002: mat->free_a = PETSC_TRUE;
5003: mat->free_ij = PETSC_TRUE;
5004: mat->nonew = 0;
5005: } else if (scall == MAT_REUSE_MATRIX) {
5006: mat=(Mat_SeqAIJ*)(*A_loc)->data;
5007: ci = mat->i; cj = mat->j; cam = mat->a;
5008: for (i=0; i<am; i++) {
5009: /* off-diagonal portion of A */
5010: ncols_o = bi[i+1] - bi[i];
5011: for (jo=0; jo<ncols_o; jo++) {
5012: col = cmap[*bj];
5013: if (col >= cstart) break;
5014: *cam++ = *ba++; bj++;
5015: }
5016: /* diagonal portion of A */
5017: ncols_d = ai[i+1] - ai[i];
5018: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5019: /* off-diagonal portion of A */
5020: for (j=jo; j<ncols_o; j++) {
5021: *cam++ = *ba++; bj++;
5022: }
5023: }
5024: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5025: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
5026: return(0);
5027: }
5031: /*@C
5032: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5034: Not Collective
5036: Input Parameters:
5037: + A - the matrix
5038: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5039: - row, col - index sets of rows and columns to extract (or NULL)
5041: Output Parameter:
5042: . A_loc - the local sequential matrix generated
5044: Level: developer
5046: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5048: @*/
5049: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5050: {
5051: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5053: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5054: IS isrowa,iscola;
5055: Mat *aloc;
5056: PetscBool match;
5059: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5060: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5061: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
5062: if (!row) {
5063: start = A->rmap->rstart; end = A->rmap->rend;
5064: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
5065: } else {
5066: isrowa = *row;
5067: }
5068: if (!col) {
5069: start = A->cmap->rstart;
5070: cmap = a->garray;
5071: nzA = a->A->cmap->n;
5072: nzB = a->B->cmap->n;
5073: PetscMalloc1((nzA+nzB), &idx);
5074: ncols = 0;
5075: for (i=0; i<nzB; i++) {
5076: if (cmap[i] < start) idx[ncols++] = cmap[i];
5077: else break;
5078: }
5079: imark = i;
5080: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5081: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5082: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
5083: } else {
5084: iscola = *col;
5085: }
5086: if (scall != MAT_INITIAL_MATRIX) {
5087: PetscMalloc(sizeof(Mat),&aloc);
5088: aloc[0] = *A_loc;
5089: }
5090: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
5091: *A_loc = aloc[0];
5092: PetscFree(aloc);
5093: if (!row) {
5094: ISDestroy(&isrowa);
5095: }
5096: if (!col) {
5097: ISDestroy(&iscola);
5098: }
5099: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5100: return(0);
5101: }
5105: /*@C
5106: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5108: Collective on Mat
5110: Input Parameters:
5111: + A,B - the matrices in mpiaij format
5112: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5113: - rowb, colb - index sets of rows and columns of B to extract (or NULL)
5115: Output Parameter:
5116: + rowb, colb - index sets of rows and columns of B to extract
5117: - B_seq - the sequential matrix generated
5119: Level: developer
5121: @*/
5122: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5123: {
5124: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5126: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5127: IS isrowb,iscolb;
5128: Mat *bseq=NULL;
5131: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5132: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5133: }
5134: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5136: if (scall == MAT_INITIAL_MATRIX) {
5137: start = A->cmap->rstart;
5138: cmap = a->garray;
5139: nzA = a->A->cmap->n;
5140: nzB = a->B->cmap->n;
5141: PetscMalloc1((nzA+nzB), &idx);
5142: ncols = 0;
5143: for (i=0; i<nzB; i++) { /* row < local row index */
5144: if (cmap[i] < start) idx[ncols++] = cmap[i];
5145: else break;
5146: }
5147: imark = i;
5148: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5149: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5150: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5151: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5152: } else {
5153: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5154: isrowb = *rowb; iscolb = *colb;
5155: PetscMalloc(sizeof(Mat),&bseq);
5156: bseq[0] = *B_seq;
5157: }
5158: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5159: *B_seq = bseq[0];
5160: PetscFree(bseq);
5161: if (!rowb) {
5162: ISDestroy(&isrowb);
5163: } else {
5164: *rowb = isrowb;
5165: }
5166: if (!colb) {
5167: ISDestroy(&iscolb);
5168: } else {
5169: *colb = iscolb;
5170: }
5171: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5172: return(0);
5173: }
5177: /*
5178: MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5179: of the OFF-DIAGONAL portion of local A
5181: Collective on Mat
5183: Input Parameters:
5184: + A,B - the matrices in mpiaij format
5185: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5187: Output Parameter:
5188: + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5189: . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5190: . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5191: - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5193: Level: developer
5195: */
5196: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5197: {
5198: VecScatter_MPI_General *gen_to,*gen_from;
5199: PetscErrorCode ierr;
5200: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5201: Mat_SeqAIJ *b_oth;
5202: VecScatter ctx =a->Mvctx;
5203: MPI_Comm comm;
5204: PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5205: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5206: PetscScalar *rvalues,*svalues;
5207: MatScalar *b_otha,*bufa,*bufA;
5208: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5209: MPI_Request *rwaits = NULL,*swaits = NULL;
5210: MPI_Status *sstatus,rstatus;
5211: PetscMPIInt jj;
5212: PetscInt *cols,sbs,rbs;
5213: PetscScalar *vals;
5216: PetscObjectGetComm((PetscObject)A,&comm);
5217: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5218: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5219: }
5220: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5221: MPI_Comm_rank(comm,&rank);
5223: gen_to = (VecScatter_MPI_General*)ctx->todata;
5224: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5225: rvalues = gen_from->values; /* holds the length of receiving row */
5226: svalues = gen_to->values; /* holds the length of sending row */
5227: nrecvs = gen_from->n;
5228: nsends = gen_to->n;
5230: PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);
5231: srow = gen_to->indices; /* local row index to be sent */
5232: sstarts = gen_to->starts;
5233: sprocs = gen_to->procs;
5234: sstatus = gen_to->sstatus;
5235: sbs = gen_to->bs;
5236: rstarts = gen_from->starts;
5237: rprocs = gen_from->procs;
5238: rbs = gen_from->bs;
5240: if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5241: if (scall == MAT_INITIAL_MATRIX) {
5242: /* i-array */
5243: /*---------*/
5244: /* post receives */
5245: for (i=0; i<nrecvs; i++) {
5246: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5247: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5248: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5249: }
5251: /* pack the outgoing message */
5252: PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);
5254: sstartsj[0] = 0;
5255: rstartsj[0] = 0;
5256: len = 0; /* total length of j or a array to be sent */
5257: k = 0;
5258: for (i=0; i<nsends; i++) {
5259: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5260: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5261: for (j=0; j<nrows; j++) {
5262: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5263: for (l=0; l<sbs; l++) {
5264: MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL); /* rowlength */
5266: rowlen[j*sbs+l] = ncols;
5268: len += ncols;
5269: MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);
5270: }
5271: k++;
5272: }
5273: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5275: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5276: }
5277: /* recvs and sends of i-array are completed */
5278: i = nrecvs;
5279: while (i--) {
5280: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5281: }
5282: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5284: /* allocate buffers for sending j and a arrays */
5285: PetscMalloc1((len+1),&bufj);
5286: PetscMalloc1((len+1),&bufa);
5288: /* create i-array of B_oth */
5289: PetscMalloc1((aBn+2),&b_othi);
5291: b_othi[0] = 0;
5292: len = 0; /* total length of j or a array to be received */
5293: k = 0;
5294: for (i=0; i<nrecvs; i++) {
5295: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5296: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5297: for (j=0; j<nrows; j++) {
5298: b_othi[k+1] = b_othi[k] + rowlen[j];
5299: len += rowlen[j]; k++;
5300: }
5301: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5302: }
5304: /* allocate space for j and a arrrays of B_oth */
5305: PetscMalloc1((b_othi[aBn]+1),&b_othj);
5306: PetscMalloc1((b_othi[aBn]+1),&b_otha);
5308: /* j-array */
5309: /*---------*/
5310: /* post receives of j-array */
5311: for (i=0; i<nrecvs; i++) {
5312: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5313: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5314: }
5316: /* pack the outgoing message j-array */
5317: k = 0;
5318: for (i=0; i<nsends; i++) {
5319: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5320: bufJ = bufj+sstartsj[i];
5321: for (j=0; j<nrows; j++) {
5322: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5323: for (ll=0; ll<sbs; ll++) {
5324: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5325: for (l=0; l<ncols; l++) {
5326: *bufJ++ = cols[l];
5327: }
5328: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5329: }
5330: }
5331: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5332: }
5334: /* recvs and sends of j-array are completed */
5335: i = nrecvs;
5336: while (i--) {
5337: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5338: }
5339: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5340: } else if (scall == MAT_REUSE_MATRIX) {
5341: sstartsj = *startsj_s;
5342: rstartsj = *startsj_r;
5343: bufa = *bufa_ptr;
5344: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5345: b_otha = b_oth->a;
5346: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5348: /* a-array */
5349: /*---------*/
5350: /* post receives of a-array */
5351: for (i=0; i<nrecvs; i++) {
5352: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5353: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5354: }
5356: /* pack the outgoing message a-array */
5357: k = 0;
5358: for (i=0; i<nsends; i++) {
5359: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5360: bufA = bufa+sstartsj[i];
5361: for (j=0; j<nrows; j++) {
5362: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5363: for (ll=0; ll<sbs; ll++) {
5364: MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5365: for (l=0; l<ncols; l++) {
5366: *bufA++ = vals[l];
5367: }
5368: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5369: }
5370: }
5371: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5372: }
5373: /* recvs and sends of a-array are completed */
5374: i = nrecvs;
5375: while (i--) {
5376: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5377: }
5378: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5379: PetscFree2(rwaits,swaits);
5381: if (scall == MAT_INITIAL_MATRIX) {
5382: /* put together the new matrix */
5383: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5385: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5386: /* Since these are PETSc arrays, change flags to free them as necessary. */
5387: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5388: b_oth->free_a = PETSC_TRUE;
5389: b_oth->free_ij = PETSC_TRUE;
5390: b_oth->nonew = 0;
5392: PetscFree(bufj);
5393: if (!startsj_s || !bufa_ptr) {
5394: PetscFree2(sstartsj,rstartsj);
5395: PetscFree(bufa_ptr);
5396: } else {
5397: *startsj_s = sstartsj;
5398: *startsj_r = rstartsj;
5399: *bufa_ptr = bufa;
5400: }
5401: }
5402: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5403: return(0);
5404: }
5408: /*@C
5409: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5411: Not Collective
5413: Input Parameters:
5414: . A - The matrix in mpiaij format
5416: Output Parameter:
5417: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5418: . colmap - A map from global column index to local index into lvec
5419: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5421: Level: developer
5423: @*/
5424: #if defined(PETSC_USE_CTABLE)
5425: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5426: #else
5427: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5428: #endif
5429: {
5430: Mat_MPIAIJ *a;
5437: a = (Mat_MPIAIJ*) A->data;
5438: if (lvec) *lvec = a->lvec;
5439: if (colmap) *colmap = a->colmap;
5440: if (multScatter) *multScatter = a->Mvctx;
5441: return(0);
5442: }
5444: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5445: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5446: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5450: /*
5451: Computes (B'*A')' since computing B*A directly is untenable
5453: n p p
5454: ( ) ( ) ( )
5455: m ( A ) * n ( B ) = m ( C )
5456: ( ) ( ) ( )
5458: */
5459: PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5460: {
5462: Mat At,Bt,Ct;
5465: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5466: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5467: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);
5468: MatDestroy(&At);
5469: MatDestroy(&Bt);
5470: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5471: MatDestroy(&Ct);
5472: return(0);
5473: }
5477: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5478: {
5480: PetscInt m=A->rmap->n,n=B->cmap->n;
5481: Mat Cmat;
5484: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5485: MatCreate(PetscObjectComm((PetscObject)A),&Cmat);
5486: MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5487: MatSetBlockSizesFromMats(Cmat,A,B);
5488: MatSetType(Cmat,MATMPIDENSE);
5489: MatMPIDenseSetPreallocation(Cmat,NULL);
5490: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
5491: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
5493: Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5495: *C = Cmat;
5496: return(0);
5497: }
5499: /* ----------------------------------------------------------------*/
5502: PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5503: {
5507: if (scall == MAT_INITIAL_MATRIX) {
5508: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
5509: MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);
5510: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
5511: }
5512: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
5513: MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);
5514: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
5515: return(0);
5516: }
5518: #if defined(PETSC_HAVE_MUMPS)
5519: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5520: #endif
5521: #if defined(PETSC_HAVE_PASTIX)
5522: PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5523: #endif
5524: #if defined(PETSC_HAVE_SUPERLU_DIST)
5525: PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5526: #endif
5527: #if defined(PETSC_HAVE_CLIQUE)
5528: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5529: #endif
5531: /*MC
5532: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5534: Options Database Keys:
5535: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5537: Level: beginner
5539: .seealso: MatCreateAIJ()
5540: M*/
5544: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5545: {
5546: Mat_MPIAIJ *b;
5548: PetscMPIInt size;
5551: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
5553: PetscNewLog(B,&b);
5554: B->data = (void*)b;
5555: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
5556: B->assembled = PETSC_FALSE;
5557: B->insertmode = NOT_SET_VALUES;
5558: b->size = size;
5560: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
5562: /* build cache for off array entries formed */
5563: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
5565: b->donotstash = PETSC_FALSE;
5566: b->colmap = 0;
5567: b->garray = 0;
5568: b->roworiented = PETSC_TRUE;
5570: /* stuff used for matrix vector multiply */
5571: b->lvec = NULL;
5572: b->Mvctx = NULL;
5574: /* stuff for MatGetRow() */
5575: b->rowindices = 0;
5576: b->rowvalues = 0;
5577: b->getrowactive = PETSC_FALSE;
5579: /* flexible pointer used in CUSP/CUSPARSE classes */
5580: b->spptr = NULL;
5582: #if defined(PETSC_HAVE_MUMPS)
5583: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);
5584: #endif
5585: #if defined(PETSC_HAVE_PASTIX)
5586: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);
5587: #endif
5588: #if defined(PETSC_HAVE_SUPERLU_DIST)
5589: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);
5590: #endif
5591: #if defined(PETSC_HAVE_CLIQUE)
5592: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);
5593: #endif
5594: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);
5595: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);
5596: PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);
5597: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);
5598: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);
5599: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);
5600: PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);
5601: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);
5602: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);
5603: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);
5604: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);
5605: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);
5606: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);
5607: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
5608: return(0);
5609: }
5613: /*@
5614: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5615: and "off-diagonal" part of the matrix in CSR format.
5617: Collective on MPI_Comm
5619: Input Parameters:
5620: + comm - MPI communicator
5621: . m - number of local rows (Cannot be PETSC_DECIDE)
5622: . n - This value should be the same as the local size used in creating the
5623: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5624: calculated if N is given) For square matrices n is almost always m.
5625: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5626: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5627: . i - row indices for "diagonal" portion of matrix
5628: . j - column indices
5629: . a - matrix values
5630: . oi - row indices for "off-diagonal" portion of matrix
5631: . oj - column indices
5632: - oa - matrix values
5634: Output Parameter:
5635: . mat - the matrix
5637: Level: advanced
5639: Notes:
5640: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5641: must free the arrays once the matrix has been destroyed and not before.
5643: The i and j indices are 0 based
5645: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5647: This sets local rows and cannot be used to set off-processor values.
5649: Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5650: legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5651: not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5652: the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5653: keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5654: communication if it is known that only local entries will be set.
5656: .keywords: matrix, aij, compressed row, sparse, parallel
5658: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5659: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5660: @*/
5661: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5662: {
5664: Mat_MPIAIJ *maij;
5667: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5668: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5669: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5670: MatCreate(comm,mat);
5671: MatSetSizes(*mat,m,n,M,N);
5672: MatSetType(*mat,MATMPIAIJ);
5673: maij = (Mat_MPIAIJ*) (*mat)->data;
5675: (*mat)->preallocated = PETSC_TRUE;
5677: PetscLayoutSetUp((*mat)->rmap);
5678: PetscLayoutSetUp((*mat)->cmap);
5680: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
5681: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
5683: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
5684: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
5685: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
5686: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
5688: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
5689: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
5690: MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
5691: return(0);
5692: }
5694: /*
5695: Special version for direct calls from Fortran
5696: */
5697: #include <petsc-private/fortranimpl.h>
5699: #if defined(PETSC_HAVE_FORTRAN_CAPS)
5700: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5701: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5702: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5703: #endif
5705: /* Change these macros so can be used in void function */
5706: #undef CHKERRQ
5707: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5708: #undef SETERRQ2
5709: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5710: #undef SETERRQ3
5711: #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5712: #undef SETERRQ
5713: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5717: PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5718: {
5719: Mat mat = *mmat;
5720: PetscInt m = *mm, n = *mn;
5721: InsertMode addv = *maddv;
5722: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
5723: PetscScalar value;
5726: MatCheckPreallocated(mat,1);
5727: if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5729: #if defined(PETSC_USE_DEBUG)
5730: else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5731: #endif
5732: {
5733: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5734: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5735: PetscBool roworiented = aij->roworiented;
5737: /* Some Variables required in the macro */
5738: Mat A = aij->A;
5739: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
5740: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5741: MatScalar *aa = a->a;
5742: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5743: Mat B = aij->B;
5744: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
5745: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5746: MatScalar *ba = b->a;
5748: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5749: PetscInt nonew = a->nonew;
5750: MatScalar *ap1,*ap2;
5753: for (i=0; i<m; i++) {
5754: if (im[i] < 0) continue;
5755: #if defined(PETSC_USE_DEBUG)
5756: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5757: #endif
5758: if (im[i] >= rstart && im[i] < rend) {
5759: row = im[i] - rstart;
5760: lastcol1 = -1;
5761: rp1 = aj + ai[row];
5762: ap1 = aa + ai[row];
5763: rmax1 = aimax[row];
5764: nrow1 = ailen[row];
5765: low1 = 0;
5766: high1 = nrow1;
5767: lastcol2 = -1;
5768: rp2 = bj + bi[row];
5769: ap2 = ba + bi[row];
5770: rmax2 = bimax[row];
5771: nrow2 = bilen[row];
5772: low2 = 0;
5773: high2 = nrow2;
5775: for (j=0; j<n; j++) {
5776: if (roworiented) value = v[i*n+j];
5777: else value = v[i+j*m];
5778: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5779: if (in[j] >= cstart && in[j] < cend) {
5780: col = in[j] - cstart;
5781: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5782: } else if (in[j] < 0) continue;
5783: #if defined(PETSC_USE_DEBUG)
5784: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5785: #endif
5786: else {
5787: if (mat->was_assembled) {
5788: if (!aij->colmap) {
5789: MatCreateColmap_MPIAIJ_Private(mat);
5790: }
5791: #if defined(PETSC_USE_CTABLE)
5792: PetscTableFind(aij->colmap,in[j]+1,&col);
5793: col--;
5794: #else
5795: col = aij->colmap[in[j]] - 1;
5796: #endif
5797: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5798: MatDisAssemble_MPIAIJ(mat);
5799: col = in[j];
5800: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5801: B = aij->B;
5802: b = (Mat_SeqAIJ*)B->data;
5803: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5804: rp2 = bj + bi[row];
5805: ap2 = ba + bi[row];
5806: rmax2 = bimax[row];
5807: nrow2 = bilen[row];
5808: low2 = 0;
5809: high2 = nrow2;
5810: bm = aij->B->rmap->n;
5811: ba = b->a;
5812: }
5813: } else col = in[j];
5814: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5815: }
5816: }
5817: } else if (!aij->donotstash) {
5818: if (roworiented) {
5819: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5820: } else {
5821: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5822: }
5823: }
5824: }
5825: }
5826: PetscFunctionReturnVoid();
5827: }