Actual source code: mpiaij.c
petsc-3.4.5 2014-06-29
2: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
3: #include <petsc-private/vecimpl.h>
4: #include <petscblaslapack.h>
5: #include <petscsf.h>
7: /*MC
8: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
10: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11: and MATMPIAIJ otherwise. As a result, for single process communicators,
12: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13: for communicators controlling multiple processes. It is recommended that you call both of
14: the above preallocation routines for simplicity.
16: Options Database Keys:
17: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
19: Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20: enough exist.
22: Level: beginner
24: .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25: M*/
27: /*MC
28: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
30: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
32: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33: for communicators controlling multiple processes. It is recommended that you call both of
34: the above preallocation routines for simplicity.
36: Options Database Keys:
37: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
39: Level: beginner
41: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42: M*/
46: PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47: {
48: PetscErrorCode ierr;
49: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
50: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
51: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
52: const PetscInt *ia,*ib;
53: const MatScalar *aa,*bb;
54: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
55: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
58: *keptrows = 0;
59: ia = a->i;
60: ib = b->i;
61: for (i=0; i<m; i++) {
62: na = ia[i+1] - ia[i];
63: nb = ib[i+1] - ib[i];
64: if (!na && !nb) {
65: cnt++;
66: goto ok1;
67: }
68: aa = a->a + ia[i];
69: for (j=0; j<na; j++) {
70: if (aa[j] != 0.0) goto ok1;
71: }
72: bb = b->a + ib[i];
73: for (j=0; j <nb; j++) {
74: if (bb[j] != 0.0) goto ok1;
75: }
76: cnt++;
77: ok1:;
78: }
79: MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));
80: if (!n0rows) return(0);
81: PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);
82: cnt = 0;
83: for (i=0; i<m; i++) {
84: na = ia[i+1] - ia[i];
85: nb = ib[i+1] - ib[i];
86: if (!na && !nb) continue;
87: aa = a->a + ia[i];
88: for (j=0; j<na;j++) {
89: if (aa[j] != 0.0) {
90: rows[cnt++] = rstart + i;
91: goto ok2;
92: }
93: }
94: bb = b->a + ib[i];
95: for (j=0; j<nb; j++) {
96: if (bb[j] != 0.0) {
97: rows[cnt++] = rstart + i;
98: goto ok2;
99: }
100: }
101: ok2:;
102: }
103: ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);
104: return(0);
105: }
109: PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110: {
111: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data;
113: PetscInt i,rstart,nrows,*rows;
116: *zrows = NULL;
117: MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);
118: MatGetOwnershipRange(M,&rstart,NULL);
119: for (i=0; i<nrows; i++) rows[i] += rstart;
120: ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);
121: return(0);
122: }
126: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127: {
129: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
130: PetscInt i,n,*garray = aij->garray;
131: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
132: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
133: PetscReal *work;
136: MatGetSize(A,NULL,&n);
137: PetscMalloc(n*sizeof(PetscReal),&work);
138: PetscMemzero(work,n*sizeof(PetscReal));
139: if (type == NORM_2) {
140: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142: }
143: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145: }
146: } else if (type == NORM_1) {
147: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149: }
150: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152: }
153: } else if (type == NORM_INFINITY) {
154: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156: }
157: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159: }
161: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162: if (type == NORM_INFINITY) {
163: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
164: } else {
165: MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
166: }
167: PetscFree(work);
168: if (type == NORM_2) {
169: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170: }
171: return(0);
172: }
176: /*
177: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
180: Only for square matrices
182: Used by a preconditioner, hence PETSC_EXTERN
183: */
184: PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185: {
186: PetscMPIInt rank,size;
187: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
189: Mat mat;
190: Mat_SeqAIJ *gmata;
191: PetscMPIInt tag;
192: MPI_Status status;
193: PetscBool aij;
194: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
197: MPI_Comm_rank(comm,&rank);
198: MPI_Comm_size(comm,&size);
199: if (!rank) {
200: PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
201: if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202: }
203: if (reuse == MAT_INITIAL_MATRIX) {
204: MatCreate(comm,&mat);
205: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
206: if (!rank) {
207: bses[0] = gmat->rmap->bs;
208: bses[1] = gmat->cmap->bs;
209: }
210: MPI_Bcast(bses,2,MPIU_INT,0,comm);
211: MatSetBlockSizes(mat,bses[0],bses[1]);
212: MatSetType(mat,MATAIJ);
213: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
214: PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
215: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
217: rowners[0] = 0;
218: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219: rstart = rowners[rank];
220: rend = rowners[rank+1];
221: PetscObjectGetNewTag((PetscObject)mat,&tag);
222: if (!rank) {
223: gmata = (Mat_SeqAIJ*) gmat->data;
224: /* send row lengths to all processors */
225: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226: for (i=1; i<size; i++) {
227: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
228: }
229: /* determine number diagonal and off-diagonal counts */
230: PetscMemzero(olens,m*sizeof(PetscInt));
231: PetscMalloc(m*sizeof(PetscInt),&ld);
232: PetscMemzero(ld,m*sizeof(PetscInt));
233: jj = 0;
234: for (i=0; i<m; i++) {
235: for (j=0; j<dlens[i]; j++) {
236: if (gmata->j[jj] < rstart) ld[i]++;
237: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238: jj++;
239: }
240: }
241: /* send column indices to other processes */
242: for (i=1; i<size; i++) {
243: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
245: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
246: }
248: /* send numerical values to other processes */
249: for (i=1; i<size; i++) {
250: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
252: }
253: gmataa = gmata->a;
254: gmataj = gmata->j;
256: } else {
257: /* receive row lengths */
258: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
259: /* receive column indices */
260: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
261: PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
262: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
263: /* determine number diagonal and off-diagonal counts */
264: PetscMemzero(olens,m*sizeof(PetscInt));
265: PetscMalloc(m*sizeof(PetscInt),&ld);
266: PetscMemzero(ld,m*sizeof(PetscInt));
267: jj = 0;
268: for (i=0; i<m; i++) {
269: for (j=0; j<dlens[i]; j++) {
270: if (gmataj[jj] < rstart) ld[i]++;
271: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272: jj++;
273: }
274: }
275: /* receive numerical values */
276: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
277: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
278: }
279: /* set preallocation */
280: for (i=0; i<m; i++) {
281: dlens[i] -= olens[i];
282: }
283: MatSeqAIJSetPreallocation(mat,0,dlens);
284: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
286: for (i=0; i<m; i++) {
287: dlens[i] += olens[i];
288: }
289: cnt = 0;
290: for (i=0; i<m; i++) {
291: row = rstart + i;
292: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
293: cnt += dlens[i];
294: }
295: if (rank) {
296: PetscFree2(gmataa,gmataj);
297: }
298: PetscFree2(dlens,olens);
299: PetscFree(rowners);
301: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
303: *inmat = mat;
304: } else { /* column indices are already set; only need to move over numerical values from process 0 */
305: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307: mat = *inmat;
308: PetscObjectGetNewTag((PetscObject)mat,&tag);
309: if (!rank) {
310: /* send numerical values to other processes */
311: gmata = (Mat_SeqAIJ*) gmat->data;
312: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
313: gmataa = gmata->a;
314: for (i=1; i<size; i++) {
315: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
317: }
318: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319: } else {
320: /* receive numerical values from process 0*/
321: nz = Ad->nz + Ao->nz;
322: PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
323: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
324: }
325: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327: ad = Ad->a;
328: ao = Ao->a;
329: if (mat->rmap->n) {
330: i = 0;
331: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
332: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
333: }
334: for (i=1; i<mat->rmap->n; i++) {
335: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
336: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
337: }
338: i--;
339: if (mat->rmap->n) {
340: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));
341: }
342: if (rank) {
343: PetscFree(gmataarestore);
344: }
345: }
346: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
347: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
348: return(0);
349: }
351: /*
352: Local utility routine that creates a mapping from the global column
353: number to the local number in the off-diagonal part of the local
354: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
355: a slightly higher hash table cost; without it it is not scalable (each processor
356: has an order N integer array but is fast to acess.
357: */
360: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361: {
362: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
364: PetscInt n = aij->B->cmap->n,i;
367: if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368: #if defined(PETSC_USE_CTABLE)
369: PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
370: for (i=0; i<n; i++) {
371: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
372: }
373: #else
374: PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);
375: PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));
376: PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));
377: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378: #endif
379: return(0);
380: }
382: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383: { \
384: if (col <= lastcol1) low1 = 0; \
385: else high1 = nrow1; \
386: lastcol1 = col;\
387: while (high1-low1 > 5) { \
388: t = (low1+high1)/2; \
389: if (rp1[t] > col) high1 = t; \
390: else low1 = t; \
391: } \
392: for (_i=low1; _i<high1; _i++) { \
393: if (rp1[_i] > col) break; \
394: if (rp1[_i] == col) { \
395: if (addv == ADD_VALUES) ap1[_i] += value; \
396: else ap1[_i] = value; \
397: goto a_noinsert; \
398: } \
399: } \
400: if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
402: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404: N = nrow1++ - 1; a->nz++; high1++; \
405: /* shift up all the later entries in this row */ \
406: for (ii=N; ii>=_i; ii--) { \
407: rp1[ii+1] = rp1[ii]; \
408: ap1[ii+1] = ap1[ii]; \
409: } \
410: rp1[_i] = col; \
411: ap1[_i] = value; \
412: a_noinsert: ; \
413: ailen[row] = nrow1; \
414: }
417: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418: { \
419: if (col <= lastcol2) low2 = 0; \
420: else high2 = nrow2; \
421: lastcol2 = col; \
422: while (high2-low2 > 5) { \
423: t = (low2+high2)/2; \
424: if (rp2[t] > col) high2 = t; \
425: else low2 = t; \
426: } \
427: for (_i=low2; _i<high2; _i++) { \
428: if (rp2[_i] > col) break; \
429: if (rp2[_i] == col) { \
430: if (addv == ADD_VALUES) ap2[_i] += value; \
431: else ap2[_i] = value; \
432: goto b_noinsert; \
433: } \
434: } \
435: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
437: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439: N = nrow2++ - 1; b->nz++; high2++; \
440: /* shift up all the later entries in this row */ \
441: for (ii=N; ii>=_i; ii--) { \
442: rp2[ii+1] = rp2[ii]; \
443: ap2[ii+1] = ap2[ii]; \
444: } \
445: rp2[_i] = col; \
446: ap2[_i] = value; \
447: b_noinsert: ; \
448: bilen[row] = nrow2; \
449: }
453: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454: {
455: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
456: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
458: PetscInt l,*garray = mat->garray,diag;
461: /* code only works for square matrices A */
463: /* find size of row to the left of the diagonal part */
464: MatGetOwnershipRange(A,&diag,0);
465: row = row - diag;
466: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467: if (garray[b->j[b->i[row]+l]] > diag) break;
468: }
469: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
471: /* diagonal part */
472: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
474: /* right of diagonal part */
475: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
476: return(0);
477: }
481: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482: {
483: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
484: PetscScalar value;
486: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
487: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488: PetscBool roworiented = aij->roworiented;
490: /* Some Variables required in the macro */
491: Mat A = aij->A;
492: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
493: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494: MatScalar *aa = a->a;
495: PetscBool ignorezeroentries = a->ignorezeroentries;
496: Mat B = aij->B;
497: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
498: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499: MatScalar *ba = b->a;
501: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502: PetscInt nonew;
503: MatScalar *ap1,*ap2;
507: for (i=0; i<m; i++) {
508: if (im[i] < 0) continue;
509: #if defined(PETSC_USE_DEBUG)
510: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511: #endif
512: if (im[i] >= rstart && im[i] < rend) {
513: row = im[i] - rstart;
514: lastcol1 = -1;
515: rp1 = aj + ai[row];
516: ap1 = aa + ai[row];
517: rmax1 = aimax[row];
518: nrow1 = ailen[row];
519: low1 = 0;
520: high1 = nrow1;
521: lastcol2 = -1;
522: rp2 = bj + bi[row];
523: ap2 = ba + bi[row];
524: rmax2 = bimax[row];
525: nrow2 = bilen[row];
526: low2 = 0;
527: high2 = nrow2;
529: for (j=0; j<n; j++) {
530: if (v) {
531: if (roworiented) value = v[i*n+j];
532: else value = v[i+j*m];
533: } else value = 0.0;
534: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535: if (in[j] >= cstart && in[j] < cend) {
536: col = in[j] - cstart;
537: nonew = a->nonew;
538: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539: } else if (in[j] < 0) continue;
540: #if defined(PETSC_USE_DEBUG)
541: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542: #endif
543: else {
544: if (mat->was_assembled) {
545: if (!aij->colmap) {
546: MatCreateColmap_MPIAIJ_Private(mat);
547: }
548: #if defined(PETSC_USE_CTABLE)
549: PetscTableFind(aij->colmap,in[j]+1,&col);
550: col--;
551: #else
552: col = aij->colmap[in[j]] - 1;
553: #endif
554: if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555: MatDisAssemble_MPIAIJ(mat);
556: col = in[j];
557: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558: B = aij->B;
559: b = (Mat_SeqAIJ*)B->data;
560: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561: rp2 = bj + bi[row];
562: ap2 = ba + bi[row];
563: rmax2 = bimax[row];
564: nrow2 = bilen[row];
565: low2 = 0;
566: high2 = nrow2;
567: bm = aij->B->rmap->n;
568: ba = b->a;
569: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570: } else col = in[j];
571: nonew = b->nonew;
572: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573: }
574: }
575: } else {
576: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577: if (!aij->donotstash) {
578: mat->assembled = PETSC_FALSE;
579: if (roworiented) {
580: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
581: } else {
582: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
583: }
584: }
585: }
586: }
587: return(0);
588: }
592: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593: {
594: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
596: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
600: for (i=0; i<m; i++) {
601: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603: if (idxm[i] >= rstart && idxm[i] < rend) {
604: row = idxm[i] - rstart;
605: for (j=0; j<n; j++) {
606: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608: if (idxn[j] >= cstart && idxn[j] < cend) {
609: col = idxn[j] - cstart;
610: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
611: } else {
612: if (!aij->colmap) {
613: MatCreateColmap_MPIAIJ_Private(mat);
614: }
615: #if defined(PETSC_USE_CTABLE)
616: PetscTableFind(aij->colmap,idxn[j]+1,&col);
617: col--;
618: #else
619: col = aij->colmap[idxn[j]] - 1;
620: #endif
621: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622: else {
623: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
624: }
625: }
626: }
627: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628: }
629: return(0);
630: }
632: extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
636: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637: {
638: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
640: PetscInt nstash,reallocs;
641: InsertMode addv;
644: if (aij->donotstash || mat->nooffprocentries) return(0);
646: /* make sure all processors are either in INSERTMODE or ADDMODE */
647: MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
648: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649: mat->insertmode = addv; /* in case this processor had no cache */
651: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
652: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
653: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
654: return(0);
655: }
659: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660: {
661: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
662: Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data;
664: PetscMPIInt n;
665: PetscInt i,j,rstart,ncols,flg;
666: PetscInt *row,*col;
667: PetscBool other_disassembled;
668: PetscScalar *val;
669: InsertMode addv = mat->insertmode;
671: /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
674: if (!aij->donotstash && !mat->nooffprocentries) {
675: while (1) {
676: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
677: if (!flg) break;
679: for (i=0; i<n; ) {
680: /* Now identify the consecutive vals belonging to the same row */
681: for (j=i,rstart=row[j]; j<n; j++) {
682: if (row[j] != rstart) break;
683: }
684: if (j < n) ncols = j-i;
685: else ncols = n-i;
686: /* Now assemble all these values with a single function call */
687: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
689: i = j;
690: }
691: }
692: MatStashScatterEnd_Private(&mat->stash);
693: }
694: MatAssemblyBegin(aij->A,mode);
695: MatAssemblyEnd(aij->A,mode);
697: /* determine if any processor has disassembled, if so we must
698: also disassemble ourselfs, in order that we may reassemble. */
699: /*
700: if nonzero structure of submatrix B cannot change then we know that
701: no processor disassembled thus we can skip this stuff
702: */
703: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
705: if (mat->was_assembled && !other_disassembled) {
706: MatDisAssemble_MPIAIJ(mat);
707: }
708: }
709: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710: MatSetUpMultiply_MPIAIJ(mat);
711: }
712: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
713: MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);
714: MatAssemblyBegin(aij->B,mode);
715: MatAssemblyEnd(aij->B,mode);
717: PetscFree2(aij->rowvalues,aij->rowindices);
719: aij->rowvalues = 0;
721: /* used by MatAXPY() */
722: a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
723: a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
725: VecDestroy(&aij->diag);
726: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727: return(0);
728: }
732: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733: {
734: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
738: MatZeroEntries(l->A);
739: MatZeroEntries(l->B);
740: return(0);
741: }
745: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746: {
747: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
748: PetscErrorCode ierr;
749: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750: PetscInt i,*owners = A->rmap->range;
751: PetscInt *nprocs,j,idx,nsends,row;
752: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
753: PetscInt *rvalues,count,base,slen,*source;
754: PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart;
755: MPI_Comm comm;
756: MPI_Request *send_waits,*recv_waits;
757: MPI_Status recv_status,*send_status;
758: const PetscScalar *xx;
759: PetscScalar *bb;
760: #if defined(PETSC_DEBUG)
761: PetscBool found = PETSC_FALSE;
762: #endif
765: PetscObjectGetComm((PetscObject)A,&comm);
766: /* first count number of contributors to each processor */
767: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
768: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
769: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
770: j = 0;
771: for (i=0; i<N; i++) {
772: if (lastidx > (idx = rows[i])) j = 0;
773: lastidx = idx;
774: for (; j<size; j++) {
775: if (idx >= owners[j] && idx < owners[j+1]) {
776: nprocs[2*j]++;
777: nprocs[2*j+1] = 1;
778: owner[i] = j;
779: #if defined(PETSC_DEBUG)
780: found = PETSC_TRUE;
781: #endif
782: break;
783: }
784: }
785: #if defined(PETSC_DEBUG)
786: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787: found = PETSC_FALSE;
788: #endif
789: }
790: nsends = 0;
791: for (i=0; i<size; i++) nsends += nprocs[2*i+1];
793: if (A->nooffproczerorows) {
794: if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795: nrecvs = nsends;
796: nmax = N;
797: } else {
798: /* inform other processors of number of messages and max length*/
799: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
800: }
802: /* post receives: */
803: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
804: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
805: for (i=0; i<nrecvs; i++) {
806: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
807: }
809: /* do sends:
810: 1) starts[i] gives the starting index in svalues for stuff going to
811: the ith processor
812: */
813: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
814: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
815: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
817: starts[0] = 0;
818: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819: for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
821: starts[0] = 0;
822: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823: count = 0;
824: for (i=0; i<size; i++) {
825: if (nprocs[2*i+1]) {
826: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
827: }
828: }
829: PetscFree(starts);
831: base = owners[rank];
833: /* wait on receives */
834: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
835: count = nrecvs; slen = 0;
836: while (count) {
837: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
838: /* unpack receives into our local space */
839: MPI_Get_count(&recv_status,MPIU_INT,&n);
841: source[imdex] = recv_status.MPI_SOURCE;
842: lens[imdex] = n;
843: slen += n;
844: count--;
845: }
846: PetscFree(recv_waits);
848: /* move the data into the send scatter */
849: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
850: count = 0;
851: for (i=0; i<nrecvs; i++) {
852: values = rvalues + i*nmax;
853: for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854: }
855: PetscFree(rvalues);
856: PetscFree2(lens,source);
857: PetscFree(owner);
858: PetscFree(nprocs);
860: /* fix right hand side if needed */
861: if (x && b) {
862: VecGetArrayRead(x,&xx);
863: VecGetArray(b,&bb);
864: for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865: VecRestoreArrayRead(x,&xx);
866: VecRestoreArray(b,&bb);
867: }
868: /*
869: Zero the required rows. If the "diagonal block" of the matrix
870: is square and the user wishes to set the diagonal we use separate
871: code so that MatSetValues() is not called for each diagonal allocating
872: new memory, thus calling lots of mallocs and slowing things down.
874: */
875: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876: MatZeroRows(l->B,slen,lrows,0.0,0,0);
877: if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878: MatZeroRows(l->A,slen,lrows,diag,0,0);
879: } else if (diag != 0.0) {
880: MatZeroRows(l->A,slen,lrows,0.0,0,0);
881: if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882: for (i = 0; i < slen; i++) {
883: row = lrows[i] + rstart;
884: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
885: }
886: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
887: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
888: } else {
889: MatZeroRows(l->A,slen,lrows,0.0,0,0);
890: }
891: PetscFree(lrows);
893: /* wait on sends */
894: if (nsends) {
895: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
896: MPI_Waitall(nsends,send_waits,send_status);
897: PetscFree(send_status);
898: }
899: PetscFree(send_waits);
900: PetscFree(svalues);
901: return(0);
902: }
906: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907: {
908: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
909: PetscErrorCode ierr;
910: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911: PetscInt i,*owners = A->rmap->range;
912: PetscInt *nprocs,j,idx,nsends;
913: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
914: PetscInt *rvalues,count,base,slen,*source;
915: PetscInt *lens,*lrows,*values,m;
916: MPI_Comm comm;
917: MPI_Request *send_waits,*recv_waits;
918: MPI_Status recv_status,*send_status;
919: const PetscScalar *xx;
920: PetscScalar *bb,*mask;
921: Vec xmask,lmask;
922: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
923: const PetscInt *aj, *ii,*ridx;
924: PetscScalar *aa;
925: #if defined(PETSC_DEBUG)
926: PetscBool found = PETSC_FALSE;
927: #endif
930: PetscObjectGetComm((PetscObject)A,&comm);
931: /* first count number of contributors to each processor */
932: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
933: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
934: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
935: j = 0;
936: for (i=0; i<N; i++) {
937: if (lastidx > (idx = rows[i])) j = 0;
938: lastidx = idx;
939: for (; j<size; j++) {
940: if (idx >= owners[j] && idx < owners[j+1]) {
941: nprocs[2*j]++;
942: nprocs[2*j+1] = 1;
943: owner[i] = j;
944: #if defined(PETSC_DEBUG)
945: found = PETSC_TRUE;
946: #endif
947: break;
948: }
949: }
950: #if defined(PETSC_DEBUG)
951: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952: found = PETSC_FALSE;
953: #endif
954: }
955: nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1];
957: /* inform other processors of number of messages and max length*/
958: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
960: /* post receives: */
961: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
962: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
963: for (i=0; i<nrecvs; i++) {
964: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
965: }
967: /* do sends:
968: 1) starts[i] gives the starting index in svalues for stuff going to
969: the ith processor
970: */
971: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
972: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
973: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
975: starts[0] = 0;
976: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977: for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
979: starts[0] = 0;
980: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981: count = 0;
982: for (i=0; i<size; i++) {
983: if (nprocs[2*i+1]) {
984: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
985: }
986: }
987: PetscFree(starts);
989: base = owners[rank];
991: /* wait on receives */
992: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
993: count = nrecvs; slen = 0;
994: while (count) {
995: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
996: /* unpack receives into our local space */
997: MPI_Get_count(&recv_status,MPIU_INT,&n);
999: source[imdex] = recv_status.MPI_SOURCE;
1000: lens[imdex] = n;
1001: slen += n;
1002: count--;
1003: }
1004: PetscFree(recv_waits);
1006: /* move the data into the send scatter */
1007: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
1008: count = 0;
1009: for (i=0; i<nrecvs; i++) {
1010: values = rvalues + i*nmax;
1011: for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012: }
1013: PetscFree(rvalues);
1014: PetscFree2(lens,source);
1015: PetscFree(owner);
1016: PetscFree(nprocs);
1017: /* lrows are the local rows to be zeroed, slen is the number of local rows */
1019: /* zero diagonal part of matrix */
1020: MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);
1022: /* handle off diagonal part of matrix */
1023: MatGetVecs(A,&xmask,NULL);
1024: VecDuplicate(l->lvec,&lmask);
1025: VecGetArray(xmask,&bb);
1026: for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027: VecRestoreArray(xmask,&bb);
1028: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1029: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1030: VecDestroy(&xmask);
1031: if (x) {
1032: VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1033: VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);
1034: VecGetArrayRead(l->lvec,&xx);
1035: VecGetArray(b,&bb);
1036: }
1037: VecGetArray(lmask,&mask);
1039: /* remove zeroed rows of off diagonal matrix */
1040: ii = aij->i;
1041: for (i=0; i<slen; i++) {
1042: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
1043: }
1045: /* loop over all elements of off process part of matrix zeroing removed columns*/
1046: if (aij->compressedrow.use) {
1047: m = aij->compressedrow.nrows;
1048: ii = aij->compressedrow.i;
1049: ridx = aij->compressedrow.rindex;
1050: for (i=0; i<m; i++) {
1051: n = ii[i+1] - ii[i];
1052: aj = aij->j + ii[i];
1053: aa = aij->a + ii[i];
1055: for (j=0; j<n; j++) {
1056: if (PetscAbsScalar(mask[*aj])) {
1057: if (b) bb[*ridx] -= *aa*xx[*aj];
1058: *aa = 0.0;
1059: }
1060: aa++;
1061: aj++;
1062: }
1063: ridx++;
1064: }
1065: } else { /* do not use compressed row format */
1066: m = l->B->rmap->n;
1067: for (i=0; i<m; i++) {
1068: n = ii[i+1] - ii[i];
1069: aj = aij->j + ii[i];
1070: aa = aij->a + ii[i];
1071: for (j=0; j<n; j++) {
1072: if (PetscAbsScalar(mask[*aj])) {
1073: if (b) bb[i] -= *aa*xx[*aj];
1074: *aa = 0.0;
1075: }
1076: aa++;
1077: aj++;
1078: }
1079: }
1080: }
1081: if (x) {
1082: VecRestoreArray(b,&bb);
1083: VecRestoreArrayRead(l->lvec,&xx);
1084: }
1085: VecRestoreArray(lmask,&mask);
1086: VecDestroy(&lmask);
1087: PetscFree(lrows);
1089: /* wait on sends */
1090: if (nsends) {
1091: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1092: MPI_Waitall(nsends,send_waits,send_status);
1093: PetscFree(send_status);
1094: }
1095: PetscFree(send_waits);
1096: PetscFree(svalues);
1097: return(0);
1098: }
1102: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103: {
1104: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1106: PetscInt nt;
1109: VecGetLocalSize(xx,&nt);
1110: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1112: (*a->A->ops->mult)(a->A,xx,yy);
1113: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1114: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1115: return(0);
1116: }
1120: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121: {
1122: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1126: MatMultDiagonalBlock(a->A,bb,xx);
1127: return(0);
1128: }
1132: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133: {
1134: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1138: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1139: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1140: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1141: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1142: return(0);
1143: }
1147: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148: {
1149: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1151: PetscBool merged;
1154: VecScatterGetMerged(a->Mvctx,&merged);
1155: /* do nondiagonal part */
1156: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1157: if (!merged) {
1158: /* send it on its way */
1159: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1160: /* do local part */
1161: (*a->A->ops->multtranspose)(a->A,xx,yy);
1162: /* receive remote parts: note this assumes the values are not actually */
1163: /* added in yy until the next line, */
1164: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1165: } else {
1166: /* do local part */
1167: (*a->A->ops->multtranspose)(a->A,xx,yy);
1168: /* send it on its way */
1169: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1170: /* values actually were received in the Begin() but we need to call this nop */
1171: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1172: }
1173: return(0);
1174: }
1178: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1179: {
1180: MPI_Comm comm;
1181: Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183: IS Me,Notme;
1185: PetscInt M,N,first,last,*notme,i;
1186: PetscMPIInt size;
1189: /* Easy test: symmetric diagonal block */
1190: Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191: MatIsTranspose(Adia,Bdia,tol,f);
1192: if (!*f) return(0);
1193: PetscObjectGetComm((PetscObject)Amat,&comm);
1194: MPI_Comm_size(comm,&size);
1195: if (size == 1) return(0);
1197: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198: MatGetSize(Amat,&M,&N);
1199: MatGetOwnershipRange(Amat,&first,&last);
1200: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
1201: for (i=0; i<first; i++) notme[i] = i;
1202: for (i=last; i<M; i++) notme[i-last+first] = i;
1203: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1204: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1205: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1206: Aoff = Aoffs[0];
1207: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1208: Boff = Boffs[0];
1209: MatIsTranspose(Aoff,Boff,tol,f);
1210: MatDestroyMatrices(1,&Aoffs);
1211: MatDestroyMatrices(1,&Boffs);
1212: ISDestroy(&Me);
1213: ISDestroy(&Notme);
1214: PetscFree(notme);
1215: return(0);
1216: }
1220: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221: {
1222: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1226: /* do nondiagonal part */
1227: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1228: /* send it on its way */
1229: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1230: /* do local part */
1231: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1232: /* receive remote parts */
1233: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1234: return(0);
1235: }
1237: /*
1238: This only works correctly for square matrices where the subblock A->A is the
1239: diagonal block
1240: */
1243: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244: {
1246: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1249: if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251: MatGetDiagonal(a->A,v);
1252: return(0);
1253: }
1257: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258: {
1259: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1263: MatScale(a->A,aa);
1264: MatScale(a->B,aa);
1265: return(0);
1266: }
1270: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271: {
1272: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1276: #if defined(PETSC_USE_LOG)
1277: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278: #endif
1279: MatStashDestroy_Private(&mat->stash);
1280: VecDestroy(&aij->diag);
1281: MatDestroy(&aij->A);
1282: MatDestroy(&aij->B);
1283: #if defined(PETSC_USE_CTABLE)
1284: PetscTableDestroy(&aij->colmap);
1285: #else
1286: PetscFree(aij->colmap);
1287: #endif
1288: PetscFree(aij->garray);
1289: VecDestroy(&aij->lvec);
1290: VecScatterDestroy(&aij->Mvctx);
1291: PetscFree2(aij->rowvalues,aij->rowindices);
1292: PetscFree(aij->ld);
1293: PetscFree(mat->data);
1295: PetscObjectChangeTypeName((PetscObject)mat,0);
1296: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1297: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1298: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
1299: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);
1300: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);
1301: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);
1302: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);
1303: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);
1304: return(0);
1305: }
1309: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310: {
1311: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1312: Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data;
1313: Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data;
1315: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1316: int fd;
1317: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1318: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1319: PetscScalar *column_values;
1320: PetscInt message_count,flowcontrolcount;
1321: FILE *file;
1324: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1325: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
1326: nz = A->nz + B->nz;
1327: if (!rank) {
1328: header[0] = MAT_FILE_CLASSID;
1329: header[1] = mat->rmap->N;
1330: header[2] = mat->cmap->N;
1332: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1333: PetscViewerBinaryGetDescriptor(viewer,&fd);
1334: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1335: /* get largest number of rows any processor has */
1336: rlen = mat->rmap->n;
1337: range = mat->rmap->range;
1338: for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339: } else {
1340: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1341: rlen = mat->rmap->n;
1342: }
1344: /* load up the local row counts */
1345: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
1346: for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1348: /* store the row lengths to the file */
1349: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1350: if (!rank) {
1351: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1352: for (i=1; i<size; i++) {
1353: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1354: rlen = range[i+1] - range[i];
1355: MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));
1356: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1357: }
1358: PetscViewerFlowControlEndMaster(viewer,&message_count);
1359: } else {
1360: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1361: MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1362: PetscViewerFlowControlEndWorker(viewer,&message_count);
1363: }
1364: PetscFree(row_lengths);
1366: /* load up the local column indices */
1367: nzmax = nz; /* th processor needs space a largest processor needs */
1368: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));
1369: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
1370: cnt = 0;
1371: for (i=0; i<mat->rmap->n; i++) {
1372: for (j=B->i[i]; j<B->i[i+1]; j++) {
1373: if ((col = garray[B->j[j]]) > cstart) break;
1374: column_indices[cnt++] = col;
1375: }
1376: for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377: for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378: }
1379: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1381: /* store the column indices to the file */
1382: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1383: if (!rank) {
1384: MPI_Status status;
1385: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1386: for (i=1; i<size; i++) {
1387: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1388: MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1389: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390: MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));
1391: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1392: }
1393: PetscViewerFlowControlEndMaster(viewer,&message_count);
1394: } else {
1395: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1396: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1397: MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1398: PetscViewerFlowControlEndWorker(viewer,&message_count);
1399: }
1400: PetscFree(column_indices);
1402: /* load up the local column values */
1403: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
1404: cnt = 0;
1405: for (i=0; i<mat->rmap->n; i++) {
1406: for (j=B->i[i]; j<B->i[i+1]; j++) {
1407: if (garray[B->j[j]] > cstart) break;
1408: column_values[cnt++] = B->a[j];
1409: }
1410: for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411: for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412: }
1413: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1415: /* store the column values to the file */
1416: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1417: if (!rank) {
1418: MPI_Status status;
1419: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1420: for (i=1; i<size; i++) {
1421: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1422: MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1423: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424: MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));
1425: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1426: }
1427: PetscViewerFlowControlEndMaster(viewer,&message_count);
1428: } else {
1429: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1430: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1431: MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
1432: PetscViewerFlowControlEndWorker(viewer,&message_count);
1433: }
1434: PetscFree(column_values);
1436: PetscViewerBinaryGetInfoPointer(viewer,&file);
1437: if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438: return(0);
1439: }
1441: #include <petscdraw.h>
1444: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445: {
1446: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1447: PetscErrorCode ierr;
1448: PetscMPIInt rank = aij->rank,size = aij->size;
1449: PetscBool isdraw,iascii,isbinary;
1450: PetscViewer sviewer;
1451: PetscViewerFormat format;
1454: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1455: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1456: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1457: if (iascii) {
1458: PetscViewerGetFormat(viewer,&format);
1459: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460: MatInfo info;
1461: PetscBool inodes;
1463: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1464: MatGetInfo(mat,MAT_LOCAL,&info);
1465: MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);
1466: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1467: if (!inodes) {
1468: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1470: } else {
1471: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
1473: }
1474: MatGetInfo(aij->A,MAT_LOCAL,&info);
1475: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1476: MatGetInfo(aij->B,MAT_LOCAL,&info);
1477: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1478: PetscViewerFlush(viewer);
1479: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1480: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1481: VecScatterView(aij->Mvctx,viewer);
1482: return(0);
1483: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484: PetscInt inodecount,inodelimit,*inodes;
1485: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1486: if (inodes) {
1487: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1488: } else {
1489: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1490: }
1491: return(0);
1492: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493: return(0);
1494: }
1495: } else if (isbinary) {
1496: if (size == 1) {
1497: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1498: MatView(aij->A,viewer);
1499: } else {
1500: MatView_MPIAIJ_Binary(mat,viewer);
1501: }
1502: return(0);
1503: } else if (isdraw) {
1504: PetscDraw draw;
1505: PetscBool isnull;
1506: PetscViewerDrawGetDraw(viewer,0,&draw);
1507: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1508: }
1510: if (size == 1) {
1511: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1512: MatView(aij->A,viewer);
1513: } else {
1514: /* assemble the entire matrix onto first processor. */
1515: Mat A;
1516: Mat_SeqAIJ *Aloc;
1517: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518: MatScalar *a;
1520: if (mat->rmap->N > 1024) {
1521: PetscBool flg = PETSC_FALSE;
1523: PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);
1524: if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525: }
1527: MatCreate(PetscObjectComm((PetscObject)mat),&A);
1528: if (!rank) {
1529: MatSetSizes(A,M,N,M,N);
1530: } else {
1531: MatSetSizes(A,0,0,M,N);
1532: }
1533: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534: MatSetType(A,MATMPIAIJ);
1535: MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);
1536: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1537: PetscLogObjectParent(mat,A);
1539: /* copy over the A part */
1540: Aloc = (Mat_SeqAIJ*)aij->A->data;
1541: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542: row = mat->rmap->rstart;
1543: for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544: for (i=0; i<m; i++) {
1545: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1546: row++;
1547: a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548: }
1549: aj = Aloc->j;
1550: for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1552: /* copy over the B part */
1553: Aloc = (Mat_SeqAIJ*)aij->B->data;
1554: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555: row = mat->rmap->rstart;
1556: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
1557: ct = cols;
1558: for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559: for (i=0; i<m; i++) {
1560: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1561: row++;
1562: a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563: }
1564: PetscFree(ct);
1565: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1566: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1567: /*
1568: Everyone has to call to draw the matrix since the graphics waits are
1569: synchronized across all processors that share the PetscDraw object
1570: */
1571: PetscViewerGetSingleton(viewer,&sviewer);
1572: if (!rank) {
1573: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);
1574: /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575: PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1577: }
1578: PetscViewerRestoreSingleton(viewer,&sviewer);
1579: MatDestroy(&A);
1580: }
1581: return(0);
1582: }
1586: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587: {
1589: PetscBool iascii,isdraw,issocket,isbinary;
1592: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1593: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1594: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1595: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1596: if (iascii || isdraw || isbinary || issocket) {
1597: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1598: }
1599: return(0);
1600: }
1604: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605: {
1606: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1608: Vec bb1 = 0;
1609: PetscBool hasop;
1612: if (flag == SOR_APPLY_UPPER) {
1613: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1614: return(0);
1615: }
1617: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618: VecDuplicate(bb,&bb1);
1619: }
1621: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622: if (flag & SOR_ZERO_INITIAL_GUESS) {
1623: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1624: its--;
1625: }
1627: while (its--) {
1628: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1629: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1631: /* update rhs: bb1 = bb - B*x */
1632: VecScale(mat->lvec,-1.0);
1633: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1635: /* local sweep */
1636: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1637: }
1638: } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639: if (flag & SOR_ZERO_INITIAL_GUESS) {
1640: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1641: its--;
1642: }
1643: while (its--) {
1644: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1645: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1647: /* update rhs: bb1 = bb - B*x */
1648: VecScale(mat->lvec,-1.0);
1649: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1651: /* local sweep */
1652: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1653: }
1654: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655: if (flag & SOR_ZERO_INITIAL_GUESS) {
1656: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1657: its--;
1658: }
1659: while (its--) {
1660: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1661: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1663: /* update rhs: bb1 = bb - B*x */
1664: VecScale(mat->lvec,-1.0);
1665: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1667: /* local sweep */
1668: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1669: }
1670: } else if (flag & SOR_EISENSTAT) {
1671: Vec xx1;
1673: VecDuplicate(bb,&xx1);
1674: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1676: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1677: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1678: if (!mat->diag) {
1679: MatGetVecs(matin,&mat->diag,NULL);
1680: MatGetDiagonal(matin,mat->diag);
1681: }
1682: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1683: if (hasop) {
1684: MatMultDiagonalBlock(matin,xx,bb1);
1685: } else {
1686: VecPointwiseMult(bb1,mat->diag,xx);
1687: }
1688: VecAYPX(bb1,(omega-2.0)/omega,bb);
1690: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1692: /* local sweep */
1693: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1694: VecAXPY(xx,1.0,xx1);
1695: VecDestroy(&xx1);
1696: } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1698: VecDestroy(&bb1);
1699: return(0);
1700: }
1704: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705: {
1706: Mat aA,aB,Aperm;
1707: const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708: PetscScalar *aa,*ba;
1709: PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710: PetscSF rowsf,sf;
1711: IS parcolp = NULL;
1712: PetscBool done;
1716: MatGetLocalSize(A,&m,&n);
1717: ISGetIndices(rowp,&rwant);
1718: ISGetIndices(colp,&cwant);
1719: PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);
1721: /* Invert row permutation to find out where my rows should go */
1722: PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);
1723: PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);
1724: PetscSFSetFromOptions(rowsf);
1725: for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726: PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1727: PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1729: /* Invert column permutation to find out where my columns should go */
1730: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1731: PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);
1732: PetscSFSetFromOptions(sf);
1733: for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734: PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1735: PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1736: PetscSFDestroy(&sf);
1738: ISRestoreIndices(rowp,&rwant);
1739: ISRestoreIndices(colp,&cwant);
1740: MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);
1742: /* Find out where my gcols should go */
1743: MatGetSize(aB,NULL,&ng);
1744: PetscMalloc(ng*sizeof(PetscInt),&gcdest);
1745: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1746: PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);
1747: PetscSFSetFromOptions(sf);
1748: PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);
1749: PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);
1750: PetscSFDestroy(&sf);
1752: PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);
1753: PetscMemzero(dnnz,m*sizeof(PetscInt));
1754: PetscMemzero(onnz,m*sizeof(PetscInt));
1755: MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1756: MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1757: for (i=0; i<m; i++) {
1758: PetscInt row = rdest[i],rowner;
1759: PetscLayoutFindOwner(A->rmap,row,&rowner);
1760: for (j=ai[i]; j<ai[i+1]; j++) {
1761: PetscInt cowner,col = cdest[aj[j]];
1762: PetscLayoutFindOwner(A->cmap,col,&cowner); /* Could build an index for the columns to eliminate this search */
1763: if (rowner == cowner) dnnz[i]++;
1764: else onnz[i]++;
1765: }
1766: for (j=bi[i]; j<bi[i+1]; j++) {
1767: PetscInt cowner,col = gcdest[bj[j]];
1768: PetscLayoutFindOwner(A->cmap,col,&cowner);
1769: if (rowner == cowner) dnnz[i]++;
1770: else onnz[i]++;
1771: }
1772: }
1773: PetscMemzero(tdnnz,m*sizeof(PetscInt));
1774: PetscMemzero(tonnz,m*sizeof(PetscInt));
1775: PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);
1776: PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);
1777: PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);
1778: PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);
1779: PetscSFDestroy(&rowsf);
1781: MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);
1782: MatSeqAIJGetArray(aA,&aa);
1783: MatSeqAIJGetArray(aB,&ba);
1784: for (i=0; i<m; i++) {
1785: PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786: PetscInt rowlen;
1787: rowlen = ai[i+1] - ai[i];
1788: for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789: MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);
1790: rowlen = bi[i+1] - bi[i];
1791: for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792: MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);
1793: }
1794: MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);
1795: MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);
1796: MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1797: MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1798: MatSeqAIJRestoreArray(aA,&aa);
1799: MatSeqAIJRestoreArray(aB,&ba);
1800: PetscFree4(dnnz,onnz,tdnnz,tonnz);
1801: PetscFree3(work,rdest,cdest);
1802: PetscFree(gcdest);
1803: if (parcolp) {ISDestroy(&colp);}
1804: *B = Aperm;
1805: return(0);
1806: }
1810: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811: {
1812: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1813: Mat A = mat->A,B = mat->B;
1815: PetscReal isend[5],irecv[5];
1818: info->block_size = 1.0;
1819: MatGetInfo(A,MAT_LOCAL,info);
1821: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822: isend[3] = info->memory; isend[4] = info->mallocs;
1824: MatGetInfo(B,MAT_LOCAL,info);
1826: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827: isend[3] += info->memory; isend[4] += info->mallocs;
1828: if (flag == MAT_LOCAL) {
1829: info->nz_used = isend[0];
1830: info->nz_allocated = isend[1];
1831: info->nz_unneeded = isend[2];
1832: info->memory = isend[3];
1833: info->mallocs = isend[4];
1834: } else if (flag == MAT_GLOBAL_MAX) {
1835: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));
1837: info->nz_used = irecv[0];
1838: info->nz_allocated = irecv[1];
1839: info->nz_unneeded = irecv[2];
1840: info->memory = irecv[3];
1841: info->mallocs = irecv[4];
1842: } else if (flag == MAT_GLOBAL_SUM) {
1843: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));
1845: info->nz_used = irecv[0];
1846: info->nz_allocated = irecv[1];
1847: info->nz_unneeded = irecv[2];
1848: info->memory = irecv[3];
1849: info->mallocs = irecv[4];
1850: }
1851: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1852: info->fill_ratio_needed = 0;
1853: info->factor_mallocs = 0;
1854: return(0);
1855: }
1859: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860: {
1861: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1865: switch (op) {
1866: case MAT_NEW_NONZERO_LOCATIONS:
1867: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869: case MAT_KEEP_NONZERO_PATTERN:
1870: case MAT_NEW_NONZERO_LOCATION_ERR:
1871: case MAT_USE_INODES:
1872: case MAT_IGNORE_ZERO_ENTRIES:
1873: MatCheckPreallocated(A,1);
1874: MatSetOption(a->A,op,flg);
1875: MatSetOption(a->B,op,flg);
1876: break;
1877: case MAT_ROW_ORIENTED:
1878: a->roworiented = flg;
1880: MatSetOption(a->A,op,flg);
1881: MatSetOption(a->B,op,flg);
1882: break;
1883: case MAT_NEW_DIAGONALS:
1884: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1885: break;
1886: case MAT_IGNORE_OFF_PROC_ENTRIES:
1887: a->donotstash = flg;
1888: break;
1889: case MAT_SPD:
1890: A->spd_set = PETSC_TRUE;
1891: A->spd = flg;
1892: if (flg) {
1893: A->symmetric = PETSC_TRUE;
1894: A->structurally_symmetric = PETSC_TRUE;
1895: A->symmetric_set = PETSC_TRUE;
1896: A->structurally_symmetric_set = PETSC_TRUE;
1897: }
1898: break;
1899: case MAT_SYMMETRIC:
1900: MatSetOption(a->A,op,flg);
1901: break;
1902: case MAT_STRUCTURALLY_SYMMETRIC:
1903: MatSetOption(a->A,op,flg);
1904: break;
1905: case MAT_HERMITIAN:
1906: MatSetOption(a->A,op,flg);
1907: break;
1908: case MAT_SYMMETRY_ETERNAL:
1909: MatSetOption(a->A,op,flg);
1910: break;
1911: default:
1912: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913: }
1914: return(0);
1915: }
1919: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920: {
1921: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1922: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1924: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926: PetscInt *cmap,*idx_p;
1929: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930: mat->getrowactive = PETSC_TRUE;
1932: if (!mat->rowvalues && (idx || v)) {
1933: /*
1934: allocate enough space to hold information from the longest row.
1935: */
1936: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937: PetscInt max = 1,tmp;
1938: for (i=0; i<matin->rmap->n; i++) {
1939: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940: if (max < tmp) max = tmp;
1941: }
1942: PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);
1943: }
1945: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946: lrow = row - rstart;
1948: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949: if (!v) {pvA = 0; pvB = 0;}
1950: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1952: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1953: nztot = nzA + nzB;
1955: cmap = mat->garray;
1956: if (v || idx) {
1957: if (nztot) {
1958: /* Sort by increasing column numbers, assuming A and B already sorted */
1959: PetscInt imark = -1;
1960: if (v) {
1961: *v = v_p = mat->rowvalues;
1962: for (i=0; i<nzB; i++) {
1963: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964: else break;
1965: }
1966: imark = i;
1967: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1968: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1969: }
1970: if (idx) {
1971: *idx = idx_p = mat->rowindices;
1972: if (imark > -1) {
1973: for (i=0; i<imark; i++) {
1974: idx_p[i] = cmap[cworkB[i]];
1975: }
1976: } else {
1977: for (i=0; i<nzB; i++) {
1978: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979: else break;
1980: }
1981: imark = i;
1982: }
1983: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1984: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1985: }
1986: } else {
1987: if (idx) *idx = 0;
1988: if (v) *v = 0;
1989: }
1990: }
1991: *nz = nztot;
1992: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1993: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1994: return(0);
1995: }
1999: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000: {
2001: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2004: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005: aij->getrowactive = PETSC_FALSE;
2006: return(0);
2007: }
2011: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012: {
2013: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2014: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2016: PetscInt i,j,cstart = mat->cmap->rstart;
2017: PetscReal sum = 0.0;
2018: MatScalar *v;
2021: if (aij->size == 1) {
2022: MatNorm(aij->A,type,norm);
2023: } else {
2024: if (type == NORM_FROBENIUS) {
2025: v = amat->a;
2026: for (i=0; i<amat->nz; i++) {
2027: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028: }
2029: v = bmat->a;
2030: for (i=0; i<bmat->nz; i++) {
2031: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032: }
2033: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
2034: *norm = PetscSqrtReal(*norm);
2035: } else if (type == NORM_1) { /* max column norm */
2036: PetscReal *tmp,*tmp2;
2037: PetscInt *jj,*garray = aij->garray;
2038: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);
2039: PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);
2040: PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
2041: *norm = 0.0;
2042: v = amat->a; jj = amat->j;
2043: for (j=0; j<amat->nz; j++) {
2044: tmp[cstart + *jj++] += PetscAbsScalar(*v); v++;
2045: }
2046: v = bmat->a; jj = bmat->j;
2047: for (j=0; j<bmat->nz; j++) {
2048: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049: }
2050: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
2051: for (j=0; j<mat->cmap->N; j++) {
2052: if (tmp2[j] > *norm) *norm = tmp2[j];
2053: }
2054: PetscFree(tmp);
2055: PetscFree(tmp2);
2056: } else if (type == NORM_INFINITY) { /* max row norm */
2057: PetscReal ntemp = 0.0;
2058: for (j=0; j<aij->A->rmap->n; j++) {
2059: v = amat->a + amat->i[j];
2060: sum = 0.0;
2061: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062: sum += PetscAbsScalar(*v); v++;
2063: }
2064: v = bmat->a + bmat->i[j];
2065: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066: sum += PetscAbsScalar(*v); v++;
2067: }
2068: if (sum > ntemp) ntemp = sum;
2069: }
2070: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));
2071: } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072: }
2073: return(0);
2074: }
2078: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079: {
2080: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2081: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2083: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084: PetscInt cstart = A->cmap->rstart,ncol;
2085: Mat B;
2086: MatScalar *array;
2089: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2091: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092: ai = Aloc->i; aj = Aloc->j;
2093: bi = Bloc->i; bj = Bloc->j;
2094: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095: PetscInt *d_nnz,*g_nnz,*o_nnz;
2096: PetscSFNode *oloc;
2097: PETSC_UNUSED PetscSF sf;
2099: PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);
2100: /* compute d_nnz for preallocation */
2101: PetscMemzero(d_nnz,na*sizeof(PetscInt));
2102: for (i=0; i<ai[ma]; i++) {
2103: d_nnz[aj[i]]++;
2104: aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105: }
2106: /* compute local off-diagonal contributions */
2107: PetscMemzero(g_nnz,nb*sizeof(PetscInt));
2108: for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109: /* map those to global */
2110: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
2111: PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);
2112: PetscSFSetFromOptions(sf);
2113: PetscMemzero(o_nnz,na*sizeof(PetscInt));
2114: PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
2115: PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
2116: PetscSFDestroy(&sf);
2118: MatCreate(PetscObjectComm((PetscObject)A),&B);
2119: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
2120: MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);
2121: MatSetType(B,((PetscObject)A)->type_name);
2122: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
2123: PetscFree4(d_nnz,o_nnz,g_nnz,oloc);
2124: } else {
2125: B = *matout;
2126: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2127: for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128: }
2130: /* copy over the A part */
2131: array = Aloc->a;
2132: row = A->rmap->rstart;
2133: for (i=0; i<ma; i++) {
2134: ncol = ai[i+1]-ai[i];
2135: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
2136: row++;
2137: array += ncol; aj += ncol;
2138: }
2139: aj = Aloc->j;
2140: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2142: /* copy over the B part */
2143: PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);
2144: PetscMemzero(cols,bi[mb]*sizeof(PetscInt));
2145: array = Bloc->a;
2146: row = A->rmap->rstart;
2147: for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148: cols_tmp = cols;
2149: for (i=0; i<mb; i++) {
2150: ncol = bi[i+1]-bi[i];
2151: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
2152: row++;
2153: array += ncol; cols_tmp += ncol;
2154: }
2155: PetscFree(cols);
2157: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2158: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2159: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160: *matout = B;
2161: } else {
2162: MatHeaderMerge(A,B);
2163: }
2164: return(0);
2165: }
2169: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170: {
2171: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2172: Mat a = aij->A,b = aij->B;
2174: PetscInt s1,s2,s3;
2177: MatGetLocalSize(mat,&s2,&s3);
2178: if (rr) {
2179: VecGetLocalSize(rr,&s1);
2180: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181: /* Overlap communication with computation. */
2182: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2183: }
2184: if (ll) {
2185: VecGetLocalSize(ll,&s1);
2186: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187: (*b->ops->diagonalscale)(b,ll,0);
2188: }
2189: /* scale the diagonal block */
2190: (*a->ops->diagonalscale)(a,ll,rr);
2192: if (rr) {
2193: /* Do a scatter end and then right scale the off-diagonal block */
2194: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2195: (*b->ops->diagonalscale)(b,0,aij->lvec);
2196: }
2197: return(0);
2198: }
2202: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203: {
2204: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2208: MatSetUnfactored(a->A);
2209: return(0);
2210: }
2214: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2215: {
2216: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217: Mat a,b,c,d;
2218: PetscBool flg;
2222: a = matA->A; b = matA->B;
2223: c = matB->A; d = matB->B;
2225: MatEqual(a,c,&flg);
2226: if (flg) {
2227: MatEqual(b,d,&flg);
2228: }
2229: MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
2230: return(0);
2231: }
2235: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236: {
2238: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2239: Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
2242: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244: /* because of the column compression in the off-processor part of the matrix a->B,
2245: the number of columns in a->B and b->B may be different, hence we cannot call
2246: the MatCopy() directly on the two parts. If need be, we can provide a more
2247: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248: then copying the submatrices */
2249: MatCopy_Basic(A,B,str);
2250: } else {
2251: MatCopy(a->A,b->A,str);
2252: MatCopy(a->B,b->B,str);
2253: }
2254: return(0);
2255: }
2259: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260: {
2264: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2265: return(0);
2266: }
2270: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272: {
2273: PetscInt i,m=Y->rmap->N;
2274: Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data;
2275: Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data;
2276: const PetscInt *xi = x->i,*yi = y->i;
2279: /* Set the number of nonzeros in the new matrix */
2280: for (i=0; i<m; i++) {
2281: PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282: const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283: nnz[i] = 0;
2284: for (j=0,k=0; j<nzx; j++) { /* Point in X */
2285: for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286: if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */
2287: nnz[i]++;
2288: }
2289: for (; k<nzy; k++) nnz[i]++;
2290: }
2291: return(0);
2292: }
2296: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297: {
2299: PetscInt i;
2300: Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301: PetscBLASInt bnz,one=1;
2302: Mat_SeqAIJ *x,*y;
2305: if (str == SAME_NONZERO_PATTERN) {
2306: PetscScalar alpha = a;
2307: x = (Mat_SeqAIJ*)xx->A->data;
2308: PetscBLASIntCast(x->nz,&bnz);
2309: y = (Mat_SeqAIJ*)yy->A->data;
2310: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311: x = (Mat_SeqAIJ*)xx->B->data;
2312: y = (Mat_SeqAIJ*)yy->B->data;
2313: PetscBLASIntCast(x->nz,&bnz);
2314: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315: } else if (str == SUBSET_NONZERO_PATTERN) {
2316: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
2318: x = (Mat_SeqAIJ*)xx->B->data;
2319: y = (Mat_SeqAIJ*)yy->B->data;
2320: if (y->xtoy && y->XtoY != xx->B) {
2321: PetscFree(y->xtoy);
2322: MatDestroy(&y->XtoY);
2323: }
2324: if (!y->xtoy) { /* get xtoy */
2325: MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
2326: y->XtoY = xx->B;
2327: PetscObjectReference((PetscObject)xx->B);
2328: }
2329: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330: } else {
2331: Mat B;
2332: PetscInt *nnz_d,*nnz_o;
2333: PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);
2334: PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);
2335: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2336: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2337: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2338: MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);
2339: MatSetType(B,MATMPIAIJ);
2340: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2341: MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2342: MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2343: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2344: MatHeaderReplace(Y,B);
2345: PetscFree(nnz_d);
2346: PetscFree(nnz_o);
2347: }
2348: return(0);
2349: }
2351: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2355: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2356: {
2357: #if defined(PETSC_USE_COMPLEX)
2359: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2362: MatConjugate_SeqAIJ(aij->A);
2363: MatConjugate_SeqAIJ(aij->B);
2364: #else
2366: #endif
2367: return(0);
2368: }
2372: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373: {
2374: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2378: MatRealPart(a->A);
2379: MatRealPart(a->B);
2380: return(0);
2381: }
2385: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386: {
2387: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2391: MatImaginaryPart(a->A);
2392: MatImaginaryPart(a->B);
2393: return(0);
2394: }
2396: #if defined(PETSC_HAVE_PBGL)
2398: #include <boost/parallel/mpi/bsp_process_group.hpp>
2399: #include <boost/graph/distributed/ilu_default_graph.hpp>
2400: #include <boost/graph/distributed/ilu_0_block.hpp>
2401: #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402: #include <boost/graph/distributed/petsc/interface.hpp>
2403: #include <boost/multi_array.hpp>
2404: #include <boost/parallel/distributed_property_map->hpp>
2408: /*
2409: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410: */
2411: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412: {
2413: namespace petsc = boost::distributed::petsc;
2415: namespace graph_dist = boost::graph::distributed;
2416: using boost::graph::distributed::ilu_default::process_group_type;
2417: using boost::graph::ilu_permuted;
2419: PetscBool row_identity, col_identity;
2420: PetscContainer c;
2421: PetscInt m, n, M, N;
2425: if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426: ISIdentity(isrow, &row_identity);
2427: ISIdentity(iscol, &col_identity);
2428: if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2430: process_group_type pg;
2431: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432: lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433: lgraph_type& level_graph = *lgraph_p;
2434: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2436: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437: ilu_permuted(level_graph);
2439: /* put together the new matrix */
2440: MatCreate(PetscObjectComm((PetscObject)A), fact);
2441: MatGetLocalSize(A, &m, &n);
2442: MatGetSize(A, &M, &N);
2443: MatSetSizes(fact, m, n, M, N);
2444: MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);
2445: MatSetType(fact, ((PetscObject)A)->type_name);
2446: MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);
2447: MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);
2449: PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450: PetscContainerSetPointer(c, lgraph_p);
2451: PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452: PetscContainerDestroy(&c);
2453: return(0);
2454: }
2458: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459: {
2461: return(0);
2462: }
2466: /*
2467: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468: */
2469: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470: {
2471: namespace graph_dist = boost::graph::distributed;
2473: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474: lgraph_type *lgraph_p;
2475: PetscContainer c;
2479: PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);
2480: PetscContainerGetPointer(c, (void**) &lgraph_p);
2481: VecCopy(b, x);
2483: PetscScalar *array_x;
2484: VecGetArray(x, &array_x);
2485: PetscInt sx;
2486: VecGetSize(x, &sx);
2488: PetscScalar *array_b;
2489: VecGetArray(b, &array_b);
2490: PetscInt sb;
2491: VecGetSize(b, &sb);
2493: lgraph_type& level_graph = *lgraph_p;
2494: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
2496: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]);
2498: array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]);
2500: typedef boost::iterator_property_map<array_ref_type::iterator,
2501: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
2502: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503: gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2505: ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506: return(0);
2507: }
2508: #endif
2510: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2511: PetscInt nzlocal,nsends,nrecvs;
2512: PetscMPIInt *send_rank,*recv_rank;
2513: PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2514: PetscScalar *sbuf_a,**rbuf_a;
2515: PetscErrorCode (*Destroy)(Mat);
2516: } Mat_Redundant;
2520: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2521: {
2523: Mat_Redundant *redund=(Mat_Redundant*)ptr;
2524: PetscInt i;
2527: PetscFree2(redund->send_rank,redund->recv_rank);
2528: PetscFree(redund->sbuf_j);
2529: PetscFree(redund->sbuf_a);
2530: for (i=0; i<redund->nrecvs; i++) {
2531: PetscFree(redund->rbuf_j[i]);
2532: PetscFree(redund->rbuf_a[i]);
2533: }
2534: PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);
2535: PetscFree(redund);
2536: return(0);
2537: }
2541: PetscErrorCode MatDestroy_MatRedundant(Mat A)
2542: {
2544: PetscContainer container;
2545: Mat_Redundant *redund=NULL;
2548: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);
2549: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2550: PetscContainerGetPointer(container,(void**)&redund);
2552: A->ops->destroy = redund->Destroy;
2554: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
2555: if (A->ops->destroy) {
2556: (*A->ops->destroy)(A);
2557: }
2558: return(0);
2559: }
2563: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2564: {
2565: PetscMPIInt rank,size;
2566: MPI_Comm comm;
2568: PetscInt nsends = 0,nrecvs=0,i,rownz_max=0;
2569: PetscMPIInt *send_rank= NULL,*recv_rank=NULL;
2570: PetscInt *rowrange = mat->rmap->range;
2571: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2572: Mat A = aij->A,B=aij->B,C=*matredundant;
2573: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2574: PetscScalar *sbuf_a;
2575: PetscInt nzlocal=a->nz+b->nz;
2576: PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2577: PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2578: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2579: MatScalar *aworkA,*aworkB;
2580: PetscScalar *vals;
2581: PetscMPIInt tag1,tag2,tag3,imdex;
2582: MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2583: MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2584: MPI_Status recv_status,*send_status;
2585: PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2586: PetscInt **rbuf_j=NULL;
2587: PetscScalar **rbuf_a=NULL;
2588: Mat_Redundant *redund =NULL;
2589: PetscContainer container;
2592: PetscObjectGetComm((PetscObject)mat,&comm);
2593: MPI_Comm_rank(comm,&rank);
2594: MPI_Comm_size(comm,&size);
2596: if (reuse == MAT_REUSE_MATRIX) {
2597: MatGetSize(C,&M,&N);
2598: if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2599: MatGetLocalSize(C,&M,&N);
2600: if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2601: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);
2602: if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2603: PetscContainerGetPointer(container,(void**)&redund);
2604: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2606: nsends = redund->nsends;
2607: nrecvs = redund->nrecvs;
2608: send_rank = redund->send_rank;
2609: recv_rank = redund->recv_rank;
2610: sbuf_nz = redund->sbuf_nz;
2611: rbuf_nz = redund->rbuf_nz;
2612: sbuf_j = redund->sbuf_j;
2613: sbuf_a = redund->sbuf_a;
2614: rbuf_j = redund->rbuf_j;
2615: rbuf_a = redund->rbuf_a;
2616: }
2618: if (reuse == MAT_INITIAL_MATRIX) {
2619: PetscMPIInt subrank,subsize;
2620: PetscInt nleftover,np_subcomm;
2621: /* get the destination processors' id send_rank, nsends and nrecvs */
2622: MPI_Comm_rank(subcomm,&subrank);
2623: MPI_Comm_size(subcomm,&subsize);
2624: PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2626: np_subcomm = size/nsubcomm;
2627: nleftover = size - nsubcomm*np_subcomm;
2629: nsends = 0; nrecvs = 0;
2630: for (i=0; i<size; i++) { /* i=rank*/
2631: if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2632: send_rank[nsends] = i; nsends++;
2633: recv_rank[nrecvs++] = i;
2634: }
2635: }
2636: if (rank >= size - nleftover) { /* this proc is a leftover processor */
2637: i = size-nleftover-1;
2638: j = 0;
2639: while (j < nsubcomm - nleftover) {
2640: send_rank[nsends++] = i;
2641: i--; j++;
2642: }
2643: }
2645: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2646: for (i=0; i<nleftover; i++) {
2647: recv_rank[nrecvs++] = size-nleftover+i;
2648: }
2649: }
2651: /* allocate sbuf_j, sbuf_a */
2652: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2653: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
2654: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
2655: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2657: /* copy mat's local entries into the buffers */
2658: if (reuse == MAT_INITIAL_MATRIX) {
2659: rownz_max = 0;
2660: rptr = sbuf_j;
2661: cols = sbuf_j + rend-rstart + 1;
2662: vals = sbuf_a;
2663: rptr[0] = 0;
2664: for (i=0; i<rend-rstart; i++) {
2665: row = i + rstart;
2666: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2667: ncols = nzA + nzB;
2668: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2669: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2670: /* load the column indices for this row into cols */
2671: lwrite = 0;
2672: for (l=0; l<nzB; l++) {
2673: if ((ctmp = bmap[cworkB[l]]) < cstart) {
2674: vals[lwrite] = aworkB[l];
2675: cols[lwrite++] = ctmp;
2676: }
2677: }
2678: for (l=0; l<nzA; l++) {
2679: vals[lwrite] = aworkA[l];
2680: cols[lwrite++] = cstart + cworkA[l];
2681: }
2682: for (l=0; l<nzB; l++) {
2683: if ((ctmp = bmap[cworkB[l]]) >= cend) {
2684: vals[lwrite] = aworkB[l];
2685: cols[lwrite++] = ctmp;
2686: }
2687: }
2688: vals += ncols;
2689: cols += ncols;
2690: rptr[i+1] = rptr[i] + ncols;
2691: if (rownz_max < ncols) rownz_max = ncols;
2692: }
2693: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2694: } else { /* only copy matrix values into sbuf_a */
2695: rptr = sbuf_j;
2696: vals = sbuf_a;
2697: rptr[0] = 0;
2698: for (i=0; i<rend-rstart; i++) {
2699: row = i + rstart;
2700: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2701: ncols = nzA + nzB;
2702: cworkB = b->j + b->i[i];
2703: aworkA = a->a + a->i[i];
2704: aworkB = b->a + b->i[i];
2705: lwrite = 0;
2706: for (l=0; l<nzB; l++) {
2707: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2708: }
2709: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2710: for (l=0; l<nzB; l++) {
2711: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2712: }
2713: vals += ncols;
2714: rptr[i+1] = rptr[i] + ncols;
2715: }
2716: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2718: /* send nzlocal to others, and recv other's nzlocal */
2719: /*--------------------------------------------------*/
2720: if (reuse == MAT_INITIAL_MATRIX) {
2721: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2723: s_waits2 = s_waits3 + nsends;
2724: s_waits1 = s_waits2 + nsends;
2725: r_waits1 = s_waits1 + nsends;
2726: r_waits2 = r_waits1 + nrecvs;
2727: r_waits3 = r_waits2 + nrecvs;
2728: } else {
2729: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2731: r_waits3 = s_waits3 + nsends;
2732: }
2734: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2735: if (reuse == MAT_INITIAL_MATRIX) {
2736: /* get new tags to keep the communication clean */
2737: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2738: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2739: PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
2741: /* post receives of other's nzlocal */
2742: for (i=0; i<nrecvs; i++) {
2743: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2744: }
2745: /* send nzlocal to others */
2746: for (i=0; i<nsends; i++) {
2747: sbuf_nz[i] = nzlocal;
2748: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2749: }
2750: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2751: count = nrecvs;
2752: while (count) {
2753: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2755: recv_rank[imdex] = recv_status.MPI_SOURCE;
2756: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2757: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
2759: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2761: rbuf_nz[imdex] += i + 2;
2763: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
2764: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2765: count--;
2766: }
2767: /* wait on sends of nzlocal */
2768: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2769: /* send mat->i,j to others, and recv from other's */
2770: /*------------------------------------------------*/
2771: for (i=0; i<nsends; i++) {
2772: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2773: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2774: }
2775: /* wait on receives of mat->i,j */
2776: /*------------------------------*/
2777: count = nrecvs;
2778: while (count) {
2779: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2780: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2781: count--;
2782: }
2783: /* wait on sends of mat->i,j */
2784: /*---------------------------*/
2785: if (nsends) {
2786: MPI_Waitall(nsends,s_waits2,send_status);
2787: }
2788: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2790: /* post receives, send and receive mat->a */
2791: /*----------------------------------------*/
2792: for (imdex=0; imdex<nrecvs; imdex++) {
2793: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2794: }
2795: for (i=0; i<nsends; i++) {
2796: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2797: }
2798: count = nrecvs;
2799: while (count) {
2800: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2801: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2802: count--;
2803: }
2804: if (nsends) {
2805: MPI_Waitall(nsends,s_waits3,send_status);
2806: }
2808: PetscFree2(s_waits3,send_status);
2810: /* create redundant matrix */
2811: /*-------------------------*/
2812: if (reuse == MAT_INITIAL_MATRIX) {
2813: /* compute rownz_max for preallocation */
2814: for (imdex=0; imdex<nrecvs; imdex++) {
2815: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2816: rptr = rbuf_j[imdex];
2817: for (i=0; i<j; i++) {
2818: ncols = rptr[i+1] - rptr[i];
2819: if (rownz_max < ncols) rownz_max = ncols;
2820: }
2821: }
2823: MatCreate(subcomm,&C);
2824: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
2825: MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);
2826: MatSetFromOptions(C);
2827: MatSeqAIJSetPreallocation(C,rownz_max,NULL);
2828: MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);
2829: } else {
2830: C = *matredundant;
2831: }
2833: /* insert local matrix entries */
2834: rptr = sbuf_j;
2835: cols = sbuf_j + rend-rstart + 1;
2836: vals = sbuf_a;
2837: for (i=0; i<rend-rstart; i++) {
2838: row = i + rstart;
2839: ncols = rptr[i+1] - rptr[i];
2840: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2841: vals += ncols;
2842: cols += ncols;
2843: }
2844: /* insert received matrix entries */
2845: for (imdex=0; imdex<nrecvs; imdex++) {
2846: rstart = rowrange[recv_rank[imdex]];
2847: rend = rowrange[recv_rank[imdex]+1];
2848: rptr = rbuf_j[imdex];
2849: cols = rbuf_j[imdex] + rend-rstart + 1;
2850: vals = rbuf_a[imdex];
2851: for (i=0; i<rend-rstart; i++) {
2852: row = i + rstart;
2853: ncols = rptr[i+1] - rptr[i];
2854: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2855: vals += ncols;
2856: cols += ncols;
2857: }
2858: }
2859: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2860: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2861: MatGetSize(C,&M,&N);
2862: if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2863: if (reuse == MAT_INITIAL_MATRIX) {
2864: PetscContainer container;
2865: *matredundant = C;
2866: /* create a supporting struct and attach it to C for reuse */
2867: PetscNewLog(C,Mat_Redundant,&redund);
2868: PetscContainerCreate(PETSC_COMM_SELF,&container);
2869: PetscContainerSetPointer(container,redund);
2870: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
2871: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
2872: PetscContainerDestroy(&container);
2874: redund->nzlocal = nzlocal;
2875: redund->nsends = nsends;
2876: redund->nrecvs = nrecvs;
2877: redund->send_rank = send_rank;
2878: redund->recv_rank = recv_rank;
2879: redund->sbuf_nz = sbuf_nz;
2880: redund->rbuf_nz = rbuf_nz;
2881: redund->sbuf_j = sbuf_j;
2882: redund->sbuf_a = sbuf_a;
2883: redund->rbuf_j = rbuf_j;
2884: redund->rbuf_a = rbuf_a;
2886: redund->Destroy = C->ops->destroy;
2887: C->ops->destroy = MatDestroy_MatRedundant;
2888: }
2889: return(0);
2890: }
2894: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2895: {
2896: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2898: PetscInt i,*idxb = 0;
2899: PetscScalar *va,*vb;
2900: Vec vtmp;
2903: MatGetRowMaxAbs(a->A,v,idx);
2904: VecGetArray(v,&va);
2905: if (idx) {
2906: for (i=0; i<A->rmap->n; i++) {
2907: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2908: }
2909: }
2911: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2912: if (idx) {
2913: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2914: }
2915: MatGetRowMaxAbs(a->B,vtmp,idxb);
2916: VecGetArray(vtmp,&vb);
2918: for (i=0; i<A->rmap->n; i++) {
2919: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2920: va[i] = vb[i];
2921: if (idx) idx[i] = a->garray[idxb[i]];
2922: }
2923: }
2925: VecRestoreArray(v,&va);
2926: VecRestoreArray(vtmp,&vb);
2927: PetscFree(idxb);
2928: VecDestroy(&vtmp);
2929: return(0);
2930: }
2934: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2935: {
2936: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2938: PetscInt i,*idxb = 0;
2939: PetscScalar *va,*vb;
2940: Vec vtmp;
2943: MatGetRowMinAbs(a->A,v,idx);
2944: VecGetArray(v,&va);
2945: if (idx) {
2946: for (i=0; i<A->cmap->n; i++) {
2947: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2948: }
2949: }
2951: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2952: if (idx) {
2953: PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);
2954: }
2955: MatGetRowMinAbs(a->B,vtmp,idxb);
2956: VecGetArray(vtmp,&vb);
2958: for (i=0; i<A->rmap->n; i++) {
2959: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2960: va[i] = vb[i];
2961: if (idx) idx[i] = a->garray[idxb[i]];
2962: }
2963: }
2965: VecRestoreArray(v,&va);
2966: VecRestoreArray(vtmp,&vb);
2967: PetscFree(idxb);
2968: VecDestroy(&vtmp);
2969: return(0);
2970: }
2974: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2975: {
2976: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2977: PetscInt n = A->rmap->n;
2978: PetscInt cstart = A->cmap->rstart;
2979: PetscInt *cmap = mat->garray;
2980: PetscInt *diagIdx, *offdiagIdx;
2981: Vec diagV, offdiagV;
2982: PetscScalar *a, *diagA, *offdiagA;
2983: PetscInt r;
2987: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
2988: VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);
2989: VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);
2990: MatGetRowMin(mat->A, diagV, diagIdx);
2991: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2992: VecGetArray(v, &a);
2993: VecGetArray(diagV, &diagA);
2994: VecGetArray(offdiagV, &offdiagA);
2995: for (r = 0; r < n; ++r) {
2996: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2997: a[r] = diagA[r];
2998: idx[r] = cstart + diagIdx[r];
2999: } else {
3000: a[r] = offdiagA[r];
3001: idx[r] = cmap[offdiagIdx[r]];
3002: }
3003: }
3004: VecRestoreArray(v, &a);
3005: VecRestoreArray(diagV, &diagA);
3006: VecRestoreArray(offdiagV, &offdiagA);
3007: VecDestroy(&diagV);
3008: VecDestroy(&offdiagV);
3009: PetscFree2(diagIdx, offdiagIdx);
3010: return(0);
3011: }
3015: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3016: {
3017: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
3018: PetscInt n = A->rmap->n;
3019: PetscInt cstart = A->cmap->rstart;
3020: PetscInt *cmap = mat->garray;
3021: PetscInt *diagIdx, *offdiagIdx;
3022: Vec diagV, offdiagV;
3023: PetscScalar *a, *diagA, *offdiagA;
3024: PetscInt r;
3028: PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);
3029: VecCreateSeq(PETSC_COMM_SELF, n, &diagV);
3030: VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);
3031: MatGetRowMax(mat->A, diagV, diagIdx);
3032: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
3033: VecGetArray(v, &a);
3034: VecGetArray(diagV, &diagA);
3035: VecGetArray(offdiagV, &offdiagA);
3036: for (r = 0; r < n; ++r) {
3037: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3038: a[r] = diagA[r];
3039: idx[r] = cstart + diagIdx[r];
3040: } else {
3041: a[r] = offdiagA[r];
3042: idx[r] = cmap[offdiagIdx[r]];
3043: }
3044: }
3045: VecRestoreArray(v, &a);
3046: VecRestoreArray(diagV, &diagA);
3047: VecRestoreArray(offdiagV, &offdiagA);
3048: VecDestroy(&diagV);
3049: VecDestroy(&offdiagV);
3050: PetscFree2(diagIdx, offdiagIdx);
3051: return(0);
3052: }
3056: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3057: {
3059: Mat *dummy;
3062: MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
3063: *newmat = *dummy;
3064: PetscFree(dummy);
3065: return(0);
3066: }
3068: extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3072: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3073: {
3074: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
3078: MatInvertBlockDiagonal(a->A,values);
3079: return(0);
3080: }
3084: static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3085: {
3087: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data;
3090: MatSetRandom(aij->A,rctx);
3091: MatSetRandom(aij->B,rctx);
3092: MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);
3093: MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);
3094: return(0);
3095: }
3097: /* -------------------------------------------------------------------*/
3098: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3099: MatGetRow_MPIAIJ,
3100: MatRestoreRow_MPIAIJ,
3101: MatMult_MPIAIJ,
3102: /* 4*/ MatMultAdd_MPIAIJ,
3103: MatMultTranspose_MPIAIJ,
3104: MatMultTransposeAdd_MPIAIJ,
3105: #if defined(PETSC_HAVE_PBGL)
3106: MatSolve_MPIAIJ,
3107: #else
3108: 0,
3109: #endif
3110: 0,
3111: 0,
3112: /*10*/ 0,
3113: 0,
3114: 0,
3115: MatSOR_MPIAIJ,
3116: MatTranspose_MPIAIJ,
3117: /*15*/ MatGetInfo_MPIAIJ,
3118: MatEqual_MPIAIJ,
3119: MatGetDiagonal_MPIAIJ,
3120: MatDiagonalScale_MPIAIJ,
3121: MatNorm_MPIAIJ,
3122: /*20*/ MatAssemblyBegin_MPIAIJ,
3123: MatAssemblyEnd_MPIAIJ,
3124: MatSetOption_MPIAIJ,
3125: MatZeroEntries_MPIAIJ,
3126: /*24*/ MatZeroRows_MPIAIJ,
3127: 0,
3128: #if defined(PETSC_HAVE_PBGL)
3129: 0,
3130: #else
3131: 0,
3132: #endif
3133: 0,
3134: 0,
3135: /*29*/ MatSetUp_MPIAIJ,
3136: #if defined(PETSC_HAVE_PBGL)
3137: 0,
3138: #else
3139: 0,
3140: #endif
3141: 0,
3142: 0,
3143: 0,
3144: /*34*/ MatDuplicate_MPIAIJ,
3145: 0,
3146: 0,
3147: 0,
3148: 0,
3149: /*39*/ MatAXPY_MPIAIJ,
3150: MatGetSubMatrices_MPIAIJ,
3151: MatIncreaseOverlap_MPIAIJ,
3152: MatGetValues_MPIAIJ,
3153: MatCopy_MPIAIJ,
3154: /*44*/ MatGetRowMax_MPIAIJ,
3155: MatScale_MPIAIJ,
3156: 0,
3157: 0,
3158: MatZeroRowsColumns_MPIAIJ,
3159: /*49*/ MatSetRandom_MPIAIJ,
3160: 0,
3161: 0,
3162: 0,
3163: 0,
3164: /*54*/ MatFDColoringCreate_MPIAIJ,
3165: 0,
3166: MatSetUnfactored_MPIAIJ,
3167: MatPermute_MPIAIJ,
3168: 0,
3169: /*59*/ MatGetSubMatrix_MPIAIJ,
3170: MatDestroy_MPIAIJ,
3171: MatView_MPIAIJ,
3172: 0,
3173: MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3174: /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3175: MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3176: 0,
3177: 0,
3178: 0,
3179: /*69*/ MatGetRowMaxAbs_MPIAIJ,
3180: MatGetRowMinAbs_MPIAIJ,
3181: 0,
3182: MatSetColoring_MPIAIJ,
3183: 0,
3184: MatSetValuesAdifor_MPIAIJ,
3185: /*75*/ MatFDColoringApply_AIJ,
3186: 0,
3187: 0,
3188: 0,
3189: MatFindZeroDiagonals_MPIAIJ,
3190: /*80*/ 0,
3191: 0,
3192: 0,
3193: /*83*/ MatLoad_MPIAIJ,
3194: 0,
3195: 0,
3196: 0,
3197: 0,
3198: 0,
3199: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3200: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3201: MatMatMultNumeric_MPIAIJ_MPIAIJ,
3202: MatPtAP_MPIAIJ_MPIAIJ,
3203: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3204: /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3205: 0,
3206: 0,
3207: 0,
3208: 0,
3209: /*99*/ 0,
3210: 0,
3211: 0,
3212: MatConjugate_MPIAIJ,
3213: 0,
3214: /*104*/MatSetValuesRow_MPIAIJ,
3215: MatRealPart_MPIAIJ,
3216: MatImaginaryPart_MPIAIJ,
3217: 0,
3218: 0,
3219: /*109*/0,
3220: MatGetRedundantMatrix_MPIAIJ,
3221: MatGetRowMin_MPIAIJ,
3222: 0,
3223: 0,
3224: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3225: 0,
3226: 0,
3227: 0,
3228: 0,
3229: /*119*/0,
3230: 0,
3231: 0,
3232: 0,
3233: MatGetMultiProcBlock_MPIAIJ,
3234: /*124*/MatFindNonzeroRows_MPIAIJ,
3235: MatGetColumnNorms_MPIAIJ,
3236: MatInvertBlockDiagonal_MPIAIJ,
3237: 0,
3238: MatGetSubMatricesParallel_MPIAIJ,
3239: /*129*/0,
3240: MatTransposeMatMult_MPIAIJ_MPIAIJ,
3241: MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3242: MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3243: 0,
3244: /*134*/0,
3245: 0,
3246: 0,
3247: 0,
3248: 0,
3249: /*139*/0,
3250: 0
3251: };
3253: /* ----------------------------------------------------------------------------------------*/
3257: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
3258: {
3259: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
3263: MatStoreValues(aij->A);
3264: MatStoreValues(aij->B);
3265: return(0);
3266: }
3270: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
3271: {
3272: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
3276: MatRetrieveValues(aij->A);
3277: MatRetrieveValues(aij->B);
3278: return(0);
3279: }
3283: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3284: {
3285: Mat_MPIAIJ *b;
3289: PetscLayoutSetUp(B->rmap);
3290: PetscLayoutSetUp(B->cmap);
3291: b = (Mat_MPIAIJ*)B->data;
3293: if (!B->preallocated) {
3294: /* Explicitly create 2 MATSEQAIJ matrices. */
3295: MatCreate(PETSC_COMM_SELF,&b->A);
3296: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3297: MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);
3298: MatSetType(b->A,MATSEQAIJ);
3299: PetscLogObjectParent(B,b->A);
3300: MatCreate(PETSC_COMM_SELF,&b->B);
3301: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3302: MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);
3303: MatSetType(b->B,MATSEQAIJ);
3304: PetscLogObjectParent(B,b->B);
3305: }
3307: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
3308: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
3309: B->preallocated = PETSC_TRUE;
3310: return(0);
3311: }
3315: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3316: {
3317: Mat mat;
3318: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3322: *newmat = 0;
3323: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
3324: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3325: MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);
3326: MatSetType(mat,((PetscObject)matin)->type_name);
3327: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3328: a = (Mat_MPIAIJ*)mat->data;
3330: mat->factortype = matin->factortype;
3331: mat->rmap->bs = matin->rmap->bs;
3332: mat->cmap->bs = matin->cmap->bs;
3333: mat->assembled = PETSC_TRUE;
3334: mat->insertmode = NOT_SET_VALUES;
3335: mat->preallocated = PETSC_TRUE;
3337: a->size = oldmat->size;
3338: a->rank = oldmat->rank;
3339: a->donotstash = oldmat->donotstash;
3340: a->roworiented = oldmat->roworiented;
3341: a->rowindices = 0;
3342: a->rowvalues = 0;
3343: a->getrowactive = PETSC_FALSE;
3345: PetscLayoutReference(matin->rmap,&mat->rmap);
3346: PetscLayoutReference(matin->cmap,&mat->cmap);
3348: if (oldmat->colmap) {
3349: #if defined(PETSC_USE_CTABLE)
3350: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3351: #else
3352: PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);
3353: PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));
3354: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
3355: #endif
3356: } else a->colmap = 0;
3357: if (oldmat->garray) {
3358: PetscInt len;
3359: len = oldmat->B->cmap->n;
3360: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
3361: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
3362: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
3363: } else a->garray = 0;
3365: VecDuplicate(oldmat->lvec,&a->lvec);
3366: PetscLogObjectParent(mat,a->lvec);
3367: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3368: PetscLogObjectParent(mat,a->Mvctx);
3369: MatDuplicate(oldmat->A,cpvalues,&a->A);
3370: PetscLogObjectParent(mat,a->A);
3371: MatDuplicate(oldmat->B,cpvalues,&a->B);
3372: PetscLogObjectParent(mat,a->B);
3373: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3374: *newmat = mat;
3375: return(0);
3376: }
3382: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3383: {
3384: PetscScalar *vals,*svals;
3385: MPI_Comm comm;
3387: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
3388: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3389: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
3390: PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3391: PetscInt cend,cstart,n,*rowners,sizesset=1;
3392: int fd;
3393: PetscInt bs = 1;
3396: PetscObjectGetComm((PetscObject)viewer,&comm);
3397: MPI_Comm_size(comm,&size);
3398: MPI_Comm_rank(comm,&rank);
3399: if (!rank) {
3400: PetscViewerBinaryGetDescriptor(viewer,&fd);
3401: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
3402: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3403: }
3405: PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");
3406: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3407: PetscOptionsEnd();
3409: if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3411: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3412: M = header[1]; N = header[2];
3413: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3414: if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3415: if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3417: /* If global sizes are set, check if they are consistent with that given in the file */
3418: if (sizesset) {
3419: MatGetSize(newMat,&grows,&gcols);
3420: }
3421: if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3422: if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3424: /* determine ownership of all (block) rows */
3425: if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3426: if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
3427: else m = newMat->rmap->n; /* Set by user */
3429: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
3430: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3432: /* First process needs enough room for process with most rows */
3433: if (!rank) {
3434: mmax = rowners[1];
3435: for (i=2; i<=size; i++) {
3436: mmax = PetscMax(mmax, rowners[i]);
3437: }
3438: } else mmax = -1; /* unused, but compilers complain */
3440: rowners[0] = 0;
3441: for (i=2; i<=size; i++) {
3442: rowners[i] += rowners[i-1];
3443: }
3444: rstart = rowners[rank];
3445: rend = rowners[rank+1];
3447: /* distribute row lengths to all processors */
3448: PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);
3449: if (!rank) {
3450: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
3451: PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);
3452: PetscMalloc(size*sizeof(PetscInt),&procsnz);
3453: PetscMemzero(procsnz,size*sizeof(PetscInt));
3454: for (j=0; j<m; j++) {
3455: procsnz[0] += ourlens[j];
3456: }
3457: for (i=1; i<size; i++) {
3458: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
3459: /* calculate the number of nonzeros on each processor */
3460: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3461: procsnz[i] += rowlengths[j];
3462: }
3463: MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
3464: }
3465: PetscFree(rowlengths);
3466: } else {
3467: MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
3468: }
3470: if (!rank) {
3471: /* determine max buffer needed and allocate it */
3472: maxnz = 0;
3473: for (i=0; i<size; i++) {
3474: maxnz = PetscMax(maxnz,procsnz[i]);
3475: }
3476: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
3478: /* read in my part of the matrix column indices */
3479: nz = procsnz[0];
3480: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3481: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3483: /* read in every one elses and ship off */
3484: for (i=1; i<size; i++) {
3485: nz = procsnz[i];
3486: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3487: MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);
3488: }
3489: PetscFree(cols);
3490: } else {
3491: /* determine buffer space needed for message */
3492: nz = 0;
3493: for (i=0; i<m; i++) {
3494: nz += ourlens[i];
3495: }
3496: PetscMalloc(nz*sizeof(PetscInt),&mycols);
3498: /* receive message of column indices*/
3499: MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
3500: }
3502: /* determine column ownership if matrix is not square */
3503: if (N != M) {
3504: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3505: else n = newMat->cmap->n;
3506: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
3507: cstart = cend - n;
3508: } else {
3509: cstart = rstart;
3510: cend = rend;
3511: n = cend - cstart;
3512: }
3514: /* loop over local rows, determining number of off diagonal entries */
3515: PetscMemzero(offlens,m*sizeof(PetscInt));
3516: jj = 0;
3517: for (i=0; i<m; i++) {
3518: for (j=0; j<ourlens[i]; j++) {
3519: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3520: jj++;
3521: }
3522: }
3524: for (i=0; i<m; i++) {
3525: ourlens[i] -= offlens[i];
3526: }
3527: if (!sizesset) {
3528: MatSetSizes(newMat,m,n,M,N);
3529: }
3531: if (bs > 1) {MatSetBlockSize(newMat,bs);}
3533: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
3535: for (i=0; i<m; i++) {
3536: ourlens[i] += offlens[i];
3537: }
3539: if (!rank) {
3540: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
3542: /* read in my part of the matrix numerical values */
3543: nz = procsnz[0];
3544: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3546: /* insert into matrix */
3547: jj = rstart;
3548: smycols = mycols;
3549: svals = vals;
3550: for (i=0; i<m; i++) {
3551: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3552: smycols += ourlens[i];
3553: svals += ourlens[i];
3554: jj++;
3555: }
3557: /* read in other processors and ship out */
3558: for (i=1; i<size; i++) {
3559: nz = procsnz[i];
3560: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3561: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
3562: }
3563: PetscFree(procsnz);
3564: } else {
3565: /* receive numeric values */
3566: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
3568: /* receive message of values*/
3569: MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
3571: /* insert into matrix */
3572: jj = rstart;
3573: smycols = mycols;
3574: svals = vals;
3575: for (i=0; i<m; i++) {
3576: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
3577: smycols += ourlens[i];
3578: svals += ourlens[i];
3579: jj++;
3580: }
3581: }
3582: PetscFree2(ourlens,offlens);
3583: PetscFree(vals);
3584: PetscFree(mycols);
3585: PetscFree(rowners);
3586: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3587: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3588: return(0);
3589: }
3593: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3594: {
3596: IS iscol_local;
3597: PetscInt csize;
3600: ISGetLocalSize(iscol,&csize);
3601: if (call == MAT_REUSE_MATRIX) {
3602: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3603: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3604: } else {
3605: PetscInt cbs;
3606: ISGetBlockSize(iscol,&cbs);
3607: ISAllGather(iscol,&iscol_local);
3608: ISSetBlockSize(iscol_local,cbs);
3609: }
3610: MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
3611: if (call == MAT_INITIAL_MATRIX) {
3612: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3613: ISDestroy(&iscol_local);
3614: }
3615: return(0);
3616: }
3618: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3621: /*
3622: Not great since it makes two copies of the submatrix, first an SeqAIJ
3623: in local and then by concatenating the local matrices the end result.
3624: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3626: Note: This requires a sequential iscol with all indices.
3627: */
3628: PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3629: {
3631: PetscMPIInt rank,size;
3632: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3633: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3634: PetscBool allcolumns, colflag;
3635: Mat M,Mreuse;
3636: MatScalar *vwork,*aa;
3637: MPI_Comm comm;
3638: Mat_SeqAIJ *aij;
3641: PetscObjectGetComm((PetscObject)mat,&comm);
3642: MPI_Comm_rank(comm,&rank);
3643: MPI_Comm_size(comm,&size);
3645: ISIdentity(iscol,&colflag);
3646: ISGetLocalSize(iscol,&ncol);
3647: if (colflag && ncol == mat->cmap->N) {
3648: allcolumns = PETSC_TRUE;
3649: } else {
3650: allcolumns = PETSC_FALSE;
3651: }
3652: if (call == MAT_REUSE_MATRIX) {
3653: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);
3654: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3655: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);
3656: } else {
3657: MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);
3658: }
3660: /*
3661: m - number of local rows
3662: n - number of columns (same on all processors)
3663: rstart - first row in new global matrix generated
3664: */
3665: MatGetSize(Mreuse,&m,&n);
3666: MatGetBlockSizes(Mreuse,&bs,&cbs);
3667: if (call == MAT_INITIAL_MATRIX) {
3668: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3669: ii = aij->i;
3670: jj = aij->j;
3672: /*
3673: Determine the number of non-zeros in the diagonal and off-diagonal
3674: portions of the matrix in order to do correct preallocation
3675: */
3677: /* first get start and end of "diagonal" columns */
3678: if (csize == PETSC_DECIDE) {
3679: ISGetSize(isrow,&mglobal);
3680: if (mglobal == n) { /* square matrix */
3681: nlocal = m;
3682: } else {
3683: nlocal = n/size + ((n % size) > rank);
3684: }
3685: } else {
3686: nlocal = csize;
3687: }
3688: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3689: rstart = rend - nlocal;
3690: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3692: /* next, compute all the lengths */
3693: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
3694: olens = dlens + m;
3695: for (i=0; i<m; i++) {
3696: jend = ii[i+1] - ii[i];
3697: olen = 0;
3698: dlen = 0;
3699: for (j=0; j<jend; j++) {
3700: if (*jj < rstart || *jj >= rend) olen++;
3701: else dlen++;
3702: jj++;
3703: }
3704: olens[i] = olen;
3705: dlens[i] = dlen;
3706: }
3707: MatCreate(comm,&M);
3708: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3709: MatSetBlockSizes(M,bs,cbs);
3710: MatSetType(M,((PetscObject)mat)->type_name);
3711: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3712: PetscFree(dlens);
3713: } else {
3714: PetscInt ml,nl;
3716: M = *newmat;
3717: MatGetLocalSize(M,&ml,&nl);
3718: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3719: MatZeroEntries(M);
3720: /*
3721: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3722: rather than the slower MatSetValues().
3723: */
3724: M->was_assembled = PETSC_TRUE;
3725: M->assembled = PETSC_FALSE;
3726: }
3727: MatGetOwnershipRange(M,&rstart,&rend);
3728: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3729: ii = aij->i;
3730: jj = aij->j;
3731: aa = aij->a;
3732: for (i=0; i<m; i++) {
3733: row = rstart + i;
3734: nz = ii[i+1] - ii[i];
3735: cwork = jj; jj += nz;
3736: vwork = aa; aa += nz;
3737: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3738: }
3740: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3741: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3742: *newmat = M;
3744: /* save submatrix used in processor for next request */
3745: if (call == MAT_INITIAL_MATRIX) {
3746: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3747: MatDestroy(&Mreuse);
3748: }
3749: return(0);
3750: }
3754: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3755: {
3756: PetscInt m,cstart, cend,j,nnz,i,d;
3757: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3758: const PetscInt *JJ;
3759: PetscScalar *values;
3763: if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3765: PetscLayoutSetUp(B->rmap);
3766: PetscLayoutSetUp(B->cmap);
3767: m = B->rmap->n;
3768: cstart = B->cmap->rstart;
3769: cend = B->cmap->rend;
3770: rstart = B->rmap->rstart;
3772: PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);
3774: #if defined(PETSC_USE_DEBUGGING)
3775: for (i=0; i<m; i++) {
3776: nnz = Ii[i+1]- Ii[i];
3777: JJ = J + Ii[i];
3778: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3779: if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3780: if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3781: }
3782: #endif
3784: for (i=0; i<m; i++) {
3785: nnz = Ii[i+1]- Ii[i];
3786: JJ = J + Ii[i];
3787: nnz_max = PetscMax(nnz_max,nnz);
3788: d = 0;
3789: for (j=0; j<nnz; j++) {
3790: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3791: }
3792: d_nnz[i] = d;
3793: o_nnz[i] = nnz - d;
3794: }
3795: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3796: PetscFree2(d_nnz,o_nnz);
3798: if (v) values = (PetscScalar*)v;
3799: else {
3800: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
3801: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
3802: }
3804: for (i=0; i<m; i++) {
3805: ii = i + rstart;
3806: nnz = Ii[i+1]- Ii[i];
3807: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3808: }
3809: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3810: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3812: if (!v) {
3813: PetscFree(values);
3814: }
3815: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3816: return(0);
3817: }
3821: /*@
3822: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3823: (the default parallel PETSc format).
3825: Collective on MPI_Comm
3827: Input Parameters:
3828: + B - the matrix
3829: . i - the indices into j for the start of each local row (starts with zero)
3830: . j - the column indices for each local row (starts with zero)
3831: - v - optional values in the matrix
3833: Level: developer
3835: Notes:
3836: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3837: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3838: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3840: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3842: The format which is used for the sparse matrix input, is equivalent to a
3843: row-major ordering.. i.e for the following matrix, the input data expected is
3844: as shown:
3846: 1 0 0
3847: 2 0 3 P0
3848: -------
3849: 4 5 6 P1
3851: Process0 [P0]: rows_owned=[0,1]
3852: i = {0,1,3} [size = nrow+1 = 2+1]
3853: j = {0,0,2} [size = nz = 6]
3854: v = {1,2,3} [size = nz = 6]
3856: Process1 [P1]: rows_owned=[2]
3857: i = {0,3} [size = nrow+1 = 1+1]
3858: j = {0,1,2} [size = nz = 6]
3859: v = {4,5,6} [size = nz = 6]
3861: .keywords: matrix, aij, compressed row, sparse, parallel
3863: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3864: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3865: @*/
3866: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3867: {
3871: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3872: return(0);
3873: }
3877: /*@C
3878: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3879: (the default parallel PETSc format). For good matrix assembly performance
3880: the user should preallocate the matrix storage by setting the parameters
3881: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3882: performance can be increased by more than a factor of 50.
3884: Collective on MPI_Comm
3886: Input Parameters:
3887: + A - the matrix
3888: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3889: (same value is used for all local rows)
3890: . d_nnz - array containing the number of nonzeros in the various rows of the
3891: DIAGONAL portion of the local submatrix (possibly different for each row)
3892: or NULL, if d_nz is used to specify the nonzero structure.
3893: The size of this array is equal to the number of local rows, i.e 'm'.
3894: For matrices that will be factored, you must leave room for (and set)
3895: the diagonal entry even if it is zero.
3896: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3897: submatrix (same value is used for all local rows).
3898: - o_nnz - array containing the number of nonzeros in the various rows of the
3899: OFF-DIAGONAL portion of the local submatrix (possibly different for
3900: each row) or NULL, if o_nz is used to specify the nonzero
3901: structure. The size of this array is equal to the number
3902: of local rows, i.e 'm'.
3904: If the *_nnz parameter is given then the *_nz parameter is ignored
3906: The AIJ format (also called the Yale sparse matrix format or
3907: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3908: storage. The stored row and column indices begin with zero.
3909: See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3911: The parallel matrix is partitioned such that the first m0 rows belong to
3912: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3913: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3915: The DIAGONAL portion of the local submatrix of a processor can be defined
3916: as the submatrix which is obtained by extraction the part corresponding to
3917: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3918: first row that belongs to the processor, r2 is the last row belonging to
3919: the this processor, and c1-c2 is range of indices of the local part of a
3920: vector suitable for applying the matrix to. This is an mxn matrix. In the
3921: common case of a square matrix, the row and column ranges are the same and
3922: the DIAGONAL part is also square. The remaining portion of the local
3923: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3925: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3927: You can call MatGetInfo() to get information on how effective the preallocation was;
3928: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3929: You can also run with the option -info and look for messages with the string
3930: malloc in them to see if additional memory allocation was needed.
3932: Example usage:
3934: Consider the following 8x8 matrix with 34 non-zero values, that is
3935: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3936: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3937: as follows:
3939: .vb
3940: 1 2 0 | 0 3 0 | 0 4
3941: Proc0 0 5 6 | 7 0 0 | 8 0
3942: 9 0 10 | 11 0 0 | 12 0
3943: -------------------------------------
3944: 13 0 14 | 15 16 17 | 0 0
3945: Proc1 0 18 0 | 19 20 21 | 0 0
3946: 0 0 0 | 22 23 0 | 24 0
3947: -------------------------------------
3948: Proc2 25 26 27 | 0 0 28 | 29 0
3949: 30 0 0 | 31 32 33 | 0 34
3950: .ve
3952: This can be represented as a collection of submatrices as:
3954: .vb
3955: A B C
3956: D E F
3957: G H I
3958: .ve
3960: Where the submatrices A,B,C are owned by proc0, D,E,F are
3961: owned by proc1, G,H,I are owned by proc2.
3963: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3964: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3965: The 'M','N' parameters are 8,8, and have the same values on all procs.
3967: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3968: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3969: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3970: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3971: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3972: matrix, ans [DF] as another SeqAIJ matrix.
3974: When d_nz, o_nz parameters are specified, d_nz storage elements are
3975: allocated for every row of the local diagonal submatrix, and o_nz
3976: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3977: One way to choose d_nz and o_nz is to use the max nonzerors per local
3978: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3979: In this case, the values of d_nz,o_nz are:
3980: .vb
3981: proc0 : dnz = 2, o_nz = 2
3982: proc1 : dnz = 3, o_nz = 2
3983: proc2 : dnz = 1, o_nz = 4
3984: .ve
3985: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3986: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3987: for proc3. i.e we are using 12+15+10=37 storage locations to store
3988: 34 values.
3990: When d_nnz, o_nnz parameters are specified, the storage is specified
3991: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3992: In the above case the values for d_nnz,o_nnz are:
3993: .vb
3994: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3995: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3996: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3997: .ve
3998: Here the space allocated is sum of all the above values i.e 34, and
3999: hence pre-allocation is perfect.
4001: Level: intermediate
4003: .keywords: matrix, aij, compressed row, sparse, parallel
4005: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4006: MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4007: @*/
4008: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4009: {
4015: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4016: return(0);
4017: }
4021: /*@
4022: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4023: CSR format the local rows.
4025: Collective on MPI_Comm
4027: Input Parameters:
4028: + comm - MPI communicator
4029: . m - number of local rows (Cannot be PETSC_DECIDE)
4030: . n - This value should be the same as the local size used in creating the
4031: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4032: calculated if N is given) For square matrices n is almost always m.
4033: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4034: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4035: . i - row indices
4036: . j - column indices
4037: - a - matrix values
4039: Output Parameter:
4040: . mat - the matrix
4042: Level: intermediate
4044: Notes:
4045: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4046: thus you CANNOT change the matrix entries by changing the values of a[] after you have
4047: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4049: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4051: The format which is used for the sparse matrix input, is equivalent to a
4052: row-major ordering.. i.e for the following matrix, the input data expected is
4053: as shown:
4055: 1 0 0
4056: 2 0 3 P0
4057: -------
4058: 4 5 6 P1
4060: Process0 [P0]: rows_owned=[0,1]
4061: i = {0,1,3} [size = nrow+1 = 2+1]
4062: j = {0,0,2} [size = nz = 6]
4063: v = {1,2,3} [size = nz = 6]
4065: Process1 [P1]: rows_owned=[2]
4066: i = {0,3} [size = nrow+1 = 1+1]
4067: j = {0,1,2} [size = nz = 6]
4068: v = {4,5,6} [size = nz = 6]
4070: .keywords: matrix, aij, compressed row, sparse, parallel
4072: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4073: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4074: @*/
4075: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4076: {
4080: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4081: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4082: MatCreate(comm,mat);
4083: MatSetSizes(*mat,m,n,M,N);
4084: /* MatSetBlockSizes(M,bs,cbs); */
4085: MatSetType(*mat,MATMPIAIJ);
4086: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
4087: return(0);
4088: }
4092: /*@C
4093: MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4094: (the default parallel PETSc format). For good matrix assembly performance
4095: the user should preallocate the matrix storage by setting the parameters
4096: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4097: performance can be increased by more than a factor of 50.
4099: Collective on MPI_Comm
4101: Input Parameters:
4102: + comm - MPI communicator
4103: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4104: This value should be the same as the local size used in creating the
4105: y vector for the matrix-vector product y = Ax.
4106: . n - This value should be the same as the local size used in creating the
4107: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4108: calculated if N is given) For square matrices n is almost always m.
4109: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4110: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4111: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4112: (same value is used for all local rows)
4113: . d_nnz - array containing the number of nonzeros in the various rows of the
4114: DIAGONAL portion of the local submatrix (possibly different for each row)
4115: or NULL, if d_nz is used to specify the nonzero structure.
4116: The size of this array is equal to the number of local rows, i.e 'm'.
4117: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4118: submatrix (same value is used for all local rows).
4119: - o_nnz - array containing the number of nonzeros in the various rows of the
4120: OFF-DIAGONAL portion of the local submatrix (possibly different for
4121: each row) or NULL, if o_nz is used to specify the nonzero
4122: structure. The size of this array is equal to the number
4123: of local rows, i.e 'm'.
4125: Output Parameter:
4126: . A - the matrix
4128: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4129: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4130: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4132: Notes:
4133: If the *_nnz parameter is given then the *_nz parameter is ignored
4135: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4136: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4137: storage requirements for this matrix.
4139: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4140: processor than it must be used on all processors that share the object for
4141: that argument.
4143: The user MUST specify either the local or global matrix dimensions
4144: (possibly both).
4146: The parallel matrix is partitioned across processors such that the
4147: first m0 rows belong to process 0, the next m1 rows belong to
4148: process 1, the next m2 rows belong to process 2 etc.. where
4149: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4150: values corresponding to [m x N] submatrix.
4152: The columns are logically partitioned with the n0 columns belonging
4153: to 0th partition, the next n1 columns belonging to the next
4154: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4156: The DIAGONAL portion of the local submatrix on any given processor
4157: is the submatrix corresponding to the rows and columns m,n
4158: corresponding to the given processor. i.e diagonal matrix on
4159: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4160: etc. The remaining portion of the local submatrix [m x (N-n)]
4161: constitute the OFF-DIAGONAL portion. The example below better
4162: illustrates this concept.
4164: For a square global matrix we define each processor's diagonal portion
4165: to be its local rows and the corresponding columns (a square submatrix);
4166: each processor's off-diagonal portion encompasses the remainder of the
4167: local matrix (a rectangular submatrix).
4169: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4171: When calling this routine with a single process communicator, a matrix of
4172: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4173: type of communicator, use the construction mechanism:
4174: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4176: By default, this format uses inodes (identical nodes) when possible.
4177: We search for consecutive rows with the same nonzero structure, thereby
4178: reusing matrix information to achieve increased efficiency.
4180: Options Database Keys:
4181: + -mat_no_inode - Do not use inodes
4182: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4183: - -mat_aij_oneindex - Internally use indexing starting at 1
4184: rather than 0. Note that when calling MatSetValues(),
4185: the user still MUST index entries starting at 0!
4188: Example usage:
4190: Consider the following 8x8 matrix with 34 non-zero values, that is
4191: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4192: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4193: as follows:
4195: .vb
4196: 1 2 0 | 0 3 0 | 0 4
4197: Proc0 0 5 6 | 7 0 0 | 8 0
4198: 9 0 10 | 11 0 0 | 12 0
4199: -------------------------------------
4200: 13 0 14 | 15 16 17 | 0 0
4201: Proc1 0 18 0 | 19 20 21 | 0 0
4202: 0 0 0 | 22 23 0 | 24 0
4203: -------------------------------------
4204: Proc2 25 26 27 | 0 0 28 | 29 0
4205: 30 0 0 | 31 32 33 | 0 34
4206: .ve
4208: This can be represented as a collection of submatrices as:
4210: .vb
4211: A B C
4212: D E F
4213: G H I
4214: .ve
4216: Where the submatrices A,B,C are owned by proc0, D,E,F are
4217: owned by proc1, G,H,I are owned by proc2.
4219: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4220: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4221: The 'M','N' parameters are 8,8, and have the same values on all procs.
4223: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4224: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4225: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4226: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4227: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4228: matrix, ans [DF] as another SeqAIJ matrix.
4230: When d_nz, o_nz parameters are specified, d_nz storage elements are
4231: allocated for every row of the local diagonal submatrix, and o_nz
4232: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4233: One way to choose d_nz and o_nz is to use the max nonzerors per local
4234: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4235: In this case, the values of d_nz,o_nz are:
4236: .vb
4237: proc0 : dnz = 2, o_nz = 2
4238: proc1 : dnz = 3, o_nz = 2
4239: proc2 : dnz = 1, o_nz = 4
4240: .ve
4241: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4242: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4243: for proc3. i.e we are using 12+15+10=37 storage locations to store
4244: 34 values.
4246: When d_nnz, o_nnz parameters are specified, the storage is specified
4247: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4248: In the above case the values for d_nnz,o_nnz are:
4249: .vb
4250: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4251: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4252: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4253: .ve
4254: Here the space allocated is sum of all the above values i.e 34, and
4255: hence pre-allocation is perfect.
4257: Level: intermediate
4259: .keywords: matrix, aij, compressed row, sparse, parallel
4261: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4262: MPIAIJ, MatCreateMPIAIJWithArrays()
4263: @*/
4264: PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4265: {
4267: PetscMPIInt size;
4270: MatCreate(comm,A);
4271: MatSetSizes(*A,m,n,M,N);
4272: MPI_Comm_size(comm,&size);
4273: if (size > 1) {
4274: MatSetType(*A,MATMPIAIJ);
4275: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4276: } else {
4277: MatSetType(*A,MATSEQAIJ);
4278: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4279: }
4280: return(0);
4281: }
4285: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4286: {
4287: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4290: *Ad = a->A;
4291: *Ao = a->B;
4292: *colmap = a->garray;
4293: return(0);
4294: }
4298: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4299: {
4301: PetscInt i;
4302: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4305: if (coloring->ctype == IS_COLORING_GLOBAL) {
4306: ISColoringValue *allcolors,*colors;
4307: ISColoring ocoloring;
4309: /* set coloring for diagonal portion */
4310: MatSetColoring_SeqAIJ(a->A,coloring);
4312: /* set coloring for off-diagonal portion */
4313: ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);
4314: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4315: for (i=0; i<a->B->cmap->n; i++) {
4316: colors[i] = allcolors[a->garray[i]];
4317: }
4318: PetscFree(allcolors);
4319: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4320: MatSetColoring_SeqAIJ(a->B,ocoloring);
4321: ISColoringDestroy(&ocoloring);
4322: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4323: ISColoringValue *colors;
4324: PetscInt *larray;
4325: ISColoring ocoloring;
4327: /* set coloring for diagonal portion */
4328: PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);
4329: for (i=0; i<a->A->cmap->n; i++) {
4330: larray[i] = i + A->cmap->rstart;
4331: }
4332: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);
4333: PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);
4334: for (i=0; i<a->A->cmap->n; i++) {
4335: colors[i] = coloring->colors[larray[i]];
4336: }
4337: PetscFree(larray);
4338: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);
4339: MatSetColoring_SeqAIJ(a->A,ocoloring);
4340: ISColoringDestroy(&ocoloring);
4342: /* set coloring for off-diagonal portion */
4343: PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);
4344: ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);
4345: PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);
4346: for (i=0; i<a->B->cmap->n; i++) {
4347: colors[i] = coloring->colors[larray[i]];
4348: }
4349: PetscFree(larray);
4350: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);
4351: MatSetColoring_SeqAIJ(a->B,ocoloring);
4352: ISColoringDestroy(&ocoloring);
4353: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4354: return(0);
4355: }
4359: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4360: {
4361: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4365: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
4366: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
4367: return(0);
4368: }
4372: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4373: {
4375: PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4376: PetscInt *indx;
4379: /* This routine will ONLY return MPIAIJ type matrix */
4380: MatGetSize(inmat,&m,&N);
4381: MatGetBlockSizes(inmat,&bs,&cbs);
4382: if (n == PETSC_DECIDE) {
4383: PetscSplitOwnership(comm,&n,&N);
4384: }
4385: /* Check sum(n) = N */
4386: MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
4387: if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4389: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4390: rstart -= m;
4392: MatPreallocateInitialize(comm,m,n,dnz,onz);
4393: for (i=0; i<m; i++) {
4394: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4395: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4396: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4397: }
4399: MatCreate(comm,outmat);
4400: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4401: MatSetBlockSizes(*outmat,bs,cbs);
4402: MatSetType(*outmat,MATMPIAIJ);
4403: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4404: MatPreallocateFinalize(dnz,onz);
4405: return(0);
4406: }
4410: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4411: {
4413: PetscInt m,N,i,rstart,nnz,Ii;
4414: PetscInt *indx;
4415: PetscScalar *values;
4418: MatGetSize(inmat,&m,&N);
4419: MatGetOwnershipRange(outmat,&rstart,NULL);
4420: for (i=0; i<m; i++) {
4421: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4422: Ii = i + rstart;
4423: MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4424: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4425: }
4426: MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);
4427: MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);
4428: return(0);
4429: }
4433: /*@
4434: MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4435: matrices from each processor
4437: Collective on MPI_Comm
4439: Input Parameters:
4440: + comm - the communicators the parallel matrix will live on
4441: . inmat - the input sequential matrices
4442: . n - number of local columns (or PETSC_DECIDE)
4443: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4445: Output Parameter:
4446: . outmat - the parallel matrix generated
4448: Level: advanced
4450: Notes: The number of columns of the matrix in EACH processor MUST be the same.
4452: @*/
4453: PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4454: {
4458: PetscLogEventBegin(MAT_Merge,inmat,0,0,0);
4459: if (scall == MAT_INITIAL_MATRIX) {
4460: MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);
4461: }
4462: MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);
4463: PetscLogEventEnd(MAT_Merge,inmat,0,0,0);
4464: return(0);
4465: }
4469: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4470: {
4471: PetscErrorCode ierr;
4472: PetscMPIInt rank;
4473: PetscInt m,N,i,rstart,nnz;
4474: size_t len;
4475: const PetscInt *indx;
4476: PetscViewer out;
4477: char *name;
4478: Mat B;
4479: const PetscScalar *values;
4482: MatGetLocalSize(A,&m,0);
4483: MatGetSize(A,0,&N);
4484: /* Should this be the type of the diagonal block of A? */
4485: MatCreate(PETSC_COMM_SELF,&B);
4486: MatSetSizes(B,m,N,m,N);
4487: MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);
4488: MatSetType(B,MATSEQAIJ);
4489: MatSeqAIJSetPreallocation(B,0,NULL);
4490: MatGetOwnershipRange(A,&rstart,0);
4491: for (i=0; i<m; i++) {
4492: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4493: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4494: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4495: }
4496: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4497: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4499: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
4500: PetscStrlen(outfile,&len);
4501: PetscMalloc((len+5)*sizeof(char),&name);
4502: sprintf(name,"%s.%d",outfile,rank);
4503: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4504: PetscFree(name);
4505: MatView(B,out);
4506: PetscViewerDestroy(&out);
4507: MatDestroy(&B);
4508: return(0);
4509: }
4511: extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4514: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4515: {
4516: PetscErrorCode ierr;
4517: Mat_Merge_SeqsToMPI *merge;
4518: PetscContainer container;
4521: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);
4522: if (container) {
4523: PetscContainerGetPointer(container,(void**)&merge);
4524: PetscFree(merge->id_r);
4525: PetscFree(merge->len_s);
4526: PetscFree(merge->len_r);
4527: PetscFree(merge->bi);
4528: PetscFree(merge->bj);
4529: PetscFree(merge->buf_ri[0]);
4530: PetscFree(merge->buf_ri);
4531: PetscFree(merge->buf_rj[0]);
4532: PetscFree(merge->buf_rj);
4533: PetscFree(merge->coi);
4534: PetscFree(merge->coj);
4535: PetscFree(merge->owners_co);
4536: PetscLayoutDestroy(&merge->rowmap);
4537: PetscFree(merge);
4538: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4539: }
4540: MatDestroy_MPIAIJ(A);
4541: return(0);
4542: }
4544: #include <../src/mat/utils/freespace.h>
4545: #include <petscbt.h>
4549: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4550: {
4551: PetscErrorCode ierr;
4552: MPI_Comm comm;
4553: Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data;
4554: PetscMPIInt size,rank,taga,*len_s;
4555: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4556: PetscInt proc,m;
4557: PetscInt **buf_ri,**buf_rj;
4558: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4559: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4560: MPI_Request *s_waits,*r_waits;
4561: MPI_Status *status;
4562: MatScalar *aa=a->a;
4563: MatScalar **abuf_r,*ba_i;
4564: Mat_Merge_SeqsToMPI *merge;
4565: PetscContainer container;
4568: PetscObjectGetComm((PetscObject)mpimat,&comm);
4569: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4571: MPI_Comm_size(comm,&size);
4572: MPI_Comm_rank(comm,&rank);
4574: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);
4575: PetscContainerGetPointer(container,(void**)&merge);
4577: bi = merge->bi;
4578: bj = merge->bj;
4579: buf_ri = merge->buf_ri;
4580: buf_rj = merge->buf_rj;
4582: PetscMalloc(size*sizeof(MPI_Status),&status);
4583: owners = merge->rowmap->range;
4584: len_s = merge->len_s;
4586: /* send and recv matrix values */
4587: /*-----------------------------*/
4588: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4589: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4591: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
4592: for (proc=0,k=0; proc<size; proc++) {
4593: if (!len_s[proc]) continue;
4594: i = owners[proc];
4595: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4596: k++;
4597: }
4599: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4600: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4601: PetscFree(status);
4603: PetscFree(s_waits);
4604: PetscFree(r_waits);
4606: /* insert mat values of mpimat */
4607: /*----------------------------*/
4608: PetscMalloc(N*sizeof(PetscScalar),&ba_i);
4609: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4611: for (k=0; k<merge->nrecv; k++) {
4612: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4613: nrows = *(buf_ri_k[k]);
4614: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4615: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4616: }
4618: /* set values of ba */
4619: m = merge->rowmap->n;
4620: for (i=0; i<m; i++) {
4621: arow = owners[rank] + i;
4622: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4623: bnzi = bi[i+1] - bi[i];
4624: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4626: /* add local non-zero vals of this proc's seqmat into ba */
4627: anzi = ai[arow+1] - ai[arow];
4628: aj = a->j + ai[arow];
4629: aa = a->a + ai[arow];
4630: nextaj = 0;
4631: for (j=0; nextaj<anzi; j++) {
4632: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4633: ba_i[j] += aa[nextaj++];
4634: }
4635: }
4637: /* add received vals into ba */
4638: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4639: /* i-th row */
4640: if (i == *nextrow[k]) {
4641: anzi = *(nextai[k]+1) - *nextai[k];
4642: aj = buf_rj[k] + *(nextai[k]);
4643: aa = abuf_r[k] + *(nextai[k]);
4644: nextaj = 0;
4645: for (j=0; nextaj<anzi; j++) {
4646: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4647: ba_i[j] += aa[nextaj++];
4648: }
4649: }
4650: nextrow[k]++; nextai[k]++;
4651: }
4652: }
4653: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4654: }
4655: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4656: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4658: PetscFree(abuf_r[0]);
4659: PetscFree(abuf_r);
4660: PetscFree(ba_i);
4661: PetscFree3(buf_ri_k,nextrow,nextai);
4662: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4663: return(0);
4664: }
4666: extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4670: PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4671: {
4672: PetscErrorCode ierr;
4673: Mat B_mpi;
4674: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4675: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4676: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4677: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4678: PetscInt len,proc,*dnz,*onz,bs,cbs;
4679: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4680: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4681: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4682: MPI_Status *status;
4683: PetscFreeSpaceList free_space=NULL,current_space=NULL;
4684: PetscBT lnkbt;
4685: Mat_Merge_SeqsToMPI *merge;
4686: PetscContainer container;
4689: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4691: /* make sure it is a PETSc comm */
4692: PetscCommDuplicate(comm,&comm,NULL);
4693: MPI_Comm_size(comm,&size);
4694: MPI_Comm_rank(comm,&rank);
4696: PetscNew(Mat_Merge_SeqsToMPI,&merge);
4697: PetscMalloc(size*sizeof(MPI_Status),&status);
4699: /* determine row ownership */
4700: /*---------------------------------------------------------*/
4701: PetscLayoutCreate(comm,&merge->rowmap);
4702: PetscLayoutSetLocalSize(merge->rowmap,m);
4703: PetscLayoutSetSize(merge->rowmap,M);
4704: PetscLayoutSetBlockSize(merge->rowmap,1);
4705: PetscLayoutSetUp(merge->rowmap);
4706: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
4707: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
4709: m = merge->rowmap->n;
4710: owners = merge->rowmap->range;
4712: /* determine the number of messages to send, their lengths */
4713: /*---------------------------------------------------------*/
4714: len_s = merge->len_s;
4716: len = 0; /* length of buf_si[] */
4717: merge->nsend = 0;
4718: for (proc=0; proc<size; proc++) {
4719: len_si[proc] = 0;
4720: if (proc == rank) {
4721: len_s[proc] = 0;
4722: } else {
4723: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4724: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4725: }
4726: if (len_s[proc]) {
4727: merge->nsend++;
4728: nrows = 0;
4729: for (i=owners[proc]; i<owners[proc+1]; i++) {
4730: if (ai[i+1] > ai[i]) nrows++;
4731: }
4732: len_si[proc] = 2*(nrows+1);
4733: len += len_si[proc];
4734: }
4735: }
4737: /* determine the number and length of messages to receive for ij-structure */
4738: /*-------------------------------------------------------------------------*/
4739: PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);
4740: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4742: /* post the Irecv of j-structure */
4743: /*-------------------------------*/
4744: PetscCommGetNewTag(comm,&tagj);
4745: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4747: /* post the Isend of j-structure */
4748: /*--------------------------------*/
4749: PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);
4751: for (proc=0, k=0; proc<size; proc++) {
4752: if (!len_s[proc]) continue;
4753: i = owners[proc];
4754: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4755: k++;
4756: }
4758: /* receives and sends of j-structure are complete */
4759: /*------------------------------------------------*/
4760: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4761: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4763: /* send and recv i-structure */
4764: /*---------------------------*/
4765: PetscCommGetNewTag(comm,&tagi);
4766: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4768: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
4769: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4770: for (proc=0,k=0; proc<size; proc++) {
4771: if (!len_s[proc]) continue;
4772: /* form outgoing message for i-structure:
4773: buf_si[0]: nrows to be sent
4774: [1:nrows]: row index (global)
4775: [nrows+1:2*nrows+1]: i-structure index
4776: */
4777: /*-------------------------------------------*/
4778: nrows = len_si[proc]/2 - 1;
4779: buf_si_i = buf_si + nrows+1;
4780: buf_si[0] = nrows;
4781: buf_si_i[0] = 0;
4782: nrows = 0;
4783: for (i=owners[proc]; i<owners[proc+1]; i++) {
4784: anzi = ai[i+1] - ai[i];
4785: if (anzi) {
4786: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4787: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4788: nrows++;
4789: }
4790: }
4791: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4792: k++;
4793: buf_si += len_si[proc];
4794: }
4796: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4797: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4799: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4800: for (i=0; i<merge->nrecv; i++) {
4801: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4802: }
4804: PetscFree(len_si);
4805: PetscFree(len_ri);
4806: PetscFree(rj_waits);
4807: PetscFree2(si_waits,sj_waits);
4808: PetscFree(ri_waits);
4809: PetscFree(buf_s);
4810: PetscFree(status);
4812: /* compute a local seq matrix in each processor */
4813: /*----------------------------------------------*/
4814: /* allocate bi array and free space for accumulating nonzero column info */
4815: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
4816: bi[0] = 0;
4818: /* create and initialize a linked list */
4819: nlnk = N+1;
4820: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4822: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4823: len = ai[owners[rank+1]] - ai[owners[rank]];
4824: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
4826: current_space = free_space;
4828: /* determine symbolic info for each local row */
4829: PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);
4831: for (k=0; k<merge->nrecv; k++) {
4832: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4833: nrows = *buf_ri_k[k];
4834: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4835: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4836: }
4838: MatPreallocateInitialize(comm,m,n,dnz,onz);
4839: len = 0;
4840: for (i=0; i<m; i++) {
4841: bnzi = 0;
4842: /* add local non-zero cols of this proc's seqmat into lnk */
4843: arow = owners[rank] + i;
4844: anzi = ai[arow+1] - ai[arow];
4845: aj = a->j + ai[arow];
4846: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4847: bnzi += nlnk;
4848: /* add received col data into lnk */
4849: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4850: if (i == *nextrow[k]) { /* i-th row */
4851: anzi = *(nextai[k]+1) - *nextai[k];
4852: aj = buf_rj[k] + *nextai[k];
4853: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4854: bnzi += nlnk;
4855: nextrow[k]++; nextai[k]++;
4856: }
4857: }
4858: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4860: /* if free space is not available, make more free space */
4861: if (current_space->local_remaining<bnzi) {
4862: PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);
4863: nspacedouble++;
4864: }
4865: /* copy data into free space, then initialize lnk */
4866: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4867: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4869: current_space->array += bnzi;
4870: current_space->local_used += bnzi;
4871: current_space->local_remaining -= bnzi;
4873: bi[i+1] = bi[i] + bnzi;
4874: }
4876: PetscFree3(buf_ri_k,nextrow,nextai);
4878: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
4879: PetscFreeSpaceContiguous(&free_space,bj);
4880: PetscLLDestroy(lnk,lnkbt);
4882: /* create symbolic parallel matrix B_mpi */
4883: /*---------------------------------------*/
4884: MatGetBlockSizes(seqmat,&bs,&cbs);
4885: MatCreate(comm,&B_mpi);
4886: if (n==PETSC_DECIDE) {
4887: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4888: } else {
4889: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4890: }
4891: MatSetBlockSizes(B_mpi,bs,cbs);
4892: MatSetType(B_mpi,MATMPIAIJ);
4893: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4894: MatPreallocateFinalize(dnz,onz);
4895: MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
4897: /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4898: B_mpi->assembled = PETSC_FALSE;
4899: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4900: merge->bi = bi;
4901: merge->bj = bj;
4902: merge->buf_ri = buf_ri;
4903: merge->buf_rj = buf_rj;
4904: merge->coi = NULL;
4905: merge->coj = NULL;
4906: merge->owners_co = NULL;
4908: PetscCommDestroy(&comm);
4910: /* attach the supporting struct to B_mpi for reuse */
4911: PetscContainerCreate(PETSC_COMM_SELF,&container);
4912: PetscContainerSetPointer(container,merge);
4913: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4914: PetscContainerDestroy(&container);
4915: *mpimat = B_mpi;
4917: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4918: return(0);
4919: }
4923: /*@C
4924: MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4925: matrices from each processor
4927: Collective on MPI_Comm
4929: Input Parameters:
4930: + comm - the communicators the parallel matrix will live on
4931: . seqmat - the input sequential matrices
4932: . m - number of local rows (or PETSC_DECIDE)
4933: . n - number of local columns (or PETSC_DECIDE)
4934: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4936: Output Parameter:
4937: . mpimat - the parallel matrix generated
4939: Level: advanced
4941: Notes:
4942: The dimensions of the sequential matrix in each processor MUST be the same.
4943: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4944: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4945: @*/
4946: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4947: {
4949: PetscMPIInt size;
4952: MPI_Comm_size(comm,&size);
4953: if (size == 1) {
4954: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4955: if (scall == MAT_INITIAL_MATRIX) {
4956: MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);
4957: } else {
4958: MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);
4959: }
4960: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4961: return(0);
4962: }
4963: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4964: if (scall == MAT_INITIAL_MATRIX) {
4965: MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);
4966: }
4967: MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);
4968: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4969: return(0);
4970: }
4974: /*@
4975: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4976: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4977: with MatGetSize()
4979: Not Collective
4981: Input Parameters:
4982: + A - the matrix
4983: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4985: Output Parameter:
4986: . A_loc - the local sequential matrix generated
4988: Level: developer
4990: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4992: @*/
4993: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4994: {
4996: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4997: Mat_SeqAIJ *mat,*a,*b;
4998: PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4999: MatScalar *aa,*ba,*cam;
5000: PetscScalar *ca;
5001: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5002: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
5003: PetscBool match;
5006: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5007: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5008: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
5009: a = (Mat_SeqAIJ*)(mpimat->A)->data;
5010: b = (Mat_SeqAIJ*)(mpimat->B)->data;
5011: ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5012: aa = a->a; ba = b->a;
5013: if (scall == MAT_INITIAL_MATRIX) {
5014: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
5015: ci[0] = 0;
5016: for (i=0; i<am; i++) {
5017: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5018: }
5019: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
5020: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
5021: k = 0;
5022: for (i=0; i<am; i++) {
5023: ncols_o = bi[i+1] - bi[i];
5024: ncols_d = ai[i+1] - ai[i];
5025: /* off-diagonal portion of A */
5026: for (jo=0; jo<ncols_o; jo++) {
5027: col = cmap[*bj];
5028: if (col >= cstart) break;
5029: cj[k] = col; bj++;
5030: ca[k++] = *ba++;
5031: }
5032: /* diagonal portion of A */
5033: for (j=0; j<ncols_d; j++) {
5034: cj[k] = cstart + *aj++;
5035: ca[k++] = *aa++;
5036: }
5037: /* off-diagonal portion of A */
5038: for (j=jo; j<ncols_o; j++) {
5039: cj[k] = cmap[*bj++];
5040: ca[k++] = *ba++;
5041: }
5042: }
5043: /* put together the new matrix */
5044: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
5045: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5046: /* Since these are PETSc arrays, change flags to free them as necessary. */
5047: mat = (Mat_SeqAIJ*)(*A_loc)->data;
5048: mat->free_a = PETSC_TRUE;
5049: mat->free_ij = PETSC_TRUE;
5050: mat->nonew = 0;
5051: } else if (scall == MAT_REUSE_MATRIX) {
5052: mat=(Mat_SeqAIJ*)(*A_loc)->data;
5053: ci = mat->i; cj = mat->j; cam = mat->a;
5054: for (i=0; i<am; i++) {
5055: /* off-diagonal portion of A */
5056: ncols_o = bi[i+1] - bi[i];
5057: for (jo=0; jo<ncols_o; jo++) {
5058: col = cmap[*bj];
5059: if (col >= cstart) break;
5060: *cam++ = *ba++; bj++;
5061: }
5062: /* diagonal portion of A */
5063: ncols_d = ai[i+1] - ai[i];
5064: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5065: /* off-diagonal portion of A */
5066: for (j=jo; j<ncols_o; j++) {
5067: *cam++ = *ba++; bj++;
5068: }
5069: }
5070: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5071: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
5072: return(0);
5073: }
5077: /*@C
5078: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5080: Not Collective
5082: Input Parameters:
5083: + A - the matrix
5084: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5085: - row, col - index sets of rows and columns to extract (or NULL)
5087: Output Parameter:
5088: . A_loc - the local sequential matrix generated
5090: Level: developer
5092: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5094: @*/
5095: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5096: {
5097: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5099: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5100: IS isrowa,iscola;
5101: Mat *aloc;
5102: PetscBool match;
5105: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5106: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5107: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
5108: if (!row) {
5109: start = A->rmap->rstart; end = A->rmap->rend;
5110: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
5111: } else {
5112: isrowa = *row;
5113: }
5114: if (!col) {
5115: start = A->cmap->rstart;
5116: cmap = a->garray;
5117: nzA = a->A->cmap->n;
5118: nzB = a->B->cmap->n;
5119: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
5120: ncols = 0;
5121: for (i=0; i<nzB; i++) {
5122: if (cmap[i] < start) idx[ncols++] = cmap[i];
5123: else break;
5124: }
5125: imark = i;
5126: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5127: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5128: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
5129: } else {
5130: iscola = *col;
5131: }
5132: if (scall != MAT_INITIAL_MATRIX) {
5133: PetscMalloc(sizeof(Mat),&aloc);
5134: aloc[0] = *A_loc;
5135: }
5136: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
5137: *A_loc = aloc[0];
5138: PetscFree(aloc);
5139: if (!row) {
5140: ISDestroy(&isrowa);
5141: }
5142: if (!col) {
5143: ISDestroy(&iscola);
5144: }
5145: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5146: return(0);
5147: }
5151: /*@C
5152: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5154: Collective on Mat
5156: Input Parameters:
5157: + A,B - the matrices in mpiaij format
5158: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5159: - rowb, colb - index sets of rows and columns of B to extract (or NULL)
5161: Output Parameter:
5162: + rowb, colb - index sets of rows and columns of B to extract
5163: - B_seq - the sequential matrix generated
5165: Level: developer
5167: @*/
5168: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5169: {
5170: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5172: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5173: IS isrowb,iscolb;
5174: Mat *bseq=NULL;
5177: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5178: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5179: }
5180: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5182: if (scall == MAT_INITIAL_MATRIX) {
5183: start = A->cmap->rstart;
5184: cmap = a->garray;
5185: nzA = a->A->cmap->n;
5186: nzB = a->B->cmap->n;
5187: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
5188: ncols = 0;
5189: for (i=0; i<nzB; i++) { /* row < local row index */
5190: if (cmap[i] < start) idx[ncols++] = cmap[i];
5191: else break;
5192: }
5193: imark = i;
5194: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5195: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5196: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5197: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5198: } else {
5199: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5200: isrowb = *rowb; iscolb = *colb;
5201: PetscMalloc(sizeof(Mat),&bseq);
5202: bseq[0] = *B_seq;
5203: }
5204: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5205: *B_seq = bseq[0];
5206: PetscFree(bseq);
5207: if (!rowb) {
5208: ISDestroy(&isrowb);
5209: } else {
5210: *rowb = isrowb;
5211: }
5212: if (!colb) {
5213: ISDestroy(&iscolb);
5214: } else {
5215: *colb = iscolb;
5216: }
5217: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5218: return(0);
5219: }
5223: /*
5224: MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5225: of the OFF-DIAGONAL portion of local A
5227: Collective on Mat
5229: Input Parameters:
5230: + A,B - the matrices in mpiaij format
5231: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5233: Output Parameter:
5234: + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5235: . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5236: . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5237: - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5239: Level: developer
5241: */
5242: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5243: {
5244: VecScatter_MPI_General *gen_to,*gen_from;
5245: PetscErrorCode ierr;
5246: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5247: Mat_SeqAIJ *b_oth;
5248: VecScatter ctx =a->Mvctx;
5249: MPI_Comm comm;
5250: PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5251: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5252: PetscScalar *rvalues,*svalues;
5253: MatScalar *b_otha,*bufa,*bufA;
5254: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5255: MPI_Request *rwaits = NULL,*swaits = NULL;
5256: MPI_Status *sstatus,rstatus;
5257: PetscMPIInt jj;
5258: PetscInt *cols,sbs,rbs;
5259: PetscScalar *vals;
5262: PetscObjectGetComm((PetscObject)A,&comm);
5263: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5264: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5265: }
5266: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5267: MPI_Comm_rank(comm,&rank);
5269: gen_to = (VecScatter_MPI_General*)ctx->todata;
5270: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5271: rvalues = gen_from->values; /* holds the length of receiving row */
5272: svalues = gen_to->values; /* holds the length of sending row */
5273: nrecvs = gen_from->n;
5274: nsends = gen_to->n;
5276: PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);
5277: srow = gen_to->indices; /* local row index to be sent */
5278: sstarts = gen_to->starts;
5279: sprocs = gen_to->procs;
5280: sstatus = gen_to->sstatus;
5281: sbs = gen_to->bs;
5282: rstarts = gen_from->starts;
5283: rprocs = gen_from->procs;
5284: rbs = gen_from->bs;
5286: if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5287: if (scall == MAT_INITIAL_MATRIX) {
5288: /* i-array */
5289: /*---------*/
5290: /* post receives */
5291: for (i=0; i<nrecvs; i++) {
5292: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5293: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5294: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5295: }
5297: /* pack the outgoing message */
5298: PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);
5300: sstartsj[0] = 0;
5301: rstartsj[0] = 0;
5302: len = 0; /* total length of j or a array to be sent */
5303: k = 0;
5304: for (i=0; i<nsends; i++) {
5305: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5306: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5307: for (j=0; j<nrows; j++) {
5308: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5309: for (l=0; l<sbs; l++) {
5310: MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL); /* rowlength */
5312: rowlen[j*sbs+l] = ncols;
5314: len += ncols;
5315: MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);
5316: }
5317: k++;
5318: }
5319: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5321: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5322: }
5323: /* recvs and sends of i-array are completed */
5324: i = nrecvs;
5325: while (i--) {
5326: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5327: }
5328: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5330: /* allocate buffers for sending j and a arrays */
5331: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
5332: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
5334: /* create i-array of B_oth */
5335: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
5337: b_othi[0] = 0;
5338: len = 0; /* total length of j or a array to be received */
5339: k = 0;
5340: for (i=0; i<nrecvs; i++) {
5341: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5342: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5343: for (j=0; j<nrows; j++) {
5344: b_othi[k+1] = b_othi[k] + rowlen[j];
5345: len += rowlen[j]; k++;
5346: }
5347: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5348: }
5350: /* allocate space for j and a arrrays of B_oth */
5351: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
5352: PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);
5354: /* j-array */
5355: /*---------*/
5356: /* post receives of j-array */
5357: for (i=0; i<nrecvs; i++) {
5358: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5359: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5360: }
5362: /* pack the outgoing message j-array */
5363: k = 0;
5364: for (i=0; i<nsends; i++) {
5365: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5366: bufJ = bufj+sstartsj[i];
5367: for (j=0; j<nrows; j++) {
5368: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5369: for (ll=0; ll<sbs; ll++) {
5370: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5371: for (l=0; l<ncols; l++) {
5372: *bufJ++ = cols[l];
5373: }
5374: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5375: }
5376: }
5377: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5378: }
5380: /* recvs and sends of j-array are completed */
5381: i = nrecvs;
5382: while (i--) {
5383: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5384: }
5385: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5386: } else if (scall == MAT_REUSE_MATRIX) {
5387: sstartsj = *startsj_s;
5388: rstartsj = *startsj_r;
5389: bufa = *bufa_ptr;
5390: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5391: b_otha = b_oth->a;
5392: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5394: /* a-array */
5395: /*---------*/
5396: /* post receives of a-array */
5397: for (i=0; i<nrecvs; i++) {
5398: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5399: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5400: }
5402: /* pack the outgoing message a-array */
5403: k = 0;
5404: for (i=0; i<nsends; i++) {
5405: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5406: bufA = bufa+sstartsj[i];
5407: for (j=0; j<nrows; j++) {
5408: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5409: for (ll=0; ll<sbs; ll++) {
5410: MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5411: for (l=0; l<ncols; l++) {
5412: *bufA++ = vals[l];
5413: }
5414: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5415: }
5416: }
5417: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5418: }
5419: /* recvs and sends of a-array are completed */
5420: i = nrecvs;
5421: while (i--) {
5422: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5423: }
5424: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5425: PetscFree2(rwaits,swaits);
5427: if (scall == MAT_INITIAL_MATRIX) {
5428: /* put together the new matrix */
5429: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5431: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5432: /* Since these are PETSc arrays, change flags to free them as necessary. */
5433: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5434: b_oth->free_a = PETSC_TRUE;
5435: b_oth->free_ij = PETSC_TRUE;
5436: b_oth->nonew = 0;
5438: PetscFree(bufj);
5439: if (!startsj_s || !bufa_ptr) {
5440: PetscFree2(sstartsj,rstartsj);
5441: PetscFree(bufa_ptr);
5442: } else {
5443: *startsj_s = sstartsj;
5444: *startsj_r = rstartsj;
5445: *bufa_ptr = bufa;
5446: }
5447: }
5448: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5449: return(0);
5450: }
5454: /*@C
5455: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5457: Not Collective
5459: Input Parameters:
5460: . A - The matrix in mpiaij format
5462: Output Parameter:
5463: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5464: . colmap - A map from global column index to local index into lvec
5465: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5467: Level: developer
5469: @*/
5470: #if defined(PETSC_USE_CTABLE)
5471: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5472: #else
5473: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5474: #endif
5475: {
5476: Mat_MPIAIJ *a;
5483: a = (Mat_MPIAIJ*) A->data;
5484: if (lvec) *lvec = a->lvec;
5485: if (colmap) *colmap = a->colmap;
5486: if (multScatter) *multScatter = a->Mvctx;
5487: return(0);
5488: }
5490: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5491: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5492: PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5496: /*
5497: Computes (B'*A')' since computing B*A directly is untenable
5499: n p p
5500: ( ) ( ) ( )
5501: m ( A ) * n ( B ) = m ( C )
5502: ( ) ( ) ( )
5504: */
5505: PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5506: {
5508: Mat At,Bt,Ct;
5511: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5512: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5513: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);
5514: MatDestroy(&At);
5515: MatDestroy(&Bt);
5516: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5517: MatDestroy(&Ct);
5518: return(0);
5519: }
5523: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5524: {
5526: PetscInt m=A->rmap->n,n=B->cmap->n;
5527: Mat Cmat;
5530: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5531: MatCreate(PetscObjectComm((PetscObject)A),&Cmat);
5532: MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5533: MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);
5534: MatSetType(Cmat,MATMPIDENSE);
5535: MatMPIDenseSetPreallocation(Cmat,NULL);
5536: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
5537: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
5539: Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5541: *C = Cmat;
5542: return(0);
5543: }
5545: /* ----------------------------------------------------------------*/
5548: PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5549: {
5553: if (scall == MAT_INITIAL_MATRIX) {
5554: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
5555: MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);
5556: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
5557: }
5558: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
5559: MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);
5560: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
5561: return(0);
5562: }
5564: #if defined(PETSC_HAVE_MUMPS)
5565: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5566: #endif
5567: #if defined(PETSC_HAVE_PASTIX)
5568: PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5569: #endif
5570: #if defined(PETSC_HAVE_SUPERLU_DIST)
5571: PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5572: #endif
5573: #if defined(PETSC_HAVE_CLIQUE)
5574: PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5575: #endif
5577: /*MC
5578: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5580: Options Database Keys:
5581: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5583: Level: beginner
5585: .seealso: MatCreateAIJ()
5586: M*/
5590: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5591: {
5592: Mat_MPIAIJ *b;
5594: PetscMPIInt size;
5597: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
5599: PetscNewLog(B,Mat_MPIAIJ,&b);
5600: B->data = (void*)b;
5601: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
5602: B->assembled = PETSC_FALSE;
5603: B->insertmode = NOT_SET_VALUES;
5604: b->size = size;
5606: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
5608: /* build cache for off array entries formed */
5609: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
5611: b->donotstash = PETSC_FALSE;
5612: b->colmap = 0;
5613: b->garray = 0;
5614: b->roworiented = PETSC_TRUE;
5616: /* stuff used for matrix vector multiply */
5617: b->lvec = NULL;
5618: b->Mvctx = NULL;
5620: /* stuff for MatGetRow() */
5621: b->rowindices = 0;
5622: b->rowvalues = 0;
5623: b->getrowactive = PETSC_FALSE;
5625: /* flexible pointer used in CUSP/CUSPARSE classes */
5626: b->spptr = NULL;
5628: #if defined(PETSC_HAVE_MUMPS)
5629: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);
5630: #endif
5631: #if defined(PETSC_HAVE_PASTIX)
5632: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);
5633: #endif
5634: #if defined(PETSC_HAVE_SUPERLU_DIST)
5635: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);
5636: #endif
5637: #if defined(PETSC_HAVE_CLIQUE)
5638: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);
5639: #endif
5640: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);
5641: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);
5642: PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);
5643: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);
5644: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);
5645: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);
5646: PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);
5647: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);
5648: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);
5649: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);
5650: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);
5651: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);
5652: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);
5653: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
5654: return(0);
5655: }
5659: /*@
5660: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5661: and "off-diagonal" part of the matrix in CSR format.
5663: Collective on MPI_Comm
5665: Input Parameters:
5666: + comm - MPI communicator
5667: . m - number of local rows (Cannot be PETSC_DECIDE)
5668: . n - This value should be the same as the local size used in creating the
5669: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5670: calculated if N is given) For square matrices n is almost always m.
5671: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5672: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5673: . i - row indices for "diagonal" portion of matrix
5674: . j - column indices
5675: . a - matrix values
5676: . oi - row indices for "off-diagonal" portion of matrix
5677: . oj - column indices
5678: - oa - matrix values
5680: Output Parameter:
5681: . mat - the matrix
5683: Level: advanced
5685: Notes:
5686: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5687: must free the arrays once the matrix has been destroyed and not before.
5689: The i and j indices are 0 based
5691: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5693: This sets local rows and cannot be used to set off-processor values.
5695: Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5696: legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5697: not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5698: the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5699: keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5700: communication if it is known that only local entries will be set.
5702: .keywords: matrix, aij, compressed row, sparse, parallel
5704: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5705: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5706: @*/
5707: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5708: {
5710: Mat_MPIAIJ *maij;
5713: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5714: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5715: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5716: MatCreate(comm,mat);
5717: MatSetSizes(*mat,m,n,M,N);
5718: MatSetType(*mat,MATMPIAIJ);
5719: maij = (Mat_MPIAIJ*) (*mat)->data;
5721: (*mat)->preallocated = PETSC_TRUE;
5723: PetscLayoutSetUp((*mat)->rmap);
5724: PetscLayoutSetUp((*mat)->cmap);
5726: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
5727: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
5729: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
5730: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
5731: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
5732: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
5734: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
5735: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
5736: MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
5737: return(0);
5738: }
5740: /*
5741: Special version for direct calls from Fortran
5742: */
5743: #include <petsc-private/fortranimpl.h>
5745: #if defined(PETSC_HAVE_FORTRAN_CAPS)
5746: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5747: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5748: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5749: #endif
5751: /* Change these macros so can be used in void function */
5752: #undef CHKERRQ
5753: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5754: #undef SETERRQ2
5755: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5756: #undef SETERRQ3
5757: #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5758: #undef SETERRQ
5759: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5763: PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5764: {
5765: Mat mat = *mmat;
5766: PetscInt m = *mm, n = *mn;
5767: InsertMode addv = *maddv;
5768: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
5769: PetscScalar value;
5772: MatCheckPreallocated(mat,1);
5773: if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5775: #if defined(PETSC_USE_DEBUG)
5776: else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5777: #endif
5778: {
5779: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5780: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5781: PetscBool roworiented = aij->roworiented;
5783: /* Some Variables required in the macro */
5784: Mat A = aij->A;
5785: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
5786: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5787: MatScalar *aa = a->a;
5788: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5789: Mat B = aij->B;
5790: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
5791: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5792: MatScalar *ba = b->a;
5794: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5795: PetscInt nonew = a->nonew;
5796: MatScalar *ap1,*ap2;
5799: for (i=0; i<m; i++) {
5800: if (im[i] < 0) continue;
5801: #if defined(PETSC_USE_DEBUG)
5802: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5803: #endif
5804: if (im[i] >= rstart && im[i] < rend) {
5805: row = im[i] - rstart;
5806: lastcol1 = -1;
5807: rp1 = aj + ai[row];
5808: ap1 = aa + ai[row];
5809: rmax1 = aimax[row];
5810: nrow1 = ailen[row];
5811: low1 = 0;
5812: high1 = nrow1;
5813: lastcol2 = -1;
5814: rp2 = bj + bi[row];
5815: ap2 = ba + bi[row];
5816: rmax2 = bimax[row];
5817: nrow2 = bilen[row];
5818: low2 = 0;
5819: high2 = nrow2;
5821: for (j=0; j<n; j++) {
5822: if (roworiented) value = v[i*n+j];
5823: else value = v[i+j*m];
5824: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5825: if (in[j] >= cstart && in[j] < cend) {
5826: col = in[j] - cstart;
5827: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5828: } else if (in[j] < 0) continue;
5829: #if defined(PETSC_USE_DEBUG)
5830: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5831: #endif
5832: else {
5833: if (mat->was_assembled) {
5834: if (!aij->colmap) {
5835: MatCreateColmap_MPIAIJ_Private(mat);
5836: }
5837: #if defined(PETSC_USE_CTABLE)
5838: PetscTableFind(aij->colmap,in[j]+1,&col);
5839: col--;
5840: #else
5841: col = aij->colmap[in[j]] - 1;
5842: #endif
5843: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5844: MatDisAssemble_MPIAIJ(mat);
5845: col = in[j];
5846: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5847: B = aij->B;
5848: b = (Mat_SeqAIJ*)B->data;
5849: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5850: rp2 = bj + bi[row];
5851: ap2 = ba + bi[row];
5852: rmax2 = bimax[row];
5853: nrow2 = bilen[row];
5854: low2 = 0;
5855: high2 = nrow2;
5856: bm = aij->B->rmap->n;
5857: ba = b->a;
5858: }
5859: } else col = in[j];
5860: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5861: }
5862: }
5863: } else if (!aij->donotstash) {
5864: if (roworiented) {
5865: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5866: } else {
5867: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5868: }
5869: }
5870: }
5871: }
5872: PetscFunctionReturnVoid();
5873: }