Actual source code: mpibaij.c
petsc-3.6.1 2015-08-06
2: #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
4: #include <petscblaslapack.h>
5: #include <petscsf.h>
9: PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
10: {
11: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
13: PetscInt i,*idxb = 0;
14: PetscScalar *va,*vb;
15: Vec vtmp;
18: MatGetRowMaxAbs(a->A,v,idx);
19: VecGetArray(v,&va);
20: if (idx) {
21: for (i=0; i<A->rmap->n; i++) {
22: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
23: }
24: }
26: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
27: if (idx) {PetscMalloc1(A->rmap->n,&idxb);}
28: MatGetRowMaxAbs(a->B,vtmp,idxb);
29: VecGetArray(vtmp,&vb);
31: for (i=0; i<A->rmap->n; i++) {
32: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
33: va[i] = vb[i];
34: if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
35: }
36: }
38: VecRestoreArray(v,&va);
39: VecRestoreArray(vtmp,&vb);
40: PetscFree(idxb);
41: VecDestroy(&vtmp);
42: return(0);
43: }
47: PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
48: {
49: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data;
53: MatStoreValues(aij->A);
54: MatStoreValues(aij->B);
55: return(0);
56: }
60: PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
61: {
62: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data;
66: MatRetrieveValues(aij->A);
67: MatRetrieveValues(aij->B);
68: return(0);
69: }
71: /*
72: Local utility routine that creates a mapping from the global column
73: number to the local number in the off-diagonal part of the local
74: storage of the matrix. This is done in a non scalable way since the
75: length of colmap equals the global matrix length.
76: */
79: PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
80: {
81: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
82: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
84: PetscInt nbs = B->nbs,i,bs=mat->rmap->bs;
87: #if defined(PETSC_USE_CTABLE)
88: PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);
89: for (i=0; i<nbs; i++) {
90: PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);
91: }
92: #else
93: PetscMalloc1(baij->Nbs+1,&baij->colmap);
94: PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));
95: PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));
96: for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
97: #endif
98: return(0);
99: }
101: #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \
102: { \
103: \
104: brow = row/bs; \
105: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
106: rmax = aimax[brow]; nrow = ailen[brow]; \
107: bcol = col/bs; \
108: ridx = row % bs; cidx = col % bs; \
109: low = 0; high = nrow; \
110: while (high-low > 3) { \
111: t = (low+high)/2; \
112: if (rp[t] > bcol) high = t; \
113: else low = t; \
114: } \
115: for (_i=low; _i<high; _i++) { \
116: if (rp[_i] > bcol) break; \
117: if (rp[_i] == bcol) { \
118: bap = ap + bs2*_i + bs*cidx + ridx; \
119: if (addv == ADD_VALUES) *bap += value; \
120: else *bap = value; \
121: goto a_noinsert; \
122: } \
123: } \
124: if (a->nonew == 1) goto a_noinsert; \
125: if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
126: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
127: N = nrow++ - 1; \
128: /* shift up all the later entries in this row */ \
129: for (ii=N; ii>=_i; ii--) { \
130: rp[ii+1] = rp[ii]; \
131: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
132: } \
133: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); } \
134: rp[_i] = bcol; \
135: ap[bs2*_i + bs*cidx + ridx] = value; \
136: a_noinsert:; \
137: ailen[brow] = nrow; \
138: }
140: #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \
141: { \
142: brow = row/bs; \
143: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
144: rmax = bimax[brow]; nrow = bilen[brow]; \
145: bcol = col/bs; \
146: ridx = row % bs; cidx = col % bs; \
147: low = 0; high = nrow; \
148: while (high-low > 3) { \
149: t = (low+high)/2; \
150: if (rp[t] > bcol) high = t; \
151: else low = t; \
152: } \
153: for (_i=low; _i<high; _i++) { \
154: if (rp[_i] > bcol) break; \
155: if (rp[_i] == bcol) { \
156: bap = ap + bs2*_i + bs*cidx + ridx; \
157: if (addv == ADD_VALUES) *bap += value; \
158: else *bap = value; \
159: goto b_noinsert; \
160: } \
161: } \
162: if (b->nonew == 1) goto b_noinsert; \
163: if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
164: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
165: N = nrow++ - 1; \
166: /* shift up all the later entries in this row */ \
167: for (ii=N; ii>=_i; ii--) { \
168: rp[ii+1] = rp[ii]; \
169: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
170: } \
171: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));} \
172: rp[_i] = bcol; \
173: ap[bs2*_i + bs*cidx + ridx] = value; \
174: b_noinsert:; \
175: bilen[brow] = nrow; \
176: }
180: PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
181: {
182: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
183: MatScalar value;
184: PetscBool roworiented = baij->roworiented;
186: PetscInt i,j,row,col;
187: PetscInt rstart_orig=mat->rmap->rstart;
188: PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
189: PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs;
191: /* Some Variables required in the macro */
192: Mat A = baij->A;
193: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
194: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
195: MatScalar *aa =a->a;
197: Mat B = baij->B;
198: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
199: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
200: MatScalar *ba =b->a;
202: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
203: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
204: MatScalar *ap,*bap;
207: for (i=0; i<m; i++) {
208: if (im[i] < 0) continue;
209: #if defined(PETSC_USE_DEBUG)
210: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
211: #endif
212: if (im[i] >= rstart_orig && im[i] < rend_orig) {
213: row = im[i] - rstart_orig;
214: for (j=0; j<n; j++) {
215: if (in[j] >= cstart_orig && in[j] < cend_orig) {
216: col = in[j] - cstart_orig;
217: if (roworiented) value = v[i*n+j];
218: else value = v[i+j*m];
219: MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
220: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
221: } else if (in[j] < 0) continue;
222: #if defined(PETSC_USE_DEBUG)
223: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
224: #endif
225: else {
226: if (mat->was_assembled) {
227: if (!baij->colmap) {
228: MatCreateColmap_MPIBAIJ_Private(mat);
229: }
230: #if defined(PETSC_USE_CTABLE)
231: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
232: col = col - 1;
233: #else
234: col = baij->colmap[in[j]/bs] - 1;
235: #endif
236: if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
237: MatDisAssemble_MPIBAIJ(mat);
238: col = in[j];
239: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
240: B = baij->B;
241: b = (Mat_SeqBAIJ*)(B)->data;
242: bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
243: ba =b->a;
244: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
245: else col += in[j]%bs;
246: } else col = in[j];
247: if (roworiented) value = v[i*n+j];
248: else value = v[i+j*m];
249: MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
250: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
251: }
252: }
253: } else {
254: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
255: if (!baij->donotstash) {
256: mat->assembled = PETSC_FALSE;
257: if (roworiented) {
258: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
259: } else {
260: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
261: }
262: }
263: }
264: }
265: return(0);
266: }
270: PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
271: {
272: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
273: PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
274: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
275: PetscErrorCode ierr;
276: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
277: PetscBool roworiented=a->roworiented;
278: const PetscScalar *value = v;
279: MatScalar *ap,*aa = a->a,*bap;
282: rp = aj + ai[row];
283: ap = aa + bs2*ai[row];
284: rmax = imax[row];
285: nrow = ailen[row];
286: low = 0;
287: high = nrow;
288: value = v;
289: low = 0;
290: high = nrow;
291: while (high-low > 7) {
292: t = (low+high)/2;
293: if (rp[t] > col) high = t;
294: else low = t;
295: }
296: for (i=low; i<high; i++) {
297: if (rp[i] > col) break;
298: if (rp[i] == col) {
299: bap = ap + bs2*i;
300: if (roworiented) {
301: if (is == ADD_VALUES) {
302: for (ii=0; ii<bs; ii++) {
303: for (jj=ii; jj<bs2; jj+=bs) {
304: bap[jj] += *value++;
305: }
306: }
307: } else {
308: for (ii=0; ii<bs; ii++) {
309: for (jj=ii; jj<bs2; jj+=bs) {
310: bap[jj] = *value++;
311: }
312: }
313: }
314: } else {
315: if (is == ADD_VALUES) {
316: for (ii=0; ii<bs; ii++,value+=bs) {
317: for (jj=0; jj<bs; jj++) {
318: bap[jj] += value[jj];
319: }
320: bap += bs;
321: }
322: } else {
323: for (ii=0; ii<bs; ii++,value+=bs) {
324: for (jj=0; jj<bs; jj++) {
325: bap[jj] = value[jj];
326: }
327: bap += bs;
328: }
329: }
330: }
331: goto noinsert2;
332: }
333: }
334: if (nonew == 1) goto noinsert2;
335: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
336: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
337: N = nrow++ - 1; high++;
338: /* shift up all the later entries in this row */
339: for (ii=N; ii>=i; ii--) {
340: rp[ii+1] = rp[ii];
341: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
342: }
343: if (N >= i) {
344: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
345: }
346: rp[i] = col;
347: bap = ap + bs2*i;
348: if (roworiented) {
349: for (ii=0; ii<bs; ii++) {
350: for (jj=ii; jj<bs2; jj+=bs) {
351: bap[jj] = *value++;
352: }
353: }
354: } else {
355: for (ii=0; ii<bs; ii++) {
356: for (jj=0; jj<bs; jj++) {
357: *bap++ = *value++;
358: }
359: }
360: }
361: noinsert2:;
362: ailen[row] = nrow;
363: return(0);
364: }
368: /*
369: This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
370: by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
371: */
372: PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
373: {
374: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
375: const PetscScalar *value;
376: MatScalar *barray = baij->barray;
377: PetscBool roworiented = baij->roworiented;
378: PetscErrorCode ierr;
379: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
380: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
381: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
384: if (!barray) {
385: PetscMalloc1(bs2,&barray);
386: baij->barray = barray;
387: }
389: if (roworiented) stepval = (n-1)*bs;
390: else stepval = (m-1)*bs;
392: for (i=0; i<m; i++) {
393: if (im[i] < 0) continue;
394: #if defined(PETSC_USE_DEBUG)
395: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
396: #endif
397: if (im[i] >= rstart && im[i] < rend) {
398: row = im[i] - rstart;
399: for (j=0; j<n; j++) {
400: /* If NumCol = 1 then a copy is not required */
401: if ((roworiented) && (n == 1)) {
402: barray = (MatScalar*)v + i*bs2;
403: } else if ((!roworiented) && (m == 1)) {
404: barray = (MatScalar*)v + j*bs2;
405: } else { /* Here a copy is required */
406: if (roworiented) {
407: value = v + (i*(stepval+bs) + j)*bs;
408: } else {
409: value = v + (j*(stepval+bs) + i)*bs;
410: }
411: for (ii=0; ii<bs; ii++,value+=bs+stepval) {
412: for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
413: barray += bs;
414: }
415: barray -= bs2;
416: }
418: if (in[j] >= cstart && in[j] < cend) {
419: col = in[j] - cstart;
420: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);
421: } else if (in[j] < 0) continue;
422: #if defined(PETSC_USE_DEBUG)
423: else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
424: #endif
425: else {
426: if (mat->was_assembled) {
427: if (!baij->colmap) {
428: MatCreateColmap_MPIBAIJ_Private(mat);
429: }
431: #if defined(PETSC_USE_DEBUG)
432: #if defined(PETSC_USE_CTABLE)
433: { PetscInt data;
434: PetscTableFind(baij->colmap,in[j]+1,&data);
435: if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
436: }
437: #else
438: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
439: #endif
440: #endif
441: #if defined(PETSC_USE_CTABLE)
442: PetscTableFind(baij->colmap,in[j]+1,&col);
443: col = (col - 1)/bs;
444: #else
445: col = (baij->colmap[in[j]] - 1)/bs;
446: #endif
447: if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
448: MatDisAssemble_MPIBAIJ(mat);
449: col = in[j];
450: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
451: } else col = in[j];
452: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);
453: }
454: }
455: } else {
456: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
457: if (!baij->donotstash) {
458: if (roworiented) {
459: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
460: } else {
461: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
462: }
463: }
464: }
465: }
466: return(0);
467: }
469: #define HASH_KEY 0.6180339887
470: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
471: /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
472: /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
475: PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
476: {
477: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
478: PetscBool roworiented = baij->roworiented;
480: PetscInt i,j,row,col;
481: PetscInt rstart_orig=mat->rmap->rstart;
482: PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs;
483: PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
484: PetscReal tmp;
485: MatScalar **HD = baij->hd,value;
486: #if defined(PETSC_USE_DEBUG)
487: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
488: #endif
491: for (i=0; i<m; i++) {
492: #if defined(PETSC_USE_DEBUG)
493: if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
494: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
495: #endif
496: row = im[i];
497: if (row >= rstart_orig && row < rend_orig) {
498: for (j=0; j<n; j++) {
499: col = in[j];
500: if (roworiented) value = v[i*n+j];
501: else value = v[i+j*m];
502: /* Look up PetscInto the Hash Table */
503: key = (row/bs)*Nbs+(col/bs)+1;
504: h1 = HASH(size,key,tmp);
507: idx = h1;
508: #if defined(PETSC_USE_DEBUG)
509: insert_ct++;
510: total_ct++;
511: if (HT[idx] != key) {
512: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
513: if (idx == size) {
514: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
515: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
516: }
517: }
518: #else
519: if (HT[idx] != key) {
520: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
521: if (idx == size) {
522: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
523: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
524: }
525: }
526: #endif
527: /* A HASH table entry is found, so insert the values at the correct address */
528: if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
529: else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value;
530: }
531: } else if (!baij->donotstash) {
532: if (roworiented) {
533: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
534: } else {
535: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
536: }
537: }
538: }
539: #if defined(PETSC_USE_DEBUG)
540: baij->ht_total_ct = total_ct;
541: baij->ht_insert_ct = insert_ct;
542: #endif
543: return(0);
544: }
548: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
549: {
550: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
551: PetscBool roworiented = baij->roworiented;
552: PetscErrorCode ierr;
553: PetscInt i,j,ii,jj,row,col;
554: PetscInt rstart=baij->rstartbs;
555: PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
556: PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
557: PetscReal tmp;
558: MatScalar **HD = baij->hd,*baij_a;
559: const PetscScalar *v_t,*value;
560: #if defined(PETSC_USE_DEBUG)
561: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
562: #endif
565: if (roworiented) stepval = (n-1)*bs;
566: else stepval = (m-1)*bs;
568: for (i=0; i<m; i++) {
569: #if defined(PETSC_USE_DEBUG)
570: if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
571: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
572: #endif
573: row = im[i];
574: v_t = v + i*nbs2;
575: if (row >= rstart && row < rend) {
576: for (j=0; j<n; j++) {
577: col = in[j];
579: /* Look up into the Hash Table */
580: key = row*Nbs+col+1;
581: h1 = HASH(size,key,tmp);
583: idx = h1;
584: #if defined(PETSC_USE_DEBUG)
585: total_ct++;
586: insert_ct++;
587: if (HT[idx] != key) {
588: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
589: if (idx == size) {
590: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
591: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
592: }
593: }
594: #else
595: if (HT[idx] != key) {
596: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
597: if (idx == size) {
598: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
599: if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
600: }
601: }
602: #endif
603: baij_a = HD[idx];
604: if (roworiented) {
605: /*value = v + i*(stepval+bs)*bs + j*bs;*/
606: /* value = v + (i*(stepval+bs)+j)*bs; */
607: value = v_t;
608: v_t += bs;
609: if (addv == ADD_VALUES) {
610: for (ii=0; ii<bs; ii++,value+=stepval) {
611: for (jj=ii; jj<bs2; jj+=bs) {
612: baij_a[jj] += *value++;
613: }
614: }
615: } else {
616: for (ii=0; ii<bs; ii++,value+=stepval) {
617: for (jj=ii; jj<bs2; jj+=bs) {
618: baij_a[jj] = *value++;
619: }
620: }
621: }
622: } else {
623: value = v + j*(stepval+bs)*bs + i*bs;
624: if (addv == ADD_VALUES) {
625: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
626: for (jj=0; jj<bs; jj++) {
627: baij_a[jj] += *value++;
628: }
629: }
630: } else {
631: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
632: for (jj=0; jj<bs; jj++) {
633: baij_a[jj] = *value++;
634: }
635: }
636: }
637: }
638: }
639: } else {
640: if (!baij->donotstash) {
641: if (roworiented) {
642: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
643: } else {
644: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
645: }
646: }
647: }
648: }
649: #if defined(PETSC_USE_DEBUG)
650: baij->ht_total_ct = total_ct;
651: baij->ht_insert_ct = insert_ct;
652: #endif
653: return(0);
654: }
658: PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
659: {
660: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
662: PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
663: PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
666: for (i=0; i<m; i++) {
667: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
668: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
669: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
670: row = idxm[i] - bsrstart;
671: for (j=0; j<n; j++) {
672: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
673: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
674: if (idxn[j] >= bscstart && idxn[j] < bscend) {
675: col = idxn[j] - bscstart;
676: MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
677: } else {
678: if (!baij->colmap) {
679: MatCreateColmap_MPIBAIJ_Private(mat);
680: }
681: #if defined(PETSC_USE_CTABLE)
682: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
683: data--;
684: #else
685: data = baij->colmap[idxn[j]/bs]-1;
686: #endif
687: if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
688: else {
689: col = data + idxn[j]%bs;
690: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
691: }
692: }
693: }
694: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
695: }
696: return(0);
697: }
701: PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
702: {
703: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
704: Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
706: PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
707: PetscReal sum = 0.0;
708: MatScalar *v;
711: if (baij->size == 1) {
712: MatNorm(baij->A,type,nrm);
713: } else {
714: if (type == NORM_FROBENIUS) {
715: v = amat->a;
716: nz = amat->nz*bs2;
717: for (i=0; i<nz; i++) {
718: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
719: }
720: v = bmat->a;
721: nz = bmat->nz*bs2;
722: for (i=0; i<nz; i++) {
723: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
724: }
725: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
726: *nrm = PetscSqrtReal(*nrm);
727: } else if (type == NORM_1) { /* max column sum */
728: PetscReal *tmp,*tmp2;
729: PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs;
730: PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);
731: PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
732: v = amat->a; jj = amat->j;
733: for (i=0; i<amat->nz; i++) {
734: for (j=0; j<bs; j++) {
735: col = bs*(cstart + *jj) + j; /* column index */
736: for (row=0; row<bs; row++) {
737: tmp[col] += PetscAbsScalar(*v); v++;
738: }
739: }
740: jj++;
741: }
742: v = bmat->a; jj = bmat->j;
743: for (i=0; i<bmat->nz; i++) {
744: for (j=0; j<bs; j++) {
745: col = bs*garray[*jj] + j;
746: for (row=0; row<bs; row++) {
747: tmp[col] += PetscAbsScalar(*v); v++;
748: }
749: }
750: jj++;
751: }
752: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
753: *nrm = 0.0;
754: for (j=0; j<mat->cmap->N; j++) {
755: if (tmp2[j] > *nrm) *nrm = tmp2[j];
756: }
757: PetscFree2(tmp,tmp2);
758: } else if (type == NORM_INFINITY) { /* max row sum */
759: PetscReal *sums;
760: PetscMalloc1(bs,&sums);
761: sum = 0.0;
762: for (j=0; j<amat->mbs; j++) {
763: for (row=0; row<bs; row++) sums[row] = 0.0;
764: v = amat->a + bs2*amat->i[j];
765: nz = amat->i[j+1]-amat->i[j];
766: for (i=0; i<nz; i++) {
767: for (col=0; col<bs; col++) {
768: for (row=0; row<bs; row++) {
769: sums[row] += PetscAbsScalar(*v); v++;
770: }
771: }
772: }
773: v = bmat->a + bs2*bmat->i[j];
774: nz = bmat->i[j+1]-bmat->i[j];
775: for (i=0; i<nz; i++) {
776: for (col=0; col<bs; col++) {
777: for (row=0; row<bs; row++) {
778: sums[row] += PetscAbsScalar(*v); v++;
779: }
780: }
781: }
782: for (row=0; row<bs; row++) {
783: if (sums[row] > sum) sum = sums[row];
784: }
785: }
786: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));
787: PetscFree(sums);
788: } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
789: }
790: return(0);
791: }
793: /*
794: Creates the hash table, and sets the table
795: This table is created only once.
796: If new entried need to be added to the matrix
797: then the hash table has to be destroyed and
798: recreated.
799: */
802: PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
803: {
804: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
805: Mat A = baij->A,B=baij->B;
806: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
807: PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
809: PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
810: PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
811: PetscInt *HT,key;
812: MatScalar **HD;
813: PetscReal tmp;
814: #if defined(PETSC_USE_INFO)
815: PetscInt ct=0,max=0;
816: #endif
819: if (baij->ht) return(0);
821: baij->ht_size = (PetscInt)(factor*nz);
822: ht_size = baij->ht_size;
824: /* Allocate Memory for Hash Table */
825: PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);
826: HD = baij->hd;
827: HT = baij->ht;
829: /* Loop Over A */
830: for (i=0; i<a->mbs; i++) {
831: for (j=ai[i]; j<ai[i+1]; j++) {
832: row = i+rstart;
833: col = aj[j]+cstart;
835: key = row*Nbs + col + 1;
836: h1 = HASH(ht_size,key,tmp);
837: for (k=0; k<ht_size; k++) {
838: if (!HT[(h1+k)%ht_size]) {
839: HT[(h1+k)%ht_size] = key;
840: HD[(h1+k)%ht_size] = a->a + j*bs2;
841: break;
842: #if defined(PETSC_USE_INFO)
843: } else {
844: ct++;
845: #endif
846: }
847: }
848: #if defined(PETSC_USE_INFO)
849: if (k> max) max = k;
850: #endif
851: }
852: }
853: /* Loop Over B */
854: for (i=0; i<b->mbs; i++) {
855: for (j=bi[i]; j<bi[i+1]; j++) {
856: row = i+rstart;
857: col = garray[bj[j]];
858: key = row*Nbs + col + 1;
859: h1 = HASH(ht_size,key,tmp);
860: for (k=0; k<ht_size; k++) {
861: if (!HT[(h1+k)%ht_size]) {
862: HT[(h1+k)%ht_size] = key;
863: HD[(h1+k)%ht_size] = b->a + j*bs2;
864: break;
865: #if defined(PETSC_USE_INFO)
866: } else {
867: ct++;
868: #endif
869: }
870: }
871: #if defined(PETSC_USE_INFO)
872: if (k> max) max = k;
873: #endif
874: }
875: }
877: /* Print Summary */
878: #if defined(PETSC_USE_INFO)
879: for (i=0,j=0; i<ht_size; i++) {
880: if (HT[i]) j++;
881: }
882: PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
883: #endif
884: return(0);
885: }
889: PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
890: {
891: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
893: PetscInt nstash,reallocs;
894: InsertMode addv;
897: if (baij->donotstash || mat->nooffprocentries) return(0);
899: /* make sure all processors are either in INSERTMODE or ADDMODE */
900: MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
901: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
902: mat->insertmode = addv; /* in case this processor had no cache */
904: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
905: MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
906: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
907: PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
908: MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);
909: PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
910: return(0);
911: }
915: PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
916: {
917: Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
918: Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data;
920: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
921: PetscInt *row,*col;
922: PetscBool r1,r2,r3,other_disassembled;
923: MatScalar *val;
924: InsertMode addv = mat->insertmode;
925: PetscMPIInt n;
928: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
929: if (!baij->donotstash && !mat->nooffprocentries) {
930: while (1) {
931: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
932: if (!flg) break;
934: for (i=0; i<n;) {
935: /* Now identify the consecutive vals belonging to the same row */
936: for (j=i,rstart=row[j]; j<n; j++) {
937: if (row[j] != rstart) break;
938: }
939: if (j < n) ncols = j-i;
940: else ncols = n-i;
941: /* Now assemble all these values with a single function call */
942: MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
943: i = j;
944: }
945: }
946: MatStashScatterEnd_Private(&mat->stash);
947: /* Now process the block-stash. Since the values are stashed column-oriented,
948: set the roworiented flag to column oriented, and after MatSetValues()
949: restore the original flags */
950: r1 = baij->roworiented;
951: r2 = a->roworiented;
952: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
954: baij->roworiented = PETSC_FALSE;
955: a->roworiented = PETSC_FALSE;
957: (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
958: while (1) {
959: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
960: if (!flg) break;
962: for (i=0; i<n;) {
963: /* Now identify the consecutive vals belonging to the same row */
964: for (j=i,rstart=row[j]; j<n; j++) {
965: if (row[j] != rstart) break;
966: }
967: if (j < n) ncols = j-i;
968: else ncols = n-i;
969: MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
970: i = j;
971: }
972: }
973: MatStashScatterEnd_Private(&mat->bstash);
975: baij->roworiented = r1;
976: a->roworiented = r2;
978: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
979: }
981: MatAssemblyBegin(baij->A,mode);
982: MatAssemblyEnd(baij->A,mode);
984: /* determine if any processor has disassembled, if so we must
985: also disassemble ourselfs, in order that we may reassemble. */
986: /*
987: if nonzero structure of submatrix B cannot change then we know that
988: no processor disassembled thus we can skip this stuff
989: */
990: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
991: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
992: if (mat->was_assembled && !other_disassembled) {
993: MatDisAssemble_MPIBAIJ(mat);
994: }
995: }
997: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
998: MatSetUpMultiply_MPIBAIJ(mat);
999: }
1000: MatAssemblyBegin(baij->B,mode);
1001: MatAssemblyEnd(baij->B,mode);
1003: #if defined(PETSC_USE_INFO)
1004: if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1005: PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1007: baij->ht_total_ct = 0;
1008: baij->ht_insert_ct = 0;
1009: }
1010: #endif
1011: if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1012: MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);
1014: mat->ops->setvalues = MatSetValues_MPIBAIJ_HT;
1015: mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1016: }
1018: PetscFree2(baij->rowvalues,baij->rowindices);
1020: baij->rowvalues = 0;
1022: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1023: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
1024: PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1025: MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
1026: }
1027: return(0);
1028: }
1030: extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
1031: #include <petscdraw.h>
1034: static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1035: {
1036: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1037: PetscErrorCode ierr;
1038: PetscMPIInt rank = baij->rank;
1039: PetscInt bs = mat->rmap->bs;
1040: PetscBool iascii,isdraw;
1041: PetscViewer sviewer;
1042: PetscViewerFormat format;
1045: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1046: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1047: if (iascii) {
1048: PetscViewerGetFormat(viewer,&format);
1049: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1050: MatInfo info;
1051: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1052: MatGetInfo(mat,MAT_LOCAL,&info);
1053: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
1054: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1055: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);
1056: MatGetInfo(baij->A,MAT_LOCAL,&info);
1057: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1058: MatGetInfo(baij->B,MAT_LOCAL,&info);
1059: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1060: PetscViewerFlush(viewer);
1061: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1062: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1063: VecScatterView(baij->Mvctx,viewer);
1064: return(0);
1065: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1066: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1067: return(0);
1068: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1069: return(0);
1070: }
1071: }
1073: if (isdraw) {
1074: PetscDraw draw;
1075: PetscBool isnull;
1076: PetscViewerDrawGetDraw(viewer,0,&draw);
1077: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1078: }
1080: {
1081: /* assemble the entire matrix onto first processor. */
1082: Mat A;
1083: Mat_SeqBAIJ *Aloc;
1084: PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1085: MatScalar *a;
1086: const char *matname;
1088: /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1089: /* Perhaps this should be the type of mat? */
1090: MatCreate(PetscObjectComm((PetscObject)mat),&A);
1091: if (!rank) {
1092: MatSetSizes(A,M,N,M,N);
1093: } else {
1094: MatSetSizes(A,0,0,M,N);
1095: }
1096: MatSetType(A,MATMPIBAIJ);
1097: MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);
1098: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1099: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
1101: /* copy over the A part */
1102: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1103: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1104: PetscMalloc1(bs,&rvals);
1106: for (i=0; i<mbs; i++) {
1107: rvals[0] = bs*(baij->rstartbs + i);
1108: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1109: for (j=ai[i]; j<ai[i+1]; j++) {
1110: col = (baij->cstartbs+aj[j])*bs;
1111: for (k=0; k<bs; k++) {
1112: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1113: col++; a += bs;
1114: }
1115: }
1116: }
1117: /* copy over the B part */
1118: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1119: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1120: for (i=0; i<mbs; i++) {
1121: rvals[0] = bs*(baij->rstartbs + i);
1122: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1123: for (j=ai[i]; j<ai[i+1]; j++) {
1124: col = baij->garray[aj[j]]*bs;
1125: for (k=0; k<bs; k++) {
1126: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1127: col++; a += bs;
1128: }
1129: }
1130: }
1131: PetscFree(rvals);
1132: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1133: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1134: /*
1135: Everyone has to call to draw the matrix since the graphics waits are
1136: synchronized across all processors that share the PetscDraw object
1137: */
1138: PetscViewerGetSingleton(viewer,&sviewer);
1139: PetscObjectGetName((PetscObject)mat,&matname);
1140: if (!rank) {
1141: PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);
1142: MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);
1143: }
1144: PetscViewerRestoreSingleton(viewer,&sviewer);
1145: MatDestroy(&A);
1146: }
1147: return(0);
1148: }
1152: static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1153: {
1154: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data;
1155: Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data;
1156: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data;
1158: PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1159: PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1160: int fd;
1161: PetscScalar *column_values;
1162: FILE *file;
1163: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1164: PetscInt message_count,flowcontrolcount;
1167: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1168: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
1169: nz = bs2*(A->nz + B->nz);
1170: rlen = mat->rmap->n;
1171: PetscViewerBinaryGetDescriptor(viewer,&fd);
1172: if (!rank) {
1173: header[0] = MAT_FILE_CLASSID;
1174: header[1] = mat->rmap->N;
1175: header[2] = mat->cmap->N;
1177: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1178: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1179: /* get largest number of rows any processor has */
1180: range = mat->rmap->range;
1181: for (i=1; i<size; i++) {
1182: rlen = PetscMax(rlen,range[i+1] - range[i]);
1183: }
1184: } else {
1185: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1186: }
1188: PetscMalloc1(rlen/bs,&crow_lens);
1189: /* compute lengths of each row */
1190: for (i=0; i<a->mbs; i++) {
1191: crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1192: }
1193: /* store the row lengths to the file */
1194: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1195: if (!rank) {
1196: MPI_Status status;
1197: PetscMalloc1(rlen,&row_lens);
1198: rlen = (range[1] - range[0])/bs;
1199: for (i=0; i<rlen; i++) {
1200: for (j=0; j<bs; j++) {
1201: row_lens[i*bs+j] = bs*crow_lens[i];
1202: }
1203: }
1204: PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);
1205: for (i=1; i<size; i++) {
1206: rlen = (range[i+1] - range[i])/bs;
1207: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1208: MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1209: for (k=0; k<rlen; k++) {
1210: for (j=0; j<bs; j++) {
1211: row_lens[k*bs+j] = bs*crow_lens[k];
1212: }
1213: }
1214: PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);
1215: }
1216: PetscViewerFlowControlEndMaster(viewer,&message_count);
1217: PetscFree(row_lens);
1218: } else {
1219: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1220: MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1221: PetscViewerFlowControlEndWorker(viewer,&message_count);
1222: }
1223: PetscFree(crow_lens);
1225: /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1226: information needed to make it for each row from a block row. This does require more communication but still not more than
1227: the communication needed for the nonzero values */
1228: nzmax = nz; /* space a largest processor needs */
1229: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));
1230: PetscMalloc1(nzmax,&column_indices);
1231: cnt = 0;
1232: for (i=0; i<a->mbs; i++) {
1233: pcnt = cnt;
1234: for (j=B->i[i]; j<B->i[i+1]; j++) {
1235: if ((col = garray[B->j[j]]) > cstart) break;
1236: for (l=0; l<bs; l++) {
1237: column_indices[cnt++] = bs*col+l;
1238: }
1239: }
1240: for (k=A->i[i]; k<A->i[i+1]; k++) {
1241: for (l=0; l<bs; l++) {
1242: column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1243: }
1244: }
1245: for (; j<B->i[i+1]; j++) {
1246: for (l=0; l<bs; l++) {
1247: column_indices[cnt++] = bs*garray[B->j[j]]+l;
1248: }
1249: }
1250: len = cnt - pcnt;
1251: for (k=1; k<bs; k++) {
1252: PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));
1253: cnt += len;
1254: }
1255: }
1256: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1258: /* store the columns to the file */
1259: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1260: if (!rank) {
1261: MPI_Status status;
1262: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1263: for (i=1; i<size; i++) {
1264: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1265: MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1266: MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1267: PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);
1268: }
1269: PetscViewerFlowControlEndMaster(viewer,&message_count);
1270: } else {
1271: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1272: MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1273: MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1274: PetscViewerFlowControlEndWorker(viewer,&message_count);
1275: }
1276: PetscFree(column_indices);
1278: /* load up the numerical values */
1279: PetscMalloc1(nzmax,&column_values);
1280: cnt = 0;
1281: for (i=0; i<a->mbs; i++) {
1282: rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1283: for (j=B->i[i]; j<B->i[i+1]; j++) {
1284: if (garray[B->j[j]] > cstart) break;
1285: for (l=0; l<bs; l++) {
1286: for (ll=0; ll<bs; ll++) {
1287: column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1288: }
1289: }
1290: cnt += bs;
1291: }
1292: for (k=A->i[i]; k<A->i[i+1]; k++) {
1293: for (l=0; l<bs; l++) {
1294: for (ll=0; ll<bs; ll++) {
1295: column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1296: }
1297: }
1298: cnt += bs;
1299: }
1300: for (; j<B->i[i+1]; j++) {
1301: for (l=0; l<bs; l++) {
1302: for (ll=0; ll<bs; ll++) {
1303: column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1304: }
1305: }
1306: cnt += bs;
1307: }
1308: cnt += (bs-1)*rlen;
1309: }
1310: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1312: /* store the column values to the file */
1313: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1314: if (!rank) {
1315: MPI_Status status;
1316: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1317: for (i=1; i<size; i++) {
1318: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1319: MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1320: MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);
1321: PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);
1322: }
1323: PetscViewerFlowControlEndMaster(viewer,&message_count);
1324: } else {
1325: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1326: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1327: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
1328: PetscViewerFlowControlEndWorker(viewer,&message_count);
1329: }
1330: PetscFree(column_values);
1332: PetscViewerBinaryGetInfoPointer(viewer,&file);
1333: if (file) {
1334: fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1335: }
1336: return(0);
1337: }
1341: PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1342: {
1344: PetscBool iascii,isdraw,issocket,isbinary;
1347: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1348: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1349: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1350: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1351: if (iascii || isdraw || issocket) {
1352: MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);
1353: } else if (isbinary) {
1354: MatView_MPIBAIJ_Binary(mat,viewer);
1355: }
1356: return(0);
1357: }
1361: PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1362: {
1363: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1367: #if defined(PETSC_USE_LOG)
1368: PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1369: #endif
1370: MatStashDestroy_Private(&mat->stash);
1371: MatStashDestroy_Private(&mat->bstash);
1372: MatDestroy(&baij->A);
1373: MatDestroy(&baij->B);
1374: #if defined(PETSC_USE_CTABLE)
1375: PetscTableDestroy(&baij->colmap);
1376: #else
1377: PetscFree(baij->colmap);
1378: #endif
1379: PetscFree(baij->garray);
1380: VecDestroy(&baij->lvec);
1381: VecScatterDestroy(&baij->Mvctx);
1382: PetscFree2(baij->rowvalues,baij->rowindices);
1383: PetscFree(baij->barray);
1384: PetscFree2(baij->hd,baij->ht);
1385: PetscFree(baij->rangebs);
1386: PetscFree(mat->data);
1388: PetscObjectChangeTypeName((PetscObject)mat,0);
1389: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1390: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1391: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
1392: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);
1393: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);
1394: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);
1395: PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);
1396: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);
1397: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);
1398: return(0);
1399: }
1403: PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1404: {
1405: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1407: PetscInt nt;
1410: VecGetLocalSize(xx,&nt);
1411: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1412: VecGetLocalSize(yy,&nt);
1413: if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1414: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1415: (*a->A->ops->mult)(a->A,xx,yy);
1416: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1417: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1418: return(0);
1419: }
1423: PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1424: {
1425: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1429: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1430: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1431: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1432: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1433: return(0);
1434: }
1438: PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1439: {
1440: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1442: PetscBool merged;
1445: VecScatterGetMerged(a->Mvctx,&merged);
1446: /* do nondiagonal part */
1447: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1448: if (!merged) {
1449: /* send it on its way */
1450: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1451: /* do local part */
1452: (*a->A->ops->multtranspose)(a->A,xx,yy);
1453: /* receive remote parts: note this assumes the values are not actually */
1454: /* inserted in yy until the next line */
1455: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1456: } else {
1457: /* do local part */
1458: (*a->A->ops->multtranspose)(a->A,xx,yy);
1459: /* send it on its way */
1460: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1461: /* values actually were received in the Begin() but we need to call this nop */
1462: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1463: }
1464: return(0);
1465: }
1469: PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1470: {
1471: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1475: /* do nondiagonal part */
1476: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1477: /* send it on its way */
1478: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1479: /* do local part */
1480: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1481: /* receive remote parts: note this assumes the values are not actually */
1482: /* inserted in yy until the next line, which is true for my implementation*/
1483: /* but is not perhaps always true. */
1484: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1485: return(0);
1486: }
1488: /*
1489: This only works correctly for square matrices where the subblock A->A is the
1490: diagonal block
1491: */
1494: PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1495: {
1496: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1500: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1501: MatGetDiagonal(a->A,v);
1502: return(0);
1503: }
1507: PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1508: {
1509: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1513: MatScale(a->A,aa);
1514: MatScale(a->B,aa);
1515: return(0);
1516: }
1520: PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1521: {
1522: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1523: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1525: PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1526: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1527: PetscInt *cmap,*idx_p,cstart = mat->cstartbs;
1530: if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1531: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1532: mat->getrowactive = PETSC_TRUE;
1534: if (!mat->rowvalues && (idx || v)) {
1535: /*
1536: allocate enough space to hold information from the longest row.
1537: */
1538: Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1539: PetscInt max = 1,mbs = mat->mbs,tmp;
1540: for (i=0; i<mbs; i++) {
1541: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1542: if (max < tmp) max = tmp;
1543: }
1544: PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);
1545: }
1546: lrow = row - brstart;
1548: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1549: if (!v) {pvA = 0; pvB = 0;}
1550: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1551: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1552: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1553: nztot = nzA + nzB;
1555: cmap = mat->garray;
1556: if (v || idx) {
1557: if (nztot) {
1558: /* Sort by increasing column numbers, assuming A and B already sorted */
1559: PetscInt imark = -1;
1560: if (v) {
1561: *v = v_p = mat->rowvalues;
1562: for (i=0; i<nzB; i++) {
1563: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1564: else break;
1565: }
1566: imark = i;
1567: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1568: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1569: }
1570: if (idx) {
1571: *idx = idx_p = mat->rowindices;
1572: if (imark > -1) {
1573: for (i=0; i<imark; i++) {
1574: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1575: }
1576: } else {
1577: for (i=0; i<nzB; i++) {
1578: if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1579: else break;
1580: }
1581: imark = i;
1582: }
1583: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1584: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1585: }
1586: } else {
1587: if (idx) *idx = 0;
1588: if (v) *v = 0;
1589: }
1590: }
1591: *nz = nztot;
1592: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1593: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1594: return(0);
1595: }
1599: PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1600: {
1601: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1604: if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1605: baij->getrowactive = PETSC_FALSE;
1606: return(0);
1607: }
1611: PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1612: {
1613: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1617: MatZeroEntries(l->A);
1618: MatZeroEntries(l->B);
1619: return(0);
1620: }
1624: PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1625: {
1626: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1627: Mat A = a->A,B = a->B;
1629: PetscReal isend[5],irecv[5];
1632: info->block_size = (PetscReal)matin->rmap->bs;
1634: MatGetInfo(A,MAT_LOCAL,info);
1636: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1637: isend[3] = info->memory; isend[4] = info->mallocs;
1639: MatGetInfo(B,MAT_LOCAL,info);
1641: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1642: isend[3] += info->memory; isend[4] += info->mallocs;
1644: if (flag == MAT_LOCAL) {
1645: info->nz_used = isend[0];
1646: info->nz_allocated = isend[1];
1647: info->nz_unneeded = isend[2];
1648: info->memory = isend[3];
1649: info->mallocs = isend[4];
1650: } else if (flag == MAT_GLOBAL_MAX) {
1651: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));
1653: info->nz_used = irecv[0];
1654: info->nz_allocated = irecv[1];
1655: info->nz_unneeded = irecv[2];
1656: info->memory = irecv[3];
1657: info->mallocs = irecv[4];
1658: } else if (flag == MAT_GLOBAL_SUM) {
1659: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));
1661: info->nz_used = irecv[0];
1662: info->nz_allocated = irecv[1];
1663: info->nz_unneeded = irecv[2];
1664: info->memory = irecv[3];
1665: info->mallocs = irecv[4];
1666: } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1667: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1668: info->fill_ratio_needed = 0;
1669: info->factor_mallocs = 0;
1670: return(0);
1671: }
1675: PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1676: {
1677: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1681: switch (op) {
1682: case MAT_NEW_NONZERO_LOCATIONS:
1683: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1684: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1685: case MAT_KEEP_NONZERO_PATTERN:
1686: case MAT_NEW_NONZERO_LOCATION_ERR:
1687: MatSetOption(a->A,op,flg);
1688: MatSetOption(a->B,op,flg);
1689: break;
1690: case MAT_ROW_ORIENTED:
1691: a->roworiented = flg;
1693: MatSetOption(a->A,op,flg);
1694: MatSetOption(a->B,op,flg);
1695: break;
1696: case MAT_NEW_DIAGONALS:
1697: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1698: break;
1699: case MAT_IGNORE_OFF_PROC_ENTRIES:
1700: a->donotstash = flg;
1701: break;
1702: case MAT_USE_HASH_TABLE:
1703: a->ht_flag = flg;
1704: break;
1705: case MAT_SYMMETRIC:
1706: case MAT_STRUCTURALLY_SYMMETRIC:
1707: case MAT_HERMITIAN:
1708: case MAT_SYMMETRY_ETERNAL:
1709: MatSetOption(a->A,op,flg);
1710: break;
1711: default:
1712: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1713: }
1714: return(0);
1715: }
1719: PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1720: {
1721: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1722: Mat_SeqBAIJ *Aloc;
1723: Mat B;
1725: PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1726: PetscInt bs=A->rmap->bs,mbs=baij->mbs;
1727: MatScalar *a;
1730: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1731: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1732: MatCreate(PetscObjectComm((PetscObject)A),&B);
1733: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1734: MatSetType(B,((PetscObject)A)->type_name);
1735: /* Do not know preallocation information, but must set block size */
1736: MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);
1737: } else {
1738: B = *matout;
1739: }
1741: /* copy over the A part */
1742: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1743: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1744: PetscMalloc1(bs,&rvals);
1746: for (i=0; i<mbs; i++) {
1747: rvals[0] = bs*(baij->rstartbs + i);
1748: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1749: for (j=ai[i]; j<ai[i+1]; j++) {
1750: col = (baij->cstartbs+aj[j])*bs;
1751: for (k=0; k<bs; k++) {
1752: MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1754: col++; a += bs;
1755: }
1756: }
1757: }
1758: /* copy over the B part */
1759: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1760: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1761: for (i=0; i<mbs; i++) {
1762: rvals[0] = bs*(baij->rstartbs + i);
1763: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1764: for (j=ai[i]; j<ai[i+1]; j++) {
1765: col = baij->garray[aj[j]]*bs;
1766: for (k=0; k<bs; k++) {
1767: MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1768: col++;
1769: a += bs;
1770: }
1771: }
1772: }
1773: PetscFree(rvals);
1774: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1775: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1777: if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1778: else {
1779: MatHeaderMerge(A,B);
1780: }
1781: return(0);
1782: }
1786: PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1787: {
1788: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1789: Mat a = baij->A,b = baij->B;
1791: PetscInt s1,s2,s3;
1794: MatGetLocalSize(mat,&s2,&s3);
1795: if (rr) {
1796: VecGetLocalSize(rr,&s1);
1797: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1798: /* Overlap communication with computation. */
1799: VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1800: }
1801: if (ll) {
1802: VecGetLocalSize(ll,&s1);
1803: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1804: (*b->ops->diagonalscale)(b,ll,NULL);
1805: }
1806: /* scale the diagonal block */
1807: (*a->ops->diagonalscale)(a,ll,rr);
1809: if (rr) {
1810: /* Do a scatter end and then right scale the off-diagonal block */
1811: VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1812: (*b->ops->diagonalscale)(b,NULL,baij->lvec);
1813: }
1814: return(0);
1815: }
1819: PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1820: {
1821: Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1822: PetscInt *owners = A->rmap->range;
1823: PetscInt n = A->rmap->n;
1824: PetscSF sf;
1825: PetscInt *lrows;
1826: PetscSFNode *rrows;
1827: PetscInt r, p = 0, len = 0;
1831: /* Create SF where leaves are input rows and roots are owned rows */
1832: PetscMalloc1(n, &lrows);
1833: for (r = 0; r < n; ++r) lrows[r] = -1;
1834: if (!A->nooffproczerorows) {PetscMalloc1(N, &rrows);}
1835: for (r = 0; r < N; ++r) {
1836: const PetscInt idx = rows[r];
1837: if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1838: if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1839: PetscLayoutFindOwner(A->rmap,idx,&p);
1840: }
1841: if (A->nooffproczerorows) {
1842: if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1843: lrows[len++] = idx - owners[p];
1844: } else {
1845: rrows[r].rank = p;
1846: rrows[r].index = rows[r] - owners[p];
1847: }
1848: }
1849: if (!A->nooffproczerorows) {
1850: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
1851: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
1852: /* Collect flags for rows to be zeroed */
1853: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1854: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1855: PetscSFDestroy(&sf);
1856: /* Compress and put in row numbers */
1857: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1858: }
1859: /* fix right hand side if needed */
1860: if (x && b) {
1861: const PetscScalar *xx;
1862: PetscScalar *bb;
1864: VecGetArrayRead(x,&xx);
1865: VecGetArray(b,&bb);
1866: for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1867: VecRestoreArrayRead(x,&xx);
1868: VecRestoreArray(b,&bb);
1869: }
1871: /* actually zap the local rows */
1872: /*
1873: Zero the required rows. If the "diagonal block" of the matrix
1874: is square and the user wishes to set the diagonal we use separate
1875: code so that MatSetValues() is not called for each diagonal allocating
1876: new memory, thus calling lots of mallocs and slowing things down.
1878: */
1879: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1880: MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);
1881: if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1882: MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);
1883: } else if (diag != 0.0) {
1884: MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);
1885: if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1886: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1887: for (r = 0; r < len; ++r) {
1888: const PetscInt row = lrows[r] + A->rmap->rstart;
1889: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
1890: }
1891: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1892: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1893: } else {
1894: MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);
1895: }
1896: PetscFree(lrows);
1898: /* only change matrix nonzero state if pattern was allowed to be changed */
1899: if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1900: PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1901: MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
1902: }
1903: return(0);
1904: }
1908: PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1909: {
1910: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1911: PetscErrorCode ierr;
1912: PetscMPIInt n = A->rmap->n;
1913: PetscInt i,j,k,r,p = 0,len = 0,row,col,count;
1914: PetscInt *lrows,*owners = A->rmap->range;
1915: PetscSFNode *rrows;
1916: PetscSF sf;
1917: const PetscScalar *xx;
1918: PetscScalar *bb,*mask;
1919: Vec xmask,lmask;
1920: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data;
1921: PetscInt bs = A->rmap->bs, bs2 = baij->bs2;
1922: PetscScalar *aa;
1925: /* Create SF where leaves are input rows and roots are owned rows */
1926: PetscMalloc1(n, &lrows);
1927: for (r = 0; r < n; ++r) lrows[r] = -1;
1928: PetscMalloc1(N, &rrows);
1929: for (r = 0; r < N; ++r) {
1930: const PetscInt idx = rows[r];
1931: if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1932: if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1933: PetscLayoutFindOwner(A->rmap,idx,&p);
1934: }
1935: rrows[r].rank = p;
1936: rrows[r].index = rows[r] - owners[p];
1937: }
1938: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
1939: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
1940: /* Collect flags for rows to be zeroed */
1941: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1942: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
1943: PetscSFDestroy(&sf);
1944: /* Compress and put in row numbers */
1945: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1946: /* zero diagonal part of matrix */
1947: MatZeroRowsColumns(l->A,len,lrows,diag,x,b);
1948: /* handle off diagonal part of matrix */
1949: MatCreateVecs(A,&xmask,NULL);
1950: VecDuplicate(l->lvec,&lmask);
1951: VecGetArray(xmask,&bb);
1952: for (i=0; i<len; i++) bb[lrows[i]] = 1;
1953: VecRestoreArray(xmask,&bb);
1954: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1955: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
1956: VecDestroy(&xmask);
1957: if (x) {
1958: VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
1959: VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
1960: VecGetArrayRead(l->lvec,&xx);
1961: VecGetArray(b,&bb);
1962: }
1963: VecGetArray(lmask,&mask);
1964: /* remove zeroed rows of off diagonal matrix */
1965: for (i = 0; i < len; ++i) {
1966: row = lrows[i];
1967: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1968: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1969: for (k = 0; k < count; ++k) {
1970: aa[0] = 0.0;
1971: aa += bs;
1972: }
1973: }
1974: /* loop over all elements of off process part of matrix zeroing removed columns*/
1975: for (i = 0; i < l->B->rmap->N; ++i) {
1976: row = i/bs;
1977: for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1978: for (k = 0; k < bs; ++k) {
1979: col = bs*baij->j[j] + k;
1980: if (PetscAbsScalar(mask[col])) {
1981: aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1982: if (b) bb[i] -= aa[0]*xx[col];
1983: aa[0] = 0.0;
1984: }
1985: }
1986: }
1987: }
1988: if (x) {
1989: VecRestoreArray(b,&bb);
1990: VecRestoreArrayRead(l->lvec,&xx);
1991: }
1992: VecRestoreArray(lmask,&mask);
1993: VecDestroy(&lmask);
1994: PetscFree(lrows);
1996: /* only change matrix nonzero state if pattern was allowed to be changed */
1997: if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1998: PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1999: MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
2000: }
2001: return(0);
2002: }
2006: PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
2007: {
2008: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2012: MatSetUnfactored(a->A);
2013: return(0);
2014: }
2016: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
2020: PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag)
2021: {
2022: Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
2023: Mat a,b,c,d;
2024: PetscBool flg;
2028: a = matA->A; b = matA->B;
2029: c = matB->A; d = matB->B;
2031: MatEqual(a,c,&flg);
2032: if (flg) {
2033: MatEqual(b,d,&flg);
2034: }
2035: MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
2036: return(0);
2037: }
2041: PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
2042: {
2044: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2045: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
2048: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2049: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2050: MatCopy_Basic(A,B,str);
2051: } else {
2052: MatCopy(a->A,b->A,str);
2053: MatCopy(a->B,b->B,str);
2054: }
2055: return(0);
2056: }
2060: PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
2061: {
2065: MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2066: return(0);
2067: }
2071: PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2072: {
2074: PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs;
2075: Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
2076: Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
2079: MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);
2080: return(0);
2081: }
2085: PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2086: {
2088: Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
2089: PetscBLASInt bnz,one=1;
2090: Mat_SeqBAIJ *x,*y;
2093: if (str == SAME_NONZERO_PATTERN) {
2094: PetscScalar alpha = a;
2095: x = (Mat_SeqBAIJ*)xx->A->data;
2096: y = (Mat_SeqBAIJ*)yy->A->data;
2097: PetscBLASIntCast(x->nz,&bnz);
2098: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2099: x = (Mat_SeqBAIJ*)xx->B->data;
2100: y = (Mat_SeqBAIJ*)yy->B->data;
2101: PetscBLASIntCast(x->nz,&bnz);
2102: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2103: PetscObjectStateIncrease((PetscObject)Y);
2104: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2105: MatAXPY_Basic(Y,a,X,str);
2106: } else {
2107: Mat B;
2108: PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2109: PetscMalloc1(yy->A->rmap->N,&nnz_d);
2110: PetscMalloc1(yy->B->rmap->N,&nnz_o);
2111: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2112: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2113: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2114: MatSetBlockSizesFromMats(B,Y,Y);
2115: MatSetType(B,MATMPIBAIJ);
2116: MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);
2117: MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2118: MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);
2119: /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2120: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2121: MatHeaderReplace(Y,B);
2122: PetscFree(nnz_d);
2123: PetscFree(nnz_o);
2124: }
2125: return(0);
2126: }
2130: PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2131: {
2132: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2136: MatRealPart(a->A);
2137: MatRealPart(a->B);
2138: return(0);
2139: }
2143: PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2144: {
2145: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2149: MatImaginaryPart(a->A);
2150: MatImaginaryPart(a->B);
2151: return(0);
2152: }
2156: PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2157: {
2159: IS iscol_local;
2160: PetscInt csize;
2163: ISGetLocalSize(iscol,&csize);
2164: if (call == MAT_REUSE_MATRIX) {
2165: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
2166: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2167: } else {
2168: ISAllGather(iscol,&iscol_local);
2169: }
2170: MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
2171: if (call == MAT_INITIAL_MATRIX) {
2172: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
2173: ISDestroy(&iscol_local);
2174: }
2175: return(0);
2176: }
2177: extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2180: /*
2181: Not great since it makes two copies of the submatrix, first an SeqBAIJ
2182: in local and then by concatenating the local matrices the end result.
2183: Writing it directly would be much like MatGetSubMatrices_MPIBAIJ().
2184: This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2185: */
2186: PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2187: {
2189: PetscMPIInt rank,size;
2190: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2191: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2192: Mat M,Mreuse;
2193: MatScalar *vwork,*aa;
2194: MPI_Comm comm;
2195: IS isrow_new, iscol_new;
2196: PetscBool idflag,allrows, allcols;
2197: Mat_SeqBAIJ *aij;
2200: PetscObjectGetComm((PetscObject)mat,&comm);
2201: MPI_Comm_rank(comm,&rank);
2202: MPI_Comm_size(comm,&size);
2203: /* The compression and expansion should be avoided. Doesn't point
2204: out errors, might change the indices, hence buggey */
2205: ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);
2206: ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);
2208: /* Check for special case: each processor gets entire matrix columns */
2209: ISIdentity(iscol,&idflag);
2210: ISGetLocalSize(iscol,&ncol);
2211: if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2212: else allcols = PETSC_FALSE;
2214: ISIdentity(isrow,&idflag);
2215: ISGetLocalSize(isrow,&nrow);
2216: if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2217: else allrows = PETSC_FALSE;
2219: if (call == MAT_REUSE_MATRIX) {
2220: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);
2221: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2222: MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);
2223: } else {
2224: MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);
2225: }
2226: ISDestroy(&isrow_new);
2227: ISDestroy(&iscol_new);
2228: /*
2229: m - number of local rows
2230: n - number of columns (same on all processors)
2231: rstart - first row in new global matrix generated
2232: */
2233: MatGetBlockSize(mat,&bs);
2234: MatGetSize(Mreuse,&m,&n);
2235: m = m/bs;
2236: n = n/bs;
2238: if (call == MAT_INITIAL_MATRIX) {
2239: aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2240: ii = aij->i;
2241: jj = aij->j;
2243: /*
2244: Determine the number of non-zeros in the diagonal and off-diagonal
2245: portions of the matrix in order to do correct preallocation
2246: */
2248: /* first get start and end of "diagonal" columns */
2249: if (csize == PETSC_DECIDE) {
2250: ISGetSize(isrow,&mglobal);
2251: if (mglobal == n*bs) { /* square matrix */
2252: nlocal = m;
2253: } else {
2254: nlocal = n/size + ((n % size) > rank);
2255: }
2256: } else {
2257: nlocal = csize/bs;
2258: }
2259: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
2260: rstart = rend - nlocal;
2261: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2263: /* next, compute all the lengths */
2264: PetscMalloc2(m+1,&dlens,m+1,&olens);
2265: for (i=0; i<m; i++) {
2266: jend = ii[i+1] - ii[i];
2267: olen = 0;
2268: dlen = 0;
2269: for (j=0; j<jend; j++) {
2270: if (*jj < rstart || *jj >= rend) olen++;
2271: else dlen++;
2272: jj++;
2273: }
2274: olens[i] = olen;
2275: dlens[i] = dlen;
2276: }
2277: MatCreate(comm,&M);
2278: MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);
2279: MatSetType(M,((PetscObject)mat)->type_name);
2280: MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);
2281: MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);
2282: PetscFree2(dlens,olens);
2283: } else {
2284: PetscInt ml,nl;
2286: M = *newmat;
2287: MatGetLocalSize(M,&ml,&nl);
2288: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2289: MatZeroEntries(M);
2290: /*
2291: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2292: rather than the slower MatSetValues().
2293: */
2294: M->was_assembled = PETSC_TRUE;
2295: M->assembled = PETSC_FALSE;
2296: }
2297: MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);
2298: MatGetOwnershipRange(M,&rstart,&rend);
2299: aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2300: ii = aij->i;
2301: jj = aij->j;
2302: aa = aij->a;
2303: for (i=0; i<m; i++) {
2304: row = rstart/bs + i;
2305: nz = ii[i+1] - ii[i];
2306: cwork = jj; jj += nz;
2307: vwork = aa; aa += nz*bs*bs;
2308: MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2309: }
2311: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2312: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2313: *newmat = M;
2315: /* save submatrix used in processor for next request */
2316: if (call == MAT_INITIAL_MATRIX) {
2317: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2318: PetscObjectDereference((PetscObject)Mreuse);
2319: }
2320: return(0);
2321: }
2325: PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2326: {
2327: MPI_Comm comm,pcomm;
2328: PetscInt clocal_size,nrows;
2329: const PetscInt *rows;
2330: PetscMPIInt size;
2331: IS crowp,lcolp;
2335: PetscObjectGetComm((PetscObject)A,&comm);
2336: /* make a collective version of 'rowp' */
2337: PetscObjectGetComm((PetscObject)rowp,&pcomm);
2338: if (pcomm==comm) {
2339: crowp = rowp;
2340: } else {
2341: ISGetSize(rowp,&nrows);
2342: ISGetIndices(rowp,&rows);
2343: ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);
2344: ISRestoreIndices(rowp,&rows);
2345: }
2346: ISSetPermutation(crowp);
2347: /* make a local version of 'colp' */
2348: PetscObjectGetComm((PetscObject)colp,&pcomm);
2349: MPI_Comm_size(pcomm,&size);
2350: if (size==1) {
2351: lcolp = colp;
2352: } else {
2353: ISAllGather(colp,&lcolp);
2354: }
2355: ISSetPermutation(lcolp);
2356: /* now we just get the submatrix */
2357: MatGetLocalSize(A,NULL,&clocal_size);
2358: MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);
2359: /* clean up */
2360: if (pcomm!=comm) {
2361: ISDestroy(&crowp);
2362: }
2363: if (size>1) {
2364: ISDestroy(&lcolp);
2365: }
2366: return(0);
2367: }
2371: PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2372: {
2373: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2374: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
2377: if (nghosts) *nghosts = B->nbs;
2378: if (ghosts) *ghosts = baij->garray;
2379: return(0);
2380: }
2384: PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2385: {
2386: Mat B;
2387: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2388: Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2389: Mat_SeqAIJ *b;
2391: PetscMPIInt size,rank,*recvcounts = 0,*displs = 0;
2392: PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2393: PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2396: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
2397: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
2399: /* ----------------------------------------------------------------
2400: Tell every processor the number of nonzeros per row
2401: */
2402: PetscMalloc1(A->rmap->N/bs,&lens);
2403: for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2404: lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2405: }
2406: sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2407: PetscMalloc1(2*size,&recvcounts);
2408: displs = recvcounts + size;
2409: for (i=0; i<size; i++) {
2410: recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2411: displs[i] = A->rmap->range[i]/bs;
2412: }
2413: #if defined(PETSC_HAVE_MPI_IN_PLACE)
2414: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2415: #else
2416: MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2417: #endif
2418: /* ---------------------------------------------------------------
2419: Create the sequential matrix of the same type as the local block diagonal
2420: */
2421: MatCreate(PETSC_COMM_SELF,&B);
2422: MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);
2423: MatSetType(B,MATSEQAIJ);
2424: MatSeqAIJSetPreallocation(B,0,lens);
2425: b = (Mat_SeqAIJ*)B->data;
2427: /*--------------------------------------------------------------------
2428: Copy my part of matrix column indices over
2429: */
2430: sendcount = ad->nz + bd->nz;
2431: jsendbuf = b->j + b->i[rstarts[rank]/bs];
2432: a_jsendbuf = ad->j;
2433: b_jsendbuf = bd->j;
2434: n = A->rmap->rend/bs - A->rmap->rstart/bs;
2435: cnt = 0;
2436: for (i=0; i<n; i++) {
2438: /* put in lower diagonal portion */
2439: m = bd->i[i+1] - bd->i[i];
2440: while (m > 0) {
2441: /* is it above diagonal (in bd (compressed) numbering) */
2442: if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2443: jsendbuf[cnt++] = garray[*b_jsendbuf++];
2444: m--;
2445: }
2447: /* put in diagonal portion */
2448: for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2449: jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2450: }
2452: /* put in upper diagonal portion */
2453: while (m-- > 0) {
2454: jsendbuf[cnt++] = garray[*b_jsendbuf++];
2455: }
2456: }
2457: if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2459: /*--------------------------------------------------------------------
2460: Gather all column indices to all processors
2461: */
2462: for (i=0; i<size; i++) {
2463: recvcounts[i] = 0;
2464: for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2465: recvcounts[i] += lens[j];
2466: }
2467: }
2468: displs[0] = 0;
2469: for (i=1; i<size; i++) {
2470: displs[i] = displs[i-1] + recvcounts[i-1];
2471: }
2472: #if defined(PETSC_HAVE_MPI_IN_PLACE)
2473: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2474: #else
2475: MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
2476: #endif
2477: /*--------------------------------------------------------------------
2478: Assemble the matrix into useable form (note numerical values not yet set)
2479: */
2480: /* set the b->ilen (length of each row) values */
2481: PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));
2482: /* set the b->i indices */
2483: b->i[0] = 0;
2484: for (i=1; i<=A->rmap->N/bs; i++) {
2485: b->i[i] = b->i[i-1] + lens[i-1];
2486: }
2487: PetscFree(lens);
2488: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2489: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2490: PetscFree(recvcounts);
2492: if (A->symmetric) {
2493: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
2494: } else if (A->hermitian) {
2495: MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
2496: } else if (A->structurally_symmetric) {
2497: MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
2498: }
2499: *newmat = B;
2500: return(0);
2501: }
2505: PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2506: {
2507: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
2509: Vec bb1 = 0;
2512: if (flag == SOR_APPLY_UPPER) {
2513: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2514: return(0);
2515: }
2517: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2518: VecDuplicate(bb,&bb1);
2519: }
2521: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2522: if (flag & SOR_ZERO_INITIAL_GUESS) {
2523: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2524: its--;
2525: }
2527: while (its--) {
2528: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2529: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2531: /* update rhs: bb1 = bb - B*x */
2532: VecScale(mat->lvec,-1.0);
2533: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2535: /* local sweep */
2536: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
2537: }
2538: } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2539: if (flag & SOR_ZERO_INITIAL_GUESS) {
2540: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2541: its--;
2542: }
2543: while (its--) {
2544: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2545: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2547: /* update rhs: bb1 = bb - B*x */
2548: VecScale(mat->lvec,-1.0);
2549: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2551: /* local sweep */
2552: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
2553: }
2554: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2555: if (flag & SOR_ZERO_INITIAL_GUESS) {
2556: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2557: its--;
2558: }
2559: while (its--) {
2560: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2561: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2563: /* update rhs: bb1 = bb - B*x */
2564: VecScale(mat->lvec,-1.0);
2565: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2567: /* local sweep */
2568: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
2569: }
2570: } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2572: VecDestroy(&bb1);
2573: return(0);
2574: }
2578: PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2579: {
2581: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data;
2582: PetscInt N,i,*garray = aij->garray;
2583: PetscInt ib,jb,bs = A->rmap->bs;
2584: Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2585: MatScalar *a_val = a_aij->a;
2586: Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2587: MatScalar *b_val = b_aij->a;
2588: PetscReal *work;
2591: MatGetSize(A,NULL,&N);
2592: PetscCalloc1(N,&work);
2593: if (type == NORM_2) {
2594: for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2595: for (jb=0; jb<bs; jb++) {
2596: for (ib=0; ib<bs; ib++) {
2597: work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2598: a_val++;
2599: }
2600: }
2601: }
2602: for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2603: for (jb=0; jb<bs; jb++) {
2604: for (ib=0; ib<bs; ib++) {
2605: work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2606: b_val++;
2607: }
2608: }
2609: }
2610: } else if (type == NORM_1) {
2611: for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2612: for (jb=0; jb<bs; jb++) {
2613: for (ib=0; ib<bs; ib++) {
2614: work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2615: a_val++;
2616: }
2617: }
2618: }
2619: for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2620: for (jb=0; jb<bs; jb++) {
2621: for (ib=0; ib<bs; ib++) {
2622: work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2623: b_val++;
2624: }
2625: }
2626: }
2627: } else if (type == NORM_INFINITY) {
2628: for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2629: for (jb=0; jb<bs; jb++) {
2630: for (ib=0; ib<bs; ib++) {
2631: int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2632: work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2633: a_val++;
2634: }
2635: }
2636: }
2637: for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2638: for (jb=0; jb<bs; jb++) {
2639: for (ib=0; ib<bs; ib++) {
2640: int col = garray[b_aij->j[i]] * bs + jb;
2641: work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2642: b_val++;
2643: }
2644: }
2645: }
2646: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2647: if (type == NORM_INFINITY) {
2648: MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
2649: } else {
2650: MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
2651: }
2652: PetscFree(work);
2653: if (type == NORM_2) {
2654: for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2655: }
2656: return(0);
2657: }
2661: PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2662: {
2663: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data;
2667: MatInvertBlockDiagonal(a->A,values);
2668: return(0);
2669: }
2673: PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2674: {
2676: Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data;
2677: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data,*bij = (Mat_SeqBAIJ*)maij->B->data;
2680: if (!aij->nz && !bij->nz) {
2681: MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);
2682: }
2683: MatShift_Basic(Y,a);
2684: return(0);
2685: }
2687: /* -------------------------------------------------------------------*/
2688: static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2689: MatGetRow_MPIBAIJ,
2690: MatRestoreRow_MPIBAIJ,
2691: MatMult_MPIBAIJ,
2692: /* 4*/ MatMultAdd_MPIBAIJ,
2693: MatMultTranspose_MPIBAIJ,
2694: MatMultTransposeAdd_MPIBAIJ,
2695: 0,
2696: 0,
2697: 0,
2698: /*10*/ 0,
2699: 0,
2700: 0,
2701: MatSOR_MPIBAIJ,
2702: MatTranspose_MPIBAIJ,
2703: /*15*/ MatGetInfo_MPIBAIJ,
2704: MatEqual_MPIBAIJ,
2705: MatGetDiagonal_MPIBAIJ,
2706: MatDiagonalScale_MPIBAIJ,
2707: MatNorm_MPIBAIJ,
2708: /*20*/ MatAssemblyBegin_MPIBAIJ,
2709: MatAssemblyEnd_MPIBAIJ,
2710: MatSetOption_MPIBAIJ,
2711: MatZeroEntries_MPIBAIJ,
2712: /*24*/ MatZeroRows_MPIBAIJ,
2713: 0,
2714: 0,
2715: 0,
2716: 0,
2717: /*29*/ MatSetUp_MPIBAIJ,
2718: 0,
2719: 0,
2720: 0,
2721: 0,
2722: /*34*/ MatDuplicate_MPIBAIJ,
2723: 0,
2724: 0,
2725: 0,
2726: 0,
2727: /*39*/ MatAXPY_MPIBAIJ,
2728: MatGetSubMatrices_MPIBAIJ,
2729: MatIncreaseOverlap_MPIBAIJ,
2730: MatGetValues_MPIBAIJ,
2731: MatCopy_MPIBAIJ,
2732: /*44*/ 0,
2733: MatScale_MPIBAIJ,
2734: MatShift_MPIBAIJ,
2735: 0,
2736: MatZeroRowsColumns_MPIBAIJ,
2737: /*49*/ 0,
2738: 0,
2739: 0,
2740: 0,
2741: 0,
2742: /*54*/ MatFDColoringCreate_MPIXAIJ,
2743: 0,
2744: MatSetUnfactored_MPIBAIJ,
2745: MatPermute_MPIBAIJ,
2746: MatSetValuesBlocked_MPIBAIJ,
2747: /*59*/ MatGetSubMatrix_MPIBAIJ,
2748: MatDestroy_MPIBAIJ,
2749: MatView_MPIBAIJ,
2750: 0,
2751: 0,
2752: /*64*/ 0,
2753: 0,
2754: 0,
2755: 0,
2756: 0,
2757: /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2758: 0,
2759: 0,
2760: 0,
2761: 0,
2762: /*74*/ 0,
2763: MatFDColoringApply_BAIJ,
2764: 0,
2765: 0,
2766: 0,
2767: /*79*/ 0,
2768: 0,
2769: 0,
2770: 0,
2771: MatLoad_MPIBAIJ,
2772: /*84*/ 0,
2773: 0,
2774: 0,
2775: 0,
2776: 0,
2777: /*89*/ 0,
2778: 0,
2779: 0,
2780: 0,
2781: 0,
2782: /*94*/ 0,
2783: 0,
2784: 0,
2785: 0,
2786: 0,
2787: /*99*/ 0,
2788: 0,
2789: 0,
2790: 0,
2791: 0,
2792: /*104*/0,
2793: MatRealPart_MPIBAIJ,
2794: MatImaginaryPart_MPIBAIJ,
2795: 0,
2796: 0,
2797: /*109*/0,
2798: 0,
2799: 0,
2800: 0,
2801: 0,
2802: /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2803: 0,
2804: MatGetGhosts_MPIBAIJ,
2805: 0,
2806: 0,
2807: /*119*/0,
2808: 0,
2809: 0,
2810: 0,
2811: MatGetMultiProcBlock_MPIBAIJ,
2812: /*124*/0,
2813: MatGetColumnNorms_MPIBAIJ,
2814: MatInvertBlockDiagonal_MPIBAIJ,
2815: 0,
2816: 0,
2817: /*129*/ 0,
2818: 0,
2819: 0,
2820: 0,
2821: 0,
2822: /*134*/ 0,
2823: 0,
2824: 0,
2825: 0,
2826: 0,
2827: /*139*/ 0,
2828: 0,
2829: 0,
2830: MatFDColoringSetUp_MPIXAIJ,
2831: 0,
2832: /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2833: };
2837: PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2838: {
2840: *a = ((Mat_MPIBAIJ*)A->data)->A;
2841: return(0);
2842: }
2844: PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2848: PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2849: {
2850: PetscInt m,rstart,cstart,cend;
2851: PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2852: const PetscInt *JJ =0;
2853: PetscScalar *values=0;
2854: PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2858: PetscLayoutSetBlockSize(B->rmap,bs);
2859: PetscLayoutSetBlockSize(B->cmap,bs);
2860: PetscLayoutSetUp(B->rmap);
2861: PetscLayoutSetUp(B->cmap);
2862: PetscLayoutGetBlockSize(B->rmap,&bs);
2863: m = B->rmap->n/bs;
2864: rstart = B->rmap->rstart/bs;
2865: cstart = B->cmap->rstart/bs;
2866: cend = B->cmap->rend/bs;
2868: if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2869: PetscMalloc2(m,&d_nnz,m,&o_nnz);
2870: for (i=0; i<m; i++) {
2871: nz = ii[i+1] - ii[i];
2872: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2873: nz_max = PetscMax(nz_max,nz);
2874: JJ = jj + ii[i];
2875: for (j=0; j<nz; j++) {
2876: if (*JJ >= cstart) break;
2877: JJ++;
2878: }
2879: d = 0;
2880: for (; j<nz; j++) {
2881: if (*JJ++ >= cend) break;
2882: d++;
2883: }
2884: d_nnz[i] = d;
2885: o_nnz[i] = nz - d;
2886: }
2887: MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
2888: PetscFree2(d_nnz,o_nnz);
2890: values = (PetscScalar*)V;
2891: if (!values) {
2892: PetscMalloc1(bs*bs*nz_max,&values);
2893: PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
2894: }
2895: for (i=0; i<m; i++) {
2896: PetscInt row = i + rstart;
2897: PetscInt ncols = ii[i+1] - ii[i];
2898: const PetscInt *icols = jj + ii[i];
2899: if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2900: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2901: MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
2902: } else { /* block ordering does not match so we can only insert one block at a time. */
2903: PetscInt j;
2904: for (j=0; j<ncols; j++) {
2905: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2906: MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);
2907: }
2908: }
2909: }
2911: if (!V) { PetscFree(values); }
2912: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2913: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2914: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2915: return(0);
2916: }
2920: /*@C
2921: MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2922: (the default parallel PETSc format).
2924: Collective on MPI_Comm
2926: Input Parameters:
2927: + B - the matrix
2928: . bs - the block size
2929: . i - the indices into j for the start of each local row (starts with zero)
2930: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2931: - v - optional values in the matrix
2933: Level: developer
2935: Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
2936: may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2937: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
2938: MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2939: block column and the second index is over columns within a block.
2941: .keywords: matrix, aij, compressed row, sparse, parallel
2943: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2944: @*/
2945: PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2946: {
2953: PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2954: return(0);
2955: }
2959: PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2960: {
2961: Mat_MPIBAIJ *b;
2963: PetscInt i;
2966: MatSetBlockSize(B,PetscAbs(bs));
2967: PetscLayoutSetUp(B->rmap);
2968: PetscLayoutSetUp(B->cmap);
2969: PetscLayoutGetBlockSize(B->rmap,&bs);
2971: if (d_nnz) {
2972: for (i=0; i<B->rmap->n/bs; i++) {
2973: if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2974: }
2975: }
2976: if (o_nnz) {
2977: for (i=0; i<B->rmap->n/bs; i++) {
2978: if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2979: }
2980: }
2982: b = (Mat_MPIBAIJ*)B->data;
2983: b->bs2 = bs*bs;
2984: b->mbs = B->rmap->n/bs;
2985: b->nbs = B->cmap->n/bs;
2986: b->Mbs = B->rmap->N/bs;
2987: b->Nbs = B->cmap->N/bs;
2989: for (i=0; i<=b->size; i++) {
2990: b->rangebs[i] = B->rmap->range[i]/bs;
2991: }
2992: b->rstartbs = B->rmap->rstart/bs;
2993: b->rendbs = B->rmap->rend/bs;
2994: b->cstartbs = B->cmap->rstart/bs;
2995: b->cendbs = B->cmap->rend/bs;
2997: if (!B->preallocated) {
2998: MatCreate(PETSC_COMM_SELF,&b->A);
2999: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
3000: MatSetType(b->A,MATSEQBAIJ);
3001: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
3002: MatCreate(PETSC_COMM_SELF,&b->B);
3003: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
3004: MatSetType(b->B,MATSEQBAIJ);
3005: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
3006: MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);
3007: }
3009: MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
3010: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
3011: B->preallocated = PETSC_TRUE;
3012: return(0);
3013: }
3015: extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3016: extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3020: PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3021: {
3022: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
3024: Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3025: PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3026: const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3029: PetscMalloc1(M+1,&ii);
3030: ii[0] = 0;
3031: for (i=0; i<M; i++) {
3032: if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3033: if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3034: ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3035: /* remove one from count of matrix has diagonal */
3036: for (j=id[i]; j<id[i+1]; j++) {
3037: if (jd[j] == i) {ii[i+1]--;break;}
3038: }
3039: }
3040: PetscMalloc1(ii[M],&jj);
3041: cnt = 0;
3042: for (i=0; i<M; i++) {
3043: for (j=io[i]; j<io[i+1]; j++) {
3044: if (garray[jo[j]] > rstart) break;
3045: jj[cnt++] = garray[jo[j]];
3046: }
3047: for (k=id[i]; k<id[i+1]; k++) {
3048: if (jd[k] != i) {
3049: jj[cnt++] = rstart + jd[k];
3050: }
3051: }
3052: for (; j<io[i+1]; j++) {
3053: jj[cnt++] = garray[jo[j]];
3054: }
3055: }
3056: MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);
3057: return(0);
3058: }
3060: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3062: PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3066: PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3067: {
3069: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3070: Mat B;
3071: Mat_MPIAIJ *b;
3074: if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3076: MatCreate(PetscObjectComm((PetscObject)A),&B);
3077: MatSetType(B,MATMPIAIJ);
3078: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3079: MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);
3080: MatSeqAIJSetPreallocation(B,0,NULL);
3081: MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);
3082: b = (Mat_MPIAIJ*) B->data;
3084: MatDestroy(&b->A);
3085: MatDestroy(&b->B);
3086: MatDisAssemble_MPIBAIJ(A);
3087: MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);
3088: MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);
3089: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3090: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3091: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
3092: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
3093: if (reuse == MAT_REUSE_MATRIX) {
3094: MatHeaderReplace(A,B);
3095: } else {
3096: *newmat = B;
3097: }
3098: return(0);
3099: }
3101: /*MC
3102: MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3104: Options Database Keys:
3105: + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3106: . -mat_block_size <bs> - set the blocksize used to store the matrix
3107: - -mat_use_hash_table <fact>
3109: Level: beginner
3111: .seealso: MatCreateMPIBAIJ
3112: M*/
3114: PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3118: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3119: {
3120: Mat_MPIBAIJ *b;
3122: PetscBool flg = PETSC_FALSE;
3125: PetscNewLog(B,&b);
3126: B->data = (void*)b;
3128: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3129: B->assembled = PETSC_FALSE;
3131: B->insertmode = NOT_SET_VALUES;
3132: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
3133: MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);
3135: /* build local table of row and column ownerships */
3136: PetscMalloc1(b->size+1,&b->rangebs);
3138: /* build cache for off array entries formed */
3139: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
3141: b->donotstash = PETSC_FALSE;
3142: b->colmap = NULL;
3143: b->garray = NULL;
3144: b->roworiented = PETSC_TRUE;
3146: /* stuff used in block assembly */
3147: b->barray = 0;
3149: /* stuff used for matrix vector multiply */
3150: b->lvec = 0;
3151: b->Mvctx = 0;
3153: /* stuff for MatGetRow() */
3154: b->rowindices = 0;
3155: b->rowvalues = 0;
3156: b->getrowactive = PETSC_FALSE;
3158: /* hash table stuff */
3159: b->ht = 0;
3160: b->hd = 0;
3161: b->ht_size = 0;
3162: b->ht_flag = PETSC_FALSE;
3163: b->ht_fact = 0;
3164: b->ht_total_ct = 0;
3165: b->ht_insert_ct = 0;
3167: /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3168: b->ijonly = PETSC_FALSE;
3171: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);
3172: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);
3173: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);
3174: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);
3175: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);
3176: PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);
3177: PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);
3178: PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);
3179: PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);
3180: PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);
3181: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);
3182: PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);
3184: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");
3185: PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);
3186: if (flg) {
3187: PetscReal fact = 1.39;
3188: MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
3189: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);
3190: if (fact <= 1.0) fact = 1.39;
3191: MatMPIBAIJSetHashTableFactor(B,fact);
3192: PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
3193: }
3194: PetscOptionsEnd();
3195: return(0);
3196: }
3198: /*MC
3199: MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3201: This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3202: and MATMPIBAIJ otherwise.
3204: Options Database Keys:
3205: . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3207: Level: beginner
3209: .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3210: M*/
3214: /*@C
3215: MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3216: (block compressed row). For good matrix assembly performance
3217: the user should preallocate the matrix storage by setting the parameters
3218: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3219: performance can be increased by more than a factor of 50.
3221: Collective on Mat
3223: Input Parameters:
3224: + B - the matrix
3225: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3226: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3227: . d_nz - number of block nonzeros per block row in diagonal portion of local
3228: submatrix (same for all local rows)
3229: . d_nnz - array containing the number of block nonzeros in the various block rows
3230: of the in diagonal portion of the local (possibly different for each block
3231: row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and
3232: set it even if it is zero.
3233: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
3234: submatrix (same for all local rows).
3235: - o_nnz - array containing the number of nonzeros in the various block rows of the
3236: off-diagonal portion of the local submatrix (possibly different for
3237: each block row) or NULL.
3239: If the *_nnz parameter is given then the *_nz parameter is ignored
3241: Options Database Keys:
3242: + -mat_block_size - size of the blocks to use
3243: - -mat_use_hash_table <fact>
3245: Notes:
3246: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
3247: than it must be used on all processors that share the object for that argument.
3249: Storage Information:
3250: For a square global matrix we define each processor's diagonal portion
3251: to be its local rows and the corresponding columns (a square submatrix);
3252: each processor's off-diagonal portion encompasses the remainder of the
3253: local matrix (a rectangular submatrix).
3255: The user can specify preallocated storage for the diagonal part of
3256: the local submatrix with either d_nz or d_nnz (not both). Set
3257: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3258: memory allocation. Likewise, specify preallocated storage for the
3259: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3261: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3262: the figure below we depict these three local rows and all columns (0-11).
3264: .vb
3265: 0 1 2 3 4 5 6 7 8 9 10 11
3266: --------------------------
3267: row 3 |o o o d d d o o o o o o
3268: row 4 |o o o d d d o o o o o o
3269: row 5 |o o o d d d o o o o o o
3270: --------------------------
3271: .ve
3273: Thus, any entries in the d locations are stored in the d (diagonal)
3274: submatrix, and any entries in the o locations are stored in the
3275: o (off-diagonal) submatrix. Note that the d and the o submatrices are
3276: stored simply in the MATSEQBAIJ format for compressed row storage.
3278: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3279: and o_nz should indicate the number of block nonzeros per row in the o matrix.
3280: In general, for PDE problems in which most nonzeros are near the diagonal,
3281: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
3282: or you will get TERRIBLE performance; see the users' manual chapter on
3283: matrices.
3285: You can call MatGetInfo() to get information on how effective the preallocation was;
3286: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3287: You can also run with the option -info and look for messages with the string
3288: malloc in them to see if additional memory allocation was needed.
3290: Level: intermediate
3292: .keywords: matrix, block, aij, compressed row, sparse, parallel
3294: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3295: @*/
3296: PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3297: {
3304: PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
3305: return(0);
3306: }
3310: /*@C
3311: MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3312: (block compressed row). For good matrix assembly performance
3313: the user should preallocate the matrix storage by setting the parameters
3314: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3315: performance can be increased by more than a factor of 50.
3317: Collective on MPI_Comm
3319: Input Parameters:
3320: + comm - MPI communicator
3321: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3322: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3323: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3324: This value should be the same as the local size used in creating the
3325: y vector for the matrix-vector product y = Ax.
3326: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3327: This value should be the same as the local size used in creating the
3328: x vector for the matrix-vector product y = Ax.
3329: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3330: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3331: . d_nz - number of nonzero blocks per block row in diagonal portion of local
3332: submatrix (same for all local rows)
3333: . d_nnz - array containing the number of nonzero blocks in the various block rows
3334: of the in diagonal portion of the local (possibly different for each block
3335: row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry
3336: and set it even if it is zero.
3337: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
3338: submatrix (same for all local rows).
3339: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3340: off-diagonal portion of the local submatrix (possibly different for
3341: each block row) or NULL.
3343: Output Parameter:
3344: . A - the matrix
3346: Options Database Keys:
3347: + -mat_block_size - size of the blocks to use
3348: - -mat_use_hash_table <fact>
3350: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3351: MatXXXXSetPreallocation() paradgm instead of this routine directly.
3352: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3354: Notes:
3355: If the *_nnz parameter is given then the *_nz parameter is ignored
3357: A nonzero block is any block that as 1 or more nonzeros in it
3359: The user MUST specify either the local or global matrix dimensions
3360: (possibly both).
3362: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
3363: than it must be used on all processors that share the object for that argument.
3365: Storage Information:
3366: For a square global matrix we define each processor's diagonal portion
3367: to be its local rows and the corresponding columns (a square submatrix);
3368: each processor's off-diagonal portion encompasses the remainder of the
3369: local matrix (a rectangular submatrix).
3371: The user can specify preallocated storage for the diagonal part of
3372: the local submatrix with either d_nz or d_nnz (not both). Set
3373: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3374: memory allocation. Likewise, specify preallocated storage for the
3375: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3377: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3378: the figure below we depict these three local rows and all columns (0-11).
3380: .vb
3381: 0 1 2 3 4 5 6 7 8 9 10 11
3382: --------------------------
3383: row 3 |o o o d d d o o o o o o
3384: row 4 |o o o d d d o o o o o o
3385: row 5 |o o o d d d o o o o o o
3386: --------------------------
3387: .ve
3389: Thus, any entries in the d locations are stored in the d (diagonal)
3390: submatrix, and any entries in the o locations are stored in the
3391: o (off-diagonal) submatrix. Note that the d and the o submatrices are
3392: stored simply in the MATSEQBAIJ format for compressed row storage.
3394: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3395: and o_nz should indicate the number of block nonzeros per row in the o matrix.
3396: In general, for PDE problems in which most nonzeros are near the diagonal,
3397: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
3398: or you will get TERRIBLE performance; see the users' manual chapter on
3399: matrices.
3401: Level: intermediate
3403: .keywords: matrix, block, aij, compressed row, sparse, parallel
3405: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3406: @*/
3407: PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3408: {
3410: PetscMPIInt size;
3413: MatCreate(comm,A);
3414: MatSetSizes(*A,m,n,M,N);
3415: MPI_Comm_size(comm,&size);
3416: if (size > 1) {
3417: MatSetType(*A,MATMPIBAIJ);
3418: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
3419: } else {
3420: MatSetType(*A,MATSEQBAIJ);
3421: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
3422: }
3423: return(0);
3424: }
3428: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3429: {
3430: Mat mat;
3431: Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3433: PetscInt len=0;
3436: *newmat = 0;
3437: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
3438: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3439: MatSetType(mat,((PetscObject)matin)->type_name);
3440: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3442: mat->factortype = matin->factortype;
3443: mat->preallocated = PETSC_TRUE;
3444: mat->assembled = PETSC_TRUE;
3445: mat->insertmode = NOT_SET_VALUES;
3447: a = (Mat_MPIBAIJ*)mat->data;
3448: mat->rmap->bs = matin->rmap->bs;
3449: a->bs2 = oldmat->bs2;
3450: a->mbs = oldmat->mbs;
3451: a->nbs = oldmat->nbs;
3452: a->Mbs = oldmat->Mbs;
3453: a->Nbs = oldmat->Nbs;
3455: PetscLayoutReference(matin->rmap,&mat->rmap);
3456: PetscLayoutReference(matin->cmap,&mat->cmap);
3458: a->size = oldmat->size;
3459: a->rank = oldmat->rank;
3460: a->donotstash = oldmat->donotstash;
3461: a->roworiented = oldmat->roworiented;
3462: a->rowindices = 0;
3463: a->rowvalues = 0;
3464: a->getrowactive = PETSC_FALSE;
3465: a->barray = 0;
3466: a->rstartbs = oldmat->rstartbs;
3467: a->rendbs = oldmat->rendbs;
3468: a->cstartbs = oldmat->cstartbs;
3469: a->cendbs = oldmat->cendbs;
3471: /* hash table stuff */
3472: a->ht = 0;
3473: a->hd = 0;
3474: a->ht_size = 0;
3475: a->ht_flag = oldmat->ht_flag;
3476: a->ht_fact = oldmat->ht_fact;
3477: a->ht_total_ct = 0;
3478: a->ht_insert_ct = 0;
3480: PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));
3481: if (oldmat->colmap) {
3482: #if defined(PETSC_USE_CTABLE)
3483: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3484: #else
3485: PetscMalloc1(a->Nbs,&a->colmap);
3486: PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));
3487: PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));
3488: #endif
3489: } else a->colmap = 0;
3491: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3492: PetscMalloc1(len,&a->garray);
3493: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
3494: PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));
3495: } else a->garray = 0;
3497: MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);
3498: VecDuplicate(oldmat->lvec,&a->lvec);
3499: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
3500: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3501: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
3503: MatDuplicate(oldmat->A,cpvalues,&a->A);
3504: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
3505: MatDuplicate(oldmat->B,cpvalues,&a->B);
3506: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
3507: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3508: *newmat = mat;
3509: return(0);
3510: }
3514: PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3515: {
3517: int fd;
3518: PetscInt i,nz,j,rstart,rend;
3519: PetscScalar *vals,*buf;
3520: MPI_Comm comm;
3521: MPI_Status status;
3522: PetscMPIInt rank,size,maxnz;
3523: PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3524: PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3525: PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3526: PetscMPIInt tag = ((PetscObject)viewer)->tag;
3527: PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3528: PetscInt dcount,kmax,k,nzcount,tmp,mend;
3531: /* force binary viewer to load .info file if it has not yet done so */
3532: PetscViewerSetUp(viewer);
3533: PetscObjectGetComm((PetscObject)viewer,&comm);
3534: PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");
3535: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3536: PetscOptionsEnd();
3537: if (bs < 0) bs = 1;
3539: MPI_Comm_size(comm,&size);
3540: MPI_Comm_rank(comm,&rank);
3541: PetscViewerBinaryGetDescriptor(viewer,&fd);
3542: if (!rank) {
3543: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
3544: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3545: }
3546: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3547: M = header[1]; N = header[2];
3549: /* If global sizes are set, check if they are consistent with that given in the file */
3550: if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3551: if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3553: if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3555: /*
3556: This code adds extra rows to make sure the number of rows is
3557: divisible by the blocksize
3558: */
3559: Mbs = M/bs;
3560: extra_rows = bs - M + bs*Mbs;
3561: if (extra_rows == bs) extra_rows = 0;
3562: else Mbs++;
3563: if (extra_rows && !rank) {
3564: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3565: }
3567: /* determine ownership of all rows */
3568: if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3569: mbs = Mbs/size + ((Mbs % size) > rank);
3570: m = mbs*bs;
3571: } else { /* User set */
3572: m = newmat->rmap->n;
3573: mbs = m/bs;
3574: }
3575: PetscMalloc2(size+1,&rowners,size+1,&browners);
3576: MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3578: /* process 0 needs enough room for process with most rows */
3579: if (!rank) {
3580: mmax = rowners[1];
3581: for (i=2; i<=size; i++) {
3582: mmax = PetscMax(mmax,rowners[i]);
3583: }
3584: mmax*=bs;
3585: } else mmax = -1; /* unused, but compiler warns anyway */
3587: rowners[0] = 0;
3588: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3589: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3590: rstart = rowners[rank];
3591: rend = rowners[rank+1];
3593: /* distribute row lengths to all processors */
3594: PetscMalloc1(m,&locrowlens);
3595: if (!rank) {
3596: mend = m;
3597: if (size == 1) mend = mend - extra_rows;
3598: PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);
3599: for (j=mend; j<m; j++) locrowlens[j] = 1;
3600: PetscMalloc1(mmax,&rowlengths);
3601: PetscCalloc1(size,&procsnz);
3602: for (j=0; j<m; j++) {
3603: procsnz[0] += locrowlens[j];
3604: }
3605: for (i=1; i<size; i++) {
3606: mend = browners[i+1] - browners[i];
3607: if (i == size-1) mend = mend - extra_rows;
3608: PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);
3609: for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3610: /* calculate the number of nonzeros on each processor */
3611: for (j=0; j<browners[i+1]-browners[i]; j++) {
3612: procsnz[i] += rowlengths[j];
3613: }
3614: MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);
3615: }
3616: PetscFree(rowlengths);
3617: } else {
3618: MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);
3619: }
3621: if (!rank) {
3622: /* determine max buffer needed and allocate it */
3623: maxnz = procsnz[0];
3624: for (i=1; i<size; i++) {
3625: maxnz = PetscMax(maxnz,procsnz[i]);
3626: }
3627: PetscMalloc1(maxnz,&cols);
3629: /* read in my part of the matrix column indices */
3630: nz = procsnz[0];
3631: PetscMalloc1(nz+1,&ibuf);
3632: mycols = ibuf;
3633: if (size == 1) nz -= extra_rows;
3634: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3635: if (size == 1) {
3636: for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3637: }
3639: /* read in every ones (except the last) and ship off */
3640: for (i=1; i<size-1; i++) {
3641: nz = procsnz[i];
3642: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3643: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
3644: }
3645: /* read in the stuff for the last proc */
3646: if (size != 1) {
3647: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
3648: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3649: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3650: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
3651: }
3652: PetscFree(cols);
3653: } else {
3654: /* determine buffer space needed for message */
3655: nz = 0;
3656: for (i=0; i<m; i++) {
3657: nz += locrowlens[i];
3658: }
3659: PetscMalloc1(nz+1,&ibuf);
3660: mycols = ibuf;
3661: /* receive message of column indices*/
3662: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
3663: MPI_Get_count(&status,MPIU_INT,&maxnz);
3664: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3665: }
3667: /* loop over local rows, determining number of off diagonal entries */
3668: PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);
3669: PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);
3670: rowcount = 0; nzcount = 0;
3671: for (i=0; i<mbs; i++) {
3672: dcount = 0;
3673: odcount = 0;
3674: for (j=0; j<bs; j++) {
3675: kmax = locrowlens[rowcount];
3676: for (k=0; k<kmax; k++) {
3677: tmp = mycols[nzcount++]/bs;
3678: if (!mask[tmp]) {
3679: mask[tmp] = 1;
3680: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3681: else masked1[dcount++] = tmp;
3682: }
3683: }
3684: rowcount++;
3685: }
3687: dlens[i] = dcount;
3688: odlens[i] = odcount;
3690: /* zero out the mask elements we set */
3691: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3692: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3693: }
3695: MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);
3696: MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);
3698: if (!rank) {
3699: PetscMalloc1(maxnz+1,&buf);
3700: /* read in my part of the matrix numerical values */
3701: nz = procsnz[0];
3702: vals = buf;
3703: mycols = ibuf;
3704: if (size == 1) nz -= extra_rows;
3705: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3706: if (size == 1) {
3707: for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3708: }
3710: /* insert into matrix */
3711: jj = rstart*bs;
3712: for (i=0; i<m; i++) {
3713: MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
3714: mycols += locrowlens[i];
3715: vals += locrowlens[i];
3716: jj++;
3717: }
3718: /* read in other processors (except the last one) and ship out */
3719: for (i=1; i<size-1; i++) {
3720: nz = procsnz[i];
3721: vals = buf;
3722: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3723: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
3724: }
3725: /* the last proc */
3726: if (size != 1) {
3727: nz = procsnz[i] - extra_rows;
3728: vals = buf;
3729: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3730: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3731: MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);
3732: }
3733: PetscFree(procsnz);
3734: } else {
3735: /* receive numeric values */
3736: PetscMalloc1(nz+1,&buf);
3738: /* receive message of values*/
3739: vals = buf;
3740: mycols = ibuf;
3741: MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);
3743: /* insert into matrix */
3744: jj = rstart*bs;
3745: for (i=0; i<m; i++) {
3746: MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
3747: mycols += locrowlens[i];
3748: vals += locrowlens[i];
3749: jj++;
3750: }
3751: }
3752: PetscFree(locrowlens);
3753: PetscFree(buf);
3754: PetscFree(ibuf);
3755: PetscFree2(rowners,browners);
3756: PetscFree2(dlens,odlens);
3757: PetscFree3(mask,masked1,masked2);
3758: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3759: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3760: return(0);
3761: }
3765: /*@
3766: MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3768: Input Parameters:
3769: . mat - the matrix
3770: . fact - factor
3772: Not Collective, each process can use a different factor
3774: Level: advanced
3776: Notes:
3777: This can also be set by the command line option: -mat_use_hash_table <fact>
3779: .keywords: matrix, hashtable, factor, HT
3781: .seealso: MatSetOption()
3782: @*/
3783: PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3784: {
3788: PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));
3789: return(0);
3790: }
3794: PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3795: {
3796: Mat_MPIBAIJ *baij;
3799: baij = (Mat_MPIBAIJ*)mat->data;
3800: baij->ht_fact = fact;
3801: return(0);
3802: }
3806: PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3807: {
3808: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3811: if (Ad) *Ad = a->A;
3812: if (Ao) *Ao = a->B;
3813: if (colmap) *colmap = a->garray;
3814: return(0);
3815: }
3817: /*
3818: Special version for direct calls from Fortran (to eliminate two function call overheads
3819: */
3820: #if defined(PETSC_HAVE_FORTRAN_CAPS)
3821: #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3822: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3823: #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3824: #endif
3828: /*@C
3829: MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3831: Collective on Mat
3833: Input Parameters:
3834: + mat - the matrix
3835: . min - number of input rows
3836: . im - input rows
3837: . nin - number of input columns
3838: . in - input columns
3839: . v - numerical values input
3840: - addvin - INSERT_VALUES or ADD_VALUES
3842: Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3844: Level: advanced
3846: .seealso: MatSetValuesBlocked()
3847: @*/
3848: PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3849: {
3850: /* convert input arguments to C version */
3851: Mat mat = *matin;
3852: PetscInt m = *min, n = *nin;
3853: InsertMode addv = *addvin;
3855: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
3856: const MatScalar *value;
3857: MatScalar *barray = baij->barray;
3858: PetscBool roworiented = baij->roworiented;
3859: PetscErrorCode ierr;
3860: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
3861: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3862: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3865: /* tasks normally handled by MatSetValuesBlocked() */
3866: if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3867: #if defined(PETSC_USE_DEBUG)
3868: else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3869: if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3870: #endif
3871: if (mat->assembled) {
3872: mat->was_assembled = PETSC_TRUE;
3873: mat->assembled = PETSC_FALSE;
3874: }
3875: PetscLogEventBegin(MAT_SetValues,mat,0,0,0);
3878: if (!barray) {
3879: PetscMalloc1(bs2,&barray);
3880: baij->barray = barray;
3881: }
3883: if (roworiented) stepval = (n-1)*bs;
3884: else stepval = (m-1)*bs;
3886: for (i=0; i<m; i++) {
3887: if (im[i] < 0) continue;
3888: #if defined(PETSC_USE_DEBUG)
3889: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3890: #endif
3891: if (im[i] >= rstart && im[i] < rend) {
3892: row = im[i] - rstart;
3893: for (j=0; j<n; j++) {
3894: /* If NumCol = 1 then a copy is not required */
3895: if ((roworiented) && (n == 1)) {
3896: barray = (MatScalar*)v + i*bs2;
3897: } else if ((!roworiented) && (m == 1)) {
3898: barray = (MatScalar*)v + j*bs2;
3899: } else { /* Here a copy is required */
3900: if (roworiented) {
3901: value = v + i*(stepval+bs)*bs + j*bs;
3902: } else {
3903: value = v + j*(stepval+bs)*bs + i*bs;
3904: }
3905: for (ii=0; ii<bs; ii++,value+=stepval) {
3906: for (jj=0; jj<bs; jj++) {
3907: *barray++ = *value++;
3908: }
3909: }
3910: barray -=bs2;
3911: }
3913: if (in[j] >= cstart && in[j] < cend) {
3914: col = in[j] - cstart;
3915: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);
3916: } else if (in[j] < 0) continue;
3917: #if defined(PETSC_USE_DEBUG)
3918: else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3919: #endif
3920: else {
3921: if (mat->was_assembled) {
3922: if (!baij->colmap) {
3923: MatCreateColmap_MPIBAIJ_Private(mat);
3924: }
3926: #if defined(PETSC_USE_DEBUG)
3927: #if defined(PETSC_USE_CTABLE)
3928: { PetscInt data;
3929: PetscTableFind(baij->colmap,in[j]+1,&data);
3930: if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3931: }
3932: #else
3933: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3934: #endif
3935: #endif
3936: #if defined(PETSC_USE_CTABLE)
3937: PetscTableFind(baij->colmap,in[j]+1,&col);
3938: col = (col - 1)/bs;
3939: #else
3940: col = (baij->colmap[in[j]] - 1)/bs;
3941: #endif
3942: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3943: MatDisAssemble_MPIBAIJ(mat);
3944: col = in[j];
3945: }
3946: } else col = in[j];
3947: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);
3948: }
3949: }
3950: } else {
3951: if (!baij->donotstash) {
3952: if (roworiented) {
3953: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3954: } else {
3955: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3956: }
3957: }
3958: }
3959: }
3961: /* task normally handled by MatSetValuesBlocked() */
3962: PetscLogEventEnd(MAT_SetValues,mat,0,0,0);
3963: return(0);
3964: }
3968: /*@
3969: MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3970: CSR format the local rows.
3972: Collective on MPI_Comm
3974: Input Parameters:
3975: + comm - MPI communicator
3976: . bs - the block size, only a block size of 1 is supported
3977: . m - number of local rows (Cannot be PETSC_DECIDE)
3978: . n - This value should be the same as the local size used in creating the
3979: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3980: calculated if N is given) For square matrices n is almost always m.
3981: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3982: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3983: . i - row indices
3984: . j - column indices
3985: - a - matrix values
3987: Output Parameter:
3988: . mat - the matrix
3990: Level: intermediate
3992: Notes:
3993: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3994: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3995: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3997: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3998: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3999: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
4000: with column-major ordering within blocks.
4002: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4004: .keywords: matrix, aij, compressed row, sparse, parallel
4006: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4007: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4008: @*/
4009: PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4010: {
4014: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4015: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4016: MatCreate(comm,mat);
4017: MatSetSizes(*mat,m,n,M,N);
4018: MatSetType(*mat,MATMPISBAIJ);
4019: MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);
4020: MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);
4021: MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);
4022: return(0);
4023: }
4027: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4028: {
4030: PetscInt m,N,i,rstart,nnz,Ii,bs,cbs;
4031: PetscInt *indx;
4032: PetscScalar *values;
4035: MatGetSize(inmat,&m,&N);
4036: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4037: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data;
4038: PetscInt *dnz,*onz,sum,mbs,Nbs;
4039: PetscInt *bindx,rmax=a->rmax,j;
4040:
4041: MatGetBlockSizes(inmat,&bs,&cbs);
4042: mbs = m/bs; Nbs = N/cbs;
4043: if (n == PETSC_DECIDE) {
4044: PetscSplitOwnership(comm,&n,&Nbs);
4045: }
4046: /* Check sum(n) = Nbs */
4047: MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
4048: if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
4050: MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);
4051: rstart -= mbs;
4053: PetscMalloc1(rmax,&bindx);
4054: MatPreallocateInitialize(comm,mbs,n,dnz,onz);
4055: for (i=0; i<mbs; i++) {
4056: MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL); /* non-blocked nnz and indx */
4057: nnz = nnz/bs;
4058: for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
4059: MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);
4060: MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);
4061: }
4062: PetscFree(bindx);
4064: MatCreate(comm,outmat);
4065: MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);
4066: MatSetBlockSizes(*outmat,bs,cbs);
4067: MatSetType(*outmat,MATMPIBAIJ);
4068: MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);
4069: MatPreallocateFinalize(dnz,onz);
4070: }
4071:
4072: /* numeric phase */
4073: MatGetBlockSizes(inmat,&bs,&cbs);
4074: MatGetOwnershipRange(*outmat,&rstart,NULL);
4076: for (i=0; i<m; i++) {
4077: MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);
4078: Ii = i + rstart;
4079: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4080: MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);
4081: }
4082: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
4083: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
4084: return(0);
4085: }