Actual source code: mpisbaij.c
petsc-3.11.4 2019-09-28
2: #include <../src/mat/impls/baij/mpi/mpibaij.h>
3: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4: #include <../src/mat/impls/sbaij/seq/sbaij.h>
5: #include <petscblaslapack.h>
7: #if defined(PETSC_HAVE_ELEMENTAL)
8: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9: #endif
11: /* This could be moved to matimpl.h */
12: static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13: {
14: Mat preallocator;
15: PetscInt r,rstart,rend;
16: PetscInt bs,i,m,n,M,N;
17: PetscBool cong = PETSC_TRUE;
23: for (i = 0; i < nm; i++) {
25: PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);
26: if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27: }
29: MatGetBlockSize(B,&bs);
30: MatGetSize(B,&M,&N);
31: MatGetLocalSize(B,&m,&n);
32: MatCreate(PetscObjectComm((PetscObject)B),&preallocator);
33: MatSetType(preallocator,MATPREALLOCATOR);
34: MatSetBlockSize(preallocator,bs);
35: MatSetSizes(preallocator,m,n,M,N);
36: MatSetUp(preallocator);
37: MatGetOwnershipRange(preallocator,&rstart,&rend);
38: for (r = rstart; r < rend; ++r) {
39: PetscInt ncols;
40: const PetscInt *row;
41: const PetscScalar *vals;
43: for (i = 0; i < nm; i++) {
44: MatGetRow(X[i],r,&ncols,&row,&vals);
45: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
46: if (symm && symm[i]) {
47: MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);
48: }
49: MatRestoreRow(X[i],r,&ncols,&row,&vals);
50: }
51: }
52: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
53: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
54: MatPreallocatorPreallocate(preallocator,fill,B);
55: MatDestroy(&preallocator);
56: return(0);
57: }
59: static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60: {
61: Mat B;
63: PetscInt r;
66: if (reuse != MAT_REUSE_MATRIX) {
67: PetscBool symm = PETSC_TRUE;
68: PetscInt bs;
70: MatCreate(PetscObjectComm((PetscObject)A),&B);
71: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
72: MatSetType(B,newtype);
73: MatGetBlockSize(A,&bs);
74: MatSetBlockSize(B,bs);
75: PetscLayoutSetUp(B->rmap);
76: PetscLayoutSetUp(B->cmap);
77: MatGetRowUpperTriangular(A);
78: MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);
79: MatRestoreRowUpperTriangular(A);
80: } else B = *newmat;
82: MatGetRowUpperTriangular(A);
83: for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
84: PetscInt ncols;
85: const PetscInt *row;
86: const PetscScalar *vals;
88: MatGetRow(A,r,&ncols,&row,&vals);
89: MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);
90: MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);
91: MatRestoreRow(A,r,&ncols,&row,&vals);
92: }
93: MatRestoreRowUpperTriangular(A);
94: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
95: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
97: if (reuse == MAT_INPLACE_MATRIX) {
98: MatHeaderReplace(A,&B);
99: } else {
100: *newmat = B;
101: }
102: return(0);
103: }
105: PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
106: {
107: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
111: MatStoreValues(aij->A);
112: MatStoreValues(aij->B);
113: return(0);
114: }
116: PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
117: {
118: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
122: MatRetrieveValues(aij->A);
123: MatRetrieveValues(aij->B);
124: return(0);
125: }
127: #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \
128: { \
129: \
130: brow = row/bs; \
131: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
132: rmax = aimax[brow]; nrow = ailen[brow]; \
133: bcol = col/bs; \
134: ridx = row % bs; cidx = col % bs; \
135: low = 0; high = nrow; \
136: while (high-low > 3) { \
137: t = (low+high)/2; \
138: if (rp[t] > bcol) high = t; \
139: else low = t; \
140: } \
141: for (_i=low; _i<high; _i++) { \
142: if (rp[_i] > bcol) break; \
143: if (rp[_i] == bcol) { \
144: bap = ap + bs2*_i + bs*cidx + ridx; \
145: if (addv == ADD_VALUES) *bap += value; \
146: else *bap = value; \
147: goto a_noinsert; \
148: } \
149: } \
150: if (a->nonew == 1) goto a_noinsert; \
151: if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
152: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
153: N = nrow++ - 1; \
154: /* shift up all the later entries in this row */ \
155: for (ii=N; ii>=_i; ii--) { \
156: rp[ii+1] = rp[ii]; \
157: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
158: } \
159: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); } \
160: rp[_i] = bcol; \
161: ap[bs2*_i + bs*cidx + ridx] = value; \
162: A->nonzerostate++;\
163: a_noinsert:; \
164: ailen[brow] = nrow; \
165: }
167: #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
168: { \
169: brow = row/bs; \
170: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
171: rmax = bimax[brow]; nrow = bilen[brow]; \
172: bcol = col/bs; \
173: ridx = row % bs; cidx = col % bs; \
174: low = 0; high = nrow; \
175: while (high-low > 3) { \
176: t = (low+high)/2; \
177: if (rp[t] > bcol) high = t; \
178: else low = t; \
179: } \
180: for (_i=low; _i<high; _i++) { \
181: if (rp[_i] > bcol) break; \
182: if (rp[_i] == bcol) { \
183: bap = ap + bs2*_i + bs*cidx + ridx; \
184: if (addv == ADD_VALUES) *bap += value; \
185: else *bap = value; \
186: goto b_noinsert; \
187: } \
188: } \
189: if (b->nonew == 1) goto b_noinsert; \
190: if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
191: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
192: N = nrow++ - 1; \
193: /* shift up all the later entries in this row */ \
194: for (ii=N; ii>=_i; ii--) { \
195: rp[ii+1] = rp[ii]; \
196: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
197: } \
198: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));} \
199: rp[_i] = bcol; \
200: ap[bs2*_i + bs*cidx + ridx] = value; \
201: B->nonzerostate++;\
202: b_noinsert:; \
203: bilen[brow] = nrow; \
204: }
206: /* Only add/insert a(i,j) with i<=j (blocks).
207: Any a(i,j) with i>j input by user is ingored.
208: */
209: PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
210: {
211: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
212: MatScalar value;
213: PetscBool roworiented = baij->roworiented;
215: PetscInt i,j,row,col;
216: PetscInt rstart_orig=mat->rmap->rstart;
217: PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
218: PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs;
220: /* Some Variables required in the macro */
221: Mat A = baij->A;
222: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
223: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224: MatScalar *aa =a->a;
226: Mat B = baij->B;
227: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
228: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229: MatScalar *ba =b->a;
231: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
232: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
233: MatScalar *ap,*bap;
235: /* for stash */
236: PetscInt n_loc, *in_loc = NULL;
237: MatScalar *v_loc = NULL;
240: if (!baij->donotstash) {
241: if (n > baij->n_loc) {
242: PetscFree(baij->in_loc);
243: PetscFree(baij->v_loc);
244: PetscMalloc1(n,&baij->in_loc);
245: PetscMalloc1(n,&baij->v_loc);
247: baij->n_loc = n;
248: }
249: in_loc = baij->in_loc;
250: v_loc = baij->v_loc;
251: }
253: for (i=0; i<m; i++) {
254: if (im[i] < 0) continue;
255: #if defined(PETSC_USE_DEBUG)
256: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
257: #endif
258: if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259: row = im[i] - rstart_orig; /* local row index */
260: for (j=0; j<n; j++) {
261: if (im[i]/bs > in[j]/bs) {
262: if (a->ignore_ltriangular) {
263: continue; /* ignore lower triangular blocks */
264: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
265: }
266: if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
267: col = in[j] - cstart_orig; /* local col index */
268: brow = row/bs; bcol = col/bs;
269: if (brow > bcol) continue; /* ignore lower triangular blocks of A */
270: if (roworiented) value = v[i*n+j];
271: else value = v[i+j*m];
272: MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
273: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
274: } else if (in[j] < 0) continue;
275: #if defined(PETSC_USE_DEBUG)
276: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
277: #endif
278: else { /* off-diag entry (B) */
279: if (mat->was_assembled) {
280: if (!baij->colmap) {
281: MatCreateColmap_MPIBAIJ_Private(mat);
282: }
283: #if defined(PETSC_USE_CTABLE)
284: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
285: col = col - 1;
286: #else
287: col = baij->colmap[in[j]/bs] - 1;
288: #endif
289: if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
290: MatDisAssemble_MPISBAIJ(mat);
291: col = in[j];
292: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293: B = baij->B;
294: b = (Mat_SeqBAIJ*)(B)->data;
295: bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
296: ba = b->a;
297: } else col += in[j]%bs;
298: } else col = in[j];
299: if (roworiented) value = v[i*n+j];
300: else value = v[i+j*m];
301: MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
302: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
303: }
304: }
305: } else { /* off processor entry */
306: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
307: if (!baij->donotstash) {
308: mat->assembled = PETSC_FALSE;
309: n_loc = 0;
310: for (j=0; j<n; j++) {
311: if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
312: in_loc[n_loc] = in[j];
313: if (roworiented) {
314: v_loc[n_loc] = v[i*n+j];
315: } else {
316: v_loc[n_loc] = v[j*m+i];
317: }
318: n_loc++;
319: }
320: MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);
321: }
322: }
323: }
324: return(0);
325: }
327: PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
328: {
329: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
330: PetscErrorCode ierr;
331: PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
332: PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen;
333: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
334: PetscBool roworiented=a->roworiented;
335: const PetscScalar *value = v;
336: MatScalar *ap,*aa = a->a,*bap;
339: if (col < row) {
340: if (a->ignore_ltriangular) return(0); /* ignore lower triangular block */
341: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
342: }
343: rp = aj + ai[row];
344: ap = aa + bs2*ai[row];
345: rmax = imax[row];
346: nrow = ailen[row];
347: value = v;
348: low = 0;
349: high = nrow;
351: while (high-low > 7) {
352: t = (low+high)/2;
353: if (rp[t] > col) high = t;
354: else low = t;
355: }
356: for (i=low; i<high; i++) {
357: if (rp[i] > col) break;
358: if (rp[i] == col) {
359: bap = ap + bs2*i;
360: if (roworiented) {
361: if (is == ADD_VALUES) {
362: for (ii=0; ii<bs; ii++) {
363: for (jj=ii; jj<bs2; jj+=bs) {
364: bap[jj] += *value++;
365: }
366: }
367: } else {
368: for (ii=0; ii<bs; ii++) {
369: for (jj=ii; jj<bs2; jj+=bs) {
370: bap[jj] = *value++;
371: }
372: }
373: }
374: } else {
375: if (is == ADD_VALUES) {
376: for (ii=0; ii<bs; ii++) {
377: for (jj=0; jj<bs; jj++) {
378: *bap++ += *value++;
379: }
380: }
381: } else {
382: for (ii=0; ii<bs; ii++) {
383: for (jj=0; jj<bs; jj++) {
384: *bap++ = *value++;
385: }
386: }
387: }
388: }
389: goto noinsert2;
390: }
391: }
392: if (nonew == 1) goto noinsert2;
393: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
394: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
395: N = nrow++ - 1; high++;
396: /* shift up all the later entries in this row */
397: for (ii=N; ii>=i; ii--) {
398: rp[ii+1] = rp[ii];
399: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
400: }
401: if (N >= i) {
402: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
403: }
404: rp[i] = col;
405: bap = ap + bs2*i;
406: if (roworiented) {
407: for (ii=0; ii<bs; ii++) {
408: for (jj=ii; jj<bs2; jj+=bs) {
409: bap[jj] = *value++;
410: }
411: }
412: } else {
413: for (ii=0; ii<bs; ii++) {
414: for (jj=0; jj<bs; jj++) {
415: *bap++ = *value++;
416: }
417: }
418: }
419: noinsert2:;
420: ailen[row] = nrow;
421: return(0);
422: }
424: /*
425: This routine is exactly duplicated in mpibaij.c
426: */
427: PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
428: {
429: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
430: PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
431: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
432: PetscErrorCode ierr;
433: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
434: PetscBool roworiented=a->roworiented;
435: const PetscScalar *value = v;
436: MatScalar *ap,*aa = a->a,*bap;
439: rp = aj + ai[row];
440: ap = aa + bs2*ai[row];
441: rmax = imax[row];
442: nrow = ailen[row];
443: low = 0;
444: high = nrow;
445: value = v;
446: while (high-low > 7) {
447: t = (low+high)/2;
448: if (rp[t] > col) high = t;
449: else low = t;
450: }
451: for (i=low; i<high; i++) {
452: if (rp[i] > col) break;
453: if (rp[i] == col) {
454: bap = ap + bs2*i;
455: if (roworiented) {
456: if (is == ADD_VALUES) {
457: for (ii=0; ii<bs; ii++) {
458: for (jj=ii; jj<bs2; jj+=bs) {
459: bap[jj] += *value++;
460: }
461: }
462: } else {
463: for (ii=0; ii<bs; ii++) {
464: for (jj=ii; jj<bs2; jj+=bs) {
465: bap[jj] = *value++;
466: }
467: }
468: }
469: } else {
470: if (is == ADD_VALUES) {
471: for (ii=0; ii<bs; ii++,value+=bs) {
472: for (jj=0; jj<bs; jj++) {
473: bap[jj] += value[jj];
474: }
475: bap += bs;
476: }
477: } else {
478: for (ii=0; ii<bs; ii++,value+=bs) {
479: for (jj=0; jj<bs; jj++) {
480: bap[jj] = value[jj];
481: }
482: bap += bs;
483: }
484: }
485: }
486: goto noinsert2;
487: }
488: }
489: if (nonew == 1) goto noinsert2;
490: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
491: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
492: N = nrow++ - 1; high++;
493: /* shift up all the later entries in this row */
494: for (ii=N; ii>=i; ii--) {
495: rp[ii+1] = rp[ii];
496: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
497: }
498: if (N >= i) {
499: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
500: }
501: rp[i] = col;
502: bap = ap + bs2*i;
503: if (roworiented) {
504: for (ii=0; ii<bs; ii++) {
505: for (jj=ii; jj<bs2; jj+=bs) {
506: bap[jj] = *value++;
507: }
508: }
509: } else {
510: for (ii=0; ii<bs; ii++) {
511: for (jj=0; jj<bs; jj++) {
512: *bap++ = *value++;
513: }
514: }
515: }
516: noinsert2:;
517: ailen[row] = nrow;
518: return(0);
519: }
521: /*
522: This routine could be optimized by removing the need for the block copy below and passing stride information
523: to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
524: */
525: PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
526: {
527: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
528: const MatScalar *value;
529: MatScalar *barray =baij->barray;
530: PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
531: PetscErrorCode ierr;
532: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
533: PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval;
534: PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
537: if (!barray) {
538: PetscMalloc1(bs2,&barray);
539: baij->barray = barray;
540: }
542: if (roworiented) {
543: stepval = (n-1)*bs;
544: } else {
545: stepval = (m-1)*bs;
546: }
547: for (i=0; i<m; i++) {
548: if (im[i] < 0) continue;
549: #if defined(PETSC_USE_DEBUG)
550: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
551: #endif
552: if (im[i] >= rstart && im[i] < rend) {
553: row = im[i] - rstart;
554: for (j=0; j<n; j++) {
555: if (im[i] > in[j]) {
556: if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
557: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
558: }
559: /* If NumCol = 1 then a copy is not required */
560: if ((roworiented) && (n == 1)) {
561: barray = (MatScalar*) v + i*bs2;
562: } else if ((!roworiented) && (m == 1)) {
563: barray = (MatScalar*) v + j*bs2;
564: } else { /* Here a copy is required */
565: if (roworiented) {
566: value = v + i*(stepval+bs)*bs + j*bs;
567: } else {
568: value = v + j*(stepval+bs)*bs + i*bs;
569: }
570: for (ii=0; ii<bs; ii++,value+=stepval) {
571: for (jj=0; jj<bs; jj++) {
572: *barray++ = *value++;
573: }
574: }
575: barray -=bs2;
576: }
578: if (in[j] >= cstart && in[j] < cend) {
579: col = in[j] - cstart;
580: MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);
581: } else if (in[j] < 0) continue;
582: #if defined(PETSC_USE_DEBUG)
583: else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584: #endif
585: else {
586: if (mat->was_assembled) {
587: if (!baij->colmap) {
588: MatCreateColmap_MPIBAIJ_Private(mat);
589: }
591: #if defined(PETSC_USE_DEBUG)
592: #if defined(PETSC_USE_CTABLE)
593: { PetscInt data;
594: PetscTableFind(baij->colmap,in[j]+1,&data);
595: if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
596: }
597: #else
598: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
599: #endif
600: #endif
601: #if defined(PETSC_USE_CTABLE)
602: PetscTableFind(baij->colmap,in[j]+1,&col);
603: col = (col - 1)/bs;
604: #else
605: col = (baij->colmap[in[j]] - 1)/bs;
606: #endif
607: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
608: MatDisAssemble_MPISBAIJ(mat);
609: col = in[j];
610: }
611: } else col = in[j];
612: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);
613: }
614: }
615: } else {
616: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
617: if (!baij->donotstash) {
618: if (roworiented) {
619: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
620: } else {
621: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
622: }
623: }
624: }
625: }
626: return(0);
627: }
629: PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630: {
631: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
633: PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
634: PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
637: for (i=0; i<m; i++) {
638: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
639: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
640: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
641: row = idxm[i] - bsrstart;
642: for (j=0; j<n; j++) {
643: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
644: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
645: if (idxn[j] >= bscstart && idxn[j] < bscend) {
646: col = idxn[j] - bscstart;
647: MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
648: } else {
649: if (!baij->colmap) {
650: MatCreateColmap_MPIBAIJ_Private(mat);
651: }
652: #if defined(PETSC_USE_CTABLE)
653: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
654: data--;
655: #else
656: data = baij->colmap[idxn[j]/bs]-1;
657: #endif
658: if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
659: else {
660: col = data + idxn[j]%bs;
661: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
662: }
663: }
664: }
665: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
666: }
667: return(0);
668: }
670: PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
671: {
672: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
674: PetscReal sum[2],*lnorm2;
677: if (baij->size == 1) {
678: MatNorm(baij->A,type,norm);
679: } else {
680: if (type == NORM_FROBENIUS) {
681: PetscMalloc1(2,&lnorm2);
682: MatNorm(baij->A,type,lnorm2);
683: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */
684: MatNorm(baij->B,type,lnorm2);
685: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */
686: MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
687: *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
688: PetscFree(lnorm2);
689: } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
690: Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
691: Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data;
692: PetscReal *rsum,*rsum2,vabs;
693: PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
694: PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
695: MatScalar *v;
697: PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);
698: PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));
699: /* Amat */
700: v = amat->a; jj = amat->j;
701: for (brow=0; brow<mbs; brow++) {
702: grow = bs*(rstart + brow);
703: nz = amat->i[brow+1] - amat->i[brow];
704: for (bcol=0; bcol<nz; bcol++) {
705: gcol = bs*(rstart + *jj); jj++;
706: for (col=0; col<bs; col++) {
707: for (row=0; row<bs; row++) {
708: vabs = PetscAbsScalar(*v); v++;
709: rsum[gcol+col] += vabs;
710: /* non-diagonal block */
711: if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
712: }
713: }
714: }
715: PetscLogFlops(nz*bs*bs);
716: }
717: /* Bmat */
718: v = bmat->a; jj = bmat->j;
719: for (brow=0; brow<mbs; brow++) {
720: grow = bs*(rstart + brow);
721: nz = bmat->i[brow+1] - bmat->i[brow];
722: for (bcol=0; bcol<nz; bcol++) {
723: gcol = bs*garray[*jj]; jj++;
724: for (col=0; col<bs; col++) {
725: for (row=0; row<bs; row++) {
726: vabs = PetscAbsScalar(*v); v++;
727: rsum[gcol+col] += vabs;
728: rsum[grow+row] += vabs;
729: }
730: }
731: }
732: PetscLogFlops(nz*bs*bs);
733: }
734: MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
735: *norm = 0.0;
736: for (col=0; col<mat->cmap->N; col++) {
737: if (rsum2[col] > *norm) *norm = rsum2[col];
738: }
739: PetscFree2(rsum,rsum2);
740: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
741: }
742: return(0);
743: }
745: PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
746: {
747: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
749: PetscInt nstash,reallocs;
752: if (baij->donotstash || mat->nooffprocentries) return(0);
754: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
755: MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
756: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
757: PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
758: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
759: PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
760: return(0);
761: }
763: PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
764: {
765: Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
766: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data;
768: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
769: PetscInt *row,*col;
770: PetscBool other_disassembled;
771: PetscMPIInt n;
772: PetscBool r1,r2,r3;
773: MatScalar *val;
775: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
777: if (!baij->donotstash && !mat->nooffprocentries) {
778: while (1) {
779: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
780: if (!flg) break;
782: for (i=0; i<n;) {
783: /* Now identify the consecutive vals belonging to the same row */
784: for (j=i,rstart=row[j]; j<n; j++) {
785: if (row[j] != rstart) break;
786: }
787: if (j < n) ncols = j-i;
788: else ncols = n-i;
789: /* Now assemble all these values with a single function call */
790: MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
791: i = j;
792: }
793: }
794: MatStashScatterEnd_Private(&mat->stash);
795: /* Now process the block-stash. Since the values are stashed column-oriented,
796: set the roworiented flag to column oriented, and after MatSetValues()
797: restore the original flags */
798: r1 = baij->roworiented;
799: r2 = a->roworiented;
800: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
802: baij->roworiented = PETSC_FALSE;
803: a->roworiented = PETSC_FALSE;
805: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
806: while (1) {
807: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
808: if (!flg) break;
810: for (i=0; i<n;) {
811: /* Now identify the consecutive vals belonging to the same row */
812: for (j=i,rstart=row[j]; j<n; j++) {
813: if (row[j] != rstart) break;
814: }
815: if (j < n) ncols = j-i;
816: else ncols = n-i;
817: MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);
818: i = j;
819: }
820: }
821: MatStashScatterEnd_Private(&mat->bstash);
823: baij->roworiented = r1;
824: a->roworiented = r2;
826: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
827: }
829: MatAssemblyBegin(baij->A,mode);
830: MatAssemblyEnd(baij->A,mode);
832: /* determine if any processor has disassembled, if so we must
833: also disassemble ourselfs, in order that we may reassemble. */
834: /*
835: if nonzero structure of submatrix B cannot change then we know that
836: no processor disassembled thus we can skip this stuff
837: */
838: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
839: MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
840: if (mat->was_assembled && !other_disassembled) {
841: MatDisAssemble_MPISBAIJ(mat);
842: }
843: }
845: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
846: MatSetUpMultiply_MPISBAIJ(mat); /* setup Mvctx and sMvctx */
847: }
848: MatAssemblyBegin(baij->B,mode);
849: MatAssemblyEnd(baij->B,mode);
851: PetscFree2(baij->rowvalues,baij->rowindices);
853: baij->rowvalues = 0;
855: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
856: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
857: PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
858: MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
859: }
860: return(0);
861: }
863: extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
864: #include <petscdraw.h>
865: static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
866: {
867: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
868: PetscErrorCode ierr;
869: PetscInt bs = mat->rmap->bs;
870: PetscMPIInt rank = baij->rank;
871: PetscBool iascii,isdraw;
872: PetscViewer sviewer;
873: PetscViewerFormat format;
876: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
877: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
878: if (iascii) {
879: PetscViewerGetFormat(viewer,&format);
880: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
881: MatInfo info;
882: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
883: MatGetInfo(mat,MAT_LOCAL,&info);
884: PetscViewerASCIIPushSynchronized(viewer);
885: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);
886: MatGetInfo(baij->A,MAT_LOCAL,&info);
887: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
888: MatGetInfo(baij->B,MAT_LOCAL,&info);
889: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
890: PetscViewerFlush(viewer);
891: PetscViewerASCIIPopSynchronized(viewer);
892: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
893: VecScatterView(baij->Mvctx,viewer);
894: return(0);
895: } else if (format == PETSC_VIEWER_ASCII_INFO) {
896: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
897: return(0);
898: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
899: return(0);
900: }
901: }
903: if (isdraw) {
904: PetscDraw draw;
905: PetscBool isnull;
906: PetscViewerDrawGetDraw(viewer,0,&draw);
907: PetscDrawIsNull(draw,&isnull);
908: if (isnull) return(0);
909: }
911: {
912: /* assemble the entire matrix onto first processor. */
913: Mat A;
914: Mat_SeqSBAIJ *Aloc;
915: Mat_SeqBAIJ *Bloc;
916: PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
917: MatScalar *a;
918: const char *matname;
920: /* Should this be the same type as mat? */
921: MatCreate(PetscObjectComm((PetscObject)mat),&A);
922: if (!rank) {
923: MatSetSizes(A,M,N,M,N);
924: } else {
925: MatSetSizes(A,0,0,M,N);
926: }
927: MatSetType(A,MATMPISBAIJ);
928: MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);
929: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
930: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
932: /* copy over the A part */
933: Aloc = (Mat_SeqSBAIJ*)baij->A->data;
934: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
935: PetscMalloc1(bs,&rvals);
937: for (i=0; i<mbs; i++) {
938: rvals[0] = bs*(baij->rstartbs + i);
939: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
940: for (j=ai[i]; j<ai[i+1]; j++) {
941: col = (baij->cstartbs+aj[j])*bs;
942: for (k=0; k<bs; k++) {
943: MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
944: col++;
945: a += bs;
946: }
947: }
948: }
949: /* copy over the B part */
950: Bloc = (Mat_SeqBAIJ*)baij->B->data;
951: ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
952: for (i=0; i<mbs; i++) {
954: rvals[0] = bs*(baij->rstartbs + i);
955: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
956: for (j=ai[i]; j<ai[i+1]; j++) {
957: col = baij->garray[aj[j]]*bs;
958: for (k=0; k<bs; k++) {
959: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
960: col++;
961: a += bs;
962: }
963: }
964: }
965: PetscFree(rvals);
966: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
967: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
968: /*
969: Everyone has to call to draw the matrix since the graphics waits are
970: synchronized across all processors that share the PetscDraw object
971: */
972: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
973: PetscObjectGetName((PetscObject)mat,&matname);
974: if (!rank) {
975: PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);
976: MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);
977: }
978: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
979: PetscViewerFlush(viewer);
980: MatDestroy(&A);
981: }
982: return(0);
983: }
985: static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
986: {
987: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data;
988: Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data;
989: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data;
991: PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
992: PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
993: int fd;
994: PetscScalar *column_values;
995: FILE *file;
996: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
997: PetscInt message_count,flowcontrolcount;
1000: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1001: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
1002: nz = bs2*(A->nz + B->nz);
1003: rlen = mat->rmap->n;
1004: PetscViewerBinaryGetDescriptor(viewer,&fd);
1005: if (!rank) {
1006: header[0] = MAT_FILE_CLASSID;
1007: header[1] = mat->rmap->N;
1008: header[2] = mat->cmap->N;
1010: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1011: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1012: /* get largest number of rows any processor has */
1013: range = mat->rmap->range;
1014: for (i=1; i<size; i++) {
1015: rlen = PetscMax(rlen,range[i+1] - range[i]);
1016: }
1017: } else {
1018: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1019: }
1021: PetscMalloc1(rlen/bs,&crow_lens);
1022: /* compute lengths of each row */
1023: for (i=0; i<a->mbs; i++) {
1024: crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1025: }
1026: /* store the row lengths to the file */
1027: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1028: if (!rank) {
1029: MPI_Status status;
1030: PetscMalloc1(rlen,&row_lens);
1031: rlen = (range[1] - range[0])/bs;
1032: for (i=0; i<rlen; i++) {
1033: for (j=0; j<bs; j++) {
1034: row_lens[i*bs+j] = bs*crow_lens[i];
1035: }
1036: }
1037: PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);
1038: for (i=1; i<size; i++) {
1039: rlen = (range[i+1] - range[i])/bs;
1040: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1041: MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1042: for (k=0; k<rlen; k++) {
1043: for (j=0; j<bs; j++) {
1044: row_lens[k*bs+j] = bs*crow_lens[k];
1045: }
1046: }
1047: PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);
1048: }
1049: PetscViewerFlowControlEndMaster(viewer,&message_count);
1050: PetscFree(row_lens);
1051: } else {
1052: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1053: MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1054: PetscViewerFlowControlEndWorker(viewer,&message_count);
1055: }
1056: PetscFree(crow_lens);
1058: /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1059: information needed to make it for each row from a block row. This does require more communication but still not more than
1060: the communication needed for the nonzero values */
1061: nzmax = nz; /* space a largest processor needs */
1062: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));
1063: PetscMalloc1(nzmax,&column_indices);
1064: cnt = 0;
1065: for (i=0; i<a->mbs; i++) {
1066: pcnt = cnt;
1067: for (j=B->i[i]; j<B->i[i+1]; j++) {
1068: if ((col = garray[B->j[j]]) > cstart) break;
1069: for (l=0; l<bs; l++) {
1070: column_indices[cnt++] = bs*col+l;
1071: }
1072: }
1073: for (k=A->i[i]; k<A->i[i+1]; k++) {
1074: for (l=0; l<bs; l++) {
1075: column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1076: }
1077: }
1078: for (; j<B->i[i+1]; j++) {
1079: for (l=0; l<bs; l++) {
1080: column_indices[cnt++] = bs*garray[B->j[j]]+l;
1081: }
1082: }
1083: len = cnt - pcnt;
1084: for (k=1; k<bs; k++) {
1085: PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));
1086: cnt += len;
1087: }
1088: }
1089: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1091: /* store the columns to the file */
1092: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1093: if (!rank) {
1094: MPI_Status status;
1095: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1096: for (i=1; i<size; i++) {
1097: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1098: MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1099: MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1100: PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);
1101: }
1102: PetscViewerFlowControlEndMaster(viewer,&message_count);
1103: } else {
1104: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1105: MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1106: MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1107: PetscViewerFlowControlEndWorker(viewer,&message_count);
1108: }
1109: PetscFree(column_indices);
1111: /* load up the numerical values */
1112: PetscMalloc1(nzmax,&column_values);
1113: cnt = 0;
1114: for (i=0; i<a->mbs; i++) {
1115: rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1116: for (j=B->i[i]; j<B->i[i+1]; j++) {
1117: if (garray[B->j[j]] > cstart) break;
1118: for (l=0; l<bs; l++) {
1119: for (ll=0; ll<bs; ll++) {
1120: column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1121: }
1122: }
1123: cnt += bs;
1124: }
1125: for (k=A->i[i]; k<A->i[i+1]; k++) {
1126: for (l=0; l<bs; l++) {
1127: for (ll=0; ll<bs; ll++) {
1128: column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1129: }
1130: }
1131: cnt += bs;
1132: }
1133: for (; j<B->i[i+1]; j++) {
1134: for (l=0; l<bs; l++) {
1135: for (ll=0; ll<bs; ll++) {
1136: column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1137: }
1138: }
1139: cnt += bs;
1140: }
1141: cnt += (bs-1)*rlen;
1142: }
1143: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1145: /* store the column values to the file */
1146: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1147: if (!rank) {
1148: MPI_Status status;
1149: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1150: for (i=1; i<size; i++) {
1151: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1152: MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1153: MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);
1154: PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);
1155: }
1156: PetscViewerFlowControlEndMaster(viewer,&message_count);
1157: } else {
1158: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1159: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1160: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
1161: PetscViewerFlowControlEndWorker(viewer,&message_count);
1162: }
1163: PetscFree(column_values);
1165: PetscViewerBinaryGetInfoPointer(viewer,&file);
1166: if (file) {
1167: fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1168: }
1169: return(0);
1170: }
1172: PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1173: {
1175: PetscBool iascii,isdraw,issocket,isbinary;
1178: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1179: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1180: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1181: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1182: if (iascii || isdraw || issocket) {
1183: MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);
1184: } else if (isbinary) {
1185: MatView_MPISBAIJ_Binary(mat,viewer);
1186: }
1187: return(0);
1188: }
1190: PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1191: {
1192: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1196: #if defined(PETSC_USE_LOG)
1197: PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1198: #endif
1199: MatStashDestroy_Private(&mat->stash);
1200: MatStashDestroy_Private(&mat->bstash);
1201: MatDestroy(&baij->A);
1202: MatDestroy(&baij->B);
1203: #if defined(PETSC_USE_CTABLE)
1204: PetscTableDestroy(&baij->colmap);
1205: #else
1206: PetscFree(baij->colmap);
1207: #endif
1208: PetscFree(baij->garray);
1209: VecDestroy(&baij->lvec);
1210: VecScatterDestroy(&baij->Mvctx);
1211: VecDestroy(&baij->slvec0);
1212: VecDestroy(&baij->slvec0b);
1213: VecDestroy(&baij->slvec1);
1214: VecDestroy(&baij->slvec1a);
1215: VecDestroy(&baij->slvec1b);
1216: VecScatterDestroy(&baij->sMvctx);
1217: PetscFree2(baij->rowvalues,baij->rowindices);
1218: PetscFree(baij->barray);
1219: PetscFree(baij->hd);
1220: VecDestroy(&baij->diag);
1221: VecDestroy(&baij->bb1);
1222: VecDestroy(&baij->xx1);
1223: #if defined(PETSC_USE_REAL_MAT_SINGLE)
1224: PetscFree(baij->setvaluescopy);
1225: #endif
1226: PetscFree(baij->in_loc);
1227: PetscFree(baij->v_loc);
1228: PetscFree(baij->rangebs);
1229: PetscFree(mat->data);
1231: PetscObjectChangeTypeName((PetscObject)mat,0);
1232: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1233: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1234: PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);
1235: #if defined(PETSC_HAVE_ELEMENTAL)
1236: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);
1237: #endif
1238: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);
1239: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);
1240: return(0);
1241: }
1243: PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1244: {
1245: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1246: PetscErrorCode ierr;
1247: PetscInt nt,mbs=a->mbs,bs=A->rmap->bs;
1248: PetscScalar *from;
1249: const PetscScalar *x;
1252: VecGetLocalSize(xx,&nt);
1253: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1255: /* diagonal part */
1256: (*a->A->ops->mult)(a->A,xx,a->slvec1a);
1257: VecSet(a->slvec1b,0.0);
1259: /* subdiagonal part */
1260: (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);
1262: /* copy x into the vec slvec0 */
1263: VecGetArray(a->slvec0,&from);
1264: VecGetArrayRead(xx,&x);
1266: PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));
1267: VecRestoreArray(a->slvec0,&from);
1268: VecRestoreArrayRead(xx,&x);
1270: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1271: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1272: /* supperdiagonal part */
1273: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
1274: return(0);
1275: }
1277: PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1278: {
1279: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1280: PetscErrorCode ierr;
1281: PetscInt nt,mbs=a->mbs,bs=A->rmap->bs;
1282: PetscScalar *from;
1283: const PetscScalar *x;
1286: VecGetLocalSize(xx,&nt);
1287: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1289: /* diagonal part */
1290: (*a->A->ops->mult)(a->A,xx,a->slvec1a);
1291: VecSet(a->slvec1b,0.0);
1293: /* subdiagonal part */
1294: (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
1296: /* copy x into the vec slvec0 */
1297: VecGetArray(a->slvec0,&from);
1298: VecGetArrayRead(xx,&x);
1300: PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));
1301: VecRestoreArray(a->slvec0,&from);
1302: VecRestoreArrayRead(xx,&x);
1304: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1305: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1306: /* supperdiagonal part */
1307: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
1308: return(0);
1309: }
1311: PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1312: {
1313: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1315: PetscInt nt;
1318: VecGetLocalSize(xx,&nt);
1319: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1321: VecGetLocalSize(yy,&nt);
1322: if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1324: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1325: /* do diagonal part */
1326: (*a->A->ops->mult)(a->A,xx,yy);
1327: /* do supperdiagonal part */
1328: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1329: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1330: /* do subdiagonal part */
1331: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1332: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1333: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1334: return(0);
1335: }
1337: PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1338: {
1339: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1340: PetscErrorCode ierr;
1341: PetscInt mbs=a->mbs,bs=A->rmap->bs;
1342: PetscScalar *from,zero=0.0;
1343: const PetscScalar *x;
1346: /*
1347: PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1348: PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1349: */
1350: /* diagonal part */
1351: (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);
1352: VecSet(a->slvec1b,zero);
1354: /* subdiagonal part */
1355: (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
1357: /* copy x into the vec slvec0 */
1358: VecGetArray(a->slvec0,&from);
1359: VecGetArrayRead(xx,&x);
1360: PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));
1361: VecRestoreArray(a->slvec0,&from);
1363: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1364: VecRestoreArrayRead(xx,&x);
1365: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1367: /* supperdiagonal part */
1368: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);
1369: return(0);
1370: }
1372: PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1373: {
1374: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1378: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1379: /* do diagonal part */
1380: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1381: /* do supperdiagonal part */
1382: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1383: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1385: /* do subdiagonal part */
1386: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1387: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1388: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1389: return(0);
1390: }
1392: /*
1393: This only works correctly for square matrices where the subblock A->A is the
1394: diagonal block
1395: */
1396: PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1397: {
1398: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1402: /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1403: MatGetDiagonal(a->A,v);
1404: return(0);
1405: }
1407: PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1408: {
1409: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1413: MatScale(a->A,aa);
1414: MatScale(a->B,aa);
1415: return(0);
1416: }
1418: PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1419: {
1420: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1421: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1423: PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1424: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1425: PetscInt *cmap,*idx_p,cstart = mat->rstartbs;
1428: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1429: mat->getrowactive = PETSC_TRUE;
1431: if (!mat->rowvalues && (idx || v)) {
1432: /*
1433: allocate enough space to hold information from the longest row.
1434: */
1435: Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1436: Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data;
1437: PetscInt max = 1,mbs = mat->mbs,tmp;
1438: for (i=0; i<mbs; i++) {
1439: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1440: if (max < tmp) max = tmp;
1441: }
1442: PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);
1443: }
1445: if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1446: lrow = row - brstart; /* local row index */
1448: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1449: if (!v) {pvA = 0; pvB = 0;}
1450: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1451: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1452: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1453: nztot = nzA + nzB;
1455: cmap = mat->garray;
1456: if (v || idx) {
1457: if (nztot) {
1458: /* Sort by increasing column numbers, assuming A and B already sorted */
1459: PetscInt imark = -1;
1460: if (v) {
1461: *v = v_p = mat->rowvalues;
1462: for (i=0; i<nzB; i++) {
1463: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1464: else break;
1465: }
1466: imark = i;
1467: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1468: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1469: }
1470: if (idx) {
1471: *idx = idx_p = mat->rowindices;
1472: if (imark > -1) {
1473: for (i=0; i<imark; i++) {
1474: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1475: }
1476: } else {
1477: for (i=0; i<nzB; i++) {
1478: if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479: else break;
1480: }
1481: imark = i;
1482: }
1483: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1484: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1485: }
1486: } else {
1487: if (idx) *idx = 0;
1488: if (v) *v = 0;
1489: }
1490: }
1491: *nz = nztot;
1492: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1493: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1494: return(0);
1495: }
1497: PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498: {
1499: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1502: if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1503: baij->getrowactive = PETSC_FALSE;
1504: return(0);
1505: }
1507: PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1508: {
1509: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1510: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1513: aA->getrow_utriangular = PETSC_TRUE;
1514: return(0);
1515: }
1516: PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1517: {
1518: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1519: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1522: aA->getrow_utriangular = PETSC_FALSE;
1523: return(0);
1524: }
1526: PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1527: {
1528: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1532: MatRealPart(a->A);
1533: MatRealPart(a->B);
1534: return(0);
1535: }
1537: PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1538: {
1539: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1543: MatImaginaryPart(a->A);
1544: MatImaginaryPart(a->B);
1545: return(0);
1546: }
1548: /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1549: Input: isrow - distributed(parallel),
1550: iscol_local - locally owned (seq)
1551: */
1552: PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg)
1553: {
1555: PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch;
1556: const PetscInt *ptr1,*ptr2;
1559: ISGetLocalSize(isrow,&sz1);
1560: ISGetLocalSize(iscol_local,&sz2);
1561: if (sz1 > sz2) {
1562: *flg = PETSC_FALSE;
1563: return(0);
1564: }
1566: ISGetIndices(isrow,&ptr1);
1567: ISGetIndices(iscol_local,&ptr2);
1569: PetscMalloc1(sz1,&a1);
1570: PetscMalloc1(sz2,&a2);
1571: PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));
1572: PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));
1573: PetscSortInt(sz1,a1);
1574: PetscSortInt(sz2,a2);
1576: nmatch=0;
1577: k = 0;
1578: for (i=0; i<sz1; i++){
1579: for (j=k; j<sz2; j++){
1580: if (a1[i] == a2[j]) {
1581: k = j; nmatch++;
1582: break;
1583: }
1584: }
1585: }
1586: ISRestoreIndices(isrow,&ptr1);
1587: ISRestoreIndices(iscol_local,&ptr2);
1588: PetscFree(a1);
1589: PetscFree(a2);
1590: if (nmatch < sz1) {
1591: *flg = PETSC_FALSE;
1592: } else {
1593: *flg = PETSC_TRUE;
1594: }
1595: return(0);
1596: }
1598: PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1599: {
1601: IS iscol_local;
1602: PetscInt csize;
1603: PetscBool isequal;
1606: ISGetLocalSize(iscol,&csize);
1607: if (call == MAT_REUSE_MATRIX) {
1608: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
1609: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1610: } else {
1611: ISAllGather(iscol,&iscol_local);
1612: ISEqual_private(isrow,iscol_local,&isequal);
1613: if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1614: }
1616: /* now call MatCreateSubMatrix_MPIBAIJ() */
1617: MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
1618: if (call == MAT_INITIAL_MATRIX) {
1619: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
1620: ISDestroy(&iscol_local);
1621: }
1622: return(0);
1623: }
1625: PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1626: {
1627: Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1631: MatZeroEntries(l->A);
1632: MatZeroEntries(l->B);
1633: return(0);
1634: }
1636: PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1637: {
1638: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1639: Mat A = a->A,B = a->B;
1641: PetscReal isend[5],irecv[5];
1644: info->block_size = (PetscReal)matin->rmap->bs;
1646: MatGetInfo(A,MAT_LOCAL,info);
1648: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1649: isend[3] = info->memory; isend[4] = info->mallocs;
1651: MatGetInfo(B,MAT_LOCAL,info);
1653: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1654: isend[3] += info->memory; isend[4] += info->mallocs;
1655: if (flag == MAT_LOCAL) {
1656: info->nz_used = isend[0];
1657: info->nz_allocated = isend[1];
1658: info->nz_unneeded = isend[2];
1659: info->memory = isend[3];
1660: info->mallocs = isend[4];
1661: } else if (flag == MAT_GLOBAL_MAX) {
1662: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));
1664: info->nz_used = irecv[0];
1665: info->nz_allocated = irecv[1];
1666: info->nz_unneeded = irecv[2];
1667: info->memory = irecv[3];
1668: info->mallocs = irecv[4];
1669: } else if (flag == MAT_GLOBAL_SUM) {
1670: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));
1672: info->nz_used = irecv[0];
1673: info->nz_allocated = irecv[1];
1674: info->nz_unneeded = irecv[2];
1675: info->memory = irecv[3];
1676: info->mallocs = irecv[4];
1677: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1678: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1679: info->fill_ratio_needed = 0;
1680: info->factor_mallocs = 0;
1681: return(0);
1682: }
1684: PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1685: {
1686: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1687: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1691: switch (op) {
1692: case MAT_NEW_NONZERO_LOCATIONS:
1693: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1694: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1695: case MAT_KEEP_NONZERO_PATTERN:
1696: case MAT_SUBMAT_SINGLEIS:
1697: case MAT_NEW_NONZERO_LOCATION_ERR:
1698: MatCheckPreallocated(A,1);
1699: MatSetOption(a->A,op,flg);
1700: MatSetOption(a->B,op,flg);
1701: break;
1702: case MAT_ROW_ORIENTED:
1703: MatCheckPreallocated(A,1);
1704: a->roworiented = flg;
1706: MatSetOption(a->A,op,flg);
1707: MatSetOption(a->B,op,flg);
1708: break;
1709: case MAT_NEW_DIAGONALS:
1710: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1711: break;
1712: case MAT_IGNORE_OFF_PROC_ENTRIES:
1713: a->donotstash = flg;
1714: break;
1715: case MAT_USE_HASH_TABLE:
1716: a->ht_flag = flg;
1717: break;
1718: case MAT_HERMITIAN:
1719: MatCheckPreallocated(A,1);
1720: if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1721: MatSetOption(a->A,op,flg);
1722: #if defined(PETSC_USE_COMPLEX)
1723: A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1724: #endif
1725: break;
1726: case MAT_SPD:
1727: A->spd_set = PETSC_TRUE;
1728: A->spd = flg;
1729: if (flg) {
1730: A->symmetric = PETSC_TRUE;
1731: A->structurally_symmetric = PETSC_TRUE;
1732: A->symmetric_set = PETSC_TRUE;
1733: A->structurally_symmetric_set = PETSC_TRUE;
1734: }
1735: break;
1736: case MAT_SYMMETRIC:
1737: MatCheckPreallocated(A,1);
1738: MatSetOption(a->A,op,flg);
1739: break;
1740: case MAT_STRUCTURALLY_SYMMETRIC:
1741: MatCheckPreallocated(A,1);
1742: MatSetOption(a->A,op,flg);
1743: break;
1744: case MAT_SYMMETRY_ETERNAL:
1745: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1746: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1747: break;
1748: case MAT_IGNORE_LOWER_TRIANGULAR:
1749: aA->ignore_ltriangular = flg;
1750: break;
1751: case MAT_ERROR_LOWER_TRIANGULAR:
1752: aA->ignore_ltriangular = flg;
1753: break;
1754: case MAT_GETROW_UPPERTRIANGULAR:
1755: aA->getrow_utriangular = flg;
1756: break;
1757: default:
1758: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1759: }
1760: return(0);
1761: }
1763: PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1764: {
1768: if (reuse == MAT_INITIAL_MATRIX) {
1769: MatDuplicate(A,MAT_COPY_VALUES,B);
1770: } else if (reuse == MAT_REUSE_MATRIX) {
1771: MatCopy(A,*B,SAME_NONZERO_PATTERN);
1772: }
1773: return(0);
1774: }
1776: PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1777: {
1778: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1779: Mat a = baij->A, b=baij->B;
1781: PetscInt nv,m,n;
1782: PetscBool flg;
1785: if (ll != rr) {
1786: VecEqual(ll,rr,&flg);
1787: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1788: }
1789: if (!ll) return(0);
1791: MatGetLocalSize(mat,&m,&n);
1792: if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1794: VecGetLocalSize(rr,&nv);
1795: if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1797: VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1799: /* left diagonalscale the off-diagonal part */
1800: (*b->ops->diagonalscale)(b,ll,NULL);
1802: /* scale the diagonal part */
1803: (*a->ops->diagonalscale)(a,ll,rr);
1805: /* right diagonalscale the off-diagonal part */
1806: VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1807: (*b->ops->diagonalscale)(b,NULL,baij->lvec);
1808: return(0);
1809: }
1811: PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1812: {
1813: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1817: MatSetUnfactored(a->A);
1818: return(0);
1819: }
1821: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1823: PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag)
1824: {
1825: Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1826: Mat a,b,c,d;
1827: PetscBool flg;
1831: a = matA->A; b = matA->B;
1832: c = matB->A; d = matB->B;
1834: MatEqual(a,c,&flg);
1835: if (flg) {
1836: MatEqual(b,d,&flg);
1837: }
1838: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1839: return(0);
1840: }
1842: PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1843: {
1845: PetscBool isbaij;
1848: PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");
1849: if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1850: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1851: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1852: MatGetRowUpperTriangular(A);
1853: MatCopy_Basic(A,B,str);
1854: MatRestoreRowUpperTriangular(A);
1855: } else {
1856: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1857: Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1859: MatCopy(a->A,b->A,str);
1860: MatCopy(a->B,b->B,str);
1861: }
1862: PetscObjectStateIncrease((PetscObject)B);
1863: return(0);
1864: }
1866: PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1867: {
1871: MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1872: return(0);
1873: }
1875: PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1876: {
1878: Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1879: PetscBLASInt bnz,one=1;
1880: Mat_SeqSBAIJ *xa,*ya;
1881: Mat_SeqBAIJ *xb,*yb;
1884: if (str == SAME_NONZERO_PATTERN) {
1885: PetscScalar alpha = a;
1886: xa = (Mat_SeqSBAIJ*)xx->A->data;
1887: ya = (Mat_SeqSBAIJ*)yy->A->data;
1888: PetscBLASIntCast(xa->nz,&bnz);
1889: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1890: xb = (Mat_SeqBAIJ*)xx->B->data;
1891: yb = (Mat_SeqBAIJ*)yy->B->data;
1892: PetscBLASIntCast(xb->nz,&bnz);
1893: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1894: PetscObjectStateIncrease((PetscObject)Y);
1895: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1896: MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
1897: MatAXPY_Basic(Y,a,X,str);
1898: MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
1899: } else {
1900: Mat B;
1901: PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1902: if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1903: MatGetRowUpperTriangular(X);
1904: MatGetRowUpperTriangular(Y);
1905: PetscMalloc1(yy->A->rmap->N,&nnz_d);
1906: PetscMalloc1(yy->B->rmap->N,&nnz_o);
1907: MatCreate(PetscObjectComm((PetscObject)Y),&B);
1908: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
1909: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
1910: MatSetBlockSizesFromMats(B,Y,Y);
1911: MatSetType(B,MATMPISBAIJ);
1912: MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);
1913: MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
1914: MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);
1915: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
1916: MatHeaderReplace(Y,&B);
1917: PetscFree(nnz_d);
1918: PetscFree(nnz_o);
1919: MatRestoreRowUpperTriangular(X);
1920: MatRestoreRowUpperTriangular(Y);
1921: }
1922: return(0);
1923: }
1925: PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1926: {
1928: PetscInt i;
1929: PetscBool flg;
1932: MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B); /* B[] are sbaij matrices */
1933: for (i=0; i<n; i++) {
1934: ISEqual(irow[i],icol[i],&flg);
1935: if (!flg) {
1936: MatSeqSBAIJZeroOps_Private(*B[i]);
1937: }
1938: }
1939: return(0);
1940: }
1942: PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1943: {
1945: Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data;
1946: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data;
1949: if (!Y->preallocated) {
1950: MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);
1951: } else if (!aij->nz) {
1952: PetscInt nonew = aij->nonew;
1953: MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);
1954: aij->nonew = nonew;
1955: }
1956: MatShift_Basic(Y,a);
1957: return(0);
1958: }
1960: PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1961: {
1962: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1966: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1967: MatMissingDiagonal(a->A,missing,d);
1968: if (d) {
1969: PetscInt rstart;
1970: MatGetOwnershipRange(A,&rstart,NULL);
1971: *d += rstart/A->rmap->bs;
1973: }
1974: return(0);
1975: }
1977: PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1978: {
1980: *a = ((Mat_MPISBAIJ*)A->data)->A;
1981: return(0);
1982: }
1984: /* -------------------------------------------------------------------*/
1985: static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1986: MatGetRow_MPISBAIJ,
1987: MatRestoreRow_MPISBAIJ,
1988: MatMult_MPISBAIJ,
1989: /* 4*/ MatMultAdd_MPISBAIJ,
1990: MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1991: MatMultAdd_MPISBAIJ,
1992: 0,
1993: 0,
1994: 0,
1995: /* 10*/ 0,
1996: 0,
1997: 0,
1998: MatSOR_MPISBAIJ,
1999: MatTranspose_MPISBAIJ,
2000: /* 15*/ MatGetInfo_MPISBAIJ,
2001: MatEqual_MPISBAIJ,
2002: MatGetDiagonal_MPISBAIJ,
2003: MatDiagonalScale_MPISBAIJ,
2004: MatNorm_MPISBAIJ,
2005: /* 20*/ MatAssemblyBegin_MPISBAIJ,
2006: MatAssemblyEnd_MPISBAIJ,
2007: MatSetOption_MPISBAIJ,
2008: MatZeroEntries_MPISBAIJ,
2009: /* 24*/ 0,
2010: 0,
2011: 0,
2012: 0,
2013: 0,
2014: /* 29*/ MatSetUp_MPISBAIJ,
2015: 0,
2016: 0,
2017: MatGetDiagonalBlock_MPISBAIJ,
2018: 0,
2019: /* 34*/ MatDuplicate_MPISBAIJ,
2020: 0,
2021: 0,
2022: 0,
2023: 0,
2024: /* 39*/ MatAXPY_MPISBAIJ,
2025: MatCreateSubMatrices_MPISBAIJ,
2026: MatIncreaseOverlap_MPISBAIJ,
2027: MatGetValues_MPISBAIJ,
2028: MatCopy_MPISBAIJ,
2029: /* 44*/ 0,
2030: MatScale_MPISBAIJ,
2031: MatShift_MPISBAIJ,
2032: 0,
2033: 0,
2034: /* 49*/ 0,
2035: 0,
2036: 0,
2037: 0,
2038: 0,
2039: /* 54*/ 0,
2040: 0,
2041: MatSetUnfactored_MPISBAIJ,
2042: 0,
2043: MatSetValuesBlocked_MPISBAIJ,
2044: /* 59*/ MatCreateSubMatrix_MPISBAIJ,
2045: 0,
2046: 0,
2047: 0,
2048: 0,
2049: /* 64*/ 0,
2050: 0,
2051: 0,
2052: 0,
2053: 0,
2054: /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2055: 0,
2056: 0,
2057: 0,
2058: 0,
2059: /* 74*/ 0,
2060: 0,
2061: 0,
2062: 0,
2063: 0,
2064: /* 79*/ 0,
2065: 0,
2066: 0,
2067: 0,
2068: MatLoad_MPISBAIJ,
2069: /* 84*/ 0,
2070: 0,
2071: 0,
2072: 0,
2073: 0,
2074: /* 89*/ 0,
2075: 0,
2076: 0,
2077: 0,
2078: 0,
2079: /* 94*/ 0,
2080: 0,
2081: 0,
2082: 0,
2083: 0,
2084: /* 99*/ 0,
2085: 0,
2086: 0,
2087: 0,
2088: 0,
2089: /*104*/ 0,
2090: MatRealPart_MPISBAIJ,
2091: MatImaginaryPart_MPISBAIJ,
2092: MatGetRowUpperTriangular_MPISBAIJ,
2093: MatRestoreRowUpperTriangular_MPISBAIJ,
2094: /*109*/ 0,
2095: 0,
2096: 0,
2097: 0,
2098: MatMissingDiagonal_MPISBAIJ,
2099: /*114*/ 0,
2100: 0,
2101: 0,
2102: 0,
2103: 0,
2104: /*119*/ 0,
2105: 0,
2106: 0,
2107: 0,
2108: 0,
2109: /*124*/ 0,
2110: 0,
2111: 0,
2112: 0,
2113: 0,
2114: /*129*/ 0,
2115: 0,
2116: 0,
2117: 0,
2118: 0,
2119: /*134*/ 0,
2120: 0,
2121: 0,
2122: 0,
2123: 0,
2124: /*139*/ MatSetBlockSizes_Default,
2125: 0,
2126: 0,
2127: 0,
2128: 0,
2129: /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2130: };
2132: PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2133: {
2134: Mat_MPISBAIJ *b;
2136: PetscInt i,mbs,Mbs;
2137: PetscMPIInt size;
2140: MatSetBlockSize(B,PetscAbs(bs));
2141: PetscLayoutSetUp(B->rmap);
2142: PetscLayoutSetUp(B->cmap);
2143: PetscLayoutGetBlockSize(B->rmap,&bs);
2145: b = (Mat_MPISBAIJ*)B->data;
2146: mbs = B->rmap->n/bs;
2147: Mbs = B->rmap->N/bs;
2148: if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2150: B->rmap->bs = bs;
2151: b->bs2 = bs*bs;
2152: b->mbs = mbs;
2153: b->Mbs = Mbs;
2154: b->nbs = B->cmap->n/bs;
2155: b->Nbs = B->cmap->N/bs;
2157: for (i=0; i<=b->size; i++) {
2158: b->rangebs[i] = B->rmap->range[i]/bs;
2159: }
2160: b->rstartbs = B->rmap->rstart/bs;
2161: b->rendbs = B->rmap->rend/bs;
2163: b->cstartbs = B->cmap->rstart/bs;
2164: b->cendbs = B->cmap->rend/bs;
2166: #if defined(PETSC_USE_CTABLE)
2167: PetscTableDestroy(&b->colmap);
2168: #else
2169: PetscFree(b->colmap);
2170: #endif
2171: PetscFree(b->garray);
2172: VecDestroy(&b->lvec);
2173: VecScatterDestroy(&b->Mvctx);
2174: VecDestroy(&b->slvec0);
2175: VecDestroy(&b->slvec0b);
2176: VecDestroy(&b->slvec1);
2177: VecDestroy(&b->slvec1a);
2178: VecDestroy(&b->slvec1b);
2179: VecScatterDestroy(&b->sMvctx);
2181: /* Because the B will have been resized we simply destroy it and create a new one each time */
2182: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
2183: MatDestroy(&b->B);
2184: MatCreate(PETSC_COMM_SELF,&b->B);
2185: MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);
2186: MatSetType(b->B,MATSEQBAIJ);
2187: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
2189: if (!B->preallocated) {
2190: MatCreate(PETSC_COMM_SELF,&b->A);
2191: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
2192: MatSetType(b->A,MATSEQSBAIJ);
2193: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
2194: MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);
2195: }
2197: MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
2198: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
2200: B->preallocated = PETSC_TRUE;
2201: B->was_assembled = PETSC_FALSE;
2202: B->assembled = PETSC_FALSE;
2203: return(0);
2204: }
2206: PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2207: {
2208: PetscInt m,rstart,cstart,cend;
2209: PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2210: const PetscInt *JJ =0;
2211: PetscScalar *values=0;
2215: if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2216: PetscLayoutSetBlockSize(B->rmap,bs);
2217: PetscLayoutSetBlockSize(B->cmap,bs);
2218: PetscLayoutSetUp(B->rmap);
2219: PetscLayoutSetUp(B->cmap);
2220: PetscLayoutGetBlockSize(B->rmap,&bs);
2221: m = B->rmap->n/bs;
2222: rstart = B->rmap->rstart/bs;
2223: cstart = B->cmap->rstart/bs;
2224: cend = B->cmap->rend/bs;
2226: if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2227: PetscMalloc2(m,&d_nnz,m,&o_nnz);
2228: for (i=0; i<m; i++) {
2229: nz = ii[i+1] - ii[i];
2230: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2231: nz_max = PetscMax(nz_max,nz);
2232: JJ = jj + ii[i];
2233: for (j=0; j<nz; j++) {
2234: if (*JJ >= cstart) break;
2235: JJ++;
2236: }
2237: d = 0;
2238: for (; j<nz; j++) {
2239: if (*JJ++ >= cend) break;
2240: d++;
2241: }
2242: d_nnz[i] = d;
2243: o_nnz[i] = nz - d;
2244: }
2245: MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
2246: PetscFree2(d_nnz,o_nnz);
2248: values = (PetscScalar*)V;
2249: if (!values) {
2250: PetscMalloc1(bs*bs*nz_max,&values);
2251: PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
2252: }
2253: for (i=0; i<m; i++) {
2254: PetscInt row = i + rstart;
2255: PetscInt ncols = ii[i+1] - ii[i];
2256: const PetscInt *icols = jj + ii[i];
2257: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2258: MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
2259: }
2261: if (!V) { PetscFree(values); }
2262: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2263: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2264: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2265: return(0);
2266: }
2268: /*MC
2269: MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2270: based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of
2271: the matrix is stored.
2273: For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2274: can call MatSetOption(Mat, MAT_HERMITIAN);
2276: Options Database Keys:
2277: . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2279: Level: beginner
2281: .seealso: MatCreateMPISBAIJ
2282: M*/
2284: PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2285: {
2286: Mat_MPISBAIJ *b;
2288: PetscBool flg = PETSC_FALSE;
2291: PetscNewLog(B,&b);
2292: B->data = (void*)b;
2293: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2295: B->ops->destroy = MatDestroy_MPISBAIJ;
2296: B->ops->view = MatView_MPISBAIJ;
2297: B->assembled = PETSC_FALSE;
2298: B->insertmode = NOT_SET_VALUES;
2300: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
2301: MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);
2303: /* build local table of row and column ownerships */
2304: PetscMalloc1(b->size+2,&b->rangebs);
2306: /* build cache for off array entries formed */
2307: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
2309: b->donotstash = PETSC_FALSE;
2310: b->colmap = NULL;
2311: b->garray = NULL;
2312: b->roworiented = PETSC_TRUE;
2314: /* stuff used in block assembly */
2315: b->barray = 0;
2317: /* stuff used for matrix vector multiply */
2318: b->lvec = 0;
2319: b->Mvctx = 0;
2320: b->slvec0 = 0;
2321: b->slvec0b = 0;
2322: b->slvec1 = 0;
2323: b->slvec1a = 0;
2324: b->slvec1b = 0;
2325: b->sMvctx = 0;
2327: /* stuff for MatGetRow() */
2328: b->rowindices = 0;
2329: b->rowvalues = 0;
2330: b->getrowactive = PETSC_FALSE;
2332: /* hash table stuff */
2333: b->ht = 0;
2334: b->hd = 0;
2335: b->ht_size = 0;
2336: b->ht_flag = PETSC_FALSE;
2337: b->ht_fact = 0;
2338: b->ht_total_ct = 0;
2339: b->ht_insert_ct = 0;
2341: /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2342: b->ijonly = PETSC_FALSE;
2344: b->in_loc = 0;
2345: b->v_loc = 0;
2346: b->n_loc = 0;
2348: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);
2349: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);
2350: PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);
2351: PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);
2352: #if defined(PETSC_HAVE_ELEMENTAL)
2353: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);
2354: #endif
2355: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);
2356: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);
2358: B->symmetric = PETSC_TRUE;
2359: B->structurally_symmetric = PETSC_TRUE;
2360: B->symmetric_set = PETSC_TRUE;
2361: B->structurally_symmetric_set = PETSC_TRUE;
2362: B->symmetric_eternal = PETSC_TRUE;
2364: B->hermitian = PETSC_FALSE;
2365: B->hermitian_set = PETSC_FALSE;
2367: PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);
2368: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");
2369: PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);
2370: if (flg) {
2371: PetscReal fact = 1.39;
2372: MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
2373: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);
2374: if (fact <= 1.0) fact = 1.39;
2375: MatMPIBAIJSetHashTableFactor(B,fact);
2376: PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
2377: }
2378: PetscOptionsEnd();
2379: return(0);
2380: }
2382: /*MC
2383: MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2385: This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2386: and MATMPISBAIJ otherwise.
2388: Options Database Keys:
2389: . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2391: Level: beginner
2393: .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2394: M*/
2396: /*@C
2397: MatMPISBAIJSetPreallocation - For good matrix assembly performance
2398: the user should preallocate the matrix storage by setting the parameters
2399: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2400: performance can be increased by more than a factor of 50.
2402: Collective on Mat
2404: Input Parameters:
2405: + B - the matrix
2406: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2407: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2408: . d_nz - number of block nonzeros per block row in diagonal portion of local
2409: submatrix (same for all local rows)
2410: . d_nnz - array containing the number of block nonzeros in the various block rows
2411: in the upper triangular and diagonal part of the in diagonal portion of the local
2412: (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room
2413: for the diagonal entry and set a value even if it is zero.
2414: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2415: submatrix (same for all local rows).
2416: - o_nnz - array containing the number of nonzeros in the various block rows of the
2417: off-diagonal portion of the local submatrix that is right of the diagonal
2418: (possibly different for each block row) or NULL.
2421: Options Database Keys:
2422: . -mat_no_unroll - uses code that does not unroll the loops in the
2423: block calculations (much slower)
2424: . -mat_block_size - size of the blocks to use
2426: Notes:
2428: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2429: than it must be used on all processors that share the object for that argument.
2431: If the *_nnz parameter is given then the *_nz parameter is ignored
2433: Storage Information:
2434: For a square global matrix we define each processor's diagonal portion
2435: to be its local rows and the corresponding columns (a square submatrix);
2436: each processor's off-diagonal portion encompasses the remainder of the
2437: local matrix (a rectangular submatrix).
2439: The user can specify preallocated storage for the diagonal part of
2440: the local submatrix with either d_nz or d_nnz (not both). Set
2441: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2442: memory allocation. Likewise, specify preallocated storage for the
2443: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2445: You can call MatGetInfo() to get information on how effective the preallocation was;
2446: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2447: You can also run with the option -info and look for messages with the string
2448: malloc in them to see if additional memory allocation was needed.
2450: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2451: the figure below we depict these three local rows and all columns (0-11).
2453: .vb
2454: 0 1 2 3 4 5 6 7 8 9 10 11
2455: --------------------------
2456: row 3 |. . . d d d o o o o o o
2457: row 4 |. . . d d d o o o o o o
2458: row 5 |. . . d d d o o o o o o
2459: --------------------------
2460: .ve
2462: Thus, any entries in the d locations are stored in the d (diagonal)
2463: submatrix, and any entries in the o locations are stored in the
2464: o (off-diagonal) submatrix. Note that the d matrix is stored in
2465: MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2467: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2468: plus the diagonal part of the d matrix,
2469: and o_nz should indicate the number of block nonzeros per row in the o matrix
2471: In general, for PDE problems in which most nonzeros are near the diagonal,
2472: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2473: or you will get TERRIBLE performance; see the users' manual chapter on
2474: matrices.
2476: Level: intermediate
2478: .keywords: matrix, block, aij, compressed row, sparse, parallel
2480: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2481: @*/
2482: PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2483: {
2490: PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
2491: return(0);
2492: }
2494: /*@C
2495: MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2496: (block compressed row). For good matrix assembly performance
2497: the user should preallocate the matrix storage by setting the parameters
2498: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2499: performance can be increased by more than a factor of 50.
2501: Collective on MPI_Comm
2503: Input Parameters:
2504: + comm - MPI communicator
2505: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2506: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2507: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2508: This value should be the same as the local size used in creating the
2509: y vector for the matrix-vector product y = Ax.
2510: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2511: This value should be the same as the local size used in creating the
2512: x vector for the matrix-vector product y = Ax.
2513: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2514: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2515: . d_nz - number of block nonzeros per block row in diagonal portion of local
2516: submatrix (same for all local rows)
2517: . d_nnz - array containing the number of block nonzeros in the various block rows
2518: in the upper triangular portion of the in diagonal portion of the local
2519: (possibly different for each block block row) or NULL.
2520: If you plan to factor the matrix you must leave room for the diagonal entry and
2521: set its value even if it is zero.
2522: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2523: submatrix (same for all local rows).
2524: - o_nnz - array containing the number of nonzeros in the various block rows of the
2525: off-diagonal portion of the local submatrix (possibly different for
2526: each block row) or NULL.
2528: Output Parameter:
2529: . A - the matrix
2531: Options Database Keys:
2532: . -mat_no_unroll - uses code that does not unroll the loops in the
2533: block calculations (much slower)
2534: . -mat_block_size - size of the blocks to use
2535: . -mat_mpi - use the parallel matrix data structures even on one processor
2536: (defaults to using SeqBAIJ format on one processor)
2538: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2539: MatXXXXSetPreallocation() paradigm instead of this routine directly.
2540: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2542: Notes:
2543: The number of rows and columns must be divisible by blocksize.
2544: This matrix type does not support complex Hermitian operation.
2546: The user MUST specify either the local or global matrix dimensions
2547: (possibly both).
2549: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2550: than it must be used on all processors that share the object for that argument.
2552: If the *_nnz parameter is given then the *_nz parameter is ignored
2554: Storage Information:
2555: For a square global matrix we define each processor's diagonal portion
2556: to be its local rows and the corresponding columns (a square submatrix);
2557: each processor's off-diagonal portion encompasses the remainder of the
2558: local matrix (a rectangular submatrix).
2560: The user can specify preallocated storage for the diagonal part of
2561: the local submatrix with either d_nz or d_nnz (not both). Set
2562: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2563: memory allocation. Likewise, specify preallocated storage for the
2564: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2566: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2567: the figure below we depict these three local rows and all columns (0-11).
2569: .vb
2570: 0 1 2 3 4 5 6 7 8 9 10 11
2571: --------------------------
2572: row 3 |. . . d d d o o o o o o
2573: row 4 |. . . d d d o o o o o o
2574: row 5 |. . . d d d o o o o o o
2575: --------------------------
2576: .ve
2578: Thus, any entries in the d locations are stored in the d (diagonal)
2579: submatrix, and any entries in the o locations are stored in the
2580: o (off-diagonal) submatrix. Note that the d matrix is stored in
2581: MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2583: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2584: plus the diagonal part of the d matrix,
2585: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2586: In general, for PDE problems in which most nonzeros are near the diagonal,
2587: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2588: or you will get TERRIBLE performance; see the users' manual chapter on
2589: matrices.
2591: Level: intermediate
2593: .keywords: matrix, block, aij, compressed row, sparse, parallel
2595: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2596: @*/
2598: PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2599: {
2601: PetscMPIInt size;
2604: MatCreate(comm,A);
2605: MatSetSizes(*A,m,n,M,N);
2606: MPI_Comm_size(comm,&size);
2607: if (size > 1) {
2608: MatSetType(*A,MATMPISBAIJ);
2609: MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
2610: } else {
2611: MatSetType(*A,MATSEQSBAIJ);
2612: MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
2613: }
2614: return(0);
2615: }
2618: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2619: {
2620: Mat mat;
2621: Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2623: PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2624: PetscScalar *array;
2627: *newmat = 0;
2629: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
2630: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
2631: MatSetType(mat,((PetscObject)matin)->type_name);
2632: PetscLayoutReference(matin->rmap,&mat->rmap);
2633: PetscLayoutReference(matin->cmap,&mat->cmap);
2635: mat->factortype = matin->factortype;
2636: mat->preallocated = PETSC_TRUE;
2637: mat->assembled = PETSC_TRUE;
2638: mat->insertmode = NOT_SET_VALUES;
2640: a = (Mat_MPISBAIJ*)mat->data;
2641: a->bs2 = oldmat->bs2;
2642: a->mbs = oldmat->mbs;
2643: a->nbs = oldmat->nbs;
2644: a->Mbs = oldmat->Mbs;
2645: a->Nbs = oldmat->Nbs;
2647: a->size = oldmat->size;
2648: a->rank = oldmat->rank;
2649: a->donotstash = oldmat->donotstash;
2650: a->roworiented = oldmat->roworiented;
2651: a->rowindices = 0;
2652: a->rowvalues = 0;
2653: a->getrowactive = PETSC_FALSE;
2654: a->barray = 0;
2655: a->rstartbs = oldmat->rstartbs;
2656: a->rendbs = oldmat->rendbs;
2657: a->cstartbs = oldmat->cstartbs;
2658: a->cendbs = oldmat->cendbs;
2660: /* hash table stuff */
2661: a->ht = 0;
2662: a->hd = 0;
2663: a->ht_size = 0;
2664: a->ht_flag = oldmat->ht_flag;
2665: a->ht_fact = oldmat->ht_fact;
2666: a->ht_total_ct = 0;
2667: a->ht_insert_ct = 0;
2669: PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));
2670: if (oldmat->colmap) {
2671: #if defined(PETSC_USE_CTABLE)
2672: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2673: #else
2674: PetscMalloc1(a->Nbs,&a->colmap);
2675: PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));
2676: PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));
2677: #endif
2678: } else a->colmap = 0;
2680: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2681: PetscMalloc1(len,&a->garray);
2682: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
2683: PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));
2684: } else a->garray = 0;
2686: MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);
2687: VecDuplicate(oldmat->lvec,&a->lvec);
2688: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
2689: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2690: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
2692: VecDuplicate(oldmat->slvec0,&a->slvec0);
2693: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);
2694: VecDuplicate(oldmat->slvec1,&a->slvec1);
2695: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);
2697: VecGetLocalSize(a->slvec1,&nt);
2698: VecGetArray(a->slvec1,&array);
2699: VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);
2700: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);
2701: VecRestoreArray(a->slvec1,&array);
2702: VecGetArray(a->slvec0,&array);
2703: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);
2704: VecRestoreArray(a->slvec0,&array);
2705: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);
2706: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);
2707: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);
2708: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);
2709: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);
2711: /* VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2712: PetscObjectReference((PetscObject)oldmat->sMvctx);
2713: a->sMvctx = oldmat->sMvctx;
2714: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);
2716: MatDuplicate(oldmat->A,cpvalues,&a->A);
2717: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
2718: MatDuplicate(oldmat->B,cpvalues,&a->B);
2719: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
2720: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
2721: *newmat = mat;
2722: return(0);
2723: }
2725: PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2726: {
2728: PetscInt i,nz,j,rstart,rend;
2729: PetscScalar *vals,*buf;
2730: MPI_Comm comm;
2731: MPI_Status status;
2732: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2733: PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2734: PetscInt *procsnz = 0,jj,*mycols,*ibuf;
2735: PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2736: PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2737: PetscInt dcount,kmax,k,nzcount,tmp;
2738: int fd;
2739: PetscBool isbinary;
2742: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
2743: if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2745: /* force binary viewer to load .info file if it has not yet done so */
2746: PetscViewerSetUp(viewer);
2747: PetscObjectGetComm((PetscObject)viewer,&comm);
2748: PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");
2749: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
2750: PetscOptionsEnd();
2751: if (bs < 0) bs = 1;
2753: MPI_Comm_size(comm,&size);
2754: MPI_Comm_rank(comm,&rank);
2755: PetscViewerBinaryGetDescriptor(viewer,&fd);
2756: if (!rank) {
2757: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
2758: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2759: if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2760: }
2762: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2763: M = header[1];
2764: N = header[2];
2766: /* If global sizes are set, check if they are consistent with that given in the file */
2767: if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2768: if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2770: if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2772: /*
2773: This code adds extra rows to make sure the number of rows is
2774: divisible by the blocksize
2775: */
2776: Mbs = M/bs;
2777: extra_rows = bs - M + bs*(Mbs);
2778: if (extra_rows == bs) extra_rows = 0;
2779: else Mbs++;
2780: if (extra_rows &&!rank) {
2781: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2782: }
2784: /* determine ownership of all rows */
2785: if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2786: mbs = Mbs/size + ((Mbs % size) > rank);
2787: m = mbs*bs;
2788: } else { /* User Set */
2789: m = newmat->rmap->n;
2790: mbs = m/bs;
2791: }
2792: PetscMalloc2(size+1,&rowners,size+1,&browners);
2793: PetscMPIIntCast(mbs,&mmbs);
2794: MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
2795: rowners[0] = 0;
2796: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2797: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2798: rstart = rowners[rank];
2799: rend = rowners[rank+1];
2801: /* distribute row lengths to all processors */
2802: PetscMalloc1((rend-rstart)*bs,&locrowlens);
2803: if (!rank) {
2804: PetscMalloc1(M+extra_rows,&rowlengths);
2805: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2806: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2807: PetscMalloc1(size,&sndcounts);
2808: for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2809: MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);
2810: PetscFree(sndcounts);
2811: } else {
2812: MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);
2813: }
2815: if (!rank) { /* procs[0] */
2816: /* calculate the number of nonzeros on each processor */
2817: PetscMalloc1(size,&procsnz);
2818: PetscMemzero(procsnz,size*sizeof(PetscInt));
2819: for (i=0; i<size; i++) {
2820: for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2821: procsnz[i] += rowlengths[j];
2822: }
2823: }
2824: PetscFree(rowlengths);
2826: /* determine max buffer needed and allocate it */
2827: maxnz = 0;
2828: for (i=0; i<size; i++) {
2829: maxnz = PetscMax(maxnz,procsnz[i]);
2830: }
2831: PetscMalloc1(maxnz,&cols);
2833: /* read in my part of the matrix column indices */
2834: nz = procsnz[0];
2835: PetscMalloc1(nz,&ibuf);
2836: mycols = ibuf;
2837: if (size == 1) nz -= extra_rows;
2838: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2839: if (size == 1) {
2840: for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2841: }
2843: /* read in every ones (except the last) and ship off */
2844: for (i=1; i<size-1; i++) {
2845: nz = procsnz[i];
2846: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2847: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2848: }
2849: /* read in the stuff for the last proc */
2850: if (size != 1) {
2851: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
2852: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2853: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2854: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
2855: }
2856: PetscFree(cols);
2857: } else { /* procs[i], i>0 */
2858: /* determine buffer space needed for message */
2859: nz = 0;
2860: for (i=0; i<m; i++) nz += locrowlens[i];
2861: PetscMalloc1(nz,&ibuf);
2862: mycols = ibuf;
2863: /* receive message of column indices*/
2864: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2865: MPI_Get_count(&status,MPIU_INT,&maxnz);
2866: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2867: }
2869: /* loop over local rows, determining number of off diagonal entries */
2870: PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);
2871: PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);
2872: PetscMemzero(mask,Mbs*sizeof(PetscInt));
2873: PetscMemzero(masked1,Mbs*sizeof(PetscInt));
2874: PetscMemzero(masked2,Mbs*sizeof(PetscInt));
2875: rowcount = 0;
2876: nzcount = 0;
2877: for (i=0; i<mbs; i++) {
2878: dcount = 0;
2879: odcount = 0;
2880: for (j=0; j<bs; j++) {
2881: kmax = locrowlens[rowcount];
2882: for (k=0; k<kmax; k++) {
2883: tmp = mycols[nzcount++]/bs; /* block col. index */
2884: if (!mask[tmp]) {
2885: mask[tmp] = 1;
2886: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2887: else masked1[dcount++] = tmp; /* entry in diag portion */
2888: }
2889: }
2890: rowcount++;
2891: }
2893: dlens[i] = dcount; /* d_nzz[i] */
2894: odlens[i] = odcount; /* o_nzz[i] */
2896: /* zero out the mask elements we set */
2897: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2898: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2899: }
2900: MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);
2901: MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);
2902: MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);
2904: if (!rank) {
2905: PetscMalloc1(maxnz,&buf);
2906: /* read in my part of the matrix numerical values */
2907: nz = procsnz[0];
2908: vals = buf;
2909: mycols = ibuf;
2910: if (size == 1) nz -= extra_rows;
2911: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2912: if (size == 1) {
2913: for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2914: }
2916: /* insert into matrix */
2917: jj = rstart*bs;
2918: for (i=0; i<m; i++) {
2919: MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2920: mycols += locrowlens[i];
2921: vals += locrowlens[i];
2922: jj++;
2923: }
2925: /* read in other processors (except the last one) and ship out */
2926: for (i=1; i<size-1; i++) {
2927: nz = procsnz[i];
2928: vals = buf;
2929: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2930: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
2931: }
2932: /* the last proc */
2933: if (size != 1) {
2934: nz = procsnz[i] - extra_rows;
2935: vals = buf;
2936: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2937: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2938: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);
2939: }
2940: PetscFree(procsnz);
2942: } else {
2943: /* receive numeric values */
2944: PetscMalloc1(nz,&buf);
2946: /* receive message of values*/
2947: vals = buf;
2948: mycols = ibuf;
2949: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
2950: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2951: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2953: /* insert into matrix */
2954: jj = rstart*bs;
2955: for (i=0; i<m; i++) {
2956: MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2957: mycols += locrowlens[i];
2958: vals += locrowlens[i];
2959: jj++;
2960: }
2961: }
2963: PetscFree(locrowlens);
2964: PetscFree(buf);
2965: PetscFree(ibuf);
2966: PetscFree2(rowners,browners);
2967: PetscFree2(dlens,odlens);
2968: PetscFree3(mask,masked1,masked2);
2969: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
2970: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
2971: return(0);
2972: }
2974: /*XXXXX@
2975: MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2977: Input Parameters:
2978: . mat - the matrix
2979: . fact - factor
2981: Not Collective on Mat, each process can have a different hash factor
2983: Level: advanced
2985: Notes:
2986: This can also be set by the command line option: -mat_use_hash_table fact
2988: .keywords: matrix, hashtable, factor, HT
2990: .seealso: MatSetOption()
2991: @XXXXX*/
2994: PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2995: {
2996: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2997: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data;
2998: PetscReal atmp;
2999: PetscReal *work,*svalues,*rvalues;
3001: PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
3002: PetscMPIInt rank,size;
3003: PetscInt *rowners_bs,dest,count,source;
3004: PetscScalar *va;
3005: MatScalar *ba;
3006: MPI_Status stat;
3009: if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
3010: MatGetRowMaxAbs(a->A,v,NULL);
3011: VecGetArray(v,&va);
3013: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3014: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
3016: bs = A->rmap->bs;
3017: mbs = a->mbs;
3018: Mbs = a->Mbs;
3019: ba = b->a;
3020: bi = b->i;
3021: bj = b->j;
3023: /* find ownerships */
3024: rowners_bs = A->rmap->range;
3026: /* each proc creates an array to be distributed */
3027: PetscMalloc1(bs*Mbs,&work);
3028: PetscMemzero(work,bs*Mbs*sizeof(PetscReal));
3030: /* row_max for B */
3031: if (rank != size-1) {
3032: for (i=0; i<mbs; i++) {
3033: ncols = bi[1] - bi[0]; bi++;
3034: brow = bs*i;
3035: for (j=0; j<ncols; j++) {
3036: bcol = bs*(*bj);
3037: for (kcol=0; kcol<bs; kcol++) {
3038: col = bcol + kcol; /* local col index */
3039: col += rowners_bs[rank+1]; /* global col index */
3040: for (krow=0; krow<bs; krow++) {
3041: atmp = PetscAbsScalar(*ba); ba++;
3042: row = brow + krow; /* local row index */
3043: if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3044: if (work[col] < atmp) work[col] = atmp;
3045: }
3046: }
3047: bj++;
3048: }
3049: }
3051: /* send values to its owners */
3052: for (dest=rank+1; dest<size; dest++) {
3053: svalues = work + rowners_bs[dest];
3054: count = rowners_bs[dest+1]-rowners_bs[dest];
3055: MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));
3056: }
3057: }
3059: /* receive values */
3060: if (rank) {
3061: rvalues = work;
3062: count = rowners_bs[rank+1]-rowners_bs[rank];
3063: for (source=0; source<rank; source++) {
3064: MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);
3065: /* process values */
3066: for (i=0; i<count; i++) {
3067: if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3068: }
3069: }
3070: }
3072: VecRestoreArray(v,&va);
3073: PetscFree(work);
3074: return(0);
3075: }
3077: PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3078: {
3079: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
3080: PetscErrorCode ierr;
3081: PetscInt mbs=mat->mbs,bs=matin->rmap->bs;
3082: PetscScalar *x,*ptr,*from;
3083: Vec bb1;
3084: const PetscScalar *b;
3087: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3088: if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3090: if (flag == SOR_APPLY_UPPER) {
3091: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
3092: return(0);
3093: }
3095: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3096: if (flag & SOR_ZERO_INITIAL_GUESS) {
3097: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
3098: its--;
3099: }
3101: VecDuplicate(bb,&bb1);
3102: while (its--) {
3104: /* lower triangular part: slvec0b = - B^T*xx */
3105: (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);
3107: /* copy xx into slvec0a */
3108: VecGetArray(mat->slvec0,&ptr);
3109: VecGetArray(xx,&x);
3110: PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));
3111: VecRestoreArray(mat->slvec0,&ptr);
3113: VecScale(mat->slvec0,-1.0);
3115: /* copy bb into slvec1a */
3116: VecGetArray(mat->slvec1,&ptr);
3117: VecGetArrayRead(bb,&b);
3118: PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));
3119: VecRestoreArray(mat->slvec1,&ptr);
3121: /* set slvec1b = 0 */
3122: VecSet(mat->slvec1b,0.0);
3124: VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
3125: VecRestoreArray(xx,&x);
3126: VecRestoreArrayRead(bb,&b);
3127: VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
3129: /* upper triangular part: bb1 = bb1 - B*x */
3130: (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);
3132: /* local diagonal sweep */
3133: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
3134: }
3135: VecDestroy(&bb1);
3136: } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3137: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
3138: } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3139: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
3140: } else if (flag & SOR_EISENSTAT) {
3141: Vec xx1;
3142: PetscBool hasop;
3143: const PetscScalar *diag;
3144: PetscScalar *sl,scale = (omega - 2.0)/omega;
3145: PetscInt i,n;
3147: if (!mat->xx1) {
3148: VecDuplicate(bb,&mat->xx1);
3149: VecDuplicate(bb,&mat->bb1);
3150: }
3151: xx1 = mat->xx1;
3152: bb1 = mat->bb1;
3154: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
3156: if (!mat->diag) {
3157: /* this is wrong for same matrix with new nonzero values */
3158: MatCreateVecs(matin,&mat->diag,NULL);
3159: MatGetDiagonal(matin,mat->diag);
3160: }
3161: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
3163: if (hasop) {
3164: MatMultDiagonalBlock(matin,xx,bb1);
3165: VecAYPX(mat->slvec1a,scale,bb);
3166: } else {
3167: /*
3168: These two lines are replaced by code that may be a bit faster for a good compiler
3169: VecPointwiseMult(mat->slvec1a,mat->diag,xx);
3170: VecAYPX(mat->slvec1a,scale,bb);
3171: */
3172: VecGetArray(mat->slvec1a,&sl);
3173: VecGetArrayRead(mat->diag,&diag);
3174: VecGetArrayRead(bb,&b);
3175: VecGetArray(xx,&x);
3176: VecGetLocalSize(xx,&n);
3177: if (omega == 1.0) {
3178: for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3179: PetscLogFlops(2.0*n);
3180: } else {
3181: for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3182: PetscLogFlops(3.0*n);
3183: }
3184: VecRestoreArray(mat->slvec1a,&sl);
3185: VecRestoreArrayRead(mat->diag,&diag);
3186: VecRestoreArrayRead(bb,&b);
3187: VecRestoreArray(xx,&x);
3188: }
3190: /* multiply off-diagonal portion of matrix */
3191: VecSet(mat->slvec1b,0.0);
3192: (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);
3193: VecGetArray(mat->slvec0,&from);
3194: VecGetArray(xx,&x);
3195: PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));
3196: VecRestoreArray(mat->slvec0,&from);
3197: VecRestoreArray(xx,&x);
3198: VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
3199: VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
3200: (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);
3202: /* local sweep */
3203: (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
3204: VecAXPY(xx,1.0,xx1);
3205: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3206: return(0);
3207: }
3209: PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3210: {
3211: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
3213: Vec lvec1,bb1;
3216: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3217: if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3219: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3220: if (flag & SOR_ZERO_INITIAL_GUESS) {
3221: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
3222: its--;
3223: }
3225: VecDuplicate(mat->lvec,&lvec1);
3226: VecDuplicate(bb,&bb1);
3227: while (its--) {
3228: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
3230: /* lower diagonal part: bb1 = bb - B^T*xx */
3231: (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);
3232: VecScale(lvec1,-1.0);
3234: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
3235: VecCopy(bb,bb1);
3236: VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);
3238: /* upper diagonal part: bb1 = bb1 - B*x */
3239: VecScale(mat->lvec,-1.0);
3240: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);
3242: VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);
3244: /* diagonal sweep */
3245: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
3246: }
3247: VecDestroy(&lvec1);
3248: VecDestroy(&bb1);
3249: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3250: return(0);
3251: }
3253: /*@
3254: MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3255: CSR format the local rows.
3257: Collective on MPI_Comm
3259: Input Parameters:
3260: + comm - MPI communicator
3261: . bs - the block size, only a block size of 1 is supported
3262: . m - number of local rows (Cannot be PETSC_DECIDE)
3263: . n - This value should be the same as the local size used in creating the
3264: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3265: calculated if N is given) For square matrices n is almost always m.
3266: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3267: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3268: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3269: . j - column indices
3270: - a - matrix values
3272: Output Parameter:
3273: . mat - the matrix
3275: Level: intermediate
3277: Notes:
3278: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3279: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3280: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3282: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3284: .keywords: matrix, aij, compressed row, sparse, parallel
3286: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3287: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3288: @*/
3289: PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3290: {
3295: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3296: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3297: MatCreate(comm,mat);
3298: MatSetSizes(*mat,m,n,M,N);
3299: MatSetType(*mat,MATMPISBAIJ);
3300: MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);
3301: return(0);
3302: }
3305: /*@C
3306: MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3307: (the default parallel PETSc format).
3309: Collective on MPI_Comm
3311: Input Parameters:
3312: + B - the matrix
3313: . bs - the block size
3314: . i - the indices into j for the start of each local row (starts with zero)
3315: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3316: - v - optional values in the matrix
3318: Level: developer
3320: .keywords: matrix, aij, compressed row, sparse, parallel
3322: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3323: @*/
3324: PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3325: {
3329: PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3330: return(0);
3331: }
3333: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3334: {
3336: PetscInt m,N,i,rstart,nnz,Ii,bs,cbs;
3337: PetscInt *indx;
3338: PetscScalar *values;
3341: MatGetSize(inmat,&m,&N);
3342: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3343: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data;
3344: PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3345: PetscInt *bindx,rmax=a->rmax,j;
3347: MatGetBlockSizes(inmat,&bs,&cbs);
3348: mbs = m/bs; Nbs = N/cbs;
3349: if (n == PETSC_DECIDE) {
3350: PetscSplitOwnership(comm,&n,&Nbs);
3351: }
3352: /* Check sum(n) = Nbs */
3353: MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
3354: if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3356: MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);
3357: rstart -= mbs;
3359: PetscMalloc1(rmax,&bindx);
3360: MatPreallocateInitialize(comm,mbs,n,dnz,onz);
3361: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
3362: for (i=0; i<mbs; i++) {
3363: MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL); /* non-blocked nnz and indx */
3364: nnz = nnz/bs;
3365: for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3366: MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);
3367: MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);
3368: }
3369: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
3370: PetscFree(bindx);
3372: MatCreate(comm,outmat);
3373: MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);
3374: MatSetBlockSizes(*outmat,bs,cbs);
3375: MatSetType(*outmat,MATMPISBAIJ);
3376: MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);
3377: MatPreallocateFinalize(dnz,onz);
3378: }
3380: /* numeric phase */
3381: MatGetBlockSizes(inmat,&bs,&cbs);
3382: MatGetOwnershipRange(*outmat,&rstart,NULL);
3384: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
3385: for (i=0; i<m; i++) {
3386: MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);
3387: Ii = i + rstart;
3388: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
3389: MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);
3390: }
3391: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
3392: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
3393: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
3394: return(0);
3395: }