Actual source code: mpisbaij.c
petsc-3.13.6 2020-09-29
2: #include <../src/mat/impls/baij/mpi/mpibaij.h>
3: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4: #include <../src/mat/impls/sbaij/seq/sbaij.h>
5: #include <petscblaslapack.h>
7: #if defined(PETSC_HAVE_ELEMENTAL)
8: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9: #endif
11: /* This could be moved to matimpl.h */
12: static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13: {
14: Mat preallocator;
15: PetscInt r,rstart,rend;
16: PetscInt bs,i,m,n,M,N;
17: PetscBool cong = PETSC_TRUE;
23: for (i = 0; i < nm; i++) {
25: PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);
26: if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27: }
29: MatGetBlockSize(B,&bs);
30: MatGetSize(B,&M,&N);
31: MatGetLocalSize(B,&m,&n);
32: MatCreate(PetscObjectComm((PetscObject)B),&preallocator);
33: MatSetType(preallocator,MATPREALLOCATOR);
34: MatSetBlockSize(preallocator,bs);
35: MatSetSizes(preallocator,m,n,M,N);
36: MatSetUp(preallocator);
37: MatGetOwnershipRange(preallocator,&rstart,&rend);
38: for (r = rstart; r < rend; ++r) {
39: PetscInt ncols;
40: const PetscInt *row;
41: const PetscScalar *vals;
43: for (i = 0; i < nm; i++) {
44: MatGetRow(X[i],r,&ncols,&row,&vals);
45: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
46: if (symm && symm[i]) {
47: MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);
48: }
49: MatRestoreRow(X[i],r,&ncols,&row,&vals);
50: }
51: }
52: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
53: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
54: MatPreallocatorPreallocate(preallocator,fill,B);
55: MatDestroy(&preallocator);
56: return(0);
57: }
59: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60: {
61: Mat B;
63: PetscInt r;
66: if (reuse != MAT_REUSE_MATRIX) {
67: PetscBool symm = PETSC_TRUE,isdense;
68: PetscInt bs;
70: MatCreate(PetscObjectComm((PetscObject)A),&B);
71: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
72: MatSetType(B,newtype);
73: MatGetBlockSize(A,&bs);
74: MatSetBlockSize(B,bs);
75: PetscLayoutSetUp(B->rmap);
76: PetscLayoutSetUp(B->cmap);
77: PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");
78: if (!isdense) {
79: MatGetRowUpperTriangular(A);
80: MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);
81: MatRestoreRowUpperTriangular(A);
82: } else {
83: MatSetUp(B);
84: }
85: } else {
86: B = *newmat;
87: MatZeroEntries(B);
88: }
90: MatGetRowUpperTriangular(A);
91: for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
92: PetscInt ncols;
93: const PetscInt *row;
94: const PetscScalar *vals;
96: MatGetRow(A,r,&ncols,&row,&vals);
97: MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);
98: #if defined(PETSC_USE_COMPLEX)
99: if (A->hermitian) {
100: PetscInt i;
101: for (i = 0; i < ncols; i++) {
102: MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);
103: }
104: } else {
105: MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);
106: }
107: #else
108: MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);
109: #endif
110: MatRestoreRow(A,r,&ncols,&row,&vals);
111: }
112: MatRestoreRowUpperTriangular(A);
113: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
114: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
116: if (reuse == MAT_INPLACE_MATRIX) {
117: MatHeaderReplace(A,&B);
118: } else {
119: *newmat = B;
120: }
121: return(0);
122: }
124: PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
125: {
126: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
130: MatStoreValues(aij->A);
131: MatStoreValues(aij->B);
132: return(0);
133: }
135: PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
136: {
137: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
141: MatRetrieveValues(aij->A);
142: MatRetrieveValues(aij->B);
143: return(0);
144: }
146: #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \
147: { \
148: brow = row/bs; \
149: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150: rmax = aimax[brow]; nrow = ailen[brow]; \
151: bcol = col/bs; \
152: ridx = row % bs; cidx = col % bs; \
153: low = 0; high = nrow; \
154: while (high-low > 3) { \
155: t = (low+high)/2; \
156: if (rp[t] > bcol) high = t; \
157: else low = t; \
158: } \
159: for (_i=low; _i<high; _i++) { \
160: if (rp[_i] > bcol) break; \
161: if (rp[_i] == bcol) { \
162: bap = ap + bs2*_i + bs*cidx + ridx; \
163: if (addv == ADD_VALUES) *bap += value; \
164: else *bap = value; \
165: goto a_noinsert; \
166: } \
167: } \
168: if (a->nonew == 1) goto a_noinsert; \
169: if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
170: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
171: N = nrow++ - 1; \
172: /* shift up all the later entries in this row */ \
173: PetscArraymove(rp+_i+1,rp+_i,N-_i+1); \
174: PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)); \
175: PetscArrayzero(ap+bs2*_i,bs2); \
176: rp[_i] = bcol; \
177: ap[bs2*_i + bs*cidx + ridx] = value; \
178: A->nonzerostate++;\
179: a_noinsert:; \
180: ailen[brow] = nrow; \
181: }
183: #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
184: { \
185: brow = row/bs; \
186: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
187: rmax = bimax[brow]; nrow = bilen[brow]; \
188: bcol = col/bs; \
189: ridx = row % bs; cidx = col % bs; \
190: low = 0; high = nrow; \
191: while (high-low > 3) { \
192: t = (low+high)/2; \
193: if (rp[t] > bcol) high = t; \
194: else low = t; \
195: } \
196: for (_i=low; _i<high; _i++) { \
197: if (rp[_i] > bcol) break; \
198: if (rp[_i] == bcol) { \
199: bap = ap + bs2*_i + bs*cidx + ridx; \
200: if (addv == ADD_VALUES) *bap += value; \
201: else *bap = value; \
202: goto b_noinsert; \
203: } \
204: } \
205: if (b->nonew == 1) goto b_noinsert; \
206: if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
207: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
208: N = nrow++ - 1; \
209: /* shift up all the later entries in this row */ \
210: PetscArraymove(rp+_i+1,rp+_i,N-_i+1); \
211: PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)); \
212: PetscArrayzero(ap+bs2*_i,bs2); \
213: rp[_i] = bcol; \
214: ap[bs2*_i + bs*cidx + ridx] = value; \
215: B->nonzerostate++;\
216: b_noinsert:; \
217: bilen[brow] = nrow; \
218: }
220: /* Only add/insert a(i,j) with i<=j (blocks).
221: Any a(i,j) with i>j input by user is ingored or generates an error
222: */
223: PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
224: {
225: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
226: MatScalar value;
227: PetscBool roworiented = baij->roworiented;
229: PetscInt i,j,row,col;
230: PetscInt rstart_orig=mat->rmap->rstart;
231: PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
232: PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs;
234: /* Some Variables required in the macro */
235: Mat A = baij->A;
236: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
237: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
238: MatScalar *aa =a->a;
240: Mat B = baij->B;
241: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
242: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
243: MatScalar *ba =b->a;
245: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
246: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
247: MatScalar *ap,*bap;
249: /* for stash */
250: PetscInt n_loc, *in_loc = NULL;
251: MatScalar *v_loc = NULL;
254: if (!baij->donotstash) {
255: if (n > baij->n_loc) {
256: PetscFree(baij->in_loc);
257: PetscFree(baij->v_loc);
258: PetscMalloc1(n,&baij->in_loc);
259: PetscMalloc1(n,&baij->v_loc);
261: baij->n_loc = n;
262: }
263: in_loc = baij->in_loc;
264: v_loc = baij->v_loc;
265: }
267: for (i=0; i<m; i++) {
268: if (im[i] < 0) continue;
269: #if defined(PETSC_USE_DEBUG)
270: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
271: #endif
272: if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
273: row = im[i] - rstart_orig; /* local row index */
274: for (j=0; j<n; j++) {
275: if (im[i]/bs > in[j]/bs) {
276: if (a->ignore_ltriangular) {
277: continue; /* ignore lower triangular blocks */
278: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
279: }
280: if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
281: col = in[j] - cstart_orig; /* local col index */
282: brow = row/bs; bcol = col/bs;
283: if (brow > bcol) continue; /* ignore lower triangular blocks of A */
284: if (roworiented) value = v[i*n+j];
285: else value = v[i+j*m];
286: MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
287: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
288: } else if (in[j] < 0) continue;
289: #if defined(PETSC_USE_DEBUG)
290: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291: #endif
292: else { /* off-diag entry (B) */
293: if (mat->was_assembled) {
294: if (!baij->colmap) {
295: MatCreateColmap_MPIBAIJ_Private(mat);
296: }
297: #if defined(PETSC_USE_CTABLE)
298: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
299: col = col - 1;
300: #else
301: col = baij->colmap[in[j]/bs] - 1;
302: #endif
303: if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
304: MatDisAssemble_MPISBAIJ(mat);
305: col = in[j];
306: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
307: B = baij->B;
308: b = (Mat_SeqBAIJ*)(B)->data;
309: bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
310: ba = b->a;
311: } else col += in[j]%bs;
312: } else col = in[j];
313: if (roworiented) value = v[i*n+j];
314: else value = v[i+j*m];
315: MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
316: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
317: }
318: }
319: } else { /* off processor entry */
320: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
321: if (!baij->donotstash) {
322: mat->assembled = PETSC_FALSE;
323: n_loc = 0;
324: for (j=0; j<n; j++) {
325: if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
326: in_loc[n_loc] = in[j];
327: if (roworiented) {
328: v_loc[n_loc] = v[i*n+j];
329: } else {
330: v_loc[n_loc] = v[j*m+i];
331: }
332: n_loc++;
333: }
334: MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);
335: }
336: }
337: }
338: return(0);
339: }
341: PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
342: {
343: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
344: PetscErrorCode ierr;
345: PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346: PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen;
347: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
348: PetscBool roworiented=a->roworiented;
349: const PetscScalar *value = v;
350: MatScalar *ap,*aa = a->a,*bap;
353: if (col < row) {
354: if (a->ignore_ltriangular) return(0); /* ignore lower triangular block */
355: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
356: }
357: rp = aj + ai[row];
358: ap = aa + bs2*ai[row];
359: rmax = imax[row];
360: nrow = ailen[row];
361: value = v;
362: low = 0;
363: high = nrow;
365: while (high-low > 7) {
366: t = (low+high)/2;
367: if (rp[t] > col) high = t;
368: else low = t;
369: }
370: for (i=low; i<high; i++) {
371: if (rp[i] > col) break;
372: if (rp[i] == col) {
373: bap = ap + bs2*i;
374: if (roworiented) {
375: if (is == ADD_VALUES) {
376: for (ii=0; ii<bs; ii++) {
377: for (jj=ii; jj<bs2; jj+=bs) {
378: bap[jj] += *value++;
379: }
380: }
381: } else {
382: for (ii=0; ii<bs; ii++) {
383: for (jj=ii; jj<bs2; jj+=bs) {
384: bap[jj] = *value++;
385: }
386: }
387: }
388: } else {
389: if (is == ADD_VALUES) {
390: for (ii=0; ii<bs; ii++) {
391: for (jj=0; jj<bs; jj++) {
392: *bap++ += *value++;
393: }
394: }
395: } else {
396: for (ii=0; ii<bs; ii++) {
397: for (jj=0; jj<bs; jj++) {
398: *bap++ = *value++;
399: }
400: }
401: }
402: }
403: goto noinsert2;
404: }
405: }
406: if (nonew == 1) goto noinsert2;
407: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
408: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
409: N = nrow++ - 1; high++;
410: /* shift up all the later entries in this row */
411: PetscArraymove(rp+i+1,rp+i,N-i+1);
412: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
413: rp[i] = col;
414: bap = ap + bs2*i;
415: if (roworiented) {
416: for (ii=0; ii<bs; ii++) {
417: for (jj=ii; jj<bs2; jj+=bs) {
418: bap[jj] = *value++;
419: }
420: }
421: } else {
422: for (ii=0; ii<bs; ii++) {
423: for (jj=0; jj<bs; jj++) {
424: *bap++ = *value++;
425: }
426: }
427: }
428: noinsert2:;
429: ailen[row] = nrow;
430: return(0);
431: }
433: /*
434: This routine is exactly duplicated in mpibaij.c
435: */
436: PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
437: {
438: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
439: PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
440: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
441: PetscErrorCode ierr;
442: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
443: PetscBool roworiented=a->roworiented;
444: const PetscScalar *value = v;
445: MatScalar *ap,*aa = a->a,*bap;
448: rp = aj + ai[row];
449: ap = aa + bs2*ai[row];
450: rmax = imax[row];
451: nrow = ailen[row];
452: low = 0;
453: high = nrow;
454: value = v;
455: while (high-low > 7) {
456: t = (low+high)/2;
457: if (rp[t] > col) high = t;
458: else low = t;
459: }
460: for (i=low; i<high; i++) {
461: if (rp[i] > col) break;
462: if (rp[i] == col) {
463: bap = ap + bs2*i;
464: if (roworiented) {
465: if (is == ADD_VALUES) {
466: for (ii=0; ii<bs; ii++) {
467: for (jj=ii; jj<bs2; jj+=bs) {
468: bap[jj] += *value++;
469: }
470: }
471: } else {
472: for (ii=0; ii<bs; ii++) {
473: for (jj=ii; jj<bs2; jj+=bs) {
474: bap[jj] = *value++;
475: }
476: }
477: }
478: } else {
479: if (is == ADD_VALUES) {
480: for (ii=0; ii<bs; ii++,value+=bs) {
481: for (jj=0; jj<bs; jj++) {
482: bap[jj] += value[jj];
483: }
484: bap += bs;
485: }
486: } else {
487: for (ii=0; ii<bs; ii++,value+=bs) {
488: for (jj=0; jj<bs; jj++) {
489: bap[jj] = value[jj];
490: }
491: bap += bs;
492: }
493: }
494: }
495: goto noinsert2;
496: }
497: }
498: if (nonew == 1) goto noinsert2;
499: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
500: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
501: N = nrow++ - 1; high++;
502: /* shift up all the later entries in this row */
503: PetscArraymove(rp+i+1,rp+i,N-i+1);
504: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
505: rp[i] = col;
506: bap = ap + bs2*i;
507: if (roworiented) {
508: for (ii=0; ii<bs; ii++) {
509: for (jj=ii; jj<bs2; jj+=bs) {
510: bap[jj] = *value++;
511: }
512: }
513: } else {
514: for (ii=0; ii<bs; ii++) {
515: for (jj=0; jj<bs; jj++) {
516: *bap++ = *value++;
517: }
518: }
519: }
520: noinsert2:;
521: ailen[row] = nrow;
522: return(0);
523: }
525: /*
526: This routine could be optimized by removing the need for the block copy below and passing stride information
527: to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
528: */
529: PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
530: {
531: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
532: const MatScalar *value;
533: MatScalar *barray =baij->barray;
534: PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
535: PetscErrorCode ierr;
536: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
537: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
538: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
541: if (!barray) {
542: PetscMalloc1(bs2,&barray);
543: baij->barray = barray;
544: }
546: if (roworiented) {
547: stepval = (n-1)*bs;
548: } else {
549: stepval = (m-1)*bs;
550: }
551: for (i=0; i<m; i++) {
552: if (im[i] < 0) continue;
553: #if defined(PETSC_USE_DEBUG)
554: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
555: #endif
556: if (im[i] >= rstart && im[i] < rend) {
557: row = im[i] - rstart;
558: for (j=0; j<n; j++) {
559: if (im[i] > in[j]) {
560: if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
561: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
562: }
563: /* If NumCol = 1 then a copy is not required */
564: if ((roworiented) && (n == 1)) {
565: barray = (MatScalar*) v + i*bs2;
566: } else if ((!roworiented) && (m == 1)) {
567: barray = (MatScalar*) v + j*bs2;
568: } else { /* Here a copy is required */
569: if (roworiented) {
570: value = v + i*(stepval+bs)*bs + j*bs;
571: } else {
572: value = v + j*(stepval+bs)*bs + i*bs;
573: }
574: for (ii=0; ii<bs; ii++,value+=stepval) {
575: for (jj=0; jj<bs; jj++) {
576: *barray++ = *value++;
577: }
578: }
579: barray -=bs2;
580: }
582: if (in[j] >= cstart && in[j] < cend) {
583: col = in[j] - cstart;
584: MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);
585: } else if (in[j] < 0) continue;
586: #if defined(PETSC_USE_DEBUG)
587: else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
588: #endif
589: else {
590: if (mat->was_assembled) {
591: if (!baij->colmap) {
592: MatCreateColmap_MPIBAIJ_Private(mat);
593: }
595: #if defined(PETSC_USE_DEBUG)
596: #if defined(PETSC_USE_CTABLE)
597: { PetscInt data;
598: PetscTableFind(baij->colmap,in[j]+1,&data);
599: if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
600: }
601: #else
602: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
603: #endif
604: #endif
605: #if defined(PETSC_USE_CTABLE)
606: PetscTableFind(baij->colmap,in[j]+1,&col);
607: col = (col - 1)/bs;
608: #else
609: col = (baij->colmap[in[j]] - 1)/bs;
610: #endif
611: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
612: MatDisAssemble_MPISBAIJ(mat);
613: col = in[j];
614: }
615: } else col = in[j];
616: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);
617: }
618: }
619: } else {
620: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621: if (!baij->donotstash) {
622: if (roworiented) {
623: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
624: } else {
625: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
626: }
627: }
628: }
629: }
630: return(0);
631: }
633: PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
634: {
635: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
637: PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
638: PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
641: for (i=0; i<m; i++) {
642: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
643: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
644: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
645: row = idxm[i] - bsrstart;
646: for (j=0; j<n; j++) {
647: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
648: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
649: if (idxn[j] >= bscstart && idxn[j] < bscend) {
650: col = idxn[j] - bscstart;
651: MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
652: } else {
653: if (!baij->colmap) {
654: MatCreateColmap_MPIBAIJ_Private(mat);
655: }
656: #if defined(PETSC_USE_CTABLE)
657: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
658: data--;
659: #else
660: data = baij->colmap[idxn[j]/bs]-1;
661: #endif
662: if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
663: else {
664: col = data + idxn[j]%bs;
665: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
666: }
667: }
668: }
669: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
670: }
671: return(0);
672: }
674: PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
675: {
676: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
678: PetscReal sum[2],*lnorm2;
681: if (baij->size == 1) {
682: MatNorm(baij->A,type,norm);
683: } else {
684: if (type == NORM_FROBENIUS) {
685: PetscMalloc1(2,&lnorm2);
686: MatNorm(baij->A,type,lnorm2);
687: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */
688: MatNorm(baij->B,type,lnorm2);
689: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */
690: MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
691: *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
692: PetscFree(lnorm2);
693: } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
694: Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
695: Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data;
696: PetscReal *rsum,*rsum2,vabs;
697: PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
698: PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
699: MatScalar *v;
701: PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);
702: PetscArrayzero(rsum,mat->cmap->N);
703: /* Amat */
704: v = amat->a; jj = amat->j;
705: for (brow=0; brow<mbs; brow++) {
706: grow = bs*(rstart + brow);
707: nz = amat->i[brow+1] - amat->i[brow];
708: for (bcol=0; bcol<nz; bcol++) {
709: gcol = bs*(rstart + *jj); jj++;
710: for (col=0; col<bs; col++) {
711: for (row=0; row<bs; row++) {
712: vabs = PetscAbsScalar(*v); v++;
713: rsum[gcol+col] += vabs;
714: /* non-diagonal block */
715: if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
716: }
717: }
718: }
719: PetscLogFlops(nz*bs*bs);
720: }
721: /* Bmat */
722: v = bmat->a; jj = bmat->j;
723: for (brow=0; brow<mbs; brow++) {
724: grow = bs*(rstart + brow);
725: nz = bmat->i[brow+1] - bmat->i[brow];
726: for (bcol=0; bcol<nz; bcol++) {
727: gcol = bs*garray[*jj]; jj++;
728: for (col=0; col<bs; col++) {
729: for (row=0; row<bs; row++) {
730: vabs = PetscAbsScalar(*v); v++;
731: rsum[gcol+col] += vabs;
732: rsum[grow+row] += vabs;
733: }
734: }
735: }
736: PetscLogFlops(nz*bs*bs);
737: }
738: MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
739: *norm = 0.0;
740: for (col=0; col<mat->cmap->N; col++) {
741: if (rsum2[col] > *norm) *norm = rsum2[col];
742: }
743: PetscFree2(rsum,rsum2);
744: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
745: }
746: return(0);
747: }
749: PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
750: {
751: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
753: PetscInt nstash,reallocs;
756: if (baij->donotstash || mat->nooffprocentries) return(0);
758: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
759: MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
760: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
761: PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
762: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
763: PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
764: return(0);
765: }
767: PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
768: {
769: Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
770: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data;
772: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
773: PetscInt *row,*col;
774: PetscBool other_disassembled;
775: PetscMPIInt n;
776: PetscBool r1,r2,r3;
777: MatScalar *val;
779: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
781: if (!baij->donotstash && !mat->nooffprocentries) {
782: while (1) {
783: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
784: if (!flg) break;
786: for (i=0; i<n;) {
787: /* Now identify the consecutive vals belonging to the same row */
788: for (j=i,rstart=row[j]; j<n; j++) {
789: if (row[j] != rstart) break;
790: }
791: if (j < n) ncols = j-i;
792: else ncols = n-i;
793: /* Now assemble all these values with a single function call */
794: MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
795: i = j;
796: }
797: }
798: MatStashScatterEnd_Private(&mat->stash);
799: /* Now process the block-stash. Since the values are stashed column-oriented,
800: set the roworiented flag to column oriented, and after MatSetValues()
801: restore the original flags */
802: r1 = baij->roworiented;
803: r2 = a->roworiented;
804: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
806: baij->roworiented = PETSC_FALSE;
807: a->roworiented = PETSC_FALSE;
809: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
810: while (1) {
811: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
812: if (!flg) break;
814: for (i=0; i<n;) {
815: /* Now identify the consecutive vals belonging to the same row */
816: for (j=i,rstart=row[j]; j<n; j++) {
817: if (row[j] != rstart) break;
818: }
819: if (j < n) ncols = j-i;
820: else ncols = n-i;
821: MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);
822: i = j;
823: }
824: }
825: MatStashScatterEnd_Private(&mat->bstash);
827: baij->roworiented = r1;
828: a->roworiented = r2;
830: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
831: }
833: MatAssemblyBegin(baij->A,mode);
834: MatAssemblyEnd(baij->A,mode);
836: /* determine if any processor has disassembled, if so we must
837: also disassemble ourselfs, in order that we may reassemble. */
838: /*
839: if nonzero structure of submatrix B cannot change then we know that
840: no processor disassembled thus we can skip this stuff
841: */
842: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
843: MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
844: if (mat->was_assembled && !other_disassembled) {
845: MatDisAssemble_MPISBAIJ(mat);
846: }
847: }
849: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
850: MatSetUpMultiply_MPISBAIJ(mat); /* setup Mvctx and sMvctx */
851: }
852: MatAssemblyBegin(baij->B,mode);
853: MatAssemblyEnd(baij->B,mode);
855: PetscFree2(baij->rowvalues,baij->rowindices);
857: baij->rowvalues = 0;
859: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
860: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
861: PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
862: MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
863: }
864: return(0);
865: }
867: extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
868: #include <petscdraw.h>
869: static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870: {
871: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
872: PetscErrorCode ierr;
873: PetscInt bs = mat->rmap->bs;
874: PetscMPIInt rank = baij->rank;
875: PetscBool iascii,isdraw;
876: PetscViewer sviewer;
877: PetscViewerFormat format;
880: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
881: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
882: if (iascii) {
883: PetscViewerGetFormat(viewer,&format);
884: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885: MatInfo info;
886: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
887: MatGetInfo(mat,MAT_LOCAL,&info);
888: PetscViewerASCIIPushSynchronized(viewer);
889: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);
890: MatGetInfo(baij->A,MAT_LOCAL,&info);
891: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
892: MatGetInfo(baij->B,MAT_LOCAL,&info);
893: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
894: PetscViewerFlush(viewer);
895: PetscViewerASCIIPopSynchronized(viewer);
896: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
897: VecScatterView(baij->Mvctx,viewer);
898: return(0);
899: } else if (format == PETSC_VIEWER_ASCII_INFO) {
900: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
901: return(0);
902: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
903: return(0);
904: }
905: }
907: if (isdraw) {
908: PetscDraw draw;
909: PetscBool isnull;
910: PetscViewerDrawGetDraw(viewer,0,&draw);
911: PetscDrawIsNull(draw,&isnull);
912: if (isnull) return(0);
913: }
915: {
916: /* assemble the entire matrix onto first processor. */
917: Mat A;
918: Mat_SeqSBAIJ *Aloc;
919: Mat_SeqBAIJ *Bloc;
920: PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
921: MatScalar *a;
922: const char *matname;
924: /* Should this be the same type as mat? */
925: MatCreate(PetscObjectComm((PetscObject)mat),&A);
926: if (!rank) {
927: MatSetSizes(A,M,N,M,N);
928: } else {
929: MatSetSizes(A,0,0,M,N);
930: }
931: MatSetType(A,MATMPISBAIJ);
932: MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);
933: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
934: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
936: /* copy over the A part */
937: Aloc = (Mat_SeqSBAIJ*)baij->A->data;
938: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
939: PetscMalloc1(bs,&rvals);
941: for (i=0; i<mbs; i++) {
942: rvals[0] = bs*(baij->rstartbs + i);
943: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
944: for (j=ai[i]; j<ai[i+1]; j++) {
945: col = (baij->cstartbs+aj[j])*bs;
946: for (k=0; k<bs; k++) {
947: MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
948: col++;
949: a += bs;
950: }
951: }
952: }
953: /* copy over the B part */
954: Bloc = (Mat_SeqBAIJ*)baij->B->data;
955: ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
956: for (i=0; i<mbs; i++) {
958: rvals[0] = bs*(baij->rstartbs + i);
959: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
960: for (j=ai[i]; j<ai[i+1]; j++) {
961: col = baij->garray[aj[j]]*bs;
962: for (k=0; k<bs; k++) {
963: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
964: col++;
965: a += bs;
966: }
967: }
968: }
969: PetscFree(rvals);
970: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
971: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
972: /*
973: Everyone has to call to draw the matrix since the graphics waits are
974: synchronized across all processors that share the PetscDraw object
975: */
976: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
977: PetscObjectGetName((PetscObject)mat,&matname);
978: if (!rank) {
979: PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);
980: MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);
981: }
982: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
983: PetscViewerFlush(viewer);
984: MatDestroy(&A);
985: }
986: return(0);
987: }
989: /* Used for both MPIBAIJ and MPISBAIJ matrices */
990: #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
992: PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
993: {
995: PetscBool iascii,isdraw,issocket,isbinary;
998: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
999: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1000: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1001: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1002: if (iascii || isdraw || issocket) {
1003: MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);
1004: } else if (isbinary) {
1005: MatView_MPISBAIJ_Binary(mat,viewer);
1006: }
1007: return(0);
1008: }
1010: PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1011: {
1012: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1016: #if defined(PETSC_USE_LOG)
1017: PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1018: #endif
1019: MatStashDestroy_Private(&mat->stash);
1020: MatStashDestroy_Private(&mat->bstash);
1021: MatDestroy(&baij->A);
1022: MatDestroy(&baij->B);
1023: #if defined(PETSC_USE_CTABLE)
1024: PetscTableDestroy(&baij->colmap);
1025: #else
1026: PetscFree(baij->colmap);
1027: #endif
1028: PetscFree(baij->garray);
1029: VecDestroy(&baij->lvec);
1030: VecScatterDestroy(&baij->Mvctx);
1031: VecDestroy(&baij->slvec0);
1032: VecDestroy(&baij->slvec0b);
1033: VecDestroy(&baij->slvec1);
1034: VecDestroy(&baij->slvec1a);
1035: VecDestroy(&baij->slvec1b);
1036: VecScatterDestroy(&baij->sMvctx);
1037: PetscFree2(baij->rowvalues,baij->rowindices);
1038: PetscFree(baij->barray);
1039: PetscFree(baij->hd);
1040: VecDestroy(&baij->diag);
1041: VecDestroy(&baij->bb1);
1042: VecDestroy(&baij->xx1);
1043: #if defined(PETSC_USE_REAL_MAT_SINGLE)
1044: PetscFree(baij->setvaluescopy);
1045: #endif
1046: PetscFree(baij->in_loc);
1047: PetscFree(baij->v_loc);
1048: PetscFree(baij->rangebs);
1049: PetscFree(mat->data);
1051: PetscObjectChangeTypeName((PetscObject)mat,0);
1052: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1053: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1054: PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);
1055: PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);
1056: #if defined(PETSC_HAVE_ELEMENTAL)
1057: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);
1058: #endif
1059: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);
1060: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);
1061: return(0);
1062: }
1064: PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1065: {
1066: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1067: PetscErrorCode ierr;
1068: PetscInt mbs=a->mbs,bs=A->rmap->bs;
1069: PetscScalar *from;
1070: const PetscScalar *x;
1073: /* diagonal part */
1074: (*a->A->ops->mult)(a->A,xx,a->slvec1a);
1075: VecSet(a->slvec1b,0.0);
1077: /* subdiagonal part */
1078: (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);
1080: /* copy x into the vec slvec0 */
1081: VecGetArray(a->slvec0,&from);
1082: VecGetArrayRead(xx,&x);
1084: PetscArraycpy(from,x,bs*mbs);
1085: VecRestoreArray(a->slvec0,&from);
1086: VecRestoreArrayRead(xx,&x);
1088: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1089: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1090: /* supperdiagonal part */
1091: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
1092: return(0);
1093: }
1095: PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1096: {
1097: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1098: PetscErrorCode ierr;
1099: PetscInt mbs=a->mbs,bs=A->rmap->bs;
1100: PetscScalar *from;
1101: const PetscScalar *x;
1104: /* diagonal part */
1105: (*a->A->ops->mult)(a->A,xx,a->slvec1a);
1106: VecSet(a->slvec1b,0.0);
1108: /* subdiagonal part */
1109: (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
1111: /* copy x into the vec slvec0 */
1112: VecGetArray(a->slvec0,&from);
1113: VecGetArrayRead(xx,&x);
1115: PetscArraycpy(from,x,bs*mbs);
1116: VecRestoreArray(a->slvec0,&from);
1117: VecRestoreArrayRead(xx,&x);
1119: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1120: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1121: /* supperdiagonal part */
1122: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
1123: return(0);
1124: }
1126: PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1127: {
1128: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1130: PetscInt nt;
1133: VecGetLocalSize(xx,&nt);
1134: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1136: VecGetLocalSize(yy,&nt);
1137: if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1139: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1140: /* do diagonal part */
1141: (*a->A->ops->mult)(a->A,xx,yy);
1142: /* do supperdiagonal part */
1143: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1144: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1145: /* do subdiagonal part */
1146: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1147: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1148: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1149: return(0);
1150: }
1152: PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1153: {
1154: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1155: PetscErrorCode ierr;
1156: PetscInt mbs=a->mbs,bs=A->rmap->bs;
1157: PetscScalar *from,zero=0.0;
1158: const PetscScalar *x;
1161: /* diagonal part */
1162: (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);
1163: VecSet(a->slvec1b,zero);
1165: /* subdiagonal part */
1166: (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);
1168: /* copy x into the vec slvec0 */
1169: VecGetArray(a->slvec0,&from);
1170: VecGetArrayRead(xx,&x);
1171: PetscArraycpy(from,x,bs*mbs);
1172: VecRestoreArray(a->slvec0,&from);
1174: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1175: VecRestoreArrayRead(xx,&x);
1176: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1178: /* supperdiagonal part */
1179: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);
1180: return(0);
1181: }
1183: PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1184: {
1185: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1186: PetscErrorCode ierr;
1187: PetscInt mbs=a->mbs,bs=A->rmap->bs;
1188: PetscScalar *from,zero=0.0;
1189: const PetscScalar *x;
1192: /* diagonal part */
1193: (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);
1194: VecSet(a->slvec1b,zero);
1196: /* subdiagonal part */
1197: (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
1199: /* copy x into the vec slvec0 */
1200: VecGetArray(a->slvec0,&from);
1201: VecGetArrayRead(xx,&x);
1202: PetscArraycpy(from,x,bs*mbs);
1203: VecRestoreArray(a->slvec0,&from);
1205: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1206: VecRestoreArrayRead(xx,&x);
1207: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1209: /* supperdiagonal part */
1210: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);
1211: return(0);
1212: }
1214: PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1215: {
1216: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1220: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1221: /* do diagonal part */
1222: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1223: /* do supperdiagonal part */
1224: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1225: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1227: /* do subdiagonal part */
1228: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1229: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1230: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1231: return(0);
1232: }
1234: /*
1235: This only works correctly for square matrices where the subblock A->A is the
1236: diagonal block
1237: */
1238: PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1239: {
1240: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1244: /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1245: MatGetDiagonal(a->A,v);
1246: return(0);
1247: }
1249: PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1250: {
1251: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1255: MatScale(a->A,aa);
1256: MatScale(a->B,aa);
1257: return(0);
1258: }
1260: PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261: {
1262: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1263: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1265: PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1266: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1267: PetscInt *cmap,*idx_p,cstart = mat->rstartbs;
1270: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271: mat->getrowactive = PETSC_TRUE;
1273: if (!mat->rowvalues && (idx || v)) {
1274: /*
1275: allocate enough space to hold information from the longest row.
1276: */
1277: Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1278: Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data;
1279: PetscInt max = 1,mbs = mat->mbs,tmp;
1280: for (i=0; i<mbs; i++) {
1281: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1282: if (max < tmp) max = tmp;
1283: }
1284: PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);
1285: }
1287: if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1288: lrow = row - brstart; /* local row index */
1290: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291: if (!v) {pvA = 0; pvB = 0;}
1292: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1294: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1295: nztot = nzA + nzB;
1297: cmap = mat->garray;
1298: if (v || idx) {
1299: if (nztot) {
1300: /* Sort by increasing column numbers, assuming A and B already sorted */
1301: PetscInt imark = -1;
1302: if (v) {
1303: *v = v_p = mat->rowvalues;
1304: for (i=0; i<nzB; i++) {
1305: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1306: else break;
1307: }
1308: imark = i;
1309: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1310: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1311: }
1312: if (idx) {
1313: *idx = idx_p = mat->rowindices;
1314: if (imark > -1) {
1315: for (i=0; i<imark; i++) {
1316: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1317: }
1318: } else {
1319: for (i=0; i<nzB; i++) {
1320: if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1321: else break;
1322: }
1323: imark = i;
1324: }
1325: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1326: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1327: }
1328: } else {
1329: if (idx) *idx = 0;
1330: if (v) *v = 0;
1331: }
1332: }
1333: *nz = nztot;
1334: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1335: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1336: return(0);
1337: }
1339: PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340: {
1341: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1344: if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1345: baij->getrowactive = PETSC_FALSE;
1346: return(0);
1347: }
1349: PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1350: {
1351: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1352: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1355: aA->getrow_utriangular = PETSC_TRUE;
1356: return(0);
1357: }
1358: PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1359: {
1360: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1361: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1364: aA->getrow_utriangular = PETSC_FALSE;
1365: return(0);
1366: }
1368: PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1369: {
1370: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1374: MatRealPart(a->A);
1375: MatRealPart(a->B);
1376: return(0);
1377: }
1379: PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1380: {
1381: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1385: MatImaginaryPart(a->A);
1386: MatImaginaryPart(a->B);
1387: return(0);
1388: }
1390: /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1391: Input: isrow - distributed(parallel),
1392: iscol_local - locally owned (seq)
1393: */
1394: PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg)
1395: {
1397: PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch;
1398: const PetscInt *ptr1,*ptr2;
1401: ISGetLocalSize(isrow,&sz1);
1402: ISGetLocalSize(iscol_local,&sz2);
1403: if (sz1 > sz2) {
1404: *flg = PETSC_FALSE;
1405: return(0);
1406: }
1408: ISGetIndices(isrow,&ptr1);
1409: ISGetIndices(iscol_local,&ptr2);
1411: PetscMalloc1(sz1,&a1);
1412: PetscMalloc1(sz2,&a2);
1413: PetscArraycpy(a1,ptr1,sz1);
1414: PetscArraycpy(a2,ptr2,sz2);
1415: PetscSortInt(sz1,a1);
1416: PetscSortInt(sz2,a2);
1418: nmatch=0;
1419: k = 0;
1420: for (i=0; i<sz1; i++){
1421: for (j=k; j<sz2; j++){
1422: if (a1[i] == a2[j]) {
1423: k = j; nmatch++;
1424: break;
1425: }
1426: }
1427: }
1428: ISRestoreIndices(isrow,&ptr1);
1429: ISRestoreIndices(iscol_local,&ptr2);
1430: PetscFree(a1);
1431: PetscFree(a2);
1432: if (nmatch < sz1) {
1433: *flg = PETSC_FALSE;
1434: } else {
1435: *flg = PETSC_TRUE;
1436: }
1437: return(0);
1438: }
1440: PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1441: {
1443: IS iscol_local;
1444: PetscInt csize;
1445: PetscBool isequal;
1448: ISGetLocalSize(iscol,&csize);
1449: if (call == MAT_REUSE_MATRIX) {
1450: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
1451: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1452: } else {
1453: ISAllGather(iscol,&iscol_local);
1454: ISEqual_private(isrow,iscol_local,&isequal);
1455: if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1456: }
1458: /* now call MatCreateSubMatrix_MPIBAIJ() */
1459: MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
1460: if (call == MAT_INITIAL_MATRIX) {
1461: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
1462: ISDestroy(&iscol_local);
1463: }
1464: return(0);
1465: }
1467: PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1468: {
1469: Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1473: MatZeroEntries(l->A);
1474: MatZeroEntries(l->B);
1475: return(0);
1476: }
1478: PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1479: {
1480: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1481: Mat A = a->A,B = a->B;
1483: PetscLogDouble isend[5],irecv[5];
1486: info->block_size = (PetscReal)matin->rmap->bs;
1488: MatGetInfo(A,MAT_LOCAL,info);
1490: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1491: isend[3] = info->memory; isend[4] = info->mallocs;
1493: MatGetInfo(B,MAT_LOCAL,info);
1495: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1496: isend[3] += info->memory; isend[4] += info->mallocs;
1497: if (flag == MAT_LOCAL) {
1498: info->nz_used = isend[0];
1499: info->nz_allocated = isend[1];
1500: info->nz_unneeded = isend[2];
1501: info->memory = isend[3];
1502: info->mallocs = isend[4];
1503: } else if (flag == MAT_GLOBAL_MAX) {
1504: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));
1506: info->nz_used = irecv[0];
1507: info->nz_allocated = irecv[1];
1508: info->nz_unneeded = irecv[2];
1509: info->memory = irecv[3];
1510: info->mallocs = irecv[4];
1511: } else if (flag == MAT_GLOBAL_SUM) {
1512: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));
1514: info->nz_used = irecv[0];
1515: info->nz_allocated = irecv[1];
1516: info->nz_unneeded = irecv[2];
1517: info->memory = irecv[3];
1518: info->mallocs = irecv[4];
1519: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1520: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1521: info->fill_ratio_needed = 0;
1522: info->factor_mallocs = 0;
1523: return(0);
1524: }
1526: PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1527: {
1528: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1529: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1533: switch (op) {
1534: case MAT_NEW_NONZERO_LOCATIONS:
1535: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1536: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1537: case MAT_KEEP_NONZERO_PATTERN:
1538: case MAT_SUBMAT_SINGLEIS:
1539: case MAT_NEW_NONZERO_LOCATION_ERR:
1540: MatCheckPreallocated(A,1);
1541: MatSetOption(a->A,op,flg);
1542: MatSetOption(a->B,op,flg);
1543: break;
1544: case MAT_ROW_ORIENTED:
1545: MatCheckPreallocated(A,1);
1546: a->roworiented = flg;
1548: MatSetOption(a->A,op,flg);
1549: MatSetOption(a->B,op,flg);
1550: break;
1551: case MAT_NEW_DIAGONALS:
1552: case MAT_SORTED_FULL:
1553: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1554: break;
1555: case MAT_IGNORE_OFF_PROC_ENTRIES:
1556: a->donotstash = flg;
1557: break;
1558: case MAT_USE_HASH_TABLE:
1559: a->ht_flag = flg;
1560: break;
1561: case MAT_HERMITIAN:
1562: MatCheckPreallocated(A,1);
1563: MatSetOption(a->A,op,flg);
1564: #if defined(PETSC_USE_COMPLEX)
1565: if (flg) { /* need different mat-vec ops */
1566: A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1567: A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian;
1568: A->ops->multtranspose = NULL;
1569: A->ops->multtransposeadd = NULL;
1570: A->symmetric = PETSC_FALSE;
1571: }
1572: #endif
1573: break;
1574: case MAT_SPD:
1575: case MAT_SYMMETRIC:
1576: MatCheckPreallocated(A,1);
1577: MatSetOption(a->A,op,flg);
1578: #if defined(PETSC_USE_COMPLEX)
1579: if (flg) { /* restore to use default mat-vec ops */
1580: A->ops->mult = MatMult_MPISBAIJ;
1581: A->ops->multadd = MatMultAdd_MPISBAIJ;
1582: A->ops->multtranspose = MatMult_MPISBAIJ;
1583: A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1584: }
1585: #endif
1586: break;
1587: case MAT_STRUCTURALLY_SYMMETRIC:
1588: MatCheckPreallocated(A,1);
1589: MatSetOption(a->A,op,flg);
1590: break;
1591: case MAT_SYMMETRY_ETERNAL:
1592: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1593: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1594: break;
1595: case MAT_IGNORE_LOWER_TRIANGULAR:
1596: aA->ignore_ltriangular = flg;
1597: break;
1598: case MAT_ERROR_LOWER_TRIANGULAR:
1599: aA->ignore_ltriangular = flg;
1600: break;
1601: case MAT_GETROW_UPPERTRIANGULAR:
1602: aA->getrow_utriangular = flg;
1603: break;
1604: default:
1605: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1606: }
1607: return(0);
1608: }
1610: PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1611: {
1615: if (reuse == MAT_INITIAL_MATRIX) {
1616: MatDuplicate(A,MAT_COPY_VALUES,B);
1617: } else if (reuse == MAT_REUSE_MATRIX) {
1618: MatCopy(A,*B,SAME_NONZERO_PATTERN);
1619: }
1620: return(0);
1621: }
1623: PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1624: {
1625: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1626: Mat a = baij->A, b=baij->B;
1628: PetscInt nv,m,n;
1629: PetscBool flg;
1632: if (ll != rr) {
1633: VecEqual(ll,rr,&flg);
1634: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1635: }
1636: if (!ll) return(0);
1638: MatGetLocalSize(mat,&m,&n);
1639: if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1641: VecGetLocalSize(rr,&nv);
1642: if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1644: VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1646: /* left diagonalscale the off-diagonal part */
1647: (*b->ops->diagonalscale)(b,ll,NULL);
1649: /* scale the diagonal part */
1650: (*a->ops->diagonalscale)(a,ll,rr);
1652: /* right diagonalscale the off-diagonal part */
1653: VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1654: (*b->ops->diagonalscale)(b,NULL,baij->lvec);
1655: return(0);
1656: }
1658: PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1659: {
1660: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1664: MatSetUnfactored(a->A);
1665: return(0);
1666: }
1668: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1670: PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag)
1671: {
1672: Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1673: Mat a,b,c,d;
1674: PetscBool flg;
1678: a = matA->A; b = matA->B;
1679: c = matB->A; d = matB->B;
1681: MatEqual(a,c,&flg);
1682: if (flg) {
1683: MatEqual(b,d,&flg);
1684: }
1685: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1686: return(0);
1687: }
1689: PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1690: {
1692: PetscBool isbaij;
1695: PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");
1696: if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1697: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1698: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1699: MatGetRowUpperTriangular(A);
1700: MatCopy_Basic(A,B,str);
1701: MatRestoreRowUpperTriangular(A);
1702: } else {
1703: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1704: Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1706: MatCopy(a->A,b->A,str);
1707: MatCopy(a->B,b->B,str);
1708: }
1709: PetscObjectStateIncrease((PetscObject)B);
1710: return(0);
1711: }
1713: PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1714: {
1718: MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1719: return(0);
1720: }
1722: PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1723: {
1725: Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1726: PetscBLASInt bnz,one=1;
1727: Mat_SeqSBAIJ *xa,*ya;
1728: Mat_SeqBAIJ *xb,*yb;
1731: if (str == SAME_NONZERO_PATTERN) {
1732: PetscScalar alpha = a;
1733: xa = (Mat_SeqSBAIJ*)xx->A->data;
1734: ya = (Mat_SeqSBAIJ*)yy->A->data;
1735: PetscBLASIntCast(xa->nz,&bnz);
1736: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1737: xb = (Mat_SeqBAIJ*)xx->B->data;
1738: yb = (Mat_SeqBAIJ*)yy->B->data;
1739: PetscBLASIntCast(xb->nz,&bnz);
1740: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1741: PetscObjectStateIncrease((PetscObject)Y);
1742: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1743: MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
1744: MatAXPY_Basic(Y,a,X,str);
1745: MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
1746: } else {
1747: Mat B;
1748: PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1749: if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1750: MatGetRowUpperTriangular(X);
1751: MatGetRowUpperTriangular(Y);
1752: PetscMalloc1(yy->A->rmap->N,&nnz_d);
1753: PetscMalloc1(yy->B->rmap->N,&nnz_o);
1754: MatCreate(PetscObjectComm((PetscObject)Y),&B);
1755: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
1756: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
1757: MatSetBlockSizesFromMats(B,Y,Y);
1758: MatSetType(B,MATMPISBAIJ);
1759: MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);
1760: MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
1761: MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);
1762: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
1763: MatHeaderReplace(Y,&B);
1764: PetscFree(nnz_d);
1765: PetscFree(nnz_o);
1766: MatRestoreRowUpperTriangular(X);
1767: MatRestoreRowUpperTriangular(Y);
1768: }
1769: return(0);
1770: }
1772: PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1773: {
1775: PetscInt i;
1776: PetscBool flg;
1779: MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B); /* B[] are sbaij matrices */
1780: for (i=0; i<n; i++) {
1781: ISEqual(irow[i],icol[i],&flg);
1782: if (!flg) {
1783: MatSeqSBAIJZeroOps_Private(*B[i]);
1784: }
1785: }
1786: return(0);
1787: }
1789: PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1790: {
1792: Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data;
1793: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data;
1796: if (!Y->preallocated) {
1797: MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);
1798: } else if (!aij->nz) {
1799: PetscInt nonew = aij->nonew;
1800: MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);
1801: aij->nonew = nonew;
1802: }
1803: MatShift_Basic(Y,a);
1804: return(0);
1805: }
1807: PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1808: {
1809: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1813: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1814: MatMissingDiagonal(a->A,missing,d);
1815: if (d) {
1816: PetscInt rstart;
1817: MatGetOwnershipRange(A,&rstart,NULL);
1818: *d += rstart/A->rmap->bs;
1820: }
1821: return(0);
1822: }
1824: PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1825: {
1827: *a = ((Mat_MPISBAIJ*)A->data)->A;
1828: return(0);
1829: }
1831: /* -------------------------------------------------------------------*/
1832: static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1833: MatGetRow_MPISBAIJ,
1834: MatRestoreRow_MPISBAIJ,
1835: MatMult_MPISBAIJ,
1836: /* 4*/ MatMultAdd_MPISBAIJ,
1837: MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1838: MatMultAdd_MPISBAIJ,
1839: 0,
1840: 0,
1841: 0,
1842: /* 10*/ 0,
1843: 0,
1844: 0,
1845: MatSOR_MPISBAIJ,
1846: MatTranspose_MPISBAIJ,
1847: /* 15*/ MatGetInfo_MPISBAIJ,
1848: MatEqual_MPISBAIJ,
1849: MatGetDiagonal_MPISBAIJ,
1850: MatDiagonalScale_MPISBAIJ,
1851: MatNorm_MPISBAIJ,
1852: /* 20*/ MatAssemblyBegin_MPISBAIJ,
1853: MatAssemblyEnd_MPISBAIJ,
1854: MatSetOption_MPISBAIJ,
1855: MatZeroEntries_MPISBAIJ,
1856: /* 24*/ 0,
1857: 0,
1858: 0,
1859: 0,
1860: 0,
1861: /* 29*/ MatSetUp_MPISBAIJ,
1862: 0,
1863: 0,
1864: MatGetDiagonalBlock_MPISBAIJ,
1865: 0,
1866: /* 34*/ MatDuplicate_MPISBAIJ,
1867: 0,
1868: 0,
1869: 0,
1870: 0,
1871: /* 39*/ MatAXPY_MPISBAIJ,
1872: MatCreateSubMatrices_MPISBAIJ,
1873: MatIncreaseOverlap_MPISBAIJ,
1874: MatGetValues_MPISBAIJ,
1875: MatCopy_MPISBAIJ,
1876: /* 44*/ 0,
1877: MatScale_MPISBAIJ,
1878: MatShift_MPISBAIJ,
1879: 0,
1880: 0,
1881: /* 49*/ 0,
1882: 0,
1883: 0,
1884: 0,
1885: 0,
1886: /* 54*/ 0,
1887: 0,
1888: MatSetUnfactored_MPISBAIJ,
1889: 0,
1890: MatSetValuesBlocked_MPISBAIJ,
1891: /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1892: 0,
1893: 0,
1894: 0,
1895: 0,
1896: /* 64*/ 0,
1897: 0,
1898: 0,
1899: 0,
1900: 0,
1901: /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1902: 0,
1903: MatConvert_MPISBAIJ_Basic,
1904: 0,
1905: 0,
1906: /* 74*/ 0,
1907: 0,
1908: 0,
1909: 0,
1910: 0,
1911: /* 79*/ 0,
1912: 0,
1913: 0,
1914: 0,
1915: MatLoad_MPISBAIJ,
1916: /* 84*/ 0,
1917: 0,
1918: 0,
1919: 0,
1920: 0,
1921: /* 89*/ 0,
1922: 0,
1923: 0,
1924: 0,
1925: 0,
1926: /* 94*/ 0,
1927: 0,
1928: 0,
1929: 0,
1930: 0,
1931: /* 99*/ 0,
1932: 0,
1933: 0,
1934: 0,
1935: 0,
1936: /*104*/ 0,
1937: MatRealPart_MPISBAIJ,
1938: MatImaginaryPart_MPISBAIJ,
1939: MatGetRowUpperTriangular_MPISBAIJ,
1940: MatRestoreRowUpperTriangular_MPISBAIJ,
1941: /*109*/ 0,
1942: 0,
1943: 0,
1944: 0,
1945: MatMissingDiagonal_MPISBAIJ,
1946: /*114*/ 0,
1947: 0,
1948: 0,
1949: 0,
1950: 0,
1951: /*119*/ 0,
1952: 0,
1953: 0,
1954: 0,
1955: 0,
1956: /*124*/ 0,
1957: 0,
1958: 0,
1959: 0,
1960: 0,
1961: /*129*/ 0,
1962: 0,
1963: 0,
1964: 0,
1965: 0,
1966: /*134*/ 0,
1967: 0,
1968: 0,
1969: 0,
1970: 0,
1971: /*139*/ MatSetBlockSizes_Default,
1972: 0,
1973: 0,
1974: 0,
1975: 0,
1976: /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1977: };
1979: PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1980: {
1981: Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1983: PetscInt i,mbs,Mbs;
1984: PetscMPIInt size;
1987: MatSetBlockSize(B,PetscAbs(bs));
1988: PetscLayoutSetUp(B->rmap);
1989: PetscLayoutSetUp(B->cmap);
1990: PetscLayoutGetBlockSize(B->rmap,&bs);
1991: if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1992: if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1994: mbs = B->rmap->n/bs;
1995: Mbs = B->rmap->N/bs;
1996: if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1998: B->rmap->bs = bs;
1999: b->bs2 = bs*bs;
2000: b->mbs = mbs;
2001: b->Mbs = Mbs;
2002: b->nbs = B->cmap->n/bs;
2003: b->Nbs = B->cmap->N/bs;
2005: for (i=0; i<=b->size; i++) {
2006: b->rangebs[i] = B->rmap->range[i]/bs;
2007: }
2008: b->rstartbs = B->rmap->rstart/bs;
2009: b->rendbs = B->rmap->rend/bs;
2011: b->cstartbs = B->cmap->rstart/bs;
2012: b->cendbs = B->cmap->rend/bs;
2014: #if defined(PETSC_USE_CTABLE)
2015: PetscTableDestroy(&b->colmap);
2016: #else
2017: PetscFree(b->colmap);
2018: #endif
2019: PetscFree(b->garray);
2020: VecDestroy(&b->lvec);
2021: VecScatterDestroy(&b->Mvctx);
2022: VecDestroy(&b->slvec0);
2023: VecDestroy(&b->slvec0b);
2024: VecDestroy(&b->slvec1);
2025: VecDestroy(&b->slvec1a);
2026: VecDestroy(&b->slvec1b);
2027: VecScatterDestroy(&b->sMvctx);
2029: /* Because the B will have been resized we simply destroy it and create a new one each time */
2030: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
2031: MatDestroy(&b->B);
2032: MatCreate(PETSC_COMM_SELF,&b->B);
2033: MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);
2034: MatSetType(b->B,MATSEQBAIJ);
2035: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
2037: if (!B->preallocated) {
2038: MatCreate(PETSC_COMM_SELF,&b->A);
2039: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
2040: MatSetType(b->A,MATSEQSBAIJ);
2041: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
2042: MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);
2043: }
2045: MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
2046: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
2048: B->preallocated = PETSC_TRUE;
2049: B->was_assembled = PETSC_FALSE;
2050: B->assembled = PETSC_FALSE;
2051: return(0);
2052: }
2054: PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2055: {
2056: PetscInt m,rstart,cend;
2057: PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2058: const PetscInt *JJ =0;
2059: PetscScalar *values=0;
2060: PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2062: PetscBool nooffprocentries;
2065: if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2066: PetscLayoutSetBlockSize(B->rmap,bs);
2067: PetscLayoutSetBlockSize(B->cmap,bs);
2068: PetscLayoutSetUp(B->rmap);
2069: PetscLayoutSetUp(B->cmap);
2070: PetscLayoutGetBlockSize(B->rmap,&bs);
2071: m = B->rmap->n/bs;
2072: rstart = B->rmap->rstart/bs;
2073: cend = B->cmap->rend/bs;
2075: if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2076: PetscMalloc2(m,&d_nnz,m,&o_nnz);
2077: for (i=0; i<m; i++) {
2078: nz = ii[i+1] - ii[i];
2079: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2080: /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2081: JJ = jj + ii[i];
2082: bd = 0;
2083: for (j=0; j<nz; j++) {
2084: if (*JJ >= i + rstart) break;
2085: JJ++;
2086: bd++;
2087: }
2088: d = 0;
2089: for (; j<nz; j++) {
2090: if (*JJ++ >= cend) break;
2091: d++;
2092: }
2093: d_nnz[i] = d;
2094: o_nnz[i] = nz - d - bd;
2095: nz = nz - bd;
2096: nz_max = PetscMax(nz_max,nz);
2097: }
2098: MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
2099: MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);
2100: PetscFree2(d_nnz,o_nnz);
2102: values = (PetscScalar*)V;
2103: if (!values) {
2104: PetscCalloc1(bs*bs*nz_max,&values);
2105: }
2106: for (i=0; i<m; i++) {
2107: PetscInt row = i + rstart;
2108: PetscInt ncols = ii[i+1] - ii[i];
2109: const PetscInt *icols = jj + ii[i];
2110: if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2111: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2112: MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
2113: } else { /* block ordering does not match so we can only insert one block at a time. */
2114: PetscInt j;
2115: for (j=0; j<ncols; j++) {
2116: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2117: MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);
2118: }
2119: }
2120: }
2122: if (!V) { PetscFree(values); }
2123: nooffprocentries = B->nooffprocentries;
2124: B->nooffprocentries = PETSC_TRUE;
2125: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2126: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2127: B->nooffprocentries = nooffprocentries;
2129: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2130: return(0);
2131: }
2133: /*MC
2134: MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2135: based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of
2136: the matrix is stored.
2138: For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2139: can call MatSetOption(Mat, MAT_HERMITIAN);
2141: Options Database Keys:
2142: . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2144: Notes:
2145: The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2146: diagonal portion of the matrix of each process has to less than or equal the number of columns.
2148: Level: beginner
2150: .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2151: M*/
2153: PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2154: {
2155: Mat_MPISBAIJ *b;
2157: PetscBool flg = PETSC_FALSE;
2160: PetscNewLog(B,&b);
2161: B->data = (void*)b;
2162: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2164: B->ops->destroy = MatDestroy_MPISBAIJ;
2165: B->ops->view = MatView_MPISBAIJ;
2166: B->assembled = PETSC_FALSE;
2167: B->insertmode = NOT_SET_VALUES;
2169: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
2170: MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);
2172: /* build local table of row and column ownerships */
2173: PetscMalloc1(b->size+2,&b->rangebs);
2175: /* build cache for off array entries formed */
2176: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
2178: b->donotstash = PETSC_FALSE;
2179: b->colmap = NULL;
2180: b->garray = NULL;
2181: b->roworiented = PETSC_TRUE;
2183: /* stuff used in block assembly */
2184: b->barray = 0;
2186: /* stuff used for matrix vector multiply */
2187: b->lvec = 0;
2188: b->Mvctx = 0;
2189: b->slvec0 = 0;
2190: b->slvec0b = 0;
2191: b->slvec1 = 0;
2192: b->slvec1a = 0;
2193: b->slvec1b = 0;
2194: b->sMvctx = 0;
2196: /* stuff for MatGetRow() */
2197: b->rowindices = 0;
2198: b->rowvalues = 0;
2199: b->getrowactive = PETSC_FALSE;
2201: /* hash table stuff */
2202: b->ht = 0;
2203: b->hd = 0;
2204: b->ht_size = 0;
2205: b->ht_flag = PETSC_FALSE;
2206: b->ht_fact = 0;
2207: b->ht_total_ct = 0;
2208: b->ht_insert_ct = 0;
2210: /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2211: b->ijonly = PETSC_FALSE;
2213: b->in_loc = 0;
2214: b->v_loc = 0;
2215: b->n_loc = 0;
2217: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);
2218: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);
2219: PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);
2220: PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);
2221: #if defined(PETSC_HAVE_ELEMENTAL)
2222: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);
2223: #endif
2224: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);
2225: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);
2227: B->symmetric = PETSC_TRUE;
2228: B->structurally_symmetric = PETSC_TRUE;
2229: B->symmetric_set = PETSC_TRUE;
2230: B->structurally_symmetric_set = PETSC_TRUE;
2231: B->symmetric_eternal = PETSC_TRUE;
2232: #if defined(PETSC_USE_COMPLEX)
2233: B->hermitian = PETSC_FALSE;
2234: B->hermitian_set = PETSC_FALSE;
2235: #else
2236: B->hermitian = PETSC_TRUE;
2237: B->hermitian_set = PETSC_TRUE;
2238: #endif
2240: PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);
2241: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");
2242: PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);
2243: if (flg) {
2244: PetscReal fact = 1.39;
2245: MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
2246: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);
2247: if (fact <= 1.0) fact = 1.39;
2248: MatMPIBAIJSetHashTableFactor(B,fact);
2249: PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
2250: }
2251: PetscOptionsEnd();
2252: return(0);
2253: }
2255: /*MC
2256: MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2258: This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2259: and MATMPISBAIJ otherwise.
2261: Options Database Keys:
2262: . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2264: Level: beginner
2266: .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2267: M*/
2269: /*@C
2270: MatMPISBAIJSetPreallocation - For good matrix assembly performance
2271: the user should preallocate the matrix storage by setting the parameters
2272: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2273: performance can be increased by more than a factor of 50.
2275: Collective on Mat
2277: Input Parameters:
2278: + B - the matrix
2279: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2280: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2281: . d_nz - number of block nonzeros per block row in diagonal portion of local
2282: submatrix (same for all local rows)
2283: . d_nnz - array containing the number of block nonzeros in the various block rows
2284: in the upper triangular and diagonal part of the in diagonal portion of the local
2285: (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room
2286: for the diagonal entry and set a value even if it is zero.
2287: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2288: submatrix (same for all local rows).
2289: - o_nnz - array containing the number of nonzeros in the various block rows of the
2290: off-diagonal portion of the local submatrix that is right of the diagonal
2291: (possibly different for each block row) or NULL.
2294: Options Database Keys:
2295: + -mat_no_unroll - uses code that does not unroll the loops in the
2296: block calculations (much slower)
2297: - -mat_block_size - size of the blocks to use
2299: Notes:
2301: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2302: than it must be used on all processors that share the object for that argument.
2304: If the *_nnz parameter is given then the *_nz parameter is ignored
2306: Storage Information:
2307: For a square global matrix we define each processor's diagonal portion
2308: to be its local rows and the corresponding columns (a square submatrix);
2309: each processor's off-diagonal portion encompasses the remainder of the
2310: local matrix (a rectangular submatrix).
2312: The user can specify preallocated storage for the diagonal part of
2313: the local submatrix with either d_nz or d_nnz (not both). Set
2314: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2315: memory allocation. Likewise, specify preallocated storage for the
2316: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2318: You can call MatGetInfo() to get information on how effective the preallocation was;
2319: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2320: You can also run with the option -info and look for messages with the string
2321: malloc in them to see if additional memory allocation was needed.
2323: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2324: the figure below we depict these three local rows and all columns (0-11).
2326: .vb
2327: 0 1 2 3 4 5 6 7 8 9 10 11
2328: --------------------------
2329: row 3 |. . . d d d o o o o o o
2330: row 4 |. . . d d d o o o o o o
2331: row 5 |. . . d d d o o o o o o
2332: --------------------------
2333: .ve
2335: Thus, any entries in the d locations are stored in the d (diagonal)
2336: submatrix, and any entries in the o locations are stored in the
2337: o (off-diagonal) submatrix. Note that the d matrix is stored in
2338: MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2340: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2341: plus the diagonal part of the d matrix,
2342: and o_nz should indicate the number of block nonzeros per row in the o matrix
2344: In general, for PDE problems in which most nonzeros are near the diagonal,
2345: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2346: or you will get TERRIBLE performance; see the users' manual chapter on
2347: matrices.
2349: Level: intermediate
2351: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2352: @*/
2353: PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2354: {
2361: PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
2362: return(0);
2363: }
2365: /*@C
2366: MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2367: (block compressed row). For good matrix assembly performance
2368: the user should preallocate the matrix storage by setting the parameters
2369: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2370: performance can be increased by more than a factor of 50.
2372: Collective
2374: Input Parameters:
2375: + comm - MPI communicator
2376: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2377: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2378: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2379: This value should be the same as the local size used in creating the
2380: y vector for the matrix-vector product y = Ax.
2381: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2382: This value should be the same as the local size used in creating the
2383: x vector for the matrix-vector product y = Ax.
2384: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2385: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2386: . d_nz - number of block nonzeros per block row in diagonal portion of local
2387: submatrix (same for all local rows)
2388: . d_nnz - array containing the number of block nonzeros in the various block rows
2389: in the upper triangular portion of the in diagonal portion of the local
2390: (possibly different for each block block row) or NULL.
2391: If you plan to factor the matrix you must leave room for the diagonal entry and
2392: set its value even if it is zero.
2393: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2394: submatrix (same for all local rows).
2395: - o_nnz - array containing the number of nonzeros in the various block rows of the
2396: off-diagonal portion of the local submatrix (possibly different for
2397: each block row) or NULL.
2399: Output Parameter:
2400: . A - the matrix
2402: Options Database Keys:
2403: + -mat_no_unroll - uses code that does not unroll the loops in the
2404: block calculations (much slower)
2405: . -mat_block_size - size of the blocks to use
2406: - -mat_mpi - use the parallel matrix data structures even on one processor
2407: (defaults to using SeqBAIJ format on one processor)
2409: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2410: MatXXXXSetPreallocation() paradigm instead of this routine directly.
2411: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2413: Notes:
2414: The number of rows and columns must be divisible by blocksize.
2415: This matrix type does not support complex Hermitian operation.
2417: The user MUST specify either the local or global matrix dimensions
2418: (possibly both).
2420: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2421: than it must be used on all processors that share the object for that argument.
2423: If the *_nnz parameter is given then the *_nz parameter is ignored
2425: Storage Information:
2426: For a square global matrix we define each processor's diagonal portion
2427: to be its local rows and the corresponding columns (a square submatrix);
2428: each processor's off-diagonal portion encompasses the remainder of the
2429: local matrix (a rectangular submatrix).
2431: The user can specify preallocated storage for the diagonal part of
2432: the local submatrix with either d_nz or d_nnz (not both). Set
2433: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2434: memory allocation. Likewise, specify preallocated storage for the
2435: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2437: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2438: the figure below we depict these three local rows and all columns (0-11).
2440: .vb
2441: 0 1 2 3 4 5 6 7 8 9 10 11
2442: --------------------------
2443: row 3 |. . . d d d o o o o o o
2444: row 4 |. . . d d d o o o o o o
2445: row 5 |. . . d d d o o o o o o
2446: --------------------------
2447: .ve
2449: Thus, any entries in the d locations are stored in the d (diagonal)
2450: submatrix, and any entries in the o locations are stored in the
2451: o (off-diagonal) submatrix. Note that the d matrix is stored in
2452: MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2454: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2455: plus the diagonal part of the d matrix,
2456: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2457: In general, for PDE problems in which most nonzeros are near the diagonal,
2458: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2459: or you will get TERRIBLE performance; see the users' manual chapter on
2460: matrices.
2462: Level: intermediate
2464: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2465: @*/
2467: PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2468: {
2470: PetscMPIInt size;
2473: MatCreate(comm,A);
2474: MatSetSizes(*A,m,n,M,N);
2475: MPI_Comm_size(comm,&size);
2476: if (size > 1) {
2477: MatSetType(*A,MATMPISBAIJ);
2478: MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
2479: } else {
2480: MatSetType(*A,MATSEQSBAIJ);
2481: MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
2482: }
2483: return(0);
2484: }
2487: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2488: {
2489: Mat mat;
2490: Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2492: PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2493: PetscScalar *array;
2496: *newmat = 0;
2498: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
2499: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
2500: MatSetType(mat,((PetscObject)matin)->type_name);
2501: PetscLayoutReference(matin->rmap,&mat->rmap);
2502: PetscLayoutReference(matin->cmap,&mat->cmap);
2504: mat->factortype = matin->factortype;
2505: mat->preallocated = PETSC_TRUE;
2506: mat->assembled = PETSC_TRUE;
2507: mat->insertmode = NOT_SET_VALUES;
2509: a = (Mat_MPISBAIJ*)mat->data;
2510: a->bs2 = oldmat->bs2;
2511: a->mbs = oldmat->mbs;
2512: a->nbs = oldmat->nbs;
2513: a->Mbs = oldmat->Mbs;
2514: a->Nbs = oldmat->Nbs;
2516: a->size = oldmat->size;
2517: a->rank = oldmat->rank;
2518: a->donotstash = oldmat->donotstash;
2519: a->roworiented = oldmat->roworiented;
2520: a->rowindices = 0;
2521: a->rowvalues = 0;
2522: a->getrowactive = PETSC_FALSE;
2523: a->barray = 0;
2524: a->rstartbs = oldmat->rstartbs;
2525: a->rendbs = oldmat->rendbs;
2526: a->cstartbs = oldmat->cstartbs;
2527: a->cendbs = oldmat->cendbs;
2529: /* hash table stuff */
2530: a->ht = 0;
2531: a->hd = 0;
2532: a->ht_size = 0;
2533: a->ht_flag = oldmat->ht_flag;
2534: a->ht_fact = oldmat->ht_fact;
2535: a->ht_total_ct = 0;
2536: a->ht_insert_ct = 0;
2538: PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);
2539: if (oldmat->colmap) {
2540: #if defined(PETSC_USE_CTABLE)
2541: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2542: #else
2543: PetscMalloc1(a->Nbs,&a->colmap);
2544: PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));
2545: PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);
2546: #endif
2547: } else a->colmap = 0;
2549: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2550: PetscMalloc1(len,&a->garray);
2551: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
2552: PetscArraycpy(a->garray,oldmat->garray,len);
2553: } else a->garray = 0;
2555: MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);
2556: VecDuplicate(oldmat->lvec,&a->lvec);
2557: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
2558: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2559: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
2561: VecDuplicate(oldmat->slvec0,&a->slvec0);
2562: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);
2563: VecDuplicate(oldmat->slvec1,&a->slvec1);
2564: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);
2566: VecGetLocalSize(a->slvec1,&nt);
2567: VecGetArray(a->slvec1,&array);
2568: VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);
2569: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);
2570: VecRestoreArray(a->slvec1,&array);
2571: VecGetArray(a->slvec0,&array);
2572: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);
2573: VecRestoreArray(a->slvec0,&array);
2574: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);
2575: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);
2576: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);
2577: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);
2578: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);
2580: /* VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2581: PetscObjectReference((PetscObject)oldmat->sMvctx);
2582: a->sMvctx = oldmat->sMvctx;
2583: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);
2585: MatDuplicate(oldmat->A,cpvalues,&a->A);
2586: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
2587: MatDuplicate(oldmat->B,cpvalues,&a->B);
2588: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
2589: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
2590: *newmat = mat;
2591: return(0);
2592: }
2594: /* Used for both MPIBAIJ and MPISBAIJ matrices */
2595: #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2597: PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2598: {
2600: PetscBool isbinary;
2603: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
2604: if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2605: MatLoad_MPISBAIJ_Binary(mat,viewer);
2606: return(0);
2607: }
2609: /*XXXXX@
2610: MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2612: Input Parameters:
2613: . mat - the matrix
2614: . fact - factor
2616: Not Collective on Mat, each process can have a different hash factor
2618: Level: advanced
2620: Notes:
2621: This can also be set by the command line option: -mat_use_hash_table fact
2623: .seealso: MatSetOption()
2624: @XXXXX*/
2627: PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2628: {
2629: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2630: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data;
2631: PetscReal atmp;
2632: PetscReal *work,*svalues,*rvalues;
2634: PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2635: PetscMPIInt rank,size;
2636: PetscInt *rowners_bs,dest,count,source;
2637: PetscScalar *va;
2638: MatScalar *ba;
2639: MPI_Status stat;
2642: if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2643: MatGetRowMaxAbs(a->A,v,NULL);
2644: VecGetArray(v,&va);
2646: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
2647: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
2649: bs = A->rmap->bs;
2650: mbs = a->mbs;
2651: Mbs = a->Mbs;
2652: ba = b->a;
2653: bi = b->i;
2654: bj = b->j;
2656: /* find ownerships */
2657: rowners_bs = A->rmap->range;
2659: /* each proc creates an array to be distributed */
2660: PetscCalloc1(bs*Mbs,&work);
2662: /* row_max for B */
2663: if (rank != size-1) {
2664: for (i=0; i<mbs; i++) {
2665: ncols = bi[1] - bi[0]; bi++;
2666: brow = bs*i;
2667: for (j=0; j<ncols; j++) {
2668: bcol = bs*(*bj);
2669: for (kcol=0; kcol<bs; kcol++) {
2670: col = bcol + kcol; /* local col index */
2671: col += rowners_bs[rank+1]; /* global col index */
2672: for (krow=0; krow<bs; krow++) {
2673: atmp = PetscAbsScalar(*ba); ba++;
2674: row = brow + krow; /* local row index */
2675: if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2676: if (work[col] < atmp) work[col] = atmp;
2677: }
2678: }
2679: bj++;
2680: }
2681: }
2683: /* send values to its owners */
2684: for (dest=rank+1; dest<size; dest++) {
2685: svalues = work + rowners_bs[dest];
2686: count = rowners_bs[dest+1]-rowners_bs[dest];
2687: MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));
2688: }
2689: }
2691: /* receive values */
2692: if (rank) {
2693: rvalues = work;
2694: count = rowners_bs[rank+1]-rowners_bs[rank];
2695: for (source=0; source<rank; source++) {
2696: MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);
2697: /* process values */
2698: for (i=0; i<count; i++) {
2699: if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2700: }
2701: }
2702: }
2704: VecRestoreArray(v,&va);
2705: PetscFree(work);
2706: return(0);
2707: }
2709: PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2710: {
2711: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
2712: PetscErrorCode ierr;
2713: PetscInt mbs=mat->mbs,bs=matin->rmap->bs;
2714: PetscScalar *x,*ptr,*from;
2715: Vec bb1;
2716: const PetscScalar *b;
2719: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2720: if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2722: if (flag == SOR_APPLY_UPPER) {
2723: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2724: return(0);
2725: }
2727: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2728: if (flag & SOR_ZERO_INITIAL_GUESS) {
2729: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
2730: its--;
2731: }
2733: VecDuplicate(bb,&bb1);
2734: while (its--) {
2736: /* lower triangular part: slvec0b = - B^T*xx */
2737: (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);
2739: /* copy xx into slvec0a */
2740: VecGetArray(mat->slvec0,&ptr);
2741: VecGetArray(xx,&x);
2742: PetscArraycpy(ptr,x,bs*mbs);
2743: VecRestoreArray(mat->slvec0,&ptr);
2745: VecScale(mat->slvec0,-1.0);
2747: /* copy bb into slvec1a */
2748: VecGetArray(mat->slvec1,&ptr);
2749: VecGetArrayRead(bb,&b);
2750: PetscArraycpy(ptr,b,bs*mbs);
2751: VecRestoreArray(mat->slvec1,&ptr);
2753: /* set slvec1b = 0 */
2754: VecSet(mat->slvec1b,0.0);
2756: VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2757: VecRestoreArray(xx,&x);
2758: VecRestoreArrayRead(bb,&b);
2759: VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2761: /* upper triangular part: bb1 = bb1 - B*x */
2762: (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);
2764: /* local diagonal sweep */
2765: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
2766: }
2767: VecDestroy(&bb1);
2768: } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2769: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2770: } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2771: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2772: } else if (flag & SOR_EISENSTAT) {
2773: Vec xx1;
2774: PetscBool hasop;
2775: const PetscScalar *diag;
2776: PetscScalar *sl,scale = (omega - 2.0)/omega;
2777: PetscInt i,n;
2779: if (!mat->xx1) {
2780: VecDuplicate(bb,&mat->xx1);
2781: VecDuplicate(bb,&mat->bb1);
2782: }
2783: xx1 = mat->xx1;
2784: bb1 = mat->bb1;
2786: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
2788: if (!mat->diag) {
2789: /* this is wrong for same matrix with new nonzero values */
2790: MatCreateVecs(matin,&mat->diag,NULL);
2791: MatGetDiagonal(matin,mat->diag);
2792: }
2793: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
2795: if (hasop) {
2796: MatMultDiagonalBlock(matin,xx,bb1);
2797: VecAYPX(mat->slvec1a,scale,bb);
2798: } else {
2799: /*
2800: These two lines are replaced by code that may be a bit faster for a good compiler
2801: VecPointwiseMult(mat->slvec1a,mat->diag,xx);
2802: VecAYPX(mat->slvec1a,scale,bb);
2803: */
2804: VecGetArray(mat->slvec1a,&sl);
2805: VecGetArrayRead(mat->diag,&diag);
2806: VecGetArrayRead(bb,&b);
2807: VecGetArray(xx,&x);
2808: VecGetLocalSize(xx,&n);
2809: if (omega == 1.0) {
2810: for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2811: PetscLogFlops(2.0*n);
2812: } else {
2813: for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2814: PetscLogFlops(3.0*n);
2815: }
2816: VecRestoreArray(mat->slvec1a,&sl);
2817: VecRestoreArrayRead(mat->diag,&diag);
2818: VecRestoreArrayRead(bb,&b);
2819: VecRestoreArray(xx,&x);
2820: }
2822: /* multiply off-diagonal portion of matrix */
2823: VecSet(mat->slvec1b,0.0);
2824: (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);
2825: VecGetArray(mat->slvec0,&from);
2826: VecGetArray(xx,&x);
2827: PetscArraycpy(from,x,bs*mbs);
2828: VecRestoreArray(mat->slvec0,&from);
2829: VecRestoreArray(xx,&x);
2830: VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2831: VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2832: (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);
2834: /* local sweep */
2835: (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
2836: VecAXPY(xx,1.0,xx1);
2837: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2838: return(0);
2839: }
2841: PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2842: {
2843: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
2845: Vec lvec1,bb1;
2848: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2849: if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2851: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2852: if (flag & SOR_ZERO_INITIAL_GUESS) {
2853: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
2854: its--;
2855: }
2857: VecDuplicate(mat->lvec,&lvec1);
2858: VecDuplicate(bb,&bb1);
2859: while (its--) {
2860: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2862: /* lower diagonal part: bb1 = bb - B^T*xx */
2863: (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);
2864: VecScale(lvec1,-1.0);
2866: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2867: VecCopy(bb,bb1);
2868: VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);
2870: /* upper diagonal part: bb1 = bb1 - B*x */
2871: VecScale(mat->lvec,-1.0);
2872: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);
2874: VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);
2876: /* diagonal sweep */
2877: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
2878: }
2879: VecDestroy(&lvec1);
2880: VecDestroy(&bb1);
2881: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2882: return(0);
2883: }
2885: /*@
2886: MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2887: CSR format the local rows.
2889: Collective
2891: Input Parameters:
2892: + comm - MPI communicator
2893: . bs - the block size, only a block size of 1 is supported
2894: . m - number of local rows (Cannot be PETSC_DECIDE)
2895: . n - This value should be the same as the local size used in creating the
2896: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2897: calculated if N is given) For square matrices n is almost always m.
2898: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2899: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2900: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2901: . j - column indices
2902: - a - matrix values
2904: Output Parameter:
2905: . mat - the matrix
2907: Level: intermediate
2909: Notes:
2910: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2911: thus you CANNOT change the matrix entries by changing the values of a[] after you have
2912: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2914: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2916: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2917: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2918: @*/
2919: PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2920: {
2925: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2926: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2927: MatCreate(comm,mat);
2928: MatSetSizes(*mat,m,n,M,N);
2929: MatSetType(*mat,MATMPISBAIJ);
2930: MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);
2931: return(0);
2932: }
2935: /*@C
2936: MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2938: Collective
2940: Input Parameters:
2941: + B - the matrix
2942: . bs - the block size
2943: . i - the indices into j for the start of each local row (starts with zero)
2944: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2945: - v - optional values in the matrix
2947: Level: advanced
2949: Notes:
2950: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2951: and usually the numerical values as well
2953: Any entries below the diagonal are ignored
2955: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2956: @*/
2957: PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2958: {
2962: PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2963: return(0);
2964: }
2966: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2967: {
2969: PetscInt m,N,i,rstart,nnz,Ii,bs,cbs;
2970: PetscInt *indx;
2971: PetscScalar *values;
2974: MatGetSize(inmat,&m,&N);
2975: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2976: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data;
2977: PetscInt *dnz,*onz,mbs,Nbs,nbs;
2978: PetscInt *bindx,rmax=a->rmax,j;
2979: PetscMPIInt rank,size;
2981: MatGetBlockSizes(inmat,&bs,&cbs);
2982: mbs = m/bs; Nbs = N/cbs;
2983: if (n == PETSC_DECIDE) {
2984: PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2985: }
2986: nbs = n/cbs;
2988: PetscMalloc1(rmax,&bindx);
2989: MatPreallocateInitialize(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
2991: MPI_Comm_rank(comm,&rank);
2992: MPI_Comm_rank(comm,&size);
2993: if (rank == size-1) {
2994: /* Check sum(nbs) = Nbs */
2995: if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2996: }
2998: rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2999: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
3000: for (i=0; i<mbs; i++) {
3001: MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL); /* non-blocked nnz and indx */
3002: nnz = nnz/bs;
3003: for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3004: MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);
3005: MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);
3006: }
3007: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
3008: PetscFree(bindx);
3010: MatCreate(comm,outmat);
3011: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
3012: MatSetBlockSizes(*outmat,bs,cbs);
3013: MatSetType(*outmat,MATSBAIJ);
3014: MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);
3015: MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);
3016: MatPreallocateFinalize(dnz,onz);
3017: }
3019: /* numeric phase */
3020: MatGetBlockSizes(inmat,&bs,&cbs);
3021: MatGetOwnershipRange(*outmat,&rstart,NULL);
3023: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
3024: for (i=0; i<m; i++) {
3025: MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);
3026: Ii = i + rstart;
3027: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
3028: MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);
3029: }
3030: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
3031: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
3032: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
3033: return(0);
3034: }