Actual source code: mpisbaij.c
petsc-3.12.5 2020-03-29
2: #include <../src/mat/impls/baij/mpi/mpibaij.h>
3: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4: #include <../src/mat/impls/sbaij/seq/sbaij.h>
5: #include <petscblaslapack.h>
7: #if defined(PETSC_HAVE_ELEMENTAL)
8: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9: #endif
11: /* This could be moved to matimpl.h */
12: static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13: {
14: Mat preallocator;
15: PetscInt r,rstart,rend;
16: PetscInt bs,i,m,n,M,N;
17: PetscBool cong = PETSC_TRUE;
23: for (i = 0; i < nm; i++) {
25: PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);
26: if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27: }
29: MatGetBlockSize(B,&bs);
30: MatGetSize(B,&M,&N);
31: MatGetLocalSize(B,&m,&n);
32: MatCreate(PetscObjectComm((PetscObject)B),&preallocator);
33: MatSetType(preallocator,MATPREALLOCATOR);
34: MatSetBlockSize(preallocator,bs);
35: MatSetSizes(preallocator,m,n,M,N);
36: MatSetUp(preallocator);
37: MatGetOwnershipRange(preallocator,&rstart,&rend);
38: for (r = rstart; r < rend; ++r) {
39: PetscInt ncols;
40: const PetscInt *row;
41: const PetscScalar *vals;
43: for (i = 0; i < nm; i++) {
44: MatGetRow(X[i],r,&ncols,&row,&vals);
45: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
46: if (symm && symm[i]) {
47: MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);
48: }
49: MatRestoreRow(X[i],r,&ncols,&row,&vals);
50: }
51: }
52: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
53: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
54: MatPreallocatorPreallocate(preallocator,fill,B);
55: MatDestroy(&preallocator);
56: return(0);
57: }
59: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60: {
61: Mat B;
63: PetscInt r;
66: if (reuse != MAT_REUSE_MATRIX) {
67: PetscBool symm = PETSC_TRUE,isdense;
68: PetscInt bs;
70: MatCreate(PetscObjectComm((PetscObject)A),&B);
71: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
72: MatSetType(B,newtype);
73: MatGetBlockSize(A,&bs);
74: MatSetBlockSize(B,bs);
75: PetscLayoutSetUp(B->rmap);
76: PetscLayoutSetUp(B->cmap);
77: PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");
78: if (!isdense) {
79: MatGetRowUpperTriangular(A);
80: MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);
81: MatRestoreRowUpperTriangular(A);
82: } else {
83: MatSetUp(B);
84: }
85: } else {
86: B = *newmat;
87: MatZeroEntries(B);
88: }
90: MatGetRowUpperTriangular(A);
91: for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
92: PetscInt ncols;
93: const PetscInt *row;
94: const PetscScalar *vals;
96: MatGetRow(A,r,&ncols,&row,&vals);
97: MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);
98: MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);
99: MatRestoreRow(A,r,&ncols,&row,&vals);
100: }
101: MatRestoreRowUpperTriangular(A);
102: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
103: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
105: if (reuse == MAT_INPLACE_MATRIX) {
106: MatHeaderReplace(A,&B);
107: } else {
108: *newmat = B;
109: }
110: return(0);
111: }
113: PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
114: {
115: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
119: MatStoreValues(aij->A);
120: MatStoreValues(aij->B);
121: return(0);
122: }
124: PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
125: {
126: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
130: MatRetrieveValues(aij->A);
131: MatRetrieveValues(aij->B);
132: return(0);
133: }
135: #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \
136: { \
137: brow = row/bs; \
138: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
139: rmax = aimax[brow]; nrow = ailen[brow]; \
140: bcol = col/bs; \
141: ridx = row % bs; cidx = col % bs; \
142: low = 0; high = nrow; \
143: while (high-low > 3) { \
144: t = (low+high)/2; \
145: if (rp[t] > bcol) high = t; \
146: else low = t; \
147: } \
148: for (_i=low; _i<high; _i++) { \
149: if (rp[_i] > bcol) break; \
150: if (rp[_i] == bcol) { \
151: bap = ap + bs2*_i + bs*cidx + ridx; \
152: if (addv == ADD_VALUES) *bap += value; \
153: else *bap = value; \
154: goto a_noinsert; \
155: } \
156: } \
157: if (a->nonew == 1) goto a_noinsert; \
158: if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
159: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
160: N = nrow++ - 1; \
161: /* shift up all the later entries in this row */ \
162: PetscArraymove(rp+_i+1,rp+_i,N-_i+1); \
163: PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)); \
164: PetscArrayzero(ap+bs2*_i,bs2); \
165: rp[_i] = bcol; \
166: ap[bs2*_i + bs*cidx + ridx] = value; \
167: A->nonzerostate++;\
168: a_noinsert:; \
169: ailen[brow] = nrow; \
170: }
172: #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
173: { \
174: brow = row/bs; \
175: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
176: rmax = bimax[brow]; nrow = bilen[brow]; \
177: bcol = col/bs; \
178: ridx = row % bs; cidx = col % bs; \
179: low = 0; high = nrow; \
180: while (high-low > 3) { \
181: t = (low+high)/2; \
182: if (rp[t] > bcol) high = t; \
183: else low = t; \
184: } \
185: for (_i=low; _i<high; _i++) { \
186: if (rp[_i] > bcol) break; \
187: if (rp[_i] == bcol) { \
188: bap = ap + bs2*_i + bs*cidx + ridx; \
189: if (addv == ADD_VALUES) *bap += value; \
190: else *bap = value; \
191: goto b_noinsert; \
192: } \
193: } \
194: if (b->nonew == 1) goto b_noinsert; \
195: if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
196: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
197: N = nrow++ - 1; \
198: /* shift up all the later entries in this row */ \
199: PetscArraymove(rp+_i+1,rp+_i,N-_i+1); \
200: PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)); \
201: PetscArrayzero(ap+bs2*_i,bs2); \
202: rp[_i] = bcol; \
203: ap[bs2*_i + bs*cidx + ridx] = value; \
204: B->nonzerostate++;\
205: b_noinsert:; \
206: bilen[brow] = nrow; \
207: }
209: /* Only add/insert a(i,j) with i<=j (blocks).
210: Any a(i,j) with i>j input by user is ingored or generates an error
211: */
212: PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
213: {
214: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
215: MatScalar value;
216: PetscBool roworiented = baij->roworiented;
218: PetscInt i,j,row,col;
219: PetscInt rstart_orig=mat->rmap->rstart;
220: PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
221: PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs;
223: /* Some Variables required in the macro */
224: Mat A = baij->A;
225: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
226: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
227: MatScalar *aa =a->a;
229: Mat B = baij->B;
230: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
231: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
232: MatScalar *ba =b->a;
234: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
235: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
236: MatScalar *ap,*bap;
238: /* for stash */
239: PetscInt n_loc, *in_loc = NULL;
240: MatScalar *v_loc = NULL;
243: if (!baij->donotstash) {
244: if (n > baij->n_loc) {
245: PetscFree(baij->in_loc);
246: PetscFree(baij->v_loc);
247: PetscMalloc1(n,&baij->in_loc);
248: PetscMalloc1(n,&baij->v_loc);
250: baij->n_loc = n;
251: }
252: in_loc = baij->in_loc;
253: v_loc = baij->v_loc;
254: }
256: for (i=0; i<m; i++) {
257: if (im[i] < 0) continue;
258: #if defined(PETSC_USE_DEBUG)
259: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
260: #endif
261: if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
262: row = im[i] - rstart_orig; /* local row index */
263: for (j=0; j<n; j++) {
264: if (im[i]/bs > in[j]/bs) {
265: if (a->ignore_ltriangular) {
266: continue; /* ignore lower triangular blocks */
267: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
268: }
269: if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
270: col = in[j] - cstart_orig; /* local col index */
271: brow = row/bs; bcol = col/bs;
272: if (brow > bcol) continue; /* ignore lower triangular blocks of A */
273: if (roworiented) value = v[i*n+j];
274: else value = v[i+j*m];
275: MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
276: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
277: } else if (in[j] < 0) continue;
278: #if defined(PETSC_USE_DEBUG)
279: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
280: #endif
281: else { /* off-diag entry (B) */
282: if (mat->was_assembled) {
283: if (!baij->colmap) {
284: MatCreateColmap_MPIBAIJ_Private(mat);
285: }
286: #if defined(PETSC_USE_CTABLE)
287: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
288: col = col - 1;
289: #else
290: col = baij->colmap[in[j]/bs] - 1;
291: #endif
292: if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
293: MatDisAssemble_MPISBAIJ(mat);
294: col = in[j];
295: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
296: B = baij->B;
297: b = (Mat_SeqBAIJ*)(B)->data;
298: bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
299: ba = b->a;
300: } else col += in[j]%bs;
301: } else col = in[j];
302: if (roworiented) value = v[i*n+j];
303: else value = v[i+j*m];
304: MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
305: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
306: }
307: }
308: } else { /* off processor entry */
309: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
310: if (!baij->donotstash) {
311: mat->assembled = PETSC_FALSE;
312: n_loc = 0;
313: for (j=0; j<n; j++) {
314: if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
315: in_loc[n_loc] = in[j];
316: if (roworiented) {
317: v_loc[n_loc] = v[i*n+j];
318: } else {
319: v_loc[n_loc] = v[j*m+i];
320: }
321: n_loc++;
322: }
323: MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);
324: }
325: }
326: }
327: return(0);
328: }
330: PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
331: {
332: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
333: PetscErrorCode ierr;
334: PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
335: PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen;
336: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
337: PetscBool roworiented=a->roworiented;
338: const PetscScalar *value = v;
339: MatScalar *ap,*aa = a->a,*bap;
342: if (col < row) {
343: if (a->ignore_ltriangular) return(0); /* ignore lower triangular block */
344: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
345: }
346: rp = aj + ai[row];
347: ap = aa + bs2*ai[row];
348: rmax = imax[row];
349: nrow = ailen[row];
350: value = v;
351: low = 0;
352: high = nrow;
354: while (high-low > 7) {
355: t = (low+high)/2;
356: if (rp[t] > col) high = t;
357: else low = t;
358: }
359: for (i=low; i<high; i++) {
360: if (rp[i] > col) break;
361: if (rp[i] == col) {
362: bap = ap + bs2*i;
363: if (roworiented) {
364: if (is == ADD_VALUES) {
365: for (ii=0; ii<bs; ii++) {
366: for (jj=ii; jj<bs2; jj+=bs) {
367: bap[jj] += *value++;
368: }
369: }
370: } else {
371: for (ii=0; ii<bs; ii++) {
372: for (jj=ii; jj<bs2; jj+=bs) {
373: bap[jj] = *value++;
374: }
375: }
376: }
377: } else {
378: if (is == ADD_VALUES) {
379: for (ii=0; ii<bs; ii++) {
380: for (jj=0; jj<bs; jj++) {
381: *bap++ += *value++;
382: }
383: }
384: } else {
385: for (ii=0; ii<bs; ii++) {
386: for (jj=0; jj<bs; jj++) {
387: *bap++ = *value++;
388: }
389: }
390: }
391: }
392: goto noinsert2;
393: }
394: }
395: if (nonew == 1) goto noinsert2;
396: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
397: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
398: N = nrow++ - 1; high++;
399: /* shift up all the later entries in this row */
400: PetscArraymove(rp+i+1,rp+i,N-i+1);
401: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
402: rp[i] = col;
403: bap = ap + bs2*i;
404: if (roworiented) {
405: for (ii=0; ii<bs; ii++) {
406: for (jj=ii; jj<bs2; jj+=bs) {
407: bap[jj] = *value++;
408: }
409: }
410: } else {
411: for (ii=0; ii<bs; ii++) {
412: for (jj=0; jj<bs; jj++) {
413: *bap++ = *value++;
414: }
415: }
416: }
417: noinsert2:;
418: ailen[row] = nrow;
419: return(0);
420: }
422: /*
423: This routine is exactly duplicated in mpibaij.c
424: */
425: PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
426: {
427: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
428: PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
429: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
430: PetscErrorCode ierr;
431: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
432: PetscBool roworiented=a->roworiented;
433: const PetscScalar *value = v;
434: MatScalar *ap,*aa = a->a,*bap;
437: rp = aj + ai[row];
438: ap = aa + bs2*ai[row];
439: rmax = imax[row];
440: nrow = ailen[row];
441: low = 0;
442: high = nrow;
443: value = v;
444: while (high-low > 7) {
445: t = (low+high)/2;
446: if (rp[t] > col) high = t;
447: else low = t;
448: }
449: for (i=low; i<high; i++) {
450: if (rp[i] > col) break;
451: if (rp[i] == col) {
452: bap = ap + bs2*i;
453: if (roworiented) {
454: if (is == ADD_VALUES) {
455: for (ii=0; ii<bs; ii++) {
456: for (jj=ii; jj<bs2; jj+=bs) {
457: bap[jj] += *value++;
458: }
459: }
460: } else {
461: for (ii=0; ii<bs; ii++) {
462: for (jj=ii; jj<bs2; jj+=bs) {
463: bap[jj] = *value++;
464: }
465: }
466: }
467: } else {
468: if (is == ADD_VALUES) {
469: for (ii=0; ii<bs; ii++,value+=bs) {
470: for (jj=0; jj<bs; jj++) {
471: bap[jj] += value[jj];
472: }
473: bap += bs;
474: }
475: } else {
476: for (ii=0; ii<bs; ii++,value+=bs) {
477: for (jj=0; jj<bs; jj++) {
478: bap[jj] = value[jj];
479: }
480: bap += bs;
481: }
482: }
483: }
484: goto noinsert2;
485: }
486: }
487: if (nonew == 1) goto noinsert2;
488: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
489: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
490: N = nrow++ - 1; high++;
491: /* shift up all the later entries in this row */
492: PetscArraymove(rp+i+1,rp+i,N-i+1);
493: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
494: rp[i] = col;
495: bap = ap + bs2*i;
496: if (roworiented) {
497: for (ii=0; ii<bs; ii++) {
498: for (jj=ii; jj<bs2; jj+=bs) {
499: bap[jj] = *value++;
500: }
501: }
502: } else {
503: for (ii=0; ii<bs; ii++) {
504: for (jj=0; jj<bs; jj++) {
505: *bap++ = *value++;
506: }
507: }
508: }
509: noinsert2:;
510: ailen[row] = nrow;
511: return(0);
512: }
514: /*
515: This routine could be optimized by removing the need for the block copy below and passing stride information
516: to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
517: */
518: PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
519: {
520: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
521: const MatScalar *value;
522: MatScalar *barray =baij->barray;
523: PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
524: PetscErrorCode ierr;
525: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
526: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
527: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
530: if (!barray) {
531: PetscMalloc1(bs2,&barray);
532: baij->barray = barray;
533: }
535: if (roworiented) {
536: stepval = (n-1)*bs;
537: } else {
538: stepval = (m-1)*bs;
539: }
540: for (i=0; i<m; i++) {
541: if (im[i] < 0) continue;
542: #if defined(PETSC_USE_DEBUG)
543: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
544: #endif
545: if (im[i] >= rstart && im[i] < rend) {
546: row = im[i] - rstart;
547: for (j=0; j<n; j++) {
548: if (im[i] > in[j]) {
549: if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
550: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
551: }
552: /* If NumCol = 1 then a copy is not required */
553: if ((roworiented) && (n == 1)) {
554: barray = (MatScalar*) v + i*bs2;
555: } else if ((!roworiented) && (m == 1)) {
556: barray = (MatScalar*) v + j*bs2;
557: } else { /* Here a copy is required */
558: if (roworiented) {
559: value = v + i*(stepval+bs)*bs + j*bs;
560: } else {
561: value = v + j*(stepval+bs)*bs + i*bs;
562: }
563: for (ii=0; ii<bs; ii++,value+=stepval) {
564: for (jj=0; jj<bs; jj++) {
565: *barray++ = *value++;
566: }
567: }
568: barray -=bs2;
569: }
571: if (in[j] >= cstart && in[j] < cend) {
572: col = in[j] - cstart;
573: MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);
574: } else if (in[j] < 0) continue;
575: #if defined(PETSC_USE_DEBUG)
576: else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
577: #endif
578: else {
579: if (mat->was_assembled) {
580: if (!baij->colmap) {
581: MatCreateColmap_MPIBAIJ_Private(mat);
582: }
584: #if defined(PETSC_USE_DEBUG)
585: #if defined(PETSC_USE_CTABLE)
586: { PetscInt data;
587: PetscTableFind(baij->colmap,in[j]+1,&data);
588: if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
589: }
590: #else
591: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
592: #endif
593: #endif
594: #if defined(PETSC_USE_CTABLE)
595: PetscTableFind(baij->colmap,in[j]+1,&col);
596: col = (col - 1)/bs;
597: #else
598: col = (baij->colmap[in[j]] - 1)/bs;
599: #endif
600: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
601: MatDisAssemble_MPISBAIJ(mat);
602: col = in[j];
603: }
604: } else col = in[j];
605: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);
606: }
607: }
608: } else {
609: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
610: if (!baij->donotstash) {
611: if (roworiented) {
612: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
613: } else {
614: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
615: }
616: }
617: }
618: }
619: return(0);
620: }
622: PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
623: {
624: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
626: PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
627: PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
630: for (i=0; i<m; i++) {
631: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
632: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
633: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
634: row = idxm[i] - bsrstart;
635: for (j=0; j<n; j++) {
636: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
637: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
638: if (idxn[j] >= bscstart && idxn[j] < bscend) {
639: col = idxn[j] - bscstart;
640: MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
641: } else {
642: if (!baij->colmap) {
643: MatCreateColmap_MPIBAIJ_Private(mat);
644: }
645: #if defined(PETSC_USE_CTABLE)
646: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
647: data--;
648: #else
649: data = baij->colmap[idxn[j]/bs]-1;
650: #endif
651: if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
652: else {
653: col = data + idxn[j]%bs;
654: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
655: }
656: }
657: }
658: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
659: }
660: return(0);
661: }
663: PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
664: {
665: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
667: PetscReal sum[2],*lnorm2;
670: if (baij->size == 1) {
671: MatNorm(baij->A,type,norm);
672: } else {
673: if (type == NORM_FROBENIUS) {
674: PetscMalloc1(2,&lnorm2);
675: MatNorm(baij->A,type,lnorm2);
676: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */
677: MatNorm(baij->B,type,lnorm2);
678: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */
679: MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
680: *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
681: PetscFree(lnorm2);
682: } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
683: Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
684: Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data;
685: PetscReal *rsum,*rsum2,vabs;
686: PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
687: PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
688: MatScalar *v;
690: PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);
691: PetscArrayzero(rsum,mat->cmap->N);
692: /* Amat */
693: v = amat->a; jj = amat->j;
694: for (brow=0; brow<mbs; brow++) {
695: grow = bs*(rstart + brow);
696: nz = amat->i[brow+1] - amat->i[brow];
697: for (bcol=0; bcol<nz; bcol++) {
698: gcol = bs*(rstart + *jj); jj++;
699: for (col=0; col<bs; col++) {
700: for (row=0; row<bs; row++) {
701: vabs = PetscAbsScalar(*v); v++;
702: rsum[gcol+col] += vabs;
703: /* non-diagonal block */
704: if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
705: }
706: }
707: }
708: PetscLogFlops(nz*bs*bs);
709: }
710: /* Bmat */
711: v = bmat->a; jj = bmat->j;
712: for (brow=0; brow<mbs; brow++) {
713: grow = bs*(rstart + brow);
714: nz = bmat->i[brow+1] - bmat->i[brow];
715: for (bcol=0; bcol<nz; bcol++) {
716: gcol = bs*garray[*jj]; jj++;
717: for (col=0; col<bs; col++) {
718: for (row=0; row<bs; row++) {
719: vabs = PetscAbsScalar(*v); v++;
720: rsum[gcol+col] += vabs;
721: rsum[grow+row] += vabs;
722: }
723: }
724: }
725: PetscLogFlops(nz*bs*bs);
726: }
727: MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
728: *norm = 0.0;
729: for (col=0; col<mat->cmap->N; col++) {
730: if (rsum2[col] > *norm) *norm = rsum2[col];
731: }
732: PetscFree2(rsum,rsum2);
733: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
734: }
735: return(0);
736: }
738: PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
739: {
740: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
742: PetscInt nstash,reallocs;
745: if (baij->donotstash || mat->nooffprocentries) return(0);
747: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
748: MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
749: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
750: PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
751: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
752: PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
753: return(0);
754: }
756: PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
757: {
758: Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
759: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data;
761: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
762: PetscInt *row,*col;
763: PetscBool other_disassembled;
764: PetscMPIInt n;
765: PetscBool r1,r2,r3;
766: MatScalar *val;
768: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
770: if (!baij->donotstash && !mat->nooffprocentries) {
771: while (1) {
772: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
773: if (!flg) break;
775: for (i=0; i<n;) {
776: /* Now identify the consecutive vals belonging to the same row */
777: for (j=i,rstart=row[j]; j<n; j++) {
778: if (row[j] != rstart) break;
779: }
780: if (j < n) ncols = j-i;
781: else ncols = n-i;
782: /* Now assemble all these values with a single function call */
783: MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
784: i = j;
785: }
786: }
787: MatStashScatterEnd_Private(&mat->stash);
788: /* Now process the block-stash. Since the values are stashed column-oriented,
789: set the roworiented flag to column oriented, and after MatSetValues()
790: restore the original flags */
791: r1 = baij->roworiented;
792: r2 = a->roworiented;
793: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
795: baij->roworiented = PETSC_FALSE;
796: a->roworiented = PETSC_FALSE;
798: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
799: while (1) {
800: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
801: if (!flg) break;
803: for (i=0; i<n;) {
804: /* Now identify the consecutive vals belonging to the same row */
805: for (j=i,rstart=row[j]; j<n; j++) {
806: if (row[j] != rstart) break;
807: }
808: if (j < n) ncols = j-i;
809: else ncols = n-i;
810: MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);
811: i = j;
812: }
813: }
814: MatStashScatterEnd_Private(&mat->bstash);
816: baij->roworiented = r1;
817: a->roworiented = r2;
819: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
820: }
822: MatAssemblyBegin(baij->A,mode);
823: MatAssemblyEnd(baij->A,mode);
825: /* determine if any processor has disassembled, if so we must
826: also disassemble ourselfs, in order that we may reassemble. */
827: /*
828: if nonzero structure of submatrix B cannot change then we know that
829: no processor disassembled thus we can skip this stuff
830: */
831: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
832: MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
833: if (mat->was_assembled && !other_disassembled) {
834: MatDisAssemble_MPISBAIJ(mat);
835: }
836: }
838: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
839: MatSetUpMultiply_MPISBAIJ(mat); /* setup Mvctx and sMvctx */
840: }
841: MatAssemblyBegin(baij->B,mode);
842: MatAssemblyEnd(baij->B,mode);
844: PetscFree2(baij->rowvalues,baij->rowindices);
846: baij->rowvalues = 0;
848: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
849: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
850: PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
851: MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
852: }
853: return(0);
854: }
856: extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
857: #include <petscdraw.h>
858: static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
859: {
860: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
861: PetscErrorCode ierr;
862: PetscInt bs = mat->rmap->bs;
863: PetscMPIInt rank = baij->rank;
864: PetscBool iascii,isdraw;
865: PetscViewer sviewer;
866: PetscViewerFormat format;
869: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
870: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
871: if (iascii) {
872: PetscViewerGetFormat(viewer,&format);
873: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
874: MatInfo info;
875: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
876: MatGetInfo(mat,MAT_LOCAL,&info);
877: PetscViewerASCIIPushSynchronized(viewer);
878: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);
879: MatGetInfo(baij->A,MAT_LOCAL,&info);
880: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
881: MatGetInfo(baij->B,MAT_LOCAL,&info);
882: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
883: PetscViewerFlush(viewer);
884: PetscViewerASCIIPopSynchronized(viewer);
885: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
886: VecScatterView(baij->Mvctx,viewer);
887: return(0);
888: } else if (format == PETSC_VIEWER_ASCII_INFO) {
889: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
890: return(0);
891: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
892: return(0);
893: }
894: }
896: if (isdraw) {
897: PetscDraw draw;
898: PetscBool isnull;
899: PetscViewerDrawGetDraw(viewer,0,&draw);
900: PetscDrawIsNull(draw,&isnull);
901: if (isnull) return(0);
902: }
904: {
905: /* assemble the entire matrix onto first processor. */
906: Mat A;
907: Mat_SeqSBAIJ *Aloc;
908: Mat_SeqBAIJ *Bloc;
909: PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
910: MatScalar *a;
911: const char *matname;
913: /* Should this be the same type as mat? */
914: MatCreate(PetscObjectComm((PetscObject)mat),&A);
915: if (!rank) {
916: MatSetSizes(A,M,N,M,N);
917: } else {
918: MatSetSizes(A,0,0,M,N);
919: }
920: MatSetType(A,MATMPISBAIJ);
921: MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);
922: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
923: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
925: /* copy over the A part */
926: Aloc = (Mat_SeqSBAIJ*)baij->A->data;
927: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
928: PetscMalloc1(bs,&rvals);
930: for (i=0; i<mbs; i++) {
931: rvals[0] = bs*(baij->rstartbs + i);
932: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
933: for (j=ai[i]; j<ai[i+1]; j++) {
934: col = (baij->cstartbs+aj[j])*bs;
935: for (k=0; k<bs; k++) {
936: MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
937: col++;
938: a += bs;
939: }
940: }
941: }
942: /* copy over the B part */
943: Bloc = (Mat_SeqBAIJ*)baij->B->data;
944: ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
945: for (i=0; i<mbs; i++) {
947: rvals[0] = bs*(baij->rstartbs + i);
948: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
949: for (j=ai[i]; j<ai[i+1]; j++) {
950: col = baij->garray[aj[j]]*bs;
951: for (k=0; k<bs; k++) {
952: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
953: col++;
954: a += bs;
955: }
956: }
957: }
958: PetscFree(rvals);
959: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
960: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
961: /*
962: Everyone has to call to draw the matrix since the graphics waits are
963: synchronized across all processors that share the PetscDraw object
964: */
965: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
966: PetscObjectGetName((PetscObject)mat,&matname);
967: if (!rank) {
968: PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);
969: MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);
970: }
971: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
972: PetscViewerFlush(viewer);
973: MatDestroy(&A);
974: }
975: return(0);
976: }
978: static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
979: {
980: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data;
981: Mat_SeqSBAIJ *A = (Mat_SeqSBAIJ*)a->A->data;
982: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data;
984: PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
985: PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
986: int fd;
987: PetscScalar *column_values;
988: FILE *file;
989: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
990: PetscInt message_count,flowcontrolcount;
993: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
994: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
995: nz = bs2*(A->nz + B->nz);
996: rlen = mat->rmap->n;
997: PetscViewerBinaryGetDescriptor(viewer,&fd);
998: if (!rank) {
999: header[0] = MAT_FILE_CLASSID;
1000: header[1] = mat->rmap->N;
1001: header[2] = mat->cmap->N;
1003: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1004: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1005: /* get largest number of rows any processor has */
1006: range = mat->rmap->range;
1007: for (i=1; i<size; i++) {
1008: rlen = PetscMax(rlen,range[i+1] - range[i]);
1009: }
1010: } else {
1011: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1012: }
1014: PetscMalloc1(rlen/bs,&crow_lens);
1015: /* compute lengths of each row */
1016: for (i=0; i<a->mbs; i++) {
1017: crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1018: }
1019: /* store the row lengths to the file */
1020: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1021: if (!rank) {
1022: MPI_Status status;
1023: PetscMalloc1(rlen,&row_lens);
1024: rlen = (range[1] - range[0])/bs;
1025: for (i=0; i<rlen; i++) {
1026: for (j=0; j<bs; j++) {
1027: row_lens[i*bs+j] = bs*crow_lens[i];
1028: }
1029: }
1030: PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);
1031: for (i=1; i<size; i++) {
1032: rlen = (range[i+1] - range[i])/bs;
1033: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1034: MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1035: for (k=0; k<rlen; k++) {
1036: for (j=0; j<bs; j++) {
1037: row_lens[k*bs+j] = bs*crow_lens[k];
1038: }
1039: }
1040: PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);
1041: }
1042: PetscViewerFlowControlEndMaster(viewer,&message_count);
1043: PetscFree(row_lens);
1044: } else {
1045: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1046: MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1047: PetscViewerFlowControlEndWorker(viewer,&message_count);
1048: }
1049: PetscFree(crow_lens);
1051: /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1052: information needed to make it for each row from a block row. This does require more communication but still not more than
1053: the communication needed for the nonzero values */
1054: nzmax = nz; /* space a largest processor needs */
1055: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));
1056: PetscMalloc1(nzmax,&column_indices);
1057: cnt = 0;
1058: for (i=0; i<a->mbs; i++) {
1059: pcnt = cnt;
1060: for (j=B->i[i]; j<B->i[i+1]; j++) {
1061: if ((col = garray[B->j[j]]) > cstart) break;
1062: for (l=0; l<bs; l++) {
1063: column_indices[cnt++] = bs*col+l;
1064: }
1065: }
1066: for (k=A->i[i]; k<A->i[i+1]; k++) {
1067: for (l=0; l<bs; l++) {
1068: column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1069: }
1070: }
1071: for (; j<B->i[i+1]; j++) {
1072: for (l=0; l<bs; l++) {
1073: column_indices[cnt++] = bs*garray[B->j[j]]+l;
1074: }
1075: }
1076: len = cnt - pcnt;
1077: for (k=1; k<bs; k++) {
1078: PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);
1079: cnt += len;
1080: }
1081: }
1082: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1084: /* store the columns to the file */
1085: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1086: if (!rank) {
1087: MPI_Status status;
1088: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1089: for (i=1; i<size; i++) {
1090: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1091: MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1092: MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1093: PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);
1094: }
1095: PetscViewerFlowControlEndMaster(viewer,&message_count);
1096: } else {
1097: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1098: MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1099: MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1100: PetscViewerFlowControlEndWorker(viewer,&message_count);
1101: }
1102: PetscFree(column_indices);
1104: /* load up the numerical values */
1105: PetscMalloc1(nzmax,&column_values);
1106: cnt = 0;
1107: for (i=0; i<a->mbs; i++) {
1108: rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1109: for (j=B->i[i]; j<B->i[i+1]; j++) {
1110: if (garray[B->j[j]] > cstart) break;
1111: for (l=0; l<bs; l++) {
1112: for (ll=0; ll<bs; ll++) {
1113: column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1114: }
1115: }
1116: cnt += bs;
1117: }
1118: for (k=A->i[i]; k<A->i[i+1]; k++) {
1119: for (l=0; l<bs; l++) {
1120: for (ll=0; ll<bs; ll++) {
1121: column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1122: }
1123: }
1124: cnt += bs;
1125: }
1126: for (; j<B->i[i+1]; j++) {
1127: for (l=0; l<bs; l++) {
1128: for (ll=0; ll<bs; ll++) {
1129: column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1130: }
1131: }
1132: cnt += bs;
1133: }
1134: cnt += (bs-1)*rlen;
1135: }
1136: if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1138: /* store the column values to the file */
1139: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1140: if (!rank) {
1141: MPI_Status status;
1142: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1143: for (i=1; i<size; i++) {
1144: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1145: MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1146: MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);
1147: PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);
1148: }
1149: PetscViewerFlowControlEndMaster(viewer,&message_count);
1150: } else {
1151: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1152: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1153: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
1154: PetscViewerFlowControlEndWorker(viewer,&message_count);
1155: }
1156: PetscFree(column_values);
1158: PetscViewerBinaryGetInfoPointer(viewer,&file);
1159: if (file) {
1160: fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1161: }
1162: return(0);
1163: }
1165: PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1166: {
1168: PetscBool iascii,isdraw,issocket,isbinary;
1171: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1172: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1173: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1174: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1175: if (iascii || isdraw || issocket) {
1176: MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);
1177: } else if (isbinary) {
1178: MatView_MPISBAIJ_Binary(mat,viewer);
1179: }
1180: return(0);
1181: }
1183: PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1184: {
1185: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1189: #if defined(PETSC_USE_LOG)
1190: PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1191: #endif
1192: MatStashDestroy_Private(&mat->stash);
1193: MatStashDestroy_Private(&mat->bstash);
1194: MatDestroy(&baij->A);
1195: MatDestroy(&baij->B);
1196: #if defined(PETSC_USE_CTABLE)
1197: PetscTableDestroy(&baij->colmap);
1198: #else
1199: PetscFree(baij->colmap);
1200: #endif
1201: PetscFree(baij->garray);
1202: VecDestroy(&baij->lvec);
1203: VecScatterDestroy(&baij->Mvctx);
1204: VecDestroy(&baij->slvec0);
1205: VecDestroy(&baij->slvec0b);
1206: VecDestroy(&baij->slvec1);
1207: VecDestroy(&baij->slvec1a);
1208: VecDestroy(&baij->slvec1b);
1209: VecScatterDestroy(&baij->sMvctx);
1210: PetscFree2(baij->rowvalues,baij->rowindices);
1211: PetscFree(baij->barray);
1212: PetscFree(baij->hd);
1213: VecDestroy(&baij->diag);
1214: VecDestroy(&baij->bb1);
1215: VecDestroy(&baij->xx1);
1216: #if defined(PETSC_USE_REAL_MAT_SINGLE)
1217: PetscFree(baij->setvaluescopy);
1218: #endif
1219: PetscFree(baij->in_loc);
1220: PetscFree(baij->v_loc);
1221: PetscFree(baij->rangebs);
1222: PetscFree(mat->data);
1224: PetscObjectChangeTypeName((PetscObject)mat,0);
1225: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1226: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1227: PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);
1228: PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);
1229: #if defined(PETSC_HAVE_ELEMENTAL)
1230: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);
1231: #endif
1232: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);
1233: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);
1234: return(0);
1235: }
1237: PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1238: {
1239: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1240: PetscErrorCode ierr;
1241: PetscInt nt,mbs=a->mbs,bs=A->rmap->bs;
1242: PetscScalar *from;
1243: const PetscScalar *x;
1246: VecGetLocalSize(xx,&nt);
1247: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1249: /* diagonal part */
1250: (*a->A->ops->mult)(a->A,xx,a->slvec1a);
1251: VecSet(a->slvec1b,0.0);
1253: /* subdiagonal part */
1254: (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);
1256: /* copy x into the vec slvec0 */
1257: VecGetArray(a->slvec0,&from);
1258: VecGetArrayRead(xx,&x);
1260: PetscArraycpy(from,x,bs*mbs);
1261: VecRestoreArray(a->slvec0,&from);
1262: VecRestoreArrayRead(xx,&x);
1264: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1265: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1266: /* supperdiagonal part */
1267: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
1268: return(0);
1269: }
1271: PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1272: {
1273: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1274: PetscErrorCode ierr;
1275: PetscInt nt,mbs=a->mbs,bs=A->rmap->bs;
1276: PetscScalar *from;
1277: const PetscScalar *x;
1280: VecGetLocalSize(xx,&nt);
1281: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1283: /* diagonal part */
1284: (*a->A->ops->mult)(a->A,xx,a->slvec1a);
1285: VecSet(a->slvec1b,0.0);
1287: /* subdiagonal part */
1288: (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
1290: /* copy x into the vec slvec0 */
1291: VecGetArray(a->slvec0,&from);
1292: VecGetArrayRead(xx,&x);
1294: PetscArraycpy(from,x,bs*mbs);
1295: VecRestoreArray(a->slvec0,&from);
1296: VecRestoreArrayRead(xx,&x);
1298: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1299: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1300: /* supperdiagonal part */
1301: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
1302: return(0);
1303: }
1305: PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1306: {
1307: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1309: PetscInt nt;
1312: VecGetLocalSize(xx,&nt);
1313: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1315: VecGetLocalSize(yy,&nt);
1316: if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1318: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1319: /* do diagonal part */
1320: (*a->A->ops->mult)(a->A,xx,yy);
1321: /* do supperdiagonal part */
1322: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1323: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1324: /* do subdiagonal part */
1325: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1326: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1327: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1328: return(0);
1329: }
1331: PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1332: {
1333: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1334: PetscErrorCode ierr;
1335: PetscInt mbs=a->mbs,bs=A->rmap->bs;
1336: PetscScalar *from,zero=0.0;
1337: const PetscScalar *x;
1340: /*
1341: PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1342: PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1343: */
1344: /* diagonal part */
1345: (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);
1346: VecSet(a->slvec1b,zero);
1348: /* subdiagonal part */
1349: (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
1351: /* copy x into the vec slvec0 */
1352: VecGetArray(a->slvec0,&from);
1353: VecGetArrayRead(xx,&x);
1354: PetscArraycpy(from,x,bs*mbs);
1355: VecRestoreArray(a->slvec0,&from);
1357: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1358: VecRestoreArrayRead(xx,&x);
1359: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1361: /* supperdiagonal part */
1362: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);
1363: return(0);
1364: }
1366: PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1367: {
1368: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1372: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1373: /* do diagonal part */
1374: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1375: /* do supperdiagonal part */
1376: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1377: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1379: /* do subdiagonal part */
1380: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1381: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1382: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1383: return(0);
1384: }
1386: /*
1387: This only works correctly for square matrices where the subblock A->A is the
1388: diagonal block
1389: */
1390: PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1391: {
1392: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1396: /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1397: MatGetDiagonal(a->A,v);
1398: return(0);
1399: }
1401: PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1402: {
1403: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1407: MatScale(a->A,aa);
1408: MatScale(a->B,aa);
1409: return(0);
1410: }
1412: PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1413: {
1414: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1415: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1417: PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1418: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1419: PetscInt *cmap,*idx_p,cstart = mat->rstartbs;
1422: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1423: mat->getrowactive = PETSC_TRUE;
1425: if (!mat->rowvalues && (idx || v)) {
1426: /*
1427: allocate enough space to hold information from the longest row.
1428: */
1429: Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1430: Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data;
1431: PetscInt max = 1,mbs = mat->mbs,tmp;
1432: for (i=0; i<mbs; i++) {
1433: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1434: if (max < tmp) max = tmp;
1435: }
1436: PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);
1437: }
1439: if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1440: lrow = row - brstart; /* local row index */
1442: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1443: if (!v) {pvA = 0; pvB = 0;}
1444: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1445: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1446: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1447: nztot = nzA + nzB;
1449: cmap = mat->garray;
1450: if (v || idx) {
1451: if (nztot) {
1452: /* Sort by increasing column numbers, assuming A and B already sorted */
1453: PetscInt imark = -1;
1454: if (v) {
1455: *v = v_p = mat->rowvalues;
1456: for (i=0; i<nzB; i++) {
1457: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1458: else break;
1459: }
1460: imark = i;
1461: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1462: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1463: }
1464: if (idx) {
1465: *idx = idx_p = mat->rowindices;
1466: if (imark > -1) {
1467: for (i=0; i<imark; i++) {
1468: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1469: }
1470: } else {
1471: for (i=0; i<nzB; i++) {
1472: if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1473: else break;
1474: }
1475: imark = i;
1476: }
1477: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1478: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1479: }
1480: } else {
1481: if (idx) *idx = 0;
1482: if (v) *v = 0;
1483: }
1484: }
1485: *nz = nztot;
1486: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1487: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1488: return(0);
1489: }
1491: PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1492: {
1493: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1496: if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1497: baij->getrowactive = PETSC_FALSE;
1498: return(0);
1499: }
1501: PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1502: {
1503: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1504: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1507: aA->getrow_utriangular = PETSC_TRUE;
1508: return(0);
1509: }
1510: PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1511: {
1512: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1513: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1516: aA->getrow_utriangular = PETSC_FALSE;
1517: return(0);
1518: }
1520: PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1521: {
1522: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1526: MatRealPart(a->A);
1527: MatRealPart(a->B);
1528: return(0);
1529: }
1531: PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1532: {
1533: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1537: MatImaginaryPart(a->A);
1538: MatImaginaryPart(a->B);
1539: return(0);
1540: }
1542: /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1543: Input: isrow - distributed(parallel),
1544: iscol_local - locally owned (seq)
1545: */
1546: PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg)
1547: {
1549: PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch;
1550: const PetscInt *ptr1,*ptr2;
1553: ISGetLocalSize(isrow,&sz1);
1554: ISGetLocalSize(iscol_local,&sz2);
1555: if (sz1 > sz2) {
1556: *flg = PETSC_FALSE;
1557: return(0);
1558: }
1560: ISGetIndices(isrow,&ptr1);
1561: ISGetIndices(iscol_local,&ptr2);
1563: PetscMalloc1(sz1,&a1);
1564: PetscMalloc1(sz2,&a2);
1565: PetscArraycpy(a1,ptr1,sz1);
1566: PetscArraycpy(a2,ptr2,sz2);
1567: PetscSortInt(sz1,a1);
1568: PetscSortInt(sz2,a2);
1570: nmatch=0;
1571: k = 0;
1572: for (i=0; i<sz1; i++){
1573: for (j=k; j<sz2; j++){
1574: if (a1[i] == a2[j]) {
1575: k = j; nmatch++;
1576: break;
1577: }
1578: }
1579: }
1580: ISRestoreIndices(isrow,&ptr1);
1581: ISRestoreIndices(iscol_local,&ptr2);
1582: PetscFree(a1);
1583: PetscFree(a2);
1584: if (nmatch < sz1) {
1585: *flg = PETSC_FALSE;
1586: } else {
1587: *flg = PETSC_TRUE;
1588: }
1589: return(0);
1590: }
1592: PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1593: {
1595: IS iscol_local;
1596: PetscInt csize;
1597: PetscBool isequal;
1600: ISGetLocalSize(iscol,&csize);
1601: if (call == MAT_REUSE_MATRIX) {
1602: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
1603: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1604: } else {
1605: ISAllGather(iscol,&iscol_local);
1606: ISEqual_private(isrow,iscol_local,&isequal);
1607: if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1608: }
1610: /* now call MatCreateSubMatrix_MPIBAIJ() */
1611: MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
1612: if (call == MAT_INITIAL_MATRIX) {
1613: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
1614: ISDestroy(&iscol_local);
1615: }
1616: return(0);
1617: }
1619: PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1620: {
1621: Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1625: MatZeroEntries(l->A);
1626: MatZeroEntries(l->B);
1627: return(0);
1628: }
1630: PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1631: {
1632: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1633: Mat A = a->A,B = a->B;
1635: PetscLogDouble isend[5],irecv[5];
1638: info->block_size = (PetscReal)matin->rmap->bs;
1640: MatGetInfo(A,MAT_LOCAL,info);
1642: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1643: isend[3] = info->memory; isend[4] = info->mallocs;
1645: MatGetInfo(B,MAT_LOCAL,info);
1647: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1648: isend[3] += info->memory; isend[4] += info->mallocs;
1649: if (flag == MAT_LOCAL) {
1650: info->nz_used = isend[0];
1651: info->nz_allocated = isend[1];
1652: info->nz_unneeded = isend[2];
1653: info->memory = isend[3];
1654: info->mallocs = isend[4];
1655: } else if (flag == MAT_GLOBAL_MAX) {
1656: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));
1658: info->nz_used = irecv[0];
1659: info->nz_allocated = irecv[1];
1660: info->nz_unneeded = irecv[2];
1661: info->memory = irecv[3];
1662: info->mallocs = irecv[4];
1663: } else if (flag == MAT_GLOBAL_SUM) {
1664: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));
1666: info->nz_used = irecv[0];
1667: info->nz_allocated = irecv[1];
1668: info->nz_unneeded = irecv[2];
1669: info->memory = irecv[3];
1670: info->mallocs = irecv[4];
1671: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1672: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1673: info->fill_ratio_needed = 0;
1674: info->factor_mallocs = 0;
1675: return(0);
1676: }
1678: PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1679: {
1680: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1681: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1685: switch (op) {
1686: case MAT_NEW_NONZERO_LOCATIONS:
1687: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1688: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1689: case MAT_KEEP_NONZERO_PATTERN:
1690: case MAT_SUBMAT_SINGLEIS:
1691: case MAT_NEW_NONZERO_LOCATION_ERR:
1692: MatCheckPreallocated(A,1);
1693: MatSetOption(a->A,op,flg);
1694: MatSetOption(a->B,op,flg);
1695: break;
1696: case MAT_ROW_ORIENTED:
1697: MatCheckPreallocated(A,1);
1698: a->roworiented = flg;
1700: MatSetOption(a->A,op,flg);
1701: MatSetOption(a->B,op,flg);
1702: break;
1703: case MAT_NEW_DIAGONALS:
1704: case MAT_SORTED_FULL:
1705: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1706: break;
1707: case MAT_IGNORE_OFF_PROC_ENTRIES:
1708: a->donotstash = flg;
1709: break;
1710: case MAT_USE_HASH_TABLE:
1711: a->ht_flag = flg;
1712: break;
1713: case MAT_HERMITIAN:
1714: MatCheckPreallocated(A,1);
1715: if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1716: MatSetOption(a->A,op,flg);
1717: #if defined(PETSC_USE_COMPLEX)
1718: A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1719: #endif
1720: break;
1721: case MAT_SPD:
1722: A->spd_set = PETSC_TRUE;
1723: A->spd = flg;
1724: if (flg) {
1725: A->symmetric = PETSC_TRUE;
1726: A->structurally_symmetric = PETSC_TRUE;
1727: A->symmetric_set = PETSC_TRUE;
1728: A->structurally_symmetric_set = PETSC_TRUE;
1729: }
1730: break;
1731: case MAT_SYMMETRIC:
1732: MatCheckPreallocated(A,1);
1733: MatSetOption(a->A,op,flg);
1734: break;
1735: case MAT_STRUCTURALLY_SYMMETRIC:
1736: MatCheckPreallocated(A,1);
1737: MatSetOption(a->A,op,flg);
1738: break;
1739: case MAT_SYMMETRY_ETERNAL:
1740: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1741: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1742: break;
1743: case MAT_IGNORE_LOWER_TRIANGULAR:
1744: aA->ignore_ltriangular = flg;
1745: break;
1746: case MAT_ERROR_LOWER_TRIANGULAR:
1747: aA->ignore_ltriangular = flg;
1748: break;
1749: case MAT_GETROW_UPPERTRIANGULAR:
1750: aA->getrow_utriangular = flg;
1751: break;
1752: default:
1753: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1754: }
1755: return(0);
1756: }
1758: PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1759: {
1763: if (reuse == MAT_INITIAL_MATRIX) {
1764: MatDuplicate(A,MAT_COPY_VALUES,B);
1765: } else if (reuse == MAT_REUSE_MATRIX) {
1766: MatCopy(A,*B,SAME_NONZERO_PATTERN);
1767: }
1768: return(0);
1769: }
1771: PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1772: {
1773: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1774: Mat a = baij->A, b=baij->B;
1776: PetscInt nv,m,n;
1777: PetscBool flg;
1780: if (ll != rr) {
1781: VecEqual(ll,rr,&flg);
1782: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1783: }
1784: if (!ll) return(0);
1786: MatGetLocalSize(mat,&m,&n);
1787: if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1789: VecGetLocalSize(rr,&nv);
1790: if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1792: VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1794: /* left diagonalscale the off-diagonal part */
1795: (*b->ops->diagonalscale)(b,ll,NULL);
1797: /* scale the diagonal part */
1798: (*a->ops->diagonalscale)(a,ll,rr);
1800: /* right diagonalscale the off-diagonal part */
1801: VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1802: (*b->ops->diagonalscale)(b,NULL,baij->lvec);
1803: return(0);
1804: }
1806: PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1807: {
1808: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1812: MatSetUnfactored(a->A);
1813: return(0);
1814: }
1816: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1818: PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag)
1819: {
1820: Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1821: Mat a,b,c,d;
1822: PetscBool flg;
1826: a = matA->A; b = matA->B;
1827: c = matB->A; d = matB->B;
1829: MatEqual(a,c,&flg);
1830: if (flg) {
1831: MatEqual(b,d,&flg);
1832: }
1833: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1834: return(0);
1835: }
1837: PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1838: {
1840: PetscBool isbaij;
1843: PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");
1844: if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1845: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1846: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1847: MatGetRowUpperTriangular(A);
1848: MatCopy_Basic(A,B,str);
1849: MatRestoreRowUpperTriangular(A);
1850: } else {
1851: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1852: Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1854: MatCopy(a->A,b->A,str);
1855: MatCopy(a->B,b->B,str);
1856: }
1857: PetscObjectStateIncrease((PetscObject)B);
1858: return(0);
1859: }
1861: PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1862: {
1866: MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1867: return(0);
1868: }
1870: PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1871: {
1873: Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1874: PetscBLASInt bnz,one=1;
1875: Mat_SeqSBAIJ *xa,*ya;
1876: Mat_SeqBAIJ *xb,*yb;
1879: if (str == SAME_NONZERO_PATTERN) {
1880: PetscScalar alpha = a;
1881: xa = (Mat_SeqSBAIJ*)xx->A->data;
1882: ya = (Mat_SeqSBAIJ*)yy->A->data;
1883: PetscBLASIntCast(xa->nz,&bnz);
1884: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1885: xb = (Mat_SeqBAIJ*)xx->B->data;
1886: yb = (Mat_SeqBAIJ*)yy->B->data;
1887: PetscBLASIntCast(xb->nz,&bnz);
1888: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1889: PetscObjectStateIncrease((PetscObject)Y);
1890: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1891: MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
1892: MatAXPY_Basic(Y,a,X,str);
1893: MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
1894: } else {
1895: Mat B;
1896: PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1897: if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1898: MatGetRowUpperTriangular(X);
1899: MatGetRowUpperTriangular(Y);
1900: PetscMalloc1(yy->A->rmap->N,&nnz_d);
1901: PetscMalloc1(yy->B->rmap->N,&nnz_o);
1902: MatCreate(PetscObjectComm((PetscObject)Y),&B);
1903: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
1904: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
1905: MatSetBlockSizesFromMats(B,Y,Y);
1906: MatSetType(B,MATMPISBAIJ);
1907: MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);
1908: MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
1909: MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);
1910: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
1911: MatHeaderReplace(Y,&B);
1912: PetscFree(nnz_d);
1913: PetscFree(nnz_o);
1914: MatRestoreRowUpperTriangular(X);
1915: MatRestoreRowUpperTriangular(Y);
1916: }
1917: return(0);
1918: }
1920: PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1921: {
1923: PetscInt i;
1924: PetscBool flg;
1927: MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B); /* B[] are sbaij matrices */
1928: for (i=0; i<n; i++) {
1929: ISEqual(irow[i],icol[i],&flg);
1930: if (!flg) {
1931: MatSeqSBAIJZeroOps_Private(*B[i]);
1932: }
1933: }
1934: return(0);
1935: }
1937: PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1938: {
1940: Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data;
1941: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data;
1944: if (!Y->preallocated) {
1945: MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);
1946: } else if (!aij->nz) {
1947: PetscInt nonew = aij->nonew;
1948: MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);
1949: aij->nonew = nonew;
1950: }
1951: MatShift_Basic(Y,a);
1952: return(0);
1953: }
1955: PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1956: {
1957: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1961: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1962: MatMissingDiagonal(a->A,missing,d);
1963: if (d) {
1964: PetscInt rstart;
1965: MatGetOwnershipRange(A,&rstart,NULL);
1966: *d += rstart/A->rmap->bs;
1968: }
1969: return(0);
1970: }
1972: PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1973: {
1975: *a = ((Mat_MPISBAIJ*)A->data)->A;
1976: return(0);
1977: }
1979: /* -------------------------------------------------------------------*/
1980: static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1981: MatGetRow_MPISBAIJ,
1982: MatRestoreRow_MPISBAIJ,
1983: MatMult_MPISBAIJ,
1984: /* 4*/ MatMultAdd_MPISBAIJ,
1985: MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1986: MatMultAdd_MPISBAIJ,
1987: 0,
1988: 0,
1989: 0,
1990: /* 10*/ 0,
1991: 0,
1992: 0,
1993: MatSOR_MPISBAIJ,
1994: MatTranspose_MPISBAIJ,
1995: /* 15*/ MatGetInfo_MPISBAIJ,
1996: MatEqual_MPISBAIJ,
1997: MatGetDiagonal_MPISBAIJ,
1998: MatDiagonalScale_MPISBAIJ,
1999: MatNorm_MPISBAIJ,
2000: /* 20*/ MatAssemblyBegin_MPISBAIJ,
2001: MatAssemblyEnd_MPISBAIJ,
2002: MatSetOption_MPISBAIJ,
2003: MatZeroEntries_MPISBAIJ,
2004: /* 24*/ 0,
2005: 0,
2006: 0,
2007: 0,
2008: 0,
2009: /* 29*/ MatSetUp_MPISBAIJ,
2010: 0,
2011: 0,
2012: MatGetDiagonalBlock_MPISBAIJ,
2013: 0,
2014: /* 34*/ MatDuplicate_MPISBAIJ,
2015: 0,
2016: 0,
2017: 0,
2018: 0,
2019: /* 39*/ MatAXPY_MPISBAIJ,
2020: MatCreateSubMatrices_MPISBAIJ,
2021: MatIncreaseOverlap_MPISBAIJ,
2022: MatGetValues_MPISBAIJ,
2023: MatCopy_MPISBAIJ,
2024: /* 44*/ 0,
2025: MatScale_MPISBAIJ,
2026: MatShift_MPISBAIJ,
2027: 0,
2028: 0,
2029: /* 49*/ 0,
2030: 0,
2031: 0,
2032: 0,
2033: 0,
2034: /* 54*/ 0,
2035: 0,
2036: MatSetUnfactored_MPISBAIJ,
2037: 0,
2038: MatSetValuesBlocked_MPISBAIJ,
2039: /* 59*/ MatCreateSubMatrix_MPISBAIJ,
2040: 0,
2041: 0,
2042: 0,
2043: 0,
2044: /* 64*/ 0,
2045: 0,
2046: 0,
2047: 0,
2048: 0,
2049: /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2050: 0,
2051: MatConvert_MPISBAIJ_Basic,
2052: 0,
2053: 0,
2054: /* 74*/ 0,
2055: 0,
2056: 0,
2057: 0,
2058: 0,
2059: /* 79*/ 0,
2060: 0,
2061: 0,
2062: 0,
2063: MatLoad_MPISBAIJ,
2064: /* 84*/ 0,
2065: 0,
2066: 0,
2067: 0,
2068: 0,
2069: /* 89*/ 0,
2070: 0,
2071: 0,
2072: 0,
2073: 0,
2074: /* 94*/ 0,
2075: 0,
2076: 0,
2077: 0,
2078: 0,
2079: /* 99*/ 0,
2080: 0,
2081: 0,
2082: 0,
2083: 0,
2084: /*104*/ 0,
2085: MatRealPart_MPISBAIJ,
2086: MatImaginaryPart_MPISBAIJ,
2087: MatGetRowUpperTriangular_MPISBAIJ,
2088: MatRestoreRowUpperTriangular_MPISBAIJ,
2089: /*109*/ 0,
2090: 0,
2091: 0,
2092: 0,
2093: MatMissingDiagonal_MPISBAIJ,
2094: /*114*/ 0,
2095: 0,
2096: 0,
2097: 0,
2098: 0,
2099: /*119*/ 0,
2100: 0,
2101: 0,
2102: 0,
2103: 0,
2104: /*124*/ 0,
2105: 0,
2106: 0,
2107: 0,
2108: 0,
2109: /*129*/ 0,
2110: 0,
2111: 0,
2112: 0,
2113: 0,
2114: /*134*/ 0,
2115: 0,
2116: 0,
2117: 0,
2118: 0,
2119: /*139*/ MatSetBlockSizes_Default,
2120: 0,
2121: 0,
2122: 0,
2123: 0,
2124: /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2125: };
2127: PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2128: {
2129: Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
2131: PetscInt i,mbs,Mbs;
2132: PetscMPIInt size;
2135: MatSetBlockSize(B,PetscAbs(bs));
2136: PetscLayoutSetUp(B->rmap);
2137: PetscLayoutSetUp(B->cmap);
2138: PetscLayoutGetBlockSize(B->rmap,&bs);
2139: if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
2140: if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
2142: mbs = B->rmap->n/bs;
2143: Mbs = B->rmap->N/bs;
2144: if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2146: B->rmap->bs = bs;
2147: b->bs2 = bs*bs;
2148: b->mbs = mbs;
2149: b->Mbs = Mbs;
2150: b->nbs = B->cmap->n/bs;
2151: b->Nbs = B->cmap->N/bs;
2153: for (i=0; i<=b->size; i++) {
2154: b->rangebs[i] = B->rmap->range[i]/bs;
2155: }
2156: b->rstartbs = B->rmap->rstart/bs;
2157: b->rendbs = B->rmap->rend/bs;
2159: b->cstartbs = B->cmap->rstart/bs;
2160: b->cendbs = B->cmap->rend/bs;
2162: #if defined(PETSC_USE_CTABLE)
2163: PetscTableDestroy(&b->colmap);
2164: #else
2165: PetscFree(b->colmap);
2166: #endif
2167: PetscFree(b->garray);
2168: VecDestroy(&b->lvec);
2169: VecScatterDestroy(&b->Mvctx);
2170: VecDestroy(&b->slvec0);
2171: VecDestroy(&b->slvec0b);
2172: VecDestroy(&b->slvec1);
2173: VecDestroy(&b->slvec1a);
2174: VecDestroy(&b->slvec1b);
2175: VecScatterDestroy(&b->sMvctx);
2177: /* Because the B will have been resized we simply destroy it and create a new one each time */
2178: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
2179: MatDestroy(&b->B);
2180: MatCreate(PETSC_COMM_SELF,&b->B);
2181: MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);
2182: MatSetType(b->B,MATSEQBAIJ);
2183: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
2185: if (!B->preallocated) {
2186: MatCreate(PETSC_COMM_SELF,&b->A);
2187: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
2188: MatSetType(b->A,MATSEQSBAIJ);
2189: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
2190: MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);
2191: }
2193: MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
2194: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
2196: B->preallocated = PETSC_TRUE;
2197: B->was_assembled = PETSC_FALSE;
2198: B->assembled = PETSC_FALSE;
2199: return(0);
2200: }
2202: PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2203: {
2204: PetscInt m,rstart,cend;
2205: PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2206: const PetscInt *JJ =0;
2207: PetscScalar *values=0;
2208: PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2210: PetscBool nooffprocentries;
2213: if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2214: PetscLayoutSetBlockSize(B->rmap,bs);
2215: PetscLayoutSetBlockSize(B->cmap,bs);
2216: PetscLayoutSetUp(B->rmap);
2217: PetscLayoutSetUp(B->cmap);
2218: PetscLayoutGetBlockSize(B->rmap,&bs);
2219: m = B->rmap->n/bs;
2220: rstart = B->rmap->rstart/bs;
2221: cend = B->cmap->rend/bs;
2223: if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2224: PetscMalloc2(m,&d_nnz,m,&o_nnz);
2225: for (i=0; i<m; i++) {
2226: nz = ii[i+1] - ii[i];
2227: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2228: /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2229: JJ = jj + ii[i];
2230: bd = 0;
2231: for (j=0; j<nz; j++) {
2232: if (*JJ >= i + rstart) break;
2233: JJ++;
2234: bd++;
2235: }
2236: d = 0;
2237: for (; j<nz; j++) {
2238: if (*JJ++ >= cend) break;
2239: d++;
2240: }
2241: d_nnz[i] = d;
2242: o_nnz[i] = nz - d - bd;
2243: nz = nz - bd;
2244: nz_max = PetscMax(nz_max,nz);
2245: }
2246: MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
2247: MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);
2248: PetscFree2(d_nnz,o_nnz);
2250: values = (PetscScalar*)V;
2251: if (!values) {
2252: PetscCalloc1(bs*bs*nz_max,&values);
2253: }
2254: for (i=0; i<m; i++) {
2255: PetscInt row = i + rstart;
2256: PetscInt ncols = ii[i+1] - ii[i];
2257: const PetscInt *icols = jj + ii[i];
2258: if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2259: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2260: MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
2261: } else { /* block ordering does not match so we can only insert one block at a time. */
2262: PetscInt j;
2263: for (j=0; j<ncols; j++) {
2264: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2265: MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);
2266: }
2267: }
2268: }
2270: if (!V) { PetscFree(values); }
2271: nooffprocentries = B->nooffprocentries;
2272: B->nooffprocentries = PETSC_TRUE;
2273: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2274: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2275: B->nooffprocentries = nooffprocentries;
2277: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2278: return(0);
2279: }
2281: /*MC
2282: MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2283: based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of
2284: the matrix is stored.
2286: For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2287: can call MatSetOption(Mat, MAT_HERMITIAN);
2289: Options Database Keys:
2290: . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2292: Notes:
2293: The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2294: diagonal portion of the matrix of each process has to less than or equal the number of columns.
2296: Level: beginner
2298: .seealso: MatCreateMPISBAIJ(), MATSEQSBAIJ, MatType
2299: M*/
2301: PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2302: {
2303: Mat_MPISBAIJ *b;
2305: PetscBool flg = PETSC_FALSE;
2308: PetscNewLog(B,&b);
2309: B->data = (void*)b;
2310: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2312: B->ops->destroy = MatDestroy_MPISBAIJ;
2313: B->ops->view = MatView_MPISBAIJ;
2314: B->assembled = PETSC_FALSE;
2315: B->insertmode = NOT_SET_VALUES;
2317: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
2318: MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);
2320: /* build local table of row and column ownerships */
2321: PetscMalloc1(b->size+2,&b->rangebs);
2323: /* build cache for off array entries formed */
2324: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
2326: b->donotstash = PETSC_FALSE;
2327: b->colmap = NULL;
2328: b->garray = NULL;
2329: b->roworiented = PETSC_TRUE;
2331: /* stuff used in block assembly */
2332: b->barray = 0;
2334: /* stuff used for matrix vector multiply */
2335: b->lvec = 0;
2336: b->Mvctx = 0;
2337: b->slvec0 = 0;
2338: b->slvec0b = 0;
2339: b->slvec1 = 0;
2340: b->slvec1a = 0;
2341: b->slvec1b = 0;
2342: b->sMvctx = 0;
2344: /* stuff for MatGetRow() */
2345: b->rowindices = 0;
2346: b->rowvalues = 0;
2347: b->getrowactive = PETSC_FALSE;
2349: /* hash table stuff */
2350: b->ht = 0;
2351: b->hd = 0;
2352: b->ht_size = 0;
2353: b->ht_flag = PETSC_FALSE;
2354: b->ht_fact = 0;
2355: b->ht_total_ct = 0;
2356: b->ht_insert_ct = 0;
2358: /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2359: b->ijonly = PETSC_FALSE;
2361: b->in_loc = 0;
2362: b->v_loc = 0;
2363: b->n_loc = 0;
2365: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);
2366: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);
2367: PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);
2368: PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);
2369: #if defined(PETSC_HAVE_ELEMENTAL)
2370: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);
2371: #endif
2372: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);
2373: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);
2375: B->symmetric = PETSC_TRUE;
2376: B->structurally_symmetric = PETSC_TRUE;
2377: B->symmetric_set = PETSC_TRUE;
2378: B->structurally_symmetric_set = PETSC_TRUE;
2379: B->symmetric_eternal = PETSC_TRUE;
2381: B->hermitian = PETSC_FALSE;
2382: B->hermitian_set = PETSC_FALSE;
2384: PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);
2385: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");
2386: PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);
2387: if (flg) {
2388: PetscReal fact = 1.39;
2389: MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
2390: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);
2391: if (fact <= 1.0) fact = 1.39;
2392: MatMPIBAIJSetHashTableFactor(B,fact);
2393: PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
2394: }
2395: PetscOptionsEnd();
2396: return(0);
2397: }
2399: /*MC
2400: MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2402: This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2403: and MATMPISBAIJ otherwise.
2405: Options Database Keys:
2406: . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2408: Level: beginner
2410: .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2411: M*/
2413: /*@C
2414: MatMPISBAIJSetPreallocation - For good matrix assembly performance
2415: the user should preallocate the matrix storage by setting the parameters
2416: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2417: performance can be increased by more than a factor of 50.
2419: Collective on Mat
2421: Input Parameters:
2422: + B - the matrix
2423: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2424: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2425: . d_nz - number of block nonzeros per block row in diagonal portion of local
2426: submatrix (same for all local rows)
2427: . d_nnz - array containing the number of block nonzeros in the various block rows
2428: in the upper triangular and diagonal part of the in diagonal portion of the local
2429: (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room
2430: for the diagonal entry and set a value even if it is zero.
2431: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2432: submatrix (same for all local rows).
2433: - o_nnz - array containing the number of nonzeros in the various block rows of the
2434: off-diagonal portion of the local submatrix that is right of the diagonal
2435: (possibly different for each block row) or NULL.
2438: Options Database Keys:
2439: + -mat_no_unroll - uses code that does not unroll the loops in the
2440: block calculations (much slower)
2441: - -mat_block_size - size of the blocks to use
2443: Notes:
2445: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2446: than it must be used on all processors that share the object for that argument.
2448: If the *_nnz parameter is given then the *_nz parameter is ignored
2450: Storage Information:
2451: For a square global matrix we define each processor's diagonal portion
2452: to be its local rows and the corresponding columns (a square submatrix);
2453: each processor's off-diagonal portion encompasses the remainder of the
2454: local matrix (a rectangular submatrix).
2456: The user can specify preallocated storage for the diagonal part of
2457: the local submatrix with either d_nz or d_nnz (not both). Set
2458: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2459: memory allocation. Likewise, specify preallocated storage for the
2460: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2462: You can call MatGetInfo() to get information on how effective the preallocation was;
2463: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2464: You can also run with the option -info and look for messages with the string
2465: malloc in them to see if additional memory allocation was needed.
2467: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2468: the figure below we depict these three local rows and all columns (0-11).
2470: .vb
2471: 0 1 2 3 4 5 6 7 8 9 10 11
2472: --------------------------
2473: row 3 |. . . d d d o o o o o o
2474: row 4 |. . . d d d o o o o o o
2475: row 5 |. . . d d d o o o o o o
2476: --------------------------
2477: .ve
2479: Thus, any entries in the d locations are stored in the d (diagonal)
2480: submatrix, and any entries in the o locations are stored in the
2481: o (off-diagonal) submatrix. Note that the d matrix is stored in
2482: MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2484: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2485: plus the diagonal part of the d matrix,
2486: and o_nz should indicate the number of block nonzeros per row in the o matrix
2488: In general, for PDE problems in which most nonzeros are near the diagonal,
2489: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2490: or you will get TERRIBLE performance; see the users' manual chapter on
2491: matrices.
2493: Level: intermediate
2495: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2496: @*/
2497: PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2498: {
2505: PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
2506: return(0);
2507: }
2509: /*@C
2510: MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2511: (block compressed row). For good matrix assembly performance
2512: the user should preallocate the matrix storage by setting the parameters
2513: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2514: performance can be increased by more than a factor of 50.
2516: Collective
2518: Input Parameters:
2519: + comm - MPI communicator
2520: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2521: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2522: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2523: This value should be the same as the local size used in creating the
2524: y vector for the matrix-vector product y = Ax.
2525: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2526: This value should be the same as the local size used in creating the
2527: x vector for the matrix-vector product y = Ax.
2528: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2529: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2530: . d_nz - number of block nonzeros per block row in diagonal portion of local
2531: submatrix (same for all local rows)
2532: . d_nnz - array containing the number of block nonzeros in the various block rows
2533: in the upper triangular portion of the in diagonal portion of the local
2534: (possibly different for each block block row) or NULL.
2535: If you plan to factor the matrix you must leave room for the diagonal entry and
2536: set its value even if it is zero.
2537: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2538: submatrix (same for all local rows).
2539: - o_nnz - array containing the number of nonzeros in the various block rows of the
2540: off-diagonal portion of the local submatrix (possibly different for
2541: each block row) or NULL.
2543: Output Parameter:
2544: . A - the matrix
2546: Options Database Keys:
2547: + -mat_no_unroll - uses code that does not unroll the loops in the
2548: block calculations (much slower)
2549: . -mat_block_size - size of the blocks to use
2550: - -mat_mpi - use the parallel matrix data structures even on one processor
2551: (defaults to using SeqBAIJ format on one processor)
2553: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2554: MatXXXXSetPreallocation() paradigm instead of this routine directly.
2555: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2557: Notes:
2558: The number of rows and columns must be divisible by blocksize.
2559: This matrix type does not support complex Hermitian operation.
2561: The user MUST specify either the local or global matrix dimensions
2562: (possibly both).
2564: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2565: than it must be used on all processors that share the object for that argument.
2567: If the *_nnz parameter is given then the *_nz parameter is ignored
2569: Storage Information:
2570: For a square global matrix we define each processor's diagonal portion
2571: to be its local rows and the corresponding columns (a square submatrix);
2572: each processor's off-diagonal portion encompasses the remainder of the
2573: local matrix (a rectangular submatrix).
2575: The user can specify preallocated storage for the diagonal part of
2576: the local submatrix with either d_nz or d_nnz (not both). Set
2577: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2578: memory allocation. Likewise, specify preallocated storage for the
2579: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2581: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2582: the figure below we depict these three local rows and all columns (0-11).
2584: .vb
2585: 0 1 2 3 4 5 6 7 8 9 10 11
2586: --------------------------
2587: row 3 |. . . d d d o o o o o o
2588: row 4 |. . . d d d o o o o o o
2589: row 5 |. . . d d d o o o o o o
2590: --------------------------
2591: .ve
2593: Thus, any entries in the d locations are stored in the d (diagonal)
2594: submatrix, and any entries in the o locations are stored in the
2595: o (off-diagonal) submatrix. Note that the d matrix is stored in
2596: MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2598: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2599: plus the diagonal part of the d matrix,
2600: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2601: In general, for PDE problems in which most nonzeros are near the diagonal,
2602: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2603: or you will get TERRIBLE performance; see the users' manual chapter on
2604: matrices.
2606: Level: intermediate
2608: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2609: @*/
2611: PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2612: {
2614: PetscMPIInt size;
2617: MatCreate(comm,A);
2618: MatSetSizes(*A,m,n,M,N);
2619: MPI_Comm_size(comm,&size);
2620: if (size > 1) {
2621: MatSetType(*A,MATMPISBAIJ);
2622: MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
2623: } else {
2624: MatSetType(*A,MATSEQSBAIJ);
2625: MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
2626: }
2627: return(0);
2628: }
2631: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2632: {
2633: Mat mat;
2634: Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2636: PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2637: PetscScalar *array;
2640: *newmat = 0;
2642: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
2643: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
2644: MatSetType(mat,((PetscObject)matin)->type_name);
2645: PetscLayoutReference(matin->rmap,&mat->rmap);
2646: PetscLayoutReference(matin->cmap,&mat->cmap);
2648: mat->factortype = matin->factortype;
2649: mat->preallocated = PETSC_TRUE;
2650: mat->assembled = PETSC_TRUE;
2651: mat->insertmode = NOT_SET_VALUES;
2653: a = (Mat_MPISBAIJ*)mat->data;
2654: a->bs2 = oldmat->bs2;
2655: a->mbs = oldmat->mbs;
2656: a->nbs = oldmat->nbs;
2657: a->Mbs = oldmat->Mbs;
2658: a->Nbs = oldmat->Nbs;
2660: a->size = oldmat->size;
2661: a->rank = oldmat->rank;
2662: a->donotstash = oldmat->donotstash;
2663: a->roworiented = oldmat->roworiented;
2664: a->rowindices = 0;
2665: a->rowvalues = 0;
2666: a->getrowactive = PETSC_FALSE;
2667: a->barray = 0;
2668: a->rstartbs = oldmat->rstartbs;
2669: a->rendbs = oldmat->rendbs;
2670: a->cstartbs = oldmat->cstartbs;
2671: a->cendbs = oldmat->cendbs;
2673: /* hash table stuff */
2674: a->ht = 0;
2675: a->hd = 0;
2676: a->ht_size = 0;
2677: a->ht_flag = oldmat->ht_flag;
2678: a->ht_fact = oldmat->ht_fact;
2679: a->ht_total_ct = 0;
2680: a->ht_insert_ct = 0;
2682: PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);
2683: if (oldmat->colmap) {
2684: #if defined(PETSC_USE_CTABLE)
2685: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2686: #else
2687: PetscMalloc1(a->Nbs,&a->colmap);
2688: PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));
2689: PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);
2690: #endif
2691: } else a->colmap = 0;
2693: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2694: PetscMalloc1(len,&a->garray);
2695: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
2696: PetscArraycpy(a->garray,oldmat->garray,len);
2697: } else a->garray = 0;
2699: MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);
2700: VecDuplicate(oldmat->lvec,&a->lvec);
2701: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
2702: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2703: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
2705: VecDuplicate(oldmat->slvec0,&a->slvec0);
2706: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);
2707: VecDuplicate(oldmat->slvec1,&a->slvec1);
2708: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);
2710: VecGetLocalSize(a->slvec1,&nt);
2711: VecGetArray(a->slvec1,&array);
2712: VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);
2713: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);
2714: VecRestoreArray(a->slvec1,&array);
2715: VecGetArray(a->slvec0,&array);
2716: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);
2717: VecRestoreArray(a->slvec0,&array);
2718: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);
2719: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);
2720: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);
2721: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);
2722: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);
2724: /* VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2725: PetscObjectReference((PetscObject)oldmat->sMvctx);
2726: a->sMvctx = oldmat->sMvctx;
2727: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);
2729: MatDuplicate(oldmat->A,cpvalues,&a->A);
2730: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
2731: MatDuplicate(oldmat->B,cpvalues,&a->B);
2732: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
2733: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
2734: *newmat = mat;
2735: return(0);
2736: }
2738: PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2739: {
2741: PetscInt i,nz,j,rstart,rend;
2742: PetscScalar *vals,*buf;
2743: MPI_Comm comm;
2744: MPI_Status status;
2745: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2746: PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2747: PetscInt *procsnz = 0,jj,*mycols,*ibuf;
2748: PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2749: PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2750: PetscInt dcount,kmax,k,nzcount,tmp;
2751: int fd;
2752: PetscBool isbinary;
2755: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
2756: if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2758: /* force binary viewer to load .info file if it has not yet done so */
2759: PetscViewerSetUp(viewer);
2760: PetscObjectGetComm((PetscObject)viewer,&comm);
2761: PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");
2762: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
2763: PetscOptionsEnd();
2764: if (bs < 0) bs = 1;
2766: MPI_Comm_size(comm,&size);
2767: MPI_Comm_rank(comm,&rank);
2768: PetscViewerBinaryGetDescriptor(viewer,&fd);
2769: if (!rank) {
2770: PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);
2771: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2772: if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2773: }
2775: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2776: M = header[1];
2777: N = header[2];
2779: /* If global sizes are set, check if they are consistent with that given in the file */
2780: if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2781: if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2783: if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2785: /*
2786: This code adds extra rows to make sure the number of rows is
2787: divisible by the blocksize
2788: */
2789: Mbs = M/bs;
2790: extra_rows = bs - M + bs*(Mbs);
2791: if (extra_rows == bs) extra_rows = 0;
2792: else Mbs++;
2793: if (extra_rows &&!rank) {
2794: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2795: }
2797: /* determine ownership of all rows */
2798: if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2799: mbs = Mbs/size + ((Mbs % size) > rank);
2800: m = mbs*bs;
2801: } else { /* User Set */
2802: m = newmat->rmap->n;
2803: mbs = m/bs;
2804: }
2805: PetscMalloc2(size+1,&rowners,size+1,&browners);
2806: PetscMPIIntCast(mbs,&mmbs);
2807: MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
2808: rowners[0] = 0;
2809: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2810: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2811: rstart = rowners[rank];
2812: rend = rowners[rank+1];
2814: /* distribute row lengths to all processors */
2815: PetscMalloc1((rend-rstart)*bs,&locrowlens);
2816: if (!rank) {
2817: PetscMalloc1(M+extra_rows,&rowlengths);
2818: PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);
2819: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2820: PetscMalloc1(size,&sndcounts);
2821: for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2822: MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);
2823: PetscFree(sndcounts);
2824: } else {
2825: MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);
2826: }
2828: if (!rank) { /* procs[0] */
2829: /* calculate the number of nonzeros on each processor */
2830: PetscCalloc1(size,&procsnz);
2831: for (i=0; i<size; i++) {
2832: for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2833: procsnz[i] += rowlengths[j];
2834: }
2835: }
2836: PetscFree(rowlengths);
2838: /* determine max buffer needed and allocate it */
2839: maxnz = 0;
2840: for (i=0; i<size; i++) {
2841: maxnz = PetscMax(maxnz,procsnz[i]);
2842: }
2843: PetscMalloc1(maxnz,&cols);
2845: /* read in my part of the matrix column indices */
2846: nz = procsnz[0];
2847: PetscMalloc1(nz,&ibuf);
2848: mycols = ibuf;
2849: if (size == 1) nz -= extra_rows;
2850: PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);
2851: if (size == 1) {
2852: for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2853: }
2855: /* read in every ones (except the last) and ship off */
2856: for (i=1; i<size-1; i++) {
2857: nz = procsnz[i];
2858: PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);
2859: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2860: }
2861: /* read in the stuff for the last proc */
2862: if (size != 1) {
2863: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
2864: PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);
2865: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2866: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
2867: }
2868: PetscFree(cols);
2869: } else { /* procs[i], i>0 */
2870: /* determine buffer space needed for message */
2871: nz = 0;
2872: for (i=0; i<m; i++) nz += locrowlens[i];
2873: PetscMalloc1(nz,&ibuf);
2874: mycols = ibuf;
2875: /* receive message of column indices*/
2876: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2877: MPI_Get_count(&status,MPIU_INT,&maxnz);
2878: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2879: }
2881: /* loop over local rows, determining number of off diagonal entries */
2882: PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);
2883: PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);
2884: rowcount = 0;
2885: nzcount = 0;
2886: for (i=0; i<mbs; i++) {
2887: dcount = 0;
2888: odcount = 0;
2889: for (j=0; j<bs; j++) {
2890: kmax = locrowlens[rowcount];
2891: for (k=0; k<kmax; k++) {
2892: tmp = mycols[nzcount++]/bs; /* block col. index */
2893: if (!mask[tmp]) {
2894: mask[tmp] = 1;
2895: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2896: else masked1[dcount++] = tmp; /* entry in diag portion */
2897: }
2898: }
2899: rowcount++;
2900: }
2902: dlens[i] = dcount; /* d_nzz[i] */
2903: odlens[i] = odcount; /* o_nzz[i] */
2905: /* zero out the mask elements we set */
2906: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2907: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2908: }
2909: MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);
2910: MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);
2911: MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);
2913: if (!rank) {
2914: PetscMalloc1(maxnz,&buf);
2915: /* read in my part of the matrix numerical values */
2916: nz = procsnz[0];
2917: vals = buf;
2918: mycols = ibuf;
2919: if (size == 1) nz -= extra_rows;
2920: PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);
2921: if (size == 1) {
2922: for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2923: }
2925: /* insert into matrix */
2926: jj = rstart*bs;
2927: for (i=0; i<m; i++) {
2928: MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2929: mycols += locrowlens[i];
2930: vals += locrowlens[i];
2931: jj++;
2932: }
2934: /* read in other processors (except the last one) and ship out */
2935: for (i=1; i<size-1; i++) {
2936: nz = procsnz[i];
2937: vals = buf;
2938: PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);
2939: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
2940: }
2941: /* the last proc */
2942: if (size != 1) {
2943: nz = procsnz[i] - extra_rows;
2944: vals = buf;
2945: PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);
2946: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2947: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);
2948: }
2949: PetscFree(procsnz);
2951: } else {
2952: /* receive numeric values */
2953: PetscMalloc1(nz,&buf);
2955: /* receive message of values*/
2956: vals = buf;
2957: mycols = ibuf;
2958: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
2959: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2960: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2962: /* insert into matrix */
2963: jj = rstart*bs;
2964: for (i=0; i<m; i++) {
2965: MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2966: mycols += locrowlens[i];
2967: vals += locrowlens[i];
2968: jj++;
2969: }
2970: }
2972: PetscFree(locrowlens);
2973: PetscFree(buf);
2974: PetscFree(ibuf);
2975: PetscFree2(rowners,browners);
2976: PetscFree2(dlens,odlens);
2977: PetscFree3(mask,masked1,masked2);
2978: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
2979: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
2980: return(0);
2981: }
2983: /*XXXXX@
2984: MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2986: Input Parameters:
2987: . mat - the matrix
2988: . fact - factor
2990: Not Collective on Mat, each process can have a different hash factor
2992: Level: advanced
2994: Notes:
2995: This can also be set by the command line option: -mat_use_hash_table fact
2997: .seealso: MatSetOption()
2998: @XXXXX*/
3001: PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
3002: {
3003: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
3004: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data;
3005: PetscReal atmp;
3006: PetscReal *work,*svalues,*rvalues;
3008: PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
3009: PetscMPIInt rank,size;
3010: PetscInt *rowners_bs,dest,count,source;
3011: PetscScalar *va;
3012: MatScalar *ba;
3013: MPI_Status stat;
3016: if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
3017: MatGetRowMaxAbs(a->A,v,NULL);
3018: VecGetArray(v,&va);
3020: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3021: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
3023: bs = A->rmap->bs;
3024: mbs = a->mbs;
3025: Mbs = a->Mbs;
3026: ba = b->a;
3027: bi = b->i;
3028: bj = b->j;
3030: /* find ownerships */
3031: rowners_bs = A->rmap->range;
3033: /* each proc creates an array to be distributed */
3034: PetscCalloc1(bs*Mbs,&work);
3036: /* row_max for B */
3037: if (rank != size-1) {
3038: for (i=0; i<mbs; i++) {
3039: ncols = bi[1] - bi[0]; bi++;
3040: brow = bs*i;
3041: for (j=0; j<ncols; j++) {
3042: bcol = bs*(*bj);
3043: for (kcol=0; kcol<bs; kcol++) {
3044: col = bcol + kcol; /* local col index */
3045: col += rowners_bs[rank+1]; /* global col index */
3046: for (krow=0; krow<bs; krow++) {
3047: atmp = PetscAbsScalar(*ba); ba++;
3048: row = brow + krow; /* local row index */
3049: if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3050: if (work[col] < atmp) work[col] = atmp;
3051: }
3052: }
3053: bj++;
3054: }
3055: }
3057: /* send values to its owners */
3058: for (dest=rank+1; dest<size; dest++) {
3059: svalues = work + rowners_bs[dest];
3060: count = rowners_bs[dest+1]-rowners_bs[dest];
3061: MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));
3062: }
3063: }
3065: /* receive values */
3066: if (rank) {
3067: rvalues = work;
3068: count = rowners_bs[rank+1]-rowners_bs[rank];
3069: for (source=0; source<rank; source++) {
3070: MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);
3071: /* process values */
3072: for (i=0; i<count; i++) {
3073: if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3074: }
3075: }
3076: }
3078: VecRestoreArray(v,&va);
3079: PetscFree(work);
3080: return(0);
3081: }
3083: PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3084: {
3085: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
3086: PetscErrorCode ierr;
3087: PetscInt mbs=mat->mbs,bs=matin->rmap->bs;
3088: PetscScalar *x,*ptr,*from;
3089: Vec bb1;
3090: const PetscScalar *b;
3093: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3094: if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3096: if (flag == SOR_APPLY_UPPER) {
3097: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
3098: return(0);
3099: }
3101: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3102: if (flag & SOR_ZERO_INITIAL_GUESS) {
3103: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
3104: its--;
3105: }
3107: VecDuplicate(bb,&bb1);
3108: while (its--) {
3110: /* lower triangular part: slvec0b = - B^T*xx */
3111: (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);
3113: /* copy xx into slvec0a */
3114: VecGetArray(mat->slvec0,&ptr);
3115: VecGetArray(xx,&x);
3116: PetscArraycpy(ptr,x,bs*mbs);
3117: VecRestoreArray(mat->slvec0,&ptr);
3119: VecScale(mat->slvec0,-1.0);
3121: /* copy bb into slvec1a */
3122: VecGetArray(mat->slvec1,&ptr);
3123: VecGetArrayRead(bb,&b);
3124: PetscArraycpy(ptr,b,bs*mbs);
3125: VecRestoreArray(mat->slvec1,&ptr);
3127: /* set slvec1b = 0 */
3128: VecSet(mat->slvec1b,0.0);
3130: VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
3131: VecRestoreArray(xx,&x);
3132: VecRestoreArrayRead(bb,&b);
3133: VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
3135: /* upper triangular part: bb1 = bb1 - B*x */
3136: (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);
3138: /* local diagonal sweep */
3139: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
3140: }
3141: VecDestroy(&bb1);
3142: } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3143: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
3144: } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3145: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
3146: } else if (flag & SOR_EISENSTAT) {
3147: Vec xx1;
3148: PetscBool hasop;
3149: const PetscScalar *diag;
3150: PetscScalar *sl,scale = (omega - 2.0)/omega;
3151: PetscInt i,n;
3153: if (!mat->xx1) {
3154: VecDuplicate(bb,&mat->xx1);
3155: VecDuplicate(bb,&mat->bb1);
3156: }
3157: xx1 = mat->xx1;
3158: bb1 = mat->bb1;
3160: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
3162: if (!mat->diag) {
3163: /* this is wrong for same matrix with new nonzero values */
3164: MatCreateVecs(matin,&mat->diag,NULL);
3165: MatGetDiagonal(matin,mat->diag);
3166: }
3167: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
3169: if (hasop) {
3170: MatMultDiagonalBlock(matin,xx,bb1);
3171: VecAYPX(mat->slvec1a,scale,bb);
3172: } else {
3173: /*
3174: These two lines are replaced by code that may be a bit faster for a good compiler
3175: VecPointwiseMult(mat->slvec1a,mat->diag,xx);
3176: VecAYPX(mat->slvec1a,scale,bb);
3177: */
3178: VecGetArray(mat->slvec1a,&sl);
3179: VecGetArrayRead(mat->diag,&diag);
3180: VecGetArrayRead(bb,&b);
3181: VecGetArray(xx,&x);
3182: VecGetLocalSize(xx,&n);
3183: if (omega == 1.0) {
3184: for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3185: PetscLogFlops(2.0*n);
3186: } else {
3187: for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3188: PetscLogFlops(3.0*n);
3189: }
3190: VecRestoreArray(mat->slvec1a,&sl);
3191: VecRestoreArrayRead(mat->diag,&diag);
3192: VecRestoreArrayRead(bb,&b);
3193: VecRestoreArray(xx,&x);
3194: }
3196: /* multiply off-diagonal portion of matrix */
3197: VecSet(mat->slvec1b,0.0);
3198: (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);
3199: VecGetArray(mat->slvec0,&from);
3200: VecGetArray(xx,&x);
3201: PetscArraycpy(from,x,bs*mbs);
3202: VecRestoreArray(mat->slvec0,&from);
3203: VecRestoreArray(xx,&x);
3204: VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
3205: VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
3206: (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);
3208: /* local sweep */
3209: (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
3210: VecAXPY(xx,1.0,xx1);
3211: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3212: return(0);
3213: }
3215: PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3216: {
3217: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
3219: Vec lvec1,bb1;
3222: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3223: if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3225: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3226: if (flag & SOR_ZERO_INITIAL_GUESS) {
3227: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
3228: its--;
3229: }
3231: VecDuplicate(mat->lvec,&lvec1);
3232: VecDuplicate(bb,&bb1);
3233: while (its--) {
3234: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
3236: /* lower diagonal part: bb1 = bb - B^T*xx */
3237: (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);
3238: VecScale(lvec1,-1.0);
3240: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
3241: VecCopy(bb,bb1);
3242: VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);
3244: /* upper diagonal part: bb1 = bb1 - B*x */
3245: VecScale(mat->lvec,-1.0);
3246: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);
3248: VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);
3250: /* diagonal sweep */
3251: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
3252: }
3253: VecDestroy(&lvec1);
3254: VecDestroy(&bb1);
3255: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3256: return(0);
3257: }
3259: /*@
3260: MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3261: CSR format the local rows.
3263: Collective
3265: Input Parameters:
3266: + comm - MPI communicator
3267: . bs - the block size, only a block size of 1 is supported
3268: . m - number of local rows (Cannot be PETSC_DECIDE)
3269: . n - This value should be the same as the local size used in creating the
3270: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3271: calculated if N is given) For square matrices n is almost always m.
3272: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3273: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3274: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3275: . j - column indices
3276: - a - matrix values
3278: Output Parameter:
3279: . mat - the matrix
3281: Level: intermediate
3283: Notes:
3284: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3285: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3286: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3288: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3290: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3291: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3292: @*/
3293: PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3294: {
3299: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3300: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3301: MatCreate(comm,mat);
3302: MatSetSizes(*mat,m,n,M,N);
3303: MatSetType(*mat,MATMPISBAIJ);
3304: MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);
3305: return(0);
3306: }
3309: /*@C
3310: MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
3312: Collective
3314: Input Parameters:
3315: + B - the matrix
3316: . bs - the block size
3317: . i - the indices into j for the start of each local row (starts with zero)
3318: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3319: - v - optional values in the matrix
3321: Level: advanced
3323: Notes:
3324: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
3325: and usually the numerical values as well
3327: Any entries below the diagonal are ignored
3329: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3330: @*/
3331: PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3332: {
3336: PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3337: return(0);
3338: }
3340: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3341: {
3343: PetscInt m,N,i,rstart,nnz,Ii,bs,cbs;
3344: PetscInt *indx;
3345: PetscScalar *values;
3348: MatGetSize(inmat,&m,&N);
3349: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3350: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data;
3351: PetscInt *dnz,*onz,mbs,Nbs,nbs;
3352: PetscInt *bindx,rmax=a->rmax,j;
3353: PetscMPIInt rank,size;
3355: MatGetBlockSizes(inmat,&bs,&cbs);
3356: mbs = m/bs; Nbs = N/cbs;
3357: if (n == PETSC_DECIDE) {
3358: PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3359: }
3360: nbs = n/cbs;
3362: PetscMalloc1(rmax,&bindx);
3363: MatPreallocateInitialize(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
3365: MPI_Comm_rank(comm,&rank);
3366: MPI_Comm_rank(comm,&size);
3367: if (rank == size-1) {
3368: /* Check sum(nbs) = Nbs */
3369: if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3370: }
3372: rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3373: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
3374: for (i=0; i<mbs; i++) {
3375: MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL); /* non-blocked nnz and indx */
3376: nnz = nnz/bs;
3377: for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3378: MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);
3379: MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);
3380: }
3381: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
3382: PetscFree(bindx);
3384: MatCreate(comm,outmat);
3385: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
3386: MatSetBlockSizes(*outmat,bs,cbs);
3387: MatSetType(*outmat,MATSBAIJ);
3388: MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);
3389: MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);
3390: MatPreallocateFinalize(dnz,onz);
3391: }
3393: /* numeric phase */
3394: MatGetBlockSizes(inmat,&bs,&cbs);
3395: MatGetOwnershipRange(*outmat,&rstart,NULL);
3397: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
3398: for (i=0; i<m; i++) {
3399: MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);
3400: Ii = i + rstart;
3401: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
3402: MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);
3403: }
3404: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
3405: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
3406: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
3407: return(0);
3408: }