Actual source code: mpisbaij.c
2: #include <../src/mat/impls/baij/mpi/mpibaij.h>
3: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4: #include <../src/mat/impls/sbaij/seq/sbaij.h>
5: #include <petscblaslapack.h>
7: #if defined(PETSC_HAVE_ELEMENTAL)
8: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9: #endif
10: #if defined(PETSC_HAVE_SCALAPACK)
11: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12: #endif
14: /* This could be moved to matimpl.h */
15: static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16: {
17: Mat preallocator;
18: PetscInt r,rstart,rend;
19: PetscInt bs,i,m,n,M,N;
20: PetscBool cong = PETSC_TRUE;
24: for (i = 0; i < nm; i++) {
26: PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);
28: }
30: MatGetBlockSize(B,&bs);
31: MatGetSize(B,&M,&N);
32: MatGetLocalSize(B,&m,&n);
33: MatCreate(PetscObjectComm((PetscObject)B),&preallocator);
34: MatSetType(preallocator,MATPREALLOCATOR);
35: MatSetBlockSize(preallocator,bs);
36: MatSetSizes(preallocator,m,n,M,N);
37: MatSetUp(preallocator);
38: MatGetOwnershipRange(preallocator,&rstart,&rend);
39: for (r = rstart; r < rend; ++r) {
40: PetscInt ncols;
41: const PetscInt *row;
42: const PetscScalar *vals;
44: for (i = 0; i < nm; i++) {
45: MatGetRow(X[i],r,&ncols,&row,&vals);
46: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
47: if (symm && symm[i]) {
48: MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);
49: }
50: MatRestoreRow(X[i],r,&ncols,&row,&vals);
51: }
52: }
53: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
54: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
55: MatPreallocatorPreallocate(preallocator,fill,B);
56: MatDestroy(&preallocator);
57: return 0;
58: }
60: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
61: {
62: Mat B;
63: PetscInt r;
65: if (reuse != MAT_REUSE_MATRIX) {
66: PetscBool symm = PETSC_TRUE,isdense;
67: PetscInt bs;
69: MatCreate(PetscObjectComm((PetscObject)A),&B);
70: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
71: MatSetType(B,newtype);
72: MatGetBlockSize(A,&bs);
73: MatSetBlockSize(B,bs);
74: PetscLayoutSetUp(B->rmap);
75: PetscLayoutSetUp(B->cmap);
76: PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");
77: if (!isdense) {
78: MatGetRowUpperTriangular(A);
79: MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);
80: MatRestoreRowUpperTriangular(A);
81: } else {
82: MatSetUp(B);
83: }
84: } else {
85: B = *newmat;
86: MatZeroEntries(B);
87: }
89: MatGetRowUpperTriangular(A);
90: for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
91: PetscInt ncols;
92: const PetscInt *row;
93: const PetscScalar *vals;
95: MatGetRow(A,r,&ncols,&row,&vals);
96: MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);
97: #if defined(PETSC_USE_COMPLEX)
98: if (A->hermitian) {
99: PetscInt i;
100: for (i = 0; i < ncols; i++) {
101: MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);
102: }
103: } else {
104: MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);
105: }
106: #else
107: MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);
108: #endif
109: MatRestoreRow(A,r,&ncols,&row,&vals);
110: }
111: MatRestoreRowUpperTriangular(A);
112: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
113: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
115: if (reuse == MAT_INPLACE_MATRIX) {
116: MatHeaderReplace(A,&B);
117: } else {
118: *newmat = B;
119: }
120: return 0;
121: }
123: PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
124: {
125: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
127: MatStoreValues(aij->A);
128: MatStoreValues(aij->B);
129: return 0;
130: }
132: PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
133: {
134: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
136: MatRetrieveValues(aij->A);
137: MatRetrieveValues(aij->B);
138: return 0;
139: }
141: #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \
142: { \
143: brow = row/bs; \
144: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
145: rmax = aimax[brow]; nrow = ailen[brow]; \
146: bcol = col/bs; \
147: ridx = row % bs; cidx = col % bs; \
148: low = 0; high = nrow; \
149: while (high-low > 3) { \
150: t = (low+high)/2; \
151: if (rp[t] > bcol) high = t; \
152: else low = t; \
153: } \
154: for (_i=low; _i<high; _i++) { \
155: if (rp[_i] > bcol) break; \
156: if (rp[_i] == bcol) { \
157: bap = ap + bs2*_i + bs*cidx + ridx; \
158: if (addv == ADD_VALUES) *bap += value; \
159: else *bap = value; \
160: goto a_noinsert; \
161: } \
162: } \
163: if (a->nonew == 1) goto a_noinsert; \
165: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
166: N = nrow++ - 1; \
167: /* shift up all the later entries in this row */ \
168: PetscArraymove(rp+_i+1,rp+_i,N-_i+1); \
169: PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)); \
170: PetscArrayzero(ap+bs2*_i,bs2); \
171: rp[_i] = bcol; \
172: ap[bs2*_i + bs*cidx + ridx] = value; \
173: A->nonzerostate++;\
174: a_noinsert:; \
175: ailen[brow] = nrow; \
176: }
178: #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
179: { \
180: brow = row/bs; \
181: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
182: rmax = bimax[brow]; nrow = bilen[brow]; \
183: bcol = col/bs; \
184: ridx = row % bs; cidx = col % bs; \
185: low = 0; high = nrow; \
186: while (high-low > 3) { \
187: t = (low+high)/2; \
188: if (rp[t] > bcol) high = t; \
189: else low = t; \
190: } \
191: for (_i=low; _i<high; _i++) { \
192: if (rp[_i] > bcol) break; \
193: if (rp[_i] == bcol) { \
194: bap = ap + bs2*_i + bs*cidx + ridx; \
195: if (addv == ADD_VALUES) *bap += value; \
196: else *bap = value; \
197: goto b_noinsert; \
198: } \
199: } \
200: if (b->nonew == 1) goto b_noinsert; \
202: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
203: N = nrow++ - 1; \
204: /* shift up all the later entries in this row */ \
205: PetscArraymove(rp+_i+1,rp+_i,N-_i+1); \
206: PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)); \
207: PetscArrayzero(ap+bs2*_i,bs2); \
208: rp[_i] = bcol; \
209: ap[bs2*_i + bs*cidx + ridx] = value; \
210: B->nonzerostate++;\
211: b_noinsert:; \
212: bilen[brow] = nrow; \
213: }
215: /* Only add/insert a(i,j) with i<=j (blocks).
216: Any a(i,j) with i>j input by user is ingored or generates an error
217: */
218: PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
219: {
220: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
221: MatScalar value;
222: PetscBool roworiented = baij->roworiented;
223: PetscInt i,j,row,col;
224: PetscInt rstart_orig=mat->rmap->rstart;
225: PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
226: PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs;
228: /* Some Variables required in the macro */
229: Mat A = baij->A;
230: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
231: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
232: MatScalar *aa =a->a;
234: Mat B = baij->B;
235: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
236: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
237: MatScalar *ba =b->a;
239: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
240: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
241: MatScalar *ap,*bap;
243: /* for stash */
244: PetscInt n_loc, *in_loc = NULL;
245: MatScalar *v_loc = NULL;
247: if (!baij->donotstash) {
248: if (n > baij->n_loc) {
249: PetscFree(baij->in_loc);
250: PetscFree(baij->v_loc);
251: PetscMalloc1(n,&baij->in_loc);
252: PetscMalloc1(n,&baij->v_loc);
254: baij->n_loc = n;
255: }
256: in_loc = baij->in_loc;
257: v_loc = baij->v_loc;
258: }
260: for (i=0; i<m; i++) {
261: if (im[i] < 0) continue;
263: if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
264: row = im[i] - rstart_orig; /* local row index */
265: for (j=0; j<n; j++) {
266: if (im[i]/bs > in[j]/bs) {
267: if (a->ignore_ltriangular) {
268: continue; /* ignore lower triangular blocks */
269: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
270: }
271: if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
272: col = in[j] - cstart_orig; /* local col index */
273: brow = row/bs; bcol = col/bs;
274: if (brow > bcol) continue; /* ignore lower triangular blocks of A */
275: if (roworiented) value = v[i*n+j];
276: else value = v[i+j*m];
277: MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
278: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
279: } else if (in[j] < 0) continue;
281: else { /* off-diag entry (B) */
282: if (mat->was_assembled) {
283: if (!baij->colmap) {
284: MatCreateColmap_MPIBAIJ_Private(mat);
285: }
286: #if defined(PETSC_USE_CTABLE)
287: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
288: col = col - 1;
289: #else
290: col = baij->colmap[in[j]/bs] - 1;
291: #endif
292: if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
293: MatDisAssemble_MPISBAIJ(mat);
294: col = in[j];
295: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
296: B = baij->B;
297: b = (Mat_SeqBAIJ*)(B)->data;
298: bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
299: ba = b->a;
300: } else col += in[j]%bs;
301: } else col = in[j];
302: if (roworiented) value = v[i*n+j];
303: else value = v[i+j*m];
304: MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
305: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
306: }
307: }
308: } else { /* off processor entry */
310: if (!baij->donotstash) {
311: mat->assembled = PETSC_FALSE;
312: n_loc = 0;
313: for (j=0; j<n; j++) {
314: if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
315: in_loc[n_loc] = in[j];
316: if (roworiented) {
317: v_loc[n_loc] = v[i*n+j];
318: } else {
319: v_loc[n_loc] = v[j*m+i];
320: }
321: n_loc++;
322: }
323: MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);
324: }
325: }
326: }
327: return 0;
328: }
330: static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
331: {
332: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
333: PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
334: PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen;
335: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
336: PetscBool roworiented=a->roworiented;
337: const PetscScalar *value = v;
338: MatScalar *ap,*aa = a->a,*bap;
340: if (col < row) {
341: if (a->ignore_ltriangular) return 0; /* ignore lower triangular block */
342: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
343: }
344: rp = aj + ai[row];
345: ap = aa + bs2*ai[row];
346: rmax = imax[row];
347: nrow = ailen[row];
348: value = v;
349: low = 0;
350: high = nrow;
352: while (high-low > 7) {
353: t = (low+high)/2;
354: if (rp[t] > col) high = t;
355: else low = t;
356: }
357: for (i=low; i<high; i++) {
358: if (rp[i] > col) break;
359: if (rp[i] == col) {
360: bap = ap + bs2*i;
361: if (roworiented) {
362: if (is == ADD_VALUES) {
363: for (ii=0; ii<bs; ii++) {
364: for (jj=ii; jj<bs2; jj+=bs) {
365: bap[jj] += *value++;
366: }
367: }
368: } else {
369: for (ii=0; ii<bs; ii++) {
370: for (jj=ii; jj<bs2; jj+=bs) {
371: bap[jj] = *value++;
372: }
373: }
374: }
375: } else {
376: if (is == ADD_VALUES) {
377: for (ii=0; ii<bs; ii++) {
378: for (jj=0; jj<bs; jj++) {
379: *bap++ += *value++;
380: }
381: }
382: } else {
383: for (ii=0; ii<bs; ii++) {
384: for (jj=0; jj<bs; jj++) {
385: *bap++ = *value++;
386: }
387: }
388: }
389: }
390: goto noinsert2;
391: }
392: }
393: if (nonew == 1) goto noinsert2;
395: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
396: N = nrow++ - 1; high++;
397: /* shift up all the later entries in this row */
398: PetscArraymove(rp+i+1,rp+i,N-i+1);
399: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
400: rp[i] = col;
401: bap = ap + bs2*i;
402: if (roworiented) {
403: for (ii=0; ii<bs; ii++) {
404: for (jj=ii; jj<bs2; jj+=bs) {
405: bap[jj] = *value++;
406: }
407: }
408: } else {
409: for (ii=0; ii<bs; ii++) {
410: for (jj=0; jj<bs; jj++) {
411: *bap++ = *value++;
412: }
413: }
414: }
415: noinsert2:;
416: ailen[row] = nrow;
417: return 0;
418: }
420: /*
421: This routine is exactly duplicated in mpibaij.c
422: */
423: static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
424: {
425: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
426: PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
427: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
428: PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
429: PetscBool roworiented=a->roworiented;
430: const PetscScalar *value = v;
431: MatScalar *ap,*aa = a->a,*bap;
433: rp = aj + ai[row];
434: ap = aa + bs2*ai[row];
435: rmax = imax[row];
436: nrow = ailen[row];
437: low = 0;
438: high = nrow;
439: value = v;
440: while (high-low > 7) {
441: t = (low+high)/2;
442: if (rp[t] > col) high = t;
443: else low = t;
444: }
445: for (i=low; i<high; i++) {
446: if (rp[i] > col) break;
447: if (rp[i] == col) {
448: bap = ap + bs2*i;
449: if (roworiented) {
450: if (is == ADD_VALUES) {
451: for (ii=0; ii<bs; ii++) {
452: for (jj=ii; jj<bs2; jj+=bs) {
453: bap[jj] += *value++;
454: }
455: }
456: } else {
457: for (ii=0; ii<bs; ii++) {
458: for (jj=ii; jj<bs2; jj+=bs) {
459: bap[jj] = *value++;
460: }
461: }
462: }
463: } else {
464: if (is == ADD_VALUES) {
465: for (ii=0; ii<bs; ii++,value+=bs) {
466: for (jj=0; jj<bs; jj++) {
467: bap[jj] += value[jj];
468: }
469: bap += bs;
470: }
471: } else {
472: for (ii=0; ii<bs; ii++,value+=bs) {
473: for (jj=0; jj<bs; jj++) {
474: bap[jj] = value[jj];
475: }
476: bap += bs;
477: }
478: }
479: }
480: goto noinsert2;
481: }
482: }
483: if (nonew == 1) goto noinsert2;
485: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
486: N = nrow++ - 1; high++;
487: /* shift up all the later entries in this row */
488: PetscArraymove(rp+i+1,rp+i,N-i+1);
489: PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
490: rp[i] = col;
491: bap = ap + bs2*i;
492: if (roworiented) {
493: for (ii=0; ii<bs; ii++) {
494: for (jj=ii; jj<bs2; jj+=bs) {
495: bap[jj] = *value++;
496: }
497: }
498: } else {
499: for (ii=0; ii<bs; ii++) {
500: for (jj=0; jj<bs; jj++) {
501: *bap++ = *value++;
502: }
503: }
504: }
505: noinsert2:;
506: ailen[row] = nrow;
507: return 0;
508: }
510: /*
511: This routine could be optimized by removing the need for the block copy below and passing stride information
512: to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
513: */
514: PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
515: {
516: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
517: const MatScalar *value;
518: MatScalar *barray =baij->barray;
519: PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
520: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
521: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
522: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
524: if (!barray) {
525: PetscMalloc1(bs2,&barray);
526: baij->barray = barray;
527: }
529: if (roworiented) {
530: stepval = (n-1)*bs;
531: } else {
532: stepval = (m-1)*bs;
533: }
534: for (i=0; i<m; i++) {
535: if (im[i] < 0) continue;
537: if (im[i] >= rstart && im[i] < rend) {
538: row = im[i] - rstart;
539: for (j=0; j<n; j++) {
540: if (im[i] > in[j]) {
541: if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
542: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
543: }
544: /* If NumCol = 1 then a copy is not required */
545: if ((roworiented) && (n == 1)) {
546: barray = (MatScalar*) v + i*bs2;
547: } else if ((!roworiented) && (m == 1)) {
548: barray = (MatScalar*) v + j*bs2;
549: } else { /* Here a copy is required */
550: if (roworiented) {
551: value = v + i*(stepval+bs)*bs + j*bs;
552: } else {
553: value = v + j*(stepval+bs)*bs + i*bs;
554: }
555: for (ii=0; ii<bs; ii++,value+=stepval) {
556: for (jj=0; jj<bs; jj++) {
557: *barray++ = *value++;
558: }
559: }
560: barray -=bs2;
561: }
563: if (in[j] >= cstart && in[j] < cend) {
564: col = in[j] - cstart;
565: MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);
566: } else if (in[j] < 0) continue;
568: else {
569: if (mat->was_assembled) {
570: if (!baij->colmap) {
571: MatCreateColmap_MPIBAIJ_Private(mat);
572: }
574: #if defined(PETSC_USE_DEBUG)
575: #if defined(PETSC_USE_CTABLE)
576: { PetscInt data;
577: PetscTableFind(baij->colmap,in[j]+1,&data);
579: }
580: #else
582: #endif
583: #endif
584: #if defined(PETSC_USE_CTABLE)
585: PetscTableFind(baij->colmap,in[j]+1,&col);
586: col = (col - 1)/bs;
587: #else
588: col = (baij->colmap[in[j]] - 1)/bs;
589: #endif
590: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
591: MatDisAssemble_MPISBAIJ(mat);
592: col = in[j];
593: }
594: } else col = in[j];
595: MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);
596: }
597: }
598: } else {
600: if (!baij->donotstash) {
601: if (roworiented) {
602: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
603: } else {
604: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
605: }
606: }
607: }
608: }
609: return 0;
610: }
612: PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
613: {
614: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
615: PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
616: PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
618: for (i=0; i<m; i++) {
619: if (idxm[i] < 0) continue; /* negative row */
621: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
622: row = idxm[i] - bsrstart;
623: for (j=0; j<n; j++) {
624: if (idxn[j] < 0) continue; /* negative column */
626: if (idxn[j] >= bscstart && idxn[j] < bscend) {
627: col = idxn[j] - bscstart;
628: MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
629: } else {
630: if (!baij->colmap) {
631: MatCreateColmap_MPIBAIJ_Private(mat);
632: }
633: #if defined(PETSC_USE_CTABLE)
634: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
635: data--;
636: #else
637: data = baij->colmap[idxn[j]/bs]-1;
638: #endif
639: if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
640: else {
641: col = data + idxn[j]%bs;
642: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
643: }
644: }
645: }
646: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
647: }
648: return 0;
649: }
651: PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
652: {
653: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
654: PetscReal sum[2],*lnorm2;
656: if (baij->size == 1) {
657: MatNorm(baij->A,type,norm);
658: } else {
659: if (type == NORM_FROBENIUS) {
660: PetscMalloc1(2,&lnorm2);
661: MatNorm(baij->A,type,lnorm2);
662: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */
663: MatNorm(baij->B,type,lnorm2);
664: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */
665: MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
666: *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
667: PetscFree(lnorm2);
668: } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
669: Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
670: Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data;
671: PetscReal *rsum,*rsum2,vabs;
672: PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
673: PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
674: MatScalar *v;
676: PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);
677: PetscArrayzero(rsum,mat->cmap->N);
678: /* Amat */
679: v = amat->a; jj = amat->j;
680: for (brow=0; brow<mbs; brow++) {
681: grow = bs*(rstart + brow);
682: nz = amat->i[brow+1] - amat->i[brow];
683: for (bcol=0; bcol<nz; bcol++) {
684: gcol = bs*(rstart + *jj); jj++;
685: for (col=0; col<bs; col++) {
686: for (row=0; row<bs; row++) {
687: vabs = PetscAbsScalar(*v); v++;
688: rsum[gcol+col] += vabs;
689: /* non-diagonal block */
690: if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
691: }
692: }
693: }
694: PetscLogFlops(nz*bs*bs);
695: }
696: /* Bmat */
697: v = bmat->a; jj = bmat->j;
698: for (brow=0; brow<mbs; brow++) {
699: grow = bs*(rstart + brow);
700: nz = bmat->i[brow+1] - bmat->i[brow];
701: for (bcol=0; bcol<nz; bcol++) {
702: gcol = bs*garray[*jj]; jj++;
703: for (col=0; col<bs; col++) {
704: for (row=0; row<bs; row++) {
705: vabs = PetscAbsScalar(*v); v++;
706: rsum[gcol+col] += vabs;
707: rsum[grow+row] += vabs;
708: }
709: }
710: }
711: PetscLogFlops(nz*bs*bs);
712: }
713: MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
714: *norm = 0.0;
715: for (col=0; col<mat->cmap->N; col++) {
716: if (rsum2[col] > *norm) *norm = rsum2[col];
717: }
718: PetscFree2(rsum,rsum2);
719: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
720: }
721: return 0;
722: }
724: PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
725: {
726: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
727: PetscInt nstash,reallocs;
729: if (baij->donotstash || mat->nooffprocentries) return 0;
731: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
732: MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
733: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
734: PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);
735: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
736: PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);
737: return 0;
738: }
740: PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
741: {
742: Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
743: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data;
744: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
745: PetscInt *row,*col;
746: PetscBool other_disassembled;
747: PetscMPIInt n;
748: PetscBool r1,r2,r3;
749: MatScalar *val;
751: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
752: if (!baij->donotstash && !mat->nooffprocentries) {
753: while (1) {
754: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
755: if (!flg) break;
757: for (i=0; i<n;) {
758: /* Now identify the consecutive vals belonging to the same row */
759: for (j=i,rstart=row[j]; j<n; j++) {
760: if (row[j] != rstart) break;
761: }
762: if (j < n) ncols = j-i;
763: else ncols = n-i;
764: /* Now assemble all these values with a single function call */
765: MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
766: i = j;
767: }
768: }
769: MatStashScatterEnd_Private(&mat->stash);
770: /* Now process the block-stash. Since the values are stashed column-oriented,
771: set the roworiented flag to column oriented, and after MatSetValues()
772: restore the original flags */
773: r1 = baij->roworiented;
774: r2 = a->roworiented;
775: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
777: baij->roworiented = PETSC_FALSE;
778: a->roworiented = PETSC_FALSE;
780: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
781: while (1) {
782: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
783: if (!flg) break;
785: for (i=0; i<n;) {
786: /* Now identify the consecutive vals belonging to the same row */
787: for (j=i,rstart=row[j]; j<n; j++) {
788: if (row[j] != rstart) break;
789: }
790: if (j < n) ncols = j-i;
791: else ncols = n-i;
792: MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);
793: i = j;
794: }
795: }
796: MatStashScatterEnd_Private(&mat->bstash);
798: baij->roworiented = r1;
799: a->roworiented = r2;
801: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
802: }
804: MatAssemblyBegin(baij->A,mode);
805: MatAssemblyEnd(baij->A,mode);
807: /* determine if any processor has disassembled, if so we must
808: also disassemble ourselves, in order that we may reassemble. */
809: /*
810: if nonzero structure of submatrix B cannot change then we know that
811: no processor disassembled thus we can skip this stuff
812: */
813: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
814: MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
815: if (mat->was_assembled && !other_disassembled) {
816: MatDisAssemble_MPISBAIJ(mat);
817: }
818: }
820: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
821: MatSetUpMultiply_MPISBAIJ(mat); /* setup Mvctx and sMvctx */
822: }
823: MatAssemblyBegin(baij->B,mode);
824: MatAssemblyEnd(baij->B,mode);
826: PetscFree2(baij->rowvalues,baij->rowindices);
828: baij->rowvalues = NULL;
830: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
831: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
832: PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
833: MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
834: }
835: return 0;
836: }
838: extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
839: #include <petscdraw.h>
840: static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
841: {
842: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
843: PetscInt bs = mat->rmap->bs;
844: PetscMPIInt rank = baij->rank;
845: PetscBool iascii,isdraw;
846: PetscViewer sviewer;
847: PetscViewerFormat format;
849: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
850: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
851: if (iascii) {
852: PetscViewerGetFormat(viewer,&format);
853: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
854: MatInfo info;
855: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
856: MatGetInfo(mat,MAT_LOCAL,&info);
857: PetscViewerASCIIPushSynchronized(viewer);
858: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);
859: MatGetInfo(baij->A,MAT_LOCAL,&info);
860: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);
861: MatGetInfo(baij->B,MAT_LOCAL,&info);
862: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);
863: PetscViewerFlush(viewer);
864: PetscViewerASCIIPopSynchronized(viewer);
865: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
866: VecScatterView(baij->Mvctx,viewer);
867: return 0;
868: } else if (format == PETSC_VIEWER_ASCII_INFO) {
869: PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs);
870: return 0;
871: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
872: return 0;
873: }
874: }
876: if (isdraw) {
877: PetscDraw draw;
878: PetscBool isnull;
879: PetscViewerDrawGetDraw(viewer,0,&draw);
880: PetscDrawIsNull(draw,&isnull);
881: if (isnull) return 0;
882: }
884: {
885: /* assemble the entire matrix onto first processor. */
886: Mat A;
887: Mat_SeqSBAIJ *Aloc;
888: Mat_SeqBAIJ *Bloc;
889: PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
890: MatScalar *a;
891: const char *matname;
893: /* Should this be the same type as mat? */
894: MatCreate(PetscObjectComm((PetscObject)mat),&A);
895: if (rank == 0) {
896: MatSetSizes(A,M,N,M,N);
897: } else {
898: MatSetSizes(A,0,0,M,N);
899: }
900: MatSetType(A,MATMPISBAIJ);
901: MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);
902: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
903: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
905: /* copy over the A part */
906: Aloc = (Mat_SeqSBAIJ*)baij->A->data;
907: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
908: PetscMalloc1(bs,&rvals);
910: for (i=0; i<mbs; i++) {
911: rvals[0] = bs*(baij->rstartbs + i);
912: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
913: for (j=ai[i]; j<ai[i+1]; j++) {
914: col = (baij->cstartbs+aj[j])*bs;
915: for (k=0; k<bs; k++) {
916: MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
917: col++;
918: a += bs;
919: }
920: }
921: }
922: /* copy over the B part */
923: Bloc = (Mat_SeqBAIJ*)baij->B->data;
924: ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
925: for (i=0; i<mbs; i++) {
927: rvals[0] = bs*(baij->rstartbs + i);
928: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
929: for (j=ai[i]; j<ai[i+1]; j++) {
930: col = baij->garray[aj[j]]*bs;
931: for (k=0; k<bs; k++) {
932: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
933: col++;
934: a += bs;
935: }
936: }
937: }
938: PetscFree(rvals);
939: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
940: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
941: /*
942: Everyone has to call to draw the matrix since the graphics waits are
943: synchronized across all processors that share the PetscDraw object
944: */
945: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
946: PetscObjectGetName((PetscObject)mat,&matname);
947: if (rank == 0) {
948: PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);
949: MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);
950: }
951: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
952: PetscViewerFlush(viewer);
953: MatDestroy(&A);
954: }
955: return 0;
956: }
958: /* Used for both MPIBAIJ and MPISBAIJ matrices */
959: #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
961: PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
962: {
963: PetscBool iascii,isdraw,issocket,isbinary;
965: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
966: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
967: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
968: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
969: if (iascii || isdraw || issocket) {
970: MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);
971: } else if (isbinary) {
972: MatView_MPISBAIJ_Binary(mat,viewer);
973: }
974: return 0;
975: }
977: PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
978: {
979: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
981: #if defined(PETSC_USE_LOG)
982: PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
983: #endif
984: MatStashDestroy_Private(&mat->stash);
985: MatStashDestroy_Private(&mat->bstash);
986: MatDestroy(&baij->A);
987: MatDestroy(&baij->B);
988: #if defined(PETSC_USE_CTABLE)
989: PetscTableDestroy(&baij->colmap);
990: #else
991: PetscFree(baij->colmap);
992: #endif
993: PetscFree(baij->garray);
994: VecDestroy(&baij->lvec);
995: VecScatterDestroy(&baij->Mvctx);
996: VecDestroy(&baij->slvec0);
997: VecDestroy(&baij->slvec0b);
998: VecDestroy(&baij->slvec1);
999: VecDestroy(&baij->slvec1a);
1000: VecDestroy(&baij->slvec1b);
1001: VecScatterDestroy(&baij->sMvctx);
1002: PetscFree2(baij->rowvalues,baij->rowindices);
1003: PetscFree(baij->barray);
1004: PetscFree(baij->hd);
1005: VecDestroy(&baij->diag);
1006: VecDestroy(&baij->bb1);
1007: VecDestroy(&baij->xx1);
1008: #if defined(PETSC_USE_REAL_MAT_SINGLE)
1009: PetscFree(baij->setvaluescopy);
1010: #endif
1011: PetscFree(baij->in_loc);
1012: PetscFree(baij->v_loc);
1013: PetscFree(baij->rangebs);
1014: PetscFree(mat->data);
1016: PetscObjectChangeTypeName((PetscObject)mat,NULL);
1017: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1018: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1019: PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);
1020: PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);
1021: #if defined(PETSC_HAVE_ELEMENTAL)
1022: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);
1023: #endif
1024: #if defined(PETSC_HAVE_SCALAPACK)
1025: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);
1026: #endif
1027: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);
1028: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);
1029: return 0;
1030: }
1032: PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1033: {
1034: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1035: PetscInt mbs=a->mbs,bs=A->rmap->bs;
1036: PetscScalar *from;
1037: const PetscScalar *x;
1039: /* diagonal part */
1040: (*a->A->ops->mult)(a->A,xx,a->slvec1a);
1041: VecSet(a->slvec1b,0.0);
1043: /* subdiagonal part */
1045: (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);
1047: /* copy x into the vec slvec0 */
1048: VecGetArray(a->slvec0,&from);
1049: VecGetArrayRead(xx,&x);
1051: PetscArraycpy(from,x,bs*mbs);
1052: VecRestoreArray(a->slvec0,&from);
1053: VecRestoreArrayRead(xx,&x);
1055: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1056: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1057: /* supperdiagonal part */
1058: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
1059: return 0;
1060: }
1062: PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1063: {
1064: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1065: PetscInt mbs=a->mbs,bs=A->rmap->bs;
1066: PetscScalar *from;
1067: const PetscScalar *x;
1069: /* diagonal part */
1070: (*a->A->ops->mult)(a->A,xx,a->slvec1a);
1071: VecSet(a->slvec1b,0.0);
1073: /* subdiagonal part */
1074: (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
1076: /* copy x into the vec slvec0 */
1077: VecGetArray(a->slvec0,&from);
1078: VecGetArrayRead(xx,&x);
1080: PetscArraycpy(from,x,bs*mbs);
1081: VecRestoreArray(a->slvec0,&from);
1082: VecRestoreArrayRead(xx,&x);
1084: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1085: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1086: /* supperdiagonal part */
1087: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
1088: return 0;
1089: }
1091: PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1092: {
1093: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1094: PetscInt mbs=a->mbs,bs=A->rmap->bs;
1095: PetscScalar *from,zero=0.0;
1096: const PetscScalar *x;
1098: /* diagonal part */
1099: (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);
1100: VecSet(a->slvec1b,zero);
1102: /* subdiagonal part */
1104: (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);
1106: /* copy x into the vec slvec0 */
1107: VecGetArray(a->slvec0,&from);
1108: VecGetArrayRead(xx,&x);
1109: PetscArraycpy(from,x,bs*mbs);
1110: VecRestoreArray(a->slvec0,&from);
1112: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1113: VecRestoreArrayRead(xx,&x);
1114: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1116: /* supperdiagonal part */
1117: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);
1118: return 0;
1119: }
1121: PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1122: {
1123: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1124: PetscInt mbs=a->mbs,bs=A->rmap->bs;
1125: PetscScalar *from,zero=0.0;
1126: const PetscScalar *x;
1128: /* diagonal part */
1129: (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);
1130: VecSet(a->slvec1b,zero);
1132: /* subdiagonal part */
1133: (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
1135: /* copy x into the vec slvec0 */
1136: VecGetArray(a->slvec0,&from);
1137: VecGetArrayRead(xx,&x);
1138: PetscArraycpy(from,x,bs*mbs);
1139: VecRestoreArray(a->slvec0,&from);
1141: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1142: VecRestoreArrayRead(xx,&x);
1143: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
1145: /* supperdiagonal part */
1146: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);
1147: return 0;
1148: }
1150: /*
1151: This only works correctly for square matrices where the subblock A->A is the
1152: diagonal block
1153: */
1154: PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1155: {
1156: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1159: MatGetDiagonal(a->A,v);
1160: return 0;
1161: }
1163: PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1164: {
1165: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1167: MatScale(a->A,aa);
1168: MatScale(a->B,aa);
1169: return 0;
1170: }
1172: PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1173: {
1174: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1175: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1176: PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1177: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1178: PetscInt *cmap,*idx_p,cstart = mat->rstartbs;
1181: mat->getrowactive = PETSC_TRUE;
1183: if (!mat->rowvalues && (idx || v)) {
1184: /*
1185: allocate enough space to hold information from the longest row.
1186: */
1187: Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1188: Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data;
1189: PetscInt max = 1,mbs = mat->mbs,tmp;
1190: for (i=0; i<mbs; i++) {
1191: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1192: if (max < tmp) max = tmp;
1193: }
1194: PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);
1195: }
1198: lrow = row - brstart; /* local row index */
1200: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1201: if (!v) {pvA = NULL; pvB = NULL;}
1202: if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1203: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1204: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1205: nztot = nzA + nzB;
1207: cmap = mat->garray;
1208: if (v || idx) {
1209: if (nztot) {
1210: /* Sort by increasing column numbers, assuming A and B already sorted */
1211: PetscInt imark = -1;
1212: if (v) {
1213: *v = v_p = mat->rowvalues;
1214: for (i=0; i<nzB; i++) {
1215: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1216: else break;
1217: }
1218: imark = i;
1219: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1220: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1221: }
1222: if (idx) {
1223: *idx = idx_p = mat->rowindices;
1224: if (imark > -1) {
1225: for (i=0; i<imark; i++) {
1226: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1227: }
1228: } else {
1229: for (i=0; i<nzB; i++) {
1230: if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1231: else break;
1232: }
1233: imark = i;
1234: }
1235: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1236: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1237: }
1238: } else {
1239: if (idx) *idx = NULL;
1240: if (v) *v = NULL;
1241: }
1242: }
1243: *nz = nztot;
1244: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1245: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1246: return 0;
1247: }
1249: PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1250: {
1251: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1254: baij->getrowactive = PETSC_FALSE;
1255: return 0;
1256: }
1258: PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1259: {
1260: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1261: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1263: aA->getrow_utriangular = PETSC_TRUE;
1264: return 0;
1265: }
1266: PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1267: {
1268: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1269: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1271: aA->getrow_utriangular = PETSC_FALSE;
1272: return 0;
1273: }
1275: PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1276: {
1277: if (PetscDefined(USE_COMPLEX)) {
1278: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data;
1280: MatConjugate(a->A);
1281: MatConjugate(a->B);
1282: }
1283: return 0;
1284: }
1286: PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1287: {
1288: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1290: MatRealPart(a->A);
1291: MatRealPart(a->B);
1292: return 0;
1293: }
1295: PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1296: {
1297: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1299: MatImaginaryPart(a->A);
1300: MatImaginaryPart(a->B);
1301: return 0;
1302: }
1304: /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1305: Input: isrow - distributed(parallel),
1306: iscol_local - locally owned (seq)
1307: */
1308: PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg)
1309: {
1310: PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch;
1311: const PetscInt *ptr1,*ptr2;
1313: ISGetLocalSize(isrow,&sz1);
1314: ISGetLocalSize(iscol_local,&sz2);
1315: if (sz1 > sz2) {
1316: *flg = PETSC_FALSE;
1317: return 0;
1318: }
1320: ISGetIndices(isrow,&ptr1);
1321: ISGetIndices(iscol_local,&ptr2);
1323: PetscMalloc1(sz1,&a1);
1324: PetscMalloc1(sz2,&a2);
1325: PetscArraycpy(a1,ptr1,sz1);
1326: PetscArraycpy(a2,ptr2,sz2);
1327: PetscSortInt(sz1,a1);
1328: PetscSortInt(sz2,a2);
1330: nmatch=0;
1331: k = 0;
1332: for (i=0; i<sz1; i++) {
1333: for (j=k; j<sz2; j++) {
1334: if (a1[i] == a2[j]) {
1335: k = j; nmatch++;
1336: break;
1337: }
1338: }
1339: }
1340: ISRestoreIndices(isrow,&ptr1);
1341: ISRestoreIndices(iscol_local,&ptr2);
1342: PetscFree(a1);
1343: PetscFree(a2);
1344: if (nmatch < sz1) {
1345: *flg = PETSC_FALSE;
1346: } else {
1347: *flg = PETSC_TRUE;
1348: }
1349: return 0;
1350: }
1352: PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1353: {
1354: IS iscol_local;
1355: PetscInt csize;
1356: PetscBool isequal;
1358: ISGetLocalSize(iscol,&csize);
1359: if (call == MAT_REUSE_MATRIX) {
1360: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
1362: } else {
1363: PetscBool issorted;
1365: ISAllGather(iscol,&iscol_local);
1366: ISEqual_private(isrow,iscol_local,&isequal);
1367: ISSorted(iscol_local, &issorted);
1369: }
1371: /* now call MatCreateSubMatrix_MPIBAIJ() */
1372: MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
1373: if (call == MAT_INITIAL_MATRIX) {
1374: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
1375: ISDestroy(&iscol_local);
1376: }
1377: return 0;
1378: }
1380: PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1381: {
1382: Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1384: MatZeroEntries(l->A);
1385: MatZeroEntries(l->B);
1386: return 0;
1387: }
1389: PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1390: {
1391: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1392: Mat A = a->A,B = a->B;
1393: PetscLogDouble isend[5],irecv[5];
1395: info->block_size = (PetscReal)matin->rmap->bs;
1397: MatGetInfo(A,MAT_LOCAL,info);
1399: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1400: isend[3] = info->memory; isend[4] = info->mallocs;
1402: MatGetInfo(B,MAT_LOCAL,info);
1404: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1405: isend[3] += info->memory; isend[4] += info->mallocs;
1406: if (flag == MAT_LOCAL) {
1407: info->nz_used = isend[0];
1408: info->nz_allocated = isend[1];
1409: info->nz_unneeded = isend[2];
1410: info->memory = isend[3];
1411: info->mallocs = isend[4];
1412: } else if (flag == MAT_GLOBAL_MAX) {
1413: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));
1415: info->nz_used = irecv[0];
1416: info->nz_allocated = irecv[1];
1417: info->nz_unneeded = irecv[2];
1418: info->memory = irecv[3];
1419: info->mallocs = irecv[4];
1420: } else if (flag == MAT_GLOBAL_SUM) {
1421: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));
1423: info->nz_used = irecv[0];
1424: info->nz_allocated = irecv[1];
1425: info->nz_unneeded = irecv[2];
1426: info->memory = irecv[3];
1427: info->mallocs = irecv[4];
1428: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1429: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1430: info->fill_ratio_needed = 0;
1431: info->factor_mallocs = 0;
1432: return 0;
1433: }
1435: PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1436: {
1437: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1438: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1440: switch (op) {
1441: case MAT_NEW_NONZERO_LOCATIONS:
1442: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1443: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1444: case MAT_KEEP_NONZERO_PATTERN:
1445: case MAT_SUBMAT_SINGLEIS:
1446: case MAT_NEW_NONZERO_LOCATION_ERR:
1447: MatCheckPreallocated(A,1);
1448: MatSetOption(a->A,op,flg);
1449: MatSetOption(a->B,op,flg);
1450: break;
1451: case MAT_ROW_ORIENTED:
1452: MatCheckPreallocated(A,1);
1453: a->roworiented = flg;
1455: MatSetOption(a->A,op,flg);
1456: MatSetOption(a->B,op,flg);
1457: break;
1458: case MAT_FORCE_DIAGONAL_ENTRIES:
1459: case MAT_SORTED_FULL:
1460: PetscInfo(A,"Option %s ignored\n",MatOptions[op]);
1461: break;
1462: case MAT_IGNORE_OFF_PROC_ENTRIES:
1463: a->donotstash = flg;
1464: break;
1465: case MAT_USE_HASH_TABLE:
1466: a->ht_flag = flg;
1467: break;
1468: case MAT_HERMITIAN:
1469: MatCheckPreallocated(A,1);
1470: MatSetOption(a->A,op,flg);
1471: #if defined(PETSC_USE_COMPLEX)
1472: if (flg) { /* need different mat-vec ops */
1473: A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1474: A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian;
1475: A->ops->multtranspose = NULL;
1476: A->ops->multtransposeadd = NULL;
1477: A->symmetric = PETSC_FALSE;
1478: }
1479: #endif
1480: break;
1481: case MAT_SPD:
1482: case MAT_SYMMETRIC:
1483: MatCheckPreallocated(A,1);
1484: MatSetOption(a->A,op,flg);
1485: #if defined(PETSC_USE_COMPLEX)
1486: if (flg) { /* restore to use default mat-vec ops */
1487: A->ops->mult = MatMult_MPISBAIJ;
1488: A->ops->multadd = MatMultAdd_MPISBAIJ;
1489: A->ops->multtranspose = MatMult_MPISBAIJ;
1490: A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1491: }
1492: #endif
1493: break;
1494: case MAT_STRUCTURALLY_SYMMETRIC:
1495: MatCheckPreallocated(A,1);
1496: MatSetOption(a->A,op,flg);
1497: break;
1498: case MAT_SYMMETRY_ETERNAL:
1500: PetscInfo(A,"Option %s ignored\n",MatOptions[op]);
1501: break;
1502: case MAT_IGNORE_LOWER_TRIANGULAR:
1503: aA->ignore_ltriangular = flg;
1504: break;
1505: case MAT_ERROR_LOWER_TRIANGULAR:
1506: aA->ignore_ltriangular = flg;
1507: break;
1508: case MAT_GETROW_UPPERTRIANGULAR:
1509: aA->getrow_utriangular = flg;
1510: break;
1511: default:
1512: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1513: }
1514: return 0;
1515: }
1517: PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1518: {
1519: if (reuse == MAT_INITIAL_MATRIX) {
1520: MatDuplicate(A,MAT_COPY_VALUES,B);
1521: } else if (reuse == MAT_REUSE_MATRIX) {
1522: MatCopy(A,*B,SAME_NONZERO_PATTERN);
1523: }
1524: return 0;
1525: }
1527: PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1528: {
1529: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1530: Mat a = baij->A, b=baij->B;
1531: PetscInt nv,m,n;
1532: PetscBool flg;
1534: if (ll != rr) {
1535: VecEqual(ll,rr,&flg);
1537: }
1538: if (!ll) return 0;
1540: MatGetLocalSize(mat,&m,&n);
1543: VecGetLocalSize(rr,&nv);
1546: VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1548: /* left diagonalscale the off-diagonal part */
1549: (*b->ops->diagonalscale)(b,ll,NULL);
1551: /* scale the diagonal part */
1552: (*a->ops->diagonalscale)(a,ll,rr);
1554: /* right diagonalscale the off-diagonal part */
1555: VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1556: (*b->ops->diagonalscale)(b,NULL,baij->lvec);
1557: return 0;
1558: }
1560: PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1561: {
1562: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1564: MatSetUnfactored(a->A);
1565: return 0;
1566: }
1568: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1570: PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag)
1571: {
1572: Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1573: Mat a,b,c,d;
1574: PetscBool flg;
1576: a = matA->A; b = matA->B;
1577: c = matB->A; d = matB->B;
1579: MatEqual(a,c,&flg);
1580: if (flg) {
1581: MatEqual(b,d,&flg);
1582: }
1583: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1584: return 0;
1585: }
1587: PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1588: {
1589: PetscBool isbaij;
1591: PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");
1593: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1594: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1595: MatGetRowUpperTriangular(A);
1596: MatCopy_Basic(A,B,str);
1597: MatRestoreRowUpperTriangular(A);
1598: } else {
1599: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1600: Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1602: MatCopy(a->A,b->A,str);
1603: MatCopy(a->B,b->B,str);
1604: }
1605: PetscObjectStateIncrease((PetscObject)B);
1606: return 0;
1607: }
1609: PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1610: {
1611: MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);
1612: return 0;
1613: }
1615: PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1616: {
1617: Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1618: PetscBLASInt bnz,one=1;
1619: Mat_SeqSBAIJ *xa,*ya;
1620: Mat_SeqBAIJ *xb,*yb;
1622: if (str == SAME_NONZERO_PATTERN) {
1623: PetscScalar alpha = a;
1624: xa = (Mat_SeqSBAIJ*)xx->A->data;
1625: ya = (Mat_SeqSBAIJ*)yy->A->data;
1626: PetscBLASIntCast(xa->nz,&bnz);
1627: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1628: xb = (Mat_SeqBAIJ*)xx->B->data;
1629: yb = (Mat_SeqBAIJ*)yy->B->data;
1630: PetscBLASIntCast(xb->nz,&bnz);
1631: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1632: PetscObjectStateIncrease((PetscObject)Y);
1633: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1634: MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
1635: MatAXPY_Basic(Y,a,X,str);
1636: MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
1637: } else {
1638: Mat B;
1639: PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1641: MatGetRowUpperTriangular(X);
1642: MatGetRowUpperTriangular(Y);
1643: PetscMalloc1(yy->A->rmap->N,&nnz_d);
1644: PetscMalloc1(yy->B->rmap->N,&nnz_o);
1645: MatCreate(PetscObjectComm((PetscObject)Y),&B);
1646: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
1647: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
1648: MatSetBlockSizesFromMats(B,Y,Y);
1649: MatSetType(B,MATMPISBAIJ);
1650: MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);
1651: MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
1652: MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);
1653: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
1654: MatHeaderMerge(Y,&B);
1655: PetscFree(nnz_d);
1656: PetscFree(nnz_o);
1657: MatRestoreRowUpperTriangular(X);
1658: MatRestoreRowUpperTriangular(Y);
1659: }
1660: return 0;
1661: }
1663: PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1664: {
1665: PetscInt i;
1666: PetscBool flg;
1668: MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B); /* B[] are sbaij matrices */
1669: for (i=0; i<n; i++) {
1670: ISEqual(irow[i],icol[i],&flg);
1671: if (!flg) {
1672: MatSeqSBAIJZeroOps_Private(*B[i]);
1673: }
1674: }
1675: return 0;
1676: }
1678: PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1679: {
1680: Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data;
1681: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data;
1683: if (!Y->preallocated) {
1684: MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);
1685: } else if (!aij->nz) {
1686: PetscInt nonew = aij->nonew;
1687: MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);
1688: aij->nonew = nonew;
1689: }
1690: MatShift_Basic(Y,a);
1691: return 0;
1692: }
1694: PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1695: {
1696: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1699: MatMissingDiagonal(a->A,missing,d);
1700: if (d) {
1701: PetscInt rstart;
1702: MatGetOwnershipRange(A,&rstart,NULL);
1703: *d += rstart/A->rmap->bs;
1705: }
1706: return 0;
1707: }
1709: PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1710: {
1711: *a = ((Mat_MPISBAIJ*)A->data)->A;
1712: return 0;
1713: }
1715: /* -------------------------------------------------------------------*/
1716: static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1717: MatGetRow_MPISBAIJ,
1718: MatRestoreRow_MPISBAIJ,
1719: MatMult_MPISBAIJ,
1720: /* 4*/ MatMultAdd_MPISBAIJ,
1721: MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1722: MatMultAdd_MPISBAIJ,
1723: NULL,
1724: NULL,
1725: NULL,
1726: /* 10*/ NULL,
1727: NULL,
1728: NULL,
1729: MatSOR_MPISBAIJ,
1730: MatTranspose_MPISBAIJ,
1731: /* 15*/ MatGetInfo_MPISBAIJ,
1732: MatEqual_MPISBAIJ,
1733: MatGetDiagonal_MPISBAIJ,
1734: MatDiagonalScale_MPISBAIJ,
1735: MatNorm_MPISBAIJ,
1736: /* 20*/ MatAssemblyBegin_MPISBAIJ,
1737: MatAssemblyEnd_MPISBAIJ,
1738: MatSetOption_MPISBAIJ,
1739: MatZeroEntries_MPISBAIJ,
1740: /* 24*/ NULL,
1741: NULL,
1742: NULL,
1743: NULL,
1744: NULL,
1745: /* 29*/ MatSetUp_MPISBAIJ,
1746: NULL,
1747: NULL,
1748: MatGetDiagonalBlock_MPISBAIJ,
1749: NULL,
1750: /* 34*/ MatDuplicate_MPISBAIJ,
1751: NULL,
1752: NULL,
1753: NULL,
1754: NULL,
1755: /* 39*/ MatAXPY_MPISBAIJ,
1756: MatCreateSubMatrices_MPISBAIJ,
1757: MatIncreaseOverlap_MPISBAIJ,
1758: MatGetValues_MPISBAIJ,
1759: MatCopy_MPISBAIJ,
1760: /* 44*/ NULL,
1761: MatScale_MPISBAIJ,
1762: MatShift_MPISBAIJ,
1763: NULL,
1764: NULL,
1765: /* 49*/ NULL,
1766: NULL,
1767: NULL,
1768: NULL,
1769: NULL,
1770: /* 54*/ NULL,
1771: NULL,
1772: MatSetUnfactored_MPISBAIJ,
1773: NULL,
1774: MatSetValuesBlocked_MPISBAIJ,
1775: /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1776: NULL,
1777: NULL,
1778: NULL,
1779: NULL,
1780: /* 64*/ NULL,
1781: NULL,
1782: NULL,
1783: NULL,
1784: NULL,
1785: /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1786: NULL,
1787: MatConvert_MPISBAIJ_Basic,
1788: NULL,
1789: NULL,
1790: /* 74*/ NULL,
1791: NULL,
1792: NULL,
1793: NULL,
1794: NULL,
1795: /* 79*/ NULL,
1796: NULL,
1797: NULL,
1798: NULL,
1799: MatLoad_MPISBAIJ,
1800: /* 84*/ NULL,
1801: NULL,
1802: NULL,
1803: NULL,
1804: NULL,
1805: /* 89*/ NULL,
1806: NULL,
1807: NULL,
1808: NULL,
1809: NULL,
1810: /* 94*/ NULL,
1811: NULL,
1812: NULL,
1813: NULL,
1814: NULL,
1815: /* 99*/ NULL,
1816: NULL,
1817: NULL,
1818: MatConjugate_MPISBAIJ,
1819: NULL,
1820: /*104*/ NULL,
1821: MatRealPart_MPISBAIJ,
1822: MatImaginaryPart_MPISBAIJ,
1823: MatGetRowUpperTriangular_MPISBAIJ,
1824: MatRestoreRowUpperTriangular_MPISBAIJ,
1825: /*109*/ NULL,
1826: NULL,
1827: NULL,
1828: NULL,
1829: MatMissingDiagonal_MPISBAIJ,
1830: /*114*/ NULL,
1831: NULL,
1832: NULL,
1833: NULL,
1834: NULL,
1835: /*119*/ NULL,
1836: NULL,
1837: NULL,
1838: NULL,
1839: NULL,
1840: /*124*/ NULL,
1841: NULL,
1842: NULL,
1843: NULL,
1844: NULL,
1845: /*129*/ NULL,
1846: NULL,
1847: NULL,
1848: NULL,
1849: NULL,
1850: /*134*/ NULL,
1851: NULL,
1852: NULL,
1853: NULL,
1854: NULL,
1855: /*139*/ MatSetBlockSizes_Default,
1856: NULL,
1857: NULL,
1858: NULL,
1859: NULL,
1860: /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1861: NULL,
1862: NULL,
1863: NULL
1864: };
1866: PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1867: {
1868: Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1869: PetscInt i,mbs,Mbs;
1870: PetscMPIInt size;
1872: MatSetBlockSize(B,PetscAbs(bs));
1873: PetscLayoutSetUp(B->rmap);
1874: PetscLayoutSetUp(B->cmap);
1875: PetscLayoutGetBlockSize(B->rmap,&bs);
1879: mbs = B->rmap->n/bs;
1880: Mbs = B->rmap->N/bs;
1883: B->rmap->bs = bs;
1884: b->bs2 = bs*bs;
1885: b->mbs = mbs;
1886: b->Mbs = Mbs;
1887: b->nbs = B->cmap->n/bs;
1888: b->Nbs = B->cmap->N/bs;
1890: for (i=0; i<=b->size; i++) {
1891: b->rangebs[i] = B->rmap->range[i]/bs;
1892: }
1893: b->rstartbs = B->rmap->rstart/bs;
1894: b->rendbs = B->rmap->rend/bs;
1896: b->cstartbs = B->cmap->rstart/bs;
1897: b->cendbs = B->cmap->rend/bs;
1899: #if defined(PETSC_USE_CTABLE)
1900: PetscTableDestroy(&b->colmap);
1901: #else
1902: PetscFree(b->colmap);
1903: #endif
1904: PetscFree(b->garray);
1905: VecDestroy(&b->lvec);
1906: VecScatterDestroy(&b->Mvctx);
1907: VecDestroy(&b->slvec0);
1908: VecDestroy(&b->slvec0b);
1909: VecDestroy(&b->slvec1);
1910: VecDestroy(&b->slvec1a);
1911: VecDestroy(&b->slvec1b);
1912: VecScatterDestroy(&b->sMvctx);
1914: /* Because the B will have been resized we simply destroy it and create a new one each time */
1915: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
1916: MatDestroy(&b->B);
1917: MatCreate(PETSC_COMM_SELF,&b->B);
1918: MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);
1919: MatSetType(b->B,MATSEQBAIJ);
1920: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
1922: if (!B->preallocated) {
1923: MatCreate(PETSC_COMM_SELF,&b->A);
1924: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
1925: MatSetType(b->A,MATSEQSBAIJ);
1926: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
1927: MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);
1928: }
1930: MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
1931: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
1933: B->preallocated = PETSC_TRUE;
1934: B->was_assembled = PETSC_FALSE;
1935: B->assembled = PETSC_FALSE;
1936: return 0;
1937: }
1939: PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1940: {
1941: PetscInt m,rstart,cend;
1942: PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
1943: const PetscInt *JJ =NULL;
1944: PetscScalar *values=NULL;
1945: PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
1946: PetscBool nooffprocentries;
1949: PetscLayoutSetBlockSize(B->rmap,bs);
1950: PetscLayoutSetBlockSize(B->cmap,bs);
1951: PetscLayoutSetUp(B->rmap);
1952: PetscLayoutSetUp(B->cmap);
1953: PetscLayoutGetBlockSize(B->rmap,&bs);
1954: m = B->rmap->n/bs;
1955: rstart = B->rmap->rstart/bs;
1956: cend = B->cmap->rend/bs;
1959: PetscMalloc2(m,&d_nnz,m,&o_nnz);
1960: for (i=0; i<m; i++) {
1961: nz = ii[i+1] - ii[i];
1963: /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
1964: JJ = jj + ii[i];
1965: bd = 0;
1966: for (j=0; j<nz; j++) {
1967: if (*JJ >= i + rstart) break;
1968: JJ++;
1969: bd++;
1970: }
1971: d = 0;
1972: for (; j<nz; j++) {
1973: if (*JJ++ >= cend) break;
1974: d++;
1975: }
1976: d_nnz[i] = d;
1977: o_nnz[i] = nz - d - bd;
1978: nz = nz - bd;
1979: nz_max = PetscMax(nz_max,nz);
1980: }
1981: MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
1982: MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);
1983: PetscFree2(d_nnz,o_nnz);
1985: values = (PetscScalar*)V;
1986: if (!values) {
1987: PetscCalloc1(bs*bs*nz_max,&values);
1988: }
1989: for (i=0; i<m; i++) {
1990: PetscInt row = i + rstart;
1991: PetscInt ncols = ii[i+1] - ii[i];
1992: const PetscInt *icols = jj + ii[i];
1993: if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
1994: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1995: MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
1996: } else { /* block ordering does not match so we can only insert one block at a time. */
1997: PetscInt j;
1998: for (j=0; j<ncols; j++) {
1999: const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2000: MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);
2001: }
2002: }
2003: }
2005: if (!V) PetscFree(values);
2006: nooffprocentries = B->nooffprocentries;
2007: B->nooffprocentries = PETSC_TRUE;
2008: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2009: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2010: B->nooffprocentries = nooffprocentries;
2012: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2013: return 0;
2014: }
2016: /*MC
2017: MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2018: based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of
2019: the matrix is stored.
2021: For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2022: can call MatSetOption(Mat, MAT_HERMITIAN);
2024: Options Database Keys:
2025: . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2027: Notes:
2028: The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2029: diagonal portion of the matrix of each process has to less than or equal the number of columns.
2031: Level: beginner
2033: .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2034: M*/
2036: PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2037: {
2038: Mat_MPISBAIJ *b;
2040: PetscBool flg = PETSC_FALSE;
2042: PetscNewLog(B,&b);
2043: B->data = (void*)b;
2044: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2046: B->ops->destroy = MatDestroy_MPISBAIJ;
2047: B->ops->view = MatView_MPISBAIJ;
2048: B->assembled = PETSC_FALSE;
2049: B->insertmode = NOT_SET_VALUES;
2051: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
2052: MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);
2054: /* build local table of row and column ownerships */
2055: PetscMalloc1(b->size+2,&b->rangebs);
2057: /* build cache for off array entries formed */
2058: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
2060: b->donotstash = PETSC_FALSE;
2061: b->colmap = NULL;
2062: b->garray = NULL;
2063: b->roworiented = PETSC_TRUE;
2065: /* stuff used in block assembly */
2066: b->barray = NULL;
2068: /* stuff used for matrix vector multiply */
2069: b->lvec = NULL;
2070: b->Mvctx = NULL;
2071: b->slvec0 = NULL;
2072: b->slvec0b = NULL;
2073: b->slvec1 = NULL;
2074: b->slvec1a = NULL;
2075: b->slvec1b = NULL;
2076: b->sMvctx = NULL;
2078: /* stuff for MatGetRow() */
2079: b->rowindices = NULL;
2080: b->rowvalues = NULL;
2081: b->getrowactive = PETSC_FALSE;
2083: /* hash table stuff */
2084: b->ht = NULL;
2085: b->hd = NULL;
2086: b->ht_size = 0;
2087: b->ht_flag = PETSC_FALSE;
2088: b->ht_fact = 0;
2089: b->ht_total_ct = 0;
2090: b->ht_insert_ct = 0;
2092: /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2093: b->ijonly = PETSC_FALSE;
2095: b->in_loc = NULL;
2096: b->v_loc = NULL;
2097: b->n_loc = 0;
2099: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);
2100: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);
2101: PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);
2102: PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);
2103: #if defined(PETSC_HAVE_ELEMENTAL)
2104: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);
2105: #endif
2106: #if defined(PETSC_HAVE_SCALAPACK)
2107: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);
2108: #endif
2109: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);
2110: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);
2112: B->symmetric = PETSC_TRUE;
2113: B->structurally_symmetric = PETSC_TRUE;
2114: B->symmetric_set = PETSC_TRUE;
2115: B->structurally_symmetric_set = PETSC_TRUE;
2116: B->symmetric_eternal = PETSC_TRUE;
2117: #if defined(PETSC_USE_COMPLEX)
2118: B->hermitian = PETSC_FALSE;
2119: B->hermitian_set = PETSC_FALSE;
2120: #else
2121: B->hermitian = PETSC_TRUE;
2122: B->hermitian_set = PETSC_TRUE;
2123: #endif
2125: PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);
2126: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");
2127: PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);
2128: if (flg) {
2129: PetscReal fact = 1.39;
2130: MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
2131: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);
2132: if (fact <= 1.0) fact = 1.39;
2133: MatMPIBAIJSetHashTableFactor(B,fact);
2134: PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact);
2135: }
2136: PetscOptionsEnd();
2137: return 0;
2138: }
2140: /*MC
2141: MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2143: This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2144: and MATMPISBAIJ otherwise.
2146: Options Database Keys:
2147: . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2149: Level: beginner
2151: .seealso: MatCreateSBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2152: M*/
2154: /*@C
2155: MatMPISBAIJSetPreallocation - For good matrix assembly performance
2156: the user should preallocate the matrix storage by setting the parameters
2157: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2158: performance can be increased by more than a factor of 50.
2160: Collective on Mat
2162: Input Parameters:
2163: + B - the matrix
2164: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2165: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2166: . d_nz - number of block nonzeros per block row in diagonal portion of local
2167: submatrix (same for all local rows)
2168: . d_nnz - array containing the number of block nonzeros in the various block rows
2169: in the upper triangular and diagonal part of the in diagonal portion of the local
2170: (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room
2171: for the diagonal entry and set a value even if it is zero.
2172: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2173: submatrix (same for all local rows).
2174: - o_nnz - array containing the number of nonzeros in the various block rows of the
2175: off-diagonal portion of the local submatrix that is right of the diagonal
2176: (possibly different for each block row) or NULL.
2178: Options Database Keys:
2179: + -mat_no_unroll - uses code that does not unroll the loops in the
2180: block calculations (much slower)
2181: - -mat_block_size - size of the blocks to use
2183: Notes:
2185: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2186: than it must be used on all processors that share the object for that argument.
2188: If the *_nnz parameter is given then the *_nz parameter is ignored
2190: Storage Information:
2191: For a square global matrix we define each processor's diagonal portion
2192: to be its local rows and the corresponding columns (a square submatrix);
2193: each processor's off-diagonal portion encompasses the remainder of the
2194: local matrix (a rectangular submatrix).
2196: The user can specify preallocated storage for the diagonal part of
2197: the local submatrix with either d_nz or d_nnz (not both). Set
2198: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2199: memory allocation. Likewise, specify preallocated storage for the
2200: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2202: You can call MatGetInfo() to get information on how effective the preallocation was;
2203: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2204: You can also run with the option -info and look for messages with the string
2205: malloc in them to see if additional memory allocation was needed.
2207: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2208: the figure below we depict these three local rows and all columns (0-11).
2210: .vb
2211: 0 1 2 3 4 5 6 7 8 9 10 11
2212: --------------------------
2213: row 3 |. . . d d d o o o o o o
2214: row 4 |. . . d d d o o o o o o
2215: row 5 |. . . d d d o o o o o o
2216: --------------------------
2217: .ve
2219: Thus, any entries in the d locations are stored in the d (diagonal)
2220: submatrix, and any entries in the o locations are stored in the
2221: o (off-diagonal) submatrix. Note that the d matrix is stored in
2222: MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2224: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2225: plus the diagonal part of the d matrix,
2226: and o_nz should indicate the number of block nonzeros per row in the o matrix
2228: In general, for PDE problems in which most nonzeros are near the diagonal,
2229: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2230: or you will get TERRIBLE performance; see the users' manual chapter on
2231: matrices.
2233: Level: intermediate
2235: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2236: @*/
2237: PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2238: {
2242: PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
2243: return 0;
2244: }
2246: /*@C
2247: MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2248: (block compressed row). For good matrix assembly performance
2249: the user should preallocate the matrix storage by setting the parameters
2250: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2251: performance can be increased by more than a factor of 50.
2253: Collective
2255: Input Parameters:
2256: + comm - MPI communicator
2257: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2258: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2259: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2260: This value should be the same as the local size used in creating the
2261: y vector for the matrix-vector product y = Ax.
2262: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2263: This value should be the same as the local size used in creating the
2264: x vector for the matrix-vector product y = Ax.
2265: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2266: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2267: . d_nz - number of block nonzeros per block row in diagonal portion of local
2268: submatrix (same for all local rows)
2269: . d_nnz - array containing the number of block nonzeros in the various block rows
2270: in the upper triangular portion of the in diagonal portion of the local
2271: (possibly different for each block block row) or NULL.
2272: If you plan to factor the matrix you must leave room for the diagonal entry and
2273: set its value even if it is zero.
2274: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2275: submatrix (same for all local rows).
2276: - o_nnz - array containing the number of nonzeros in the various block rows of the
2277: off-diagonal portion of the local submatrix (possibly different for
2278: each block row) or NULL.
2280: Output Parameter:
2281: . A - the matrix
2283: Options Database Keys:
2284: + -mat_no_unroll - uses code that does not unroll the loops in the
2285: block calculations (much slower)
2286: . -mat_block_size - size of the blocks to use
2287: - -mat_mpi - use the parallel matrix data structures even on one processor
2288: (defaults to using SeqBAIJ format on one processor)
2290: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2291: MatXXXXSetPreallocation() paradigm instead of this routine directly.
2292: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2294: Notes:
2295: The number of rows and columns must be divisible by blocksize.
2296: This matrix type does not support complex Hermitian operation.
2298: The user MUST specify either the local or global matrix dimensions
2299: (possibly both).
2301: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2302: than it must be used on all processors that share the object for that argument.
2304: If the *_nnz parameter is given then the *_nz parameter is ignored
2306: Storage Information:
2307: For a square global matrix we define each processor's diagonal portion
2308: to be its local rows and the corresponding columns (a square submatrix);
2309: each processor's off-diagonal portion encompasses the remainder of the
2310: local matrix (a rectangular submatrix).
2312: The user can specify preallocated storage for the diagonal part of
2313: the local submatrix with either d_nz or d_nnz (not both). Set
2314: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2315: memory allocation. Likewise, specify preallocated storage for the
2316: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2318: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2319: the figure below we depict these three local rows and all columns (0-11).
2321: .vb
2322: 0 1 2 3 4 5 6 7 8 9 10 11
2323: --------------------------
2324: row 3 |. . . d d d o o o o o o
2325: row 4 |. . . d d d o o o o o o
2326: row 5 |. . . d d d o o o o o o
2327: --------------------------
2328: .ve
2330: Thus, any entries in the d locations are stored in the d (diagonal)
2331: submatrix, and any entries in the o locations are stored in the
2332: o (off-diagonal) submatrix. Note that the d matrix is stored in
2333: MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2335: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2336: plus the diagonal part of the d matrix,
2337: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2338: In general, for PDE problems in which most nonzeros are near the diagonal,
2339: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2340: or you will get TERRIBLE performance; see the users' manual chapter on
2341: matrices.
2343: Level: intermediate
2345: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2346: @*/
2348: PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2349: {
2350: PetscMPIInt size;
2352: MatCreate(comm,A);
2353: MatSetSizes(*A,m,n,M,N);
2354: MPI_Comm_size(comm,&size);
2355: if (size > 1) {
2356: MatSetType(*A,MATMPISBAIJ);
2357: MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
2358: } else {
2359: MatSetType(*A,MATSEQSBAIJ);
2360: MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
2361: }
2362: return 0;
2363: }
2365: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2366: {
2367: Mat mat;
2368: Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2369: PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2370: PetscScalar *array;
2372: *newmat = NULL;
2374: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
2375: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
2376: MatSetType(mat,((PetscObject)matin)->type_name);
2377: PetscLayoutReference(matin->rmap,&mat->rmap);
2378: PetscLayoutReference(matin->cmap,&mat->cmap);
2380: mat->factortype = matin->factortype;
2381: mat->preallocated = PETSC_TRUE;
2382: mat->assembled = PETSC_TRUE;
2383: mat->insertmode = NOT_SET_VALUES;
2385: a = (Mat_MPISBAIJ*)mat->data;
2386: a->bs2 = oldmat->bs2;
2387: a->mbs = oldmat->mbs;
2388: a->nbs = oldmat->nbs;
2389: a->Mbs = oldmat->Mbs;
2390: a->Nbs = oldmat->Nbs;
2392: a->size = oldmat->size;
2393: a->rank = oldmat->rank;
2394: a->donotstash = oldmat->donotstash;
2395: a->roworiented = oldmat->roworiented;
2396: a->rowindices = NULL;
2397: a->rowvalues = NULL;
2398: a->getrowactive = PETSC_FALSE;
2399: a->barray = NULL;
2400: a->rstartbs = oldmat->rstartbs;
2401: a->rendbs = oldmat->rendbs;
2402: a->cstartbs = oldmat->cstartbs;
2403: a->cendbs = oldmat->cendbs;
2405: /* hash table stuff */
2406: a->ht = NULL;
2407: a->hd = NULL;
2408: a->ht_size = 0;
2409: a->ht_flag = oldmat->ht_flag;
2410: a->ht_fact = oldmat->ht_fact;
2411: a->ht_total_ct = 0;
2412: a->ht_insert_ct = 0;
2414: PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);
2415: if (oldmat->colmap) {
2416: #if defined(PETSC_USE_CTABLE)
2417: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2418: #else
2419: PetscMalloc1(a->Nbs,&a->colmap);
2420: PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));
2421: PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);
2422: #endif
2423: } else a->colmap = NULL;
2425: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2426: PetscMalloc1(len,&a->garray);
2427: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
2428: PetscArraycpy(a->garray,oldmat->garray,len);
2429: } else a->garray = NULL;
2431: MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);
2432: VecDuplicate(oldmat->lvec,&a->lvec);
2433: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
2434: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2435: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
2437: VecDuplicate(oldmat->slvec0,&a->slvec0);
2438: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);
2439: VecDuplicate(oldmat->slvec1,&a->slvec1);
2440: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);
2442: VecGetLocalSize(a->slvec1,&nt);
2443: VecGetArray(a->slvec1,&array);
2444: VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);
2445: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);
2446: VecRestoreArray(a->slvec1,&array);
2447: VecGetArray(a->slvec0,&array);
2448: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);
2449: VecRestoreArray(a->slvec0,&array);
2450: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);
2451: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);
2452: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);
2453: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);
2454: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);
2456: /* VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2457: PetscObjectReference((PetscObject)oldmat->sMvctx);
2458: a->sMvctx = oldmat->sMvctx;
2459: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);
2461: MatDuplicate(oldmat->A,cpvalues,&a->A);
2462: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
2463: MatDuplicate(oldmat->B,cpvalues,&a->B);
2464: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
2465: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
2466: *newmat = mat;
2467: return 0;
2468: }
2470: /* Used for both MPIBAIJ and MPISBAIJ matrices */
2471: #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2473: PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2474: {
2475: PetscBool isbinary;
2477: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
2479: MatLoad_MPISBAIJ_Binary(mat,viewer);
2480: return 0;
2481: }
2483: /*XXXXX@
2484: MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2486: Input Parameters:
2487: . mat - the matrix
2488: . fact - factor
2490: Not Collective on Mat, each process can have a different hash factor
2492: Level: advanced
2494: Notes:
2495: This can also be set by the command line option: -mat_use_hash_table fact
2497: .seealso: MatSetOption()
2498: @XXXXX*/
2500: PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2501: {
2502: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2503: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data;
2504: PetscReal atmp;
2505: PetscReal *work,*svalues,*rvalues;
2506: PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2507: PetscMPIInt rank,size;
2508: PetscInt *rowners_bs,dest,count,source;
2509: PetscScalar *va;
2510: MatScalar *ba;
2511: MPI_Status stat;
2514: MatGetRowMaxAbs(a->A,v,NULL);
2515: VecGetArray(v,&va);
2517: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
2518: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
2520: bs = A->rmap->bs;
2521: mbs = a->mbs;
2522: Mbs = a->Mbs;
2523: ba = b->a;
2524: bi = b->i;
2525: bj = b->j;
2527: /* find ownerships */
2528: rowners_bs = A->rmap->range;
2530: /* each proc creates an array to be distributed */
2531: PetscCalloc1(bs*Mbs,&work);
2533: /* row_max for B */
2534: if (rank != size-1) {
2535: for (i=0; i<mbs; i++) {
2536: ncols = bi[1] - bi[0]; bi++;
2537: brow = bs*i;
2538: for (j=0; j<ncols; j++) {
2539: bcol = bs*(*bj);
2540: for (kcol=0; kcol<bs; kcol++) {
2541: col = bcol + kcol; /* local col index */
2542: col += rowners_bs[rank+1]; /* global col index */
2543: for (krow=0; krow<bs; krow++) {
2544: atmp = PetscAbsScalar(*ba); ba++;
2545: row = brow + krow; /* local row index */
2546: if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2547: if (work[col] < atmp) work[col] = atmp;
2548: }
2549: }
2550: bj++;
2551: }
2552: }
2554: /* send values to its owners */
2555: for (dest=rank+1; dest<size; dest++) {
2556: svalues = work + rowners_bs[dest];
2557: count = rowners_bs[dest+1]-rowners_bs[dest];
2558: MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));
2559: }
2560: }
2562: /* receive values */
2563: if (rank) {
2564: rvalues = work;
2565: count = rowners_bs[rank+1]-rowners_bs[rank];
2566: for (source=0; source<rank; source++) {
2567: MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);
2568: /* process values */
2569: for (i=0; i<count; i++) {
2570: if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2571: }
2572: }
2573: }
2575: VecRestoreArray(v,&va);
2576: PetscFree(work);
2577: return 0;
2578: }
2580: PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2581: {
2582: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
2583: PetscInt mbs=mat->mbs,bs=matin->rmap->bs;
2584: PetscScalar *x,*ptr,*from;
2585: Vec bb1;
2586: const PetscScalar *b;
2591: if (flag == SOR_APPLY_UPPER) {
2592: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2593: return 0;
2594: }
2596: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2597: if (flag & SOR_ZERO_INITIAL_GUESS) {
2598: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
2599: its--;
2600: }
2602: VecDuplicate(bb,&bb1);
2603: while (its--) {
2605: /* lower triangular part: slvec0b = - B^T*xx */
2606: (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);
2608: /* copy xx into slvec0a */
2609: VecGetArray(mat->slvec0,&ptr);
2610: VecGetArray(xx,&x);
2611: PetscArraycpy(ptr,x,bs*mbs);
2612: VecRestoreArray(mat->slvec0,&ptr);
2614: VecScale(mat->slvec0,-1.0);
2616: /* copy bb into slvec1a */
2617: VecGetArray(mat->slvec1,&ptr);
2618: VecGetArrayRead(bb,&b);
2619: PetscArraycpy(ptr,b,bs*mbs);
2620: VecRestoreArray(mat->slvec1,&ptr);
2622: /* set slvec1b = 0 */
2623: VecSet(mat->slvec1b,0.0);
2625: VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2626: VecRestoreArray(xx,&x);
2627: VecRestoreArrayRead(bb,&b);
2628: VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2630: /* upper triangular part: bb1 = bb1 - B*x */
2631: (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);
2633: /* local diagonal sweep */
2634: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
2635: }
2636: VecDestroy(&bb1);
2637: } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2638: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2639: } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2640: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2641: } else if (flag & SOR_EISENSTAT) {
2642: Vec xx1;
2643: PetscBool hasop;
2644: const PetscScalar *diag;
2645: PetscScalar *sl,scale = (omega - 2.0)/omega;
2646: PetscInt i,n;
2648: if (!mat->xx1) {
2649: VecDuplicate(bb,&mat->xx1);
2650: VecDuplicate(bb,&mat->bb1);
2651: }
2652: xx1 = mat->xx1;
2653: bb1 = mat->bb1;
2655: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
2657: if (!mat->diag) {
2658: /* this is wrong for same matrix with new nonzero values */
2659: MatCreateVecs(matin,&mat->diag,NULL);
2660: MatGetDiagonal(matin,mat->diag);
2661: }
2662: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
2664: if (hasop) {
2665: MatMultDiagonalBlock(matin,xx,bb1);
2666: VecAYPX(mat->slvec1a,scale,bb);
2667: } else {
2668: /*
2669: These two lines are replaced by code that may be a bit faster for a good compiler
2670: VecPointwiseMult(mat->slvec1a,mat->diag,xx);
2671: VecAYPX(mat->slvec1a,scale,bb);
2672: */
2673: VecGetArray(mat->slvec1a,&sl);
2674: VecGetArrayRead(mat->diag,&diag);
2675: VecGetArrayRead(bb,&b);
2676: VecGetArray(xx,&x);
2677: VecGetLocalSize(xx,&n);
2678: if (omega == 1.0) {
2679: for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2680: PetscLogFlops(2.0*n);
2681: } else {
2682: for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2683: PetscLogFlops(3.0*n);
2684: }
2685: VecRestoreArray(mat->slvec1a,&sl);
2686: VecRestoreArrayRead(mat->diag,&diag);
2687: VecRestoreArrayRead(bb,&b);
2688: VecRestoreArray(xx,&x);
2689: }
2691: /* multiply off-diagonal portion of matrix */
2692: VecSet(mat->slvec1b,0.0);
2693: (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);
2694: VecGetArray(mat->slvec0,&from);
2695: VecGetArray(xx,&x);
2696: PetscArraycpy(from,x,bs*mbs);
2697: VecRestoreArray(mat->slvec0,&from);
2698: VecRestoreArray(xx,&x);
2699: VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2700: VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2701: (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);
2703: /* local sweep */
2704: (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
2705: VecAXPY(xx,1.0,xx1);
2706: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2707: return 0;
2708: }
2710: /*@
2711: MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2712: CSR format the local rows.
2714: Collective
2716: Input Parameters:
2717: + comm - MPI communicator
2718: . bs - the block size, only a block size of 1 is supported
2719: . m - number of local rows (Cannot be PETSC_DECIDE)
2720: . n - This value should be the same as the local size used in creating the
2721: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2722: calculated if N is given) For square matrices n is almost always m.
2723: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2724: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2725: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2726: . j - column indices
2727: - a - matrix values
2729: Output Parameter:
2730: . mat - the matrix
2732: Level: intermediate
2734: Notes:
2735: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2736: thus you CANNOT change the matrix entries by changing the values of a[] after you have
2737: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2739: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2741: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2742: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2743: @*/
2744: PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2745: {
2748: MatCreate(comm,mat);
2749: MatSetSizes(*mat,m,n,M,N);
2750: MatSetType(*mat,MATMPISBAIJ);
2751: MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);
2752: return 0;
2753: }
2755: /*@C
2756: MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2758: Collective
2760: Input Parameters:
2761: + B - the matrix
2762: . bs - the block size
2763: . i - the indices into j for the start of each local row (starts with zero)
2764: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2765: - v - optional values in the matrix
2767: Level: advanced
2769: Notes:
2770: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2771: and usually the numerical values as well
2773: Any entries below the diagonal are ignored
2775: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2776: @*/
2777: PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2778: {
2779: PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2780: return 0;
2781: }
2783: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2784: {
2786: PetscInt m,N,i,rstart,nnz,Ii,bs,cbs;
2787: PetscInt *indx;
2788: PetscScalar *values;
2790: MatGetSize(inmat,&m,&N);
2791: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2792: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data;
2793: PetscInt *dnz,*onz,mbs,Nbs,nbs;
2794: PetscInt *bindx,rmax=a->rmax,j;
2795: PetscMPIInt rank,size;
2797: MatGetBlockSizes(inmat,&bs,&cbs);
2798: mbs = m/bs; Nbs = N/cbs;
2799: if (n == PETSC_DECIDE) {
2800: PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2801: }
2802: nbs = n/cbs;
2804: PetscMalloc1(rmax,&bindx);
2805: MatPreallocateInitialize(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
2807: MPI_Comm_rank(comm,&rank);
2808: MPI_Comm_rank(comm,&size);
2809: if (rank == size-1) {
2810: /* Check sum(nbs) = Nbs */
2812: }
2814: rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2815: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
2816: for (i=0; i<mbs; i++) {
2817: MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL); /* non-blocked nnz and indx */
2818: nnz = nnz/bs;
2819: for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2820: MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);
2821: MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);
2822: }
2823: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
2824: PetscFree(bindx);
2826: MatCreate(comm,outmat);
2827: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
2828: MatSetBlockSizes(*outmat,bs,cbs);
2829: MatSetType(*outmat,MATSBAIJ);
2830: MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);
2831: MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);
2832: MatPreallocateFinalize(dnz,onz);
2833: }
2835: /* numeric phase */
2836: MatGetBlockSizes(inmat,&bs,&cbs);
2837: MatGetOwnershipRange(*outmat,&rstart,NULL);
2839: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);
2840: for (i=0; i<m; i++) {
2841: MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);
2842: Ii = i + rstart;
2843: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
2844: MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);
2845: }
2846: MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);
2847: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
2848: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
2849: return 0;
2850: }