Actual source code: mpisbaij.c
petsc-3.5.3 2015-01-31
2: #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
3: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4: #include <../src/mat/impls/sbaij/seq/sbaij.h>
5: #include <petscblaslapack.h>
7: extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8: extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9: extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10: extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11: extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12: extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13: extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14: extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15: extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16: extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17: extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18: extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19: extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20: extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21: extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
25: PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
26: {
27: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
31: MatStoreValues(aij->A);
32: MatStoreValues(aij->B);
33: return(0);
34: }
38: PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
39: {
40: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
44: MatRetrieveValues(aij->A);
45: MatRetrieveValues(aij->B);
46: return(0);
47: }
49: #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50: { \
51: \
52: brow = row/bs; \
53: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54: rmax = aimax[brow]; nrow = ailen[brow]; \
55: bcol = col/bs; \
56: ridx = row % bs; cidx = col % bs; \
57: low = 0; high = nrow; \
58: while (high-low > 3) { \
59: t = (low+high)/2; \
60: if (rp[t] > bcol) high = t; \
61: else low = t; \
62: } \
63: for (_i=low; _i<high; _i++) { \
64: if (rp[_i] > bcol) break; \
65: if (rp[_i] == bcol) { \
66: bap = ap + bs2*_i + bs*cidx + ridx; \
67: if (addv == ADD_VALUES) *bap += value; \
68: else *bap = value; \
69: goto a_noinsert; \
70: } \
71: } \
72: if (a->nonew == 1) goto a_noinsert; \
73: if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75: N = nrow++ - 1; \
76: /* shift up all the later entries in this row */ \
77: for (ii=N; ii>=_i; ii--) { \
78: rp[ii+1] = rp[ii]; \
79: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
80: } \
81: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); } \
82: rp[_i] = bcol; \
83: ap[bs2*_i + bs*cidx + ridx] = value; \
84: A->nonzerostate++;\
85: a_noinsert:; \
86: ailen[brow] = nrow; \
87: }
89: #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
90: { \
91: brow = row/bs; \
92: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
93: rmax = bimax[brow]; nrow = bilen[brow]; \
94: bcol = col/bs; \
95: ridx = row % bs; cidx = col % bs; \
96: low = 0; high = nrow; \
97: while (high-low > 3) { \
98: t = (low+high)/2; \
99: if (rp[t] > bcol) high = t; \
100: else low = t; \
101: } \
102: for (_i=low; _i<high; _i++) { \
103: if (rp[_i] > bcol) break; \
104: if (rp[_i] == bcol) { \
105: bap = ap + bs2*_i + bs*cidx + ridx; \
106: if (addv == ADD_VALUES) *bap += value; \
107: else *bap = value; \
108: goto b_noinsert; \
109: } \
110: } \
111: if (b->nonew == 1) goto b_noinsert; \
112: if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
113: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
114: N = nrow++ - 1; \
115: /* shift up all the later entries in this row */ \
116: for (ii=N; ii>=_i; ii--) { \
117: rp[ii+1] = rp[ii]; \
118: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
119: } \
120: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));} \
121: rp[_i] = bcol; \
122: ap[bs2*_i + bs*cidx + ridx] = value; \
123: B->nonzerostate++;\
124: b_noinsert:; \
125: bilen[brow] = nrow; \
126: }
128: /* Only add/insert a(i,j) with i<=j (blocks).
129: Any a(i,j) with i>j input by user is ingored.
130: */
133: PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
134: {
135: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
136: MatScalar value;
137: PetscBool roworiented = baij->roworiented;
139: PetscInt i,j,row,col;
140: PetscInt rstart_orig=mat->rmap->rstart;
141: PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
142: PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs;
144: /* Some Variables required in the macro */
145: Mat A = baij->A;
146: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
147: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
148: MatScalar *aa =a->a;
150: Mat B = baij->B;
151: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
152: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
153: MatScalar *ba =b->a;
155: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
156: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
157: MatScalar *ap,*bap;
159: /* for stash */
160: PetscInt n_loc, *in_loc = NULL;
161: MatScalar *v_loc = NULL;
164: if (!baij->donotstash) {
165: if (n > baij->n_loc) {
166: PetscFree(baij->in_loc);
167: PetscFree(baij->v_loc);
168: PetscMalloc1(n,&baij->in_loc);
169: PetscMalloc1(n,&baij->v_loc);
171: baij->n_loc = n;
172: }
173: in_loc = baij->in_loc;
174: v_loc = baij->v_loc;
175: }
177: for (i=0; i<m; i++) {
178: if (im[i] < 0) continue;
179: #if defined(PETSC_USE_DEBUG)
180: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
181: #endif
182: if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
183: row = im[i] - rstart_orig; /* local row index */
184: for (j=0; j<n; j++) {
185: if (im[i]/bs > in[j]/bs) {
186: if (a->ignore_ltriangular) {
187: continue; /* ignore lower triangular blocks */
188: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
189: }
190: if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
191: col = in[j] - cstart_orig; /* local col index */
192: brow = row/bs; bcol = col/bs;
193: if (brow > bcol) continue; /* ignore lower triangular blocks of A */
194: if (roworiented) value = v[i*n+j];
195: else value = v[i+j*m];
196: MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
197: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
198: } else if (in[j] < 0) continue;
199: #if defined(PETSC_USE_DEBUG)
200: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
201: #endif
202: else { /* off-diag entry (B) */
203: if (mat->was_assembled) {
204: if (!baij->colmap) {
205: MatCreateColmap_MPIBAIJ_Private(mat);
206: }
207: #if defined(PETSC_USE_CTABLE)
208: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
209: col = col - 1;
210: #else
211: col = baij->colmap[in[j]/bs] - 1;
212: #endif
213: if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
214: MatDisAssemble_MPISBAIJ(mat);
215: col = in[j];
216: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
217: B = baij->B;
218: b = (Mat_SeqBAIJ*)(B)->data;
219: bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
220: ba = b->a;
221: } else col += in[j]%bs;
222: } else col = in[j];
223: if (roworiented) value = v[i*n+j];
224: else value = v[i+j*m];
225: MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
226: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
227: }
228: }
229: } else { /* off processor entry */
230: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
231: if (!baij->donotstash) {
232: mat->assembled = PETSC_FALSE;
233: n_loc = 0;
234: for (j=0; j<n; j++) {
235: if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
236: in_loc[n_loc] = in[j];
237: if (roworiented) {
238: v_loc[n_loc] = v[i*n+j];
239: } else {
240: v_loc[n_loc] = v[j*m+i];
241: }
242: n_loc++;
243: }
244: MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);
245: }
246: }
247: }
248: return(0);
249: }
253: PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
254: {
255: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
256: const MatScalar *value;
257: MatScalar *barray =baij->barray;
258: PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
259: PetscErrorCode ierr;
260: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
261: PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval;
262: PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265: if (!barray) {
266: PetscMalloc1(bs2,&barray);
267: baij->barray = barray;
268: }
270: if (roworiented) {
271: stepval = (n-1)*bs;
272: } else {
273: stepval = (m-1)*bs;
274: }
275: for (i=0; i<m; i++) {
276: if (im[i] < 0) continue;
277: #if defined(PETSC_USE_DEBUG)
278: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
279: #endif
280: if (im[i] >= rstart && im[i] < rend) {
281: row = im[i] - rstart;
282: for (j=0; j<n; j++) {
283: if (im[i] > in[j]) {
284: if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
285: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
286: }
287: /* If NumCol = 1 then a copy is not required */
288: if ((roworiented) && (n == 1)) {
289: barray = (MatScalar*) v + i*bs2;
290: } else if ((!roworiented) && (m == 1)) {
291: barray = (MatScalar*) v + j*bs2;
292: } else { /* Here a copy is required */
293: if (roworiented) {
294: value = v + i*(stepval+bs)*bs + j*bs;
295: } else {
296: value = v + j*(stepval+bs)*bs + i*bs;
297: }
298: for (ii=0; ii<bs; ii++,value+=stepval) {
299: for (jj=0; jj<bs; jj++) {
300: *barray++ = *value++;
301: }
302: }
303: barray -=bs2;
304: }
306: if (in[j] >= cstart && in[j] < cend) {
307: col = in[j] - cstart;
308: MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);
309: } else if (in[j] < 0) continue;
310: #if defined(PETSC_USE_DEBUG)
311: else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
312: #endif
313: else {
314: if (mat->was_assembled) {
315: if (!baij->colmap) {
316: MatCreateColmap_MPIBAIJ_Private(mat);
317: }
319: #if defined(PETSC_USE_DEBUG)
320: #if defined(PETSC_USE_CTABLE)
321: { PetscInt data;
322: PetscTableFind(baij->colmap,in[j]+1,&data);
323: if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
324: }
325: #else
326: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327: #endif
328: #endif
329: #if defined(PETSC_USE_CTABLE)
330: PetscTableFind(baij->colmap,in[j]+1,&col);
331: col = (col - 1)/bs;
332: #else
333: col = (baij->colmap[in[j]] - 1)/bs;
334: #endif
335: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336: MatDisAssemble_MPISBAIJ(mat);
337: col = in[j];
338: }
339: } else col = in[j];
340: MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);
341: }
342: }
343: } else {
344: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
345: if (!baij->donotstash) {
346: if (roworiented) {
347: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
348: } else {
349: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
350: }
351: }
352: }
353: }
354: return(0);
355: }
359: PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360: {
361: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
363: PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364: PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
367: for (i=0; i<m; i++) {
368: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371: row = idxm[i] - bsrstart;
372: for (j=0; j<n; j++) {
373: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375: if (idxn[j] >= bscstart && idxn[j] < bscend) {
376: col = idxn[j] - bscstart;
377: MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
378: } else {
379: if (!baij->colmap) {
380: MatCreateColmap_MPIBAIJ_Private(mat);
381: }
382: #if defined(PETSC_USE_CTABLE)
383: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
384: data--;
385: #else
386: data = baij->colmap[idxn[j]/bs]-1;
387: #endif
388: if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389: else {
390: col = data + idxn[j]%bs;
391: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
392: }
393: }
394: }
395: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
396: }
397: return(0);
398: }
402: PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
403: {
404: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
406: PetscReal sum[2],*lnorm2;
409: if (baij->size == 1) {
410: MatNorm(baij->A,type,norm);
411: } else {
412: if (type == NORM_FROBENIUS) {
413: PetscMalloc1(2,&lnorm2);
414: MatNorm(baij->A,type,lnorm2);
415: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */
416: MatNorm(baij->B,type,lnorm2);
417: *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */
418: MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
419: *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
420: PetscFree(lnorm2);
421: } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
422: Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
423: Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data;
424: PetscReal *rsum,*rsum2,vabs;
425: PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
426: PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
427: MatScalar *v;
429: PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);
430: PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));
431: /* Amat */
432: v = amat->a; jj = amat->j;
433: for (brow=0; brow<mbs; brow++) {
434: grow = bs*(rstart + brow);
435: nz = amat->i[brow+1] - amat->i[brow];
436: for (bcol=0; bcol<nz; bcol++) {
437: gcol = bs*(rstart + *jj); jj++;
438: for (col=0; col<bs; col++) {
439: for (row=0; row<bs; row++) {
440: vabs = PetscAbsScalar(*v); v++;
441: rsum[gcol+col] += vabs;
442: /* non-diagonal block */
443: if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
444: }
445: }
446: }
447: }
448: /* Bmat */
449: v = bmat->a; jj = bmat->j;
450: for (brow=0; brow<mbs; brow++) {
451: grow = bs*(rstart + brow);
452: nz = bmat->i[brow+1] - bmat->i[brow];
453: for (bcol=0; bcol<nz; bcol++) {
454: gcol = bs*garray[*jj]; jj++;
455: for (col=0; col<bs; col++) {
456: for (row=0; row<bs; row++) {
457: vabs = PetscAbsScalar(*v); v++;
458: rsum[gcol+col] += vabs;
459: rsum[grow+row] += vabs;
460: }
461: }
462: }
463: }
464: MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
465: *norm = 0.0;
466: for (col=0; col<mat->cmap->N; col++) {
467: if (rsum2[col] > *norm) *norm = rsum2[col];
468: }
469: PetscFree2(rsum,rsum2);
470: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
471: }
472: return(0);
473: }
477: PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
478: {
479: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
481: PetscInt nstash,reallocs;
482: InsertMode addv;
485: if (baij->donotstash || mat->nooffprocentries) return(0);
487: /* make sure all processors are either in INSERTMODE or ADDMODE */
488: MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
489: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
490: mat->insertmode = addv; /* in case this processor had no cache */
492: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
493: MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
494: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
495: PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
496: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
497: PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
498: return(0);
499: }
503: PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
504: {
505: Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
506: Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data;
508: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
509: PetscInt *row,*col;
510: PetscBool other_disassembled;
511: PetscMPIInt n;
512: PetscBool r1,r2,r3;
513: MatScalar *val;
514: InsertMode addv = mat->insertmode;
516: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
518: if (!baij->donotstash && !mat->nooffprocentries) {
519: while (1) {
520: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
521: if (!flg) break;
523: for (i=0; i<n;) {
524: /* Now identify the consecutive vals belonging to the same row */
525: for (j=i,rstart=row[j]; j<n; j++) {
526: if (row[j] != rstart) break;
527: }
528: if (j < n) ncols = j-i;
529: else ncols = n-i;
530: /* Now assemble all these values with a single function call */
531: MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
532: i = j;
533: }
534: }
535: MatStashScatterEnd_Private(&mat->stash);
536: /* Now process the block-stash. Since the values are stashed column-oriented,
537: set the roworiented flag to column oriented, and after MatSetValues()
538: restore the original flags */
539: r1 = baij->roworiented;
540: r2 = a->roworiented;
541: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
543: baij->roworiented = PETSC_FALSE;
544: a->roworiented = PETSC_FALSE;
546: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
547: while (1) {
548: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
549: if (!flg) break;
551: for (i=0; i<n;) {
552: /* Now identify the consecutive vals belonging to the same row */
553: for (j=i,rstart=row[j]; j<n; j++) {
554: if (row[j] != rstart) break;
555: }
556: if (j < n) ncols = j-i;
557: else ncols = n-i;
558: MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
559: i = j;
560: }
561: }
562: MatStashScatterEnd_Private(&mat->bstash);
564: baij->roworiented = r1;
565: a->roworiented = r2;
567: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
568: }
570: MatAssemblyBegin(baij->A,mode);
571: MatAssemblyEnd(baij->A,mode);
573: /* determine if any processor has disassembled, if so we must
574: also disassemble ourselfs, in order that we may reassemble. */
575: /*
576: if nonzero structure of submatrix B cannot change then we know that
577: no processor disassembled thus we can skip this stuff
578: */
579: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
580: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
581: if (mat->was_assembled && !other_disassembled) {
582: MatDisAssemble_MPISBAIJ(mat);
583: }
584: }
586: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
587: MatSetUpMultiply_MPISBAIJ(mat); /* setup Mvctx and sMvctx */
588: }
589: MatAssemblyBegin(baij->B,mode);
590: MatAssemblyEnd(baij->B,mode);
592: PetscFree2(baij->rowvalues,baij->rowindices);
594: baij->rowvalues = 0;
596: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
597: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
598: PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
599: MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
600: }
601: return(0);
602: }
604: extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
605: extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
606: #include <petscdraw.h>
609: static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
610: {
611: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
612: PetscErrorCode ierr;
613: PetscInt bs = mat->rmap->bs;
614: PetscMPIInt rank = baij->rank;
615: PetscBool iascii,isdraw;
616: PetscViewer sviewer;
617: PetscViewerFormat format;
620: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
621: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
622: if (iascii) {
623: PetscViewerGetFormat(viewer,&format);
624: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
625: MatInfo info;
626: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
627: MatGetInfo(mat,MAT_LOCAL,&info);
628: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
629: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);
630: MatGetInfo(baij->A,MAT_LOCAL,&info);
631: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
632: MatGetInfo(baij->B,MAT_LOCAL,&info);
633: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
634: PetscViewerFlush(viewer);
635: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
636: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
637: VecScatterView(baij->Mvctx,viewer);
638: return(0);
639: } else if (format == PETSC_VIEWER_ASCII_INFO) {
640: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
641: return(0);
642: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
643: return(0);
644: }
645: }
647: if (isdraw) {
648: PetscDraw draw;
649: PetscBool isnull;
650: PetscViewerDrawGetDraw(viewer,0,&draw);
651: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
652: }
654: {
655: /* assemble the entire matrix onto first processor. */
656: Mat A;
657: Mat_SeqSBAIJ *Aloc;
658: Mat_SeqBAIJ *Bloc;
659: PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660: MatScalar *a;
661: const char *matname;
663: /* Should this be the same type as mat? */
664: MatCreate(PetscObjectComm((PetscObject)mat),&A);
665: if (!rank) {
666: MatSetSizes(A,M,N,M,N);
667: } else {
668: MatSetSizes(A,0,0,M,N);
669: }
670: MatSetType(A,MATMPISBAIJ);
671: MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);
672: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
673: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
675: /* copy over the A part */
676: Aloc = (Mat_SeqSBAIJ*)baij->A->data;
677: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
678: PetscMalloc1(bs,&rvals);
680: for (i=0; i<mbs; i++) {
681: rvals[0] = bs*(baij->rstartbs + i);
682: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
683: for (j=ai[i]; j<ai[i+1]; j++) {
684: col = (baij->cstartbs+aj[j])*bs;
685: for (k=0; k<bs; k++) {
686: MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
687: col++;
688: a += bs;
689: }
690: }
691: }
692: /* copy over the B part */
693: Bloc = (Mat_SeqBAIJ*)baij->B->data;
694: ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
695: for (i=0; i<mbs; i++) {
697: rvals[0] = bs*(baij->rstartbs + i);
698: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
699: for (j=ai[i]; j<ai[i+1]; j++) {
700: col = baij->garray[aj[j]]*bs;
701: for (k=0; k<bs; k++) {
702: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
703: col++;
704: a += bs;
705: }
706: }
707: }
708: PetscFree(rvals);
709: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
710: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
711: /*
712: Everyone has to call to draw the matrix since the graphics waits are
713: synchronized across all processors that share the PetscDraw object
714: */
715: PetscViewerGetSingleton(viewer,&sviewer);
716: PetscObjectGetName((PetscObject)mat,&matname);
717: if (!rank) {
718: PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);
719: MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);
720: }
721: PetscViewerRestoreSingleton(viewer,&sviewer);
722: MatDestroy(&A);
723: }
724: return(0);
725: }
729: PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
730: {
732: PetscBool iascii,isdraw,issocket,isbinary;
735: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
736: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
737: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
738: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
739: if (iascii || isdraw || issocket || isbinary) {
740: MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);
741: }
742: return(0);
743: }
747: PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
748: {
749: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
753: #if defined(PETSC_USE_LOG)
754: PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
755: #endif
756: MatStashDestroy_Private(&mat->stash);
757: MatStashDestroy_Private(&mat->bstash);
758: MatDestroy(&baij->A);
759: MatDestroy(&baij->B);
760: #if defined(PETSC_USE_CTABLE)
761: PetscTableDestroy(&baij->colmap);
762: #else
763: PetscFree(baij->colmap);
764: #endif
765: PetscFree(baij->garray);
766: VecDestroy(&baij->lvec);
767: VecScatterDestroy(&baij->Mvctx);
768: VecDestroy(&baij->slvec0);
769: VecDestroy(&baij->slvec0b);
770: VecDestroy(&baij->slvec1);
771: VecDestroy(&baij->slvec1a);
772: VecDestroy(&baij->slvec1b);
773: VecScatterDestroy(&baij->sMvctx);
774: PetscFree2(baij->rowvalues,baij->rowindices);
775: PetscFree(baij->barray);
776: PetscFree(baij->hd);
777: VecDestroy(&baij->diag);
778: VecDestroy(&baij->bb1);
779: VecDestroy(&baij->xx1);
780: #if defined(PETSC_USE_REAL_MAT_SINGLE)
781: PetscFree(baij->setvaluescopy);
782: #endif
783: PetscFree(baij->in_loc);
784: PetscFree(baij->v_loc);
785: PetscFree(baij->rangebs);
786: PetscFree(mat->data);
788: PetscObjectChangeTypeName((PetscObject)mat,0);
789: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
790: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
791: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);
792: PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);
793: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);
794: return(0);
795: }
799: PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800: {
801: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
803: PetscInt nt,mbs=a->mbs,bs=A->rmap->bs;
804: PetscScalar *x,*from;
807: VecGetLocalSize(xx,&nt);
808: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
810: /* diagonal part */
811: (*a->A->ops->mult)(a->A,xx,a->slvec1a);
812: VecSet(a->slvec1b,0.0);
814: /* subdiagonal part */
815: (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);
817: /* copy x into the vec slvec0 */
818: VecGetArray(a->slvec0,&from);
819: VecGetArray(xx,&x);
821: PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));
822: VecRestoreArray(a->slvec0,&from);
823: VecRestoreArray(xx,&x);
825: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
826: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
827: /* supperdiagonal part */
828: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
829: return(0);
830: }
834: PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835: {
836: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
838: PetscInt nt,mbs=a->mbs,bs=A->rmap->bs;
839: PetscScalar *x,*from;
842: VecGetLocalSize(xx,&nt);
843: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
845: /* diagonal part */
846: (*a->A->ops->mult)(a->A,xx,a->slvec1a);
847: VecSet(a->slvec1b,0.0);
849: /* subdiagonal part */
850: (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
852: /* copy x into the vec slvec0 */
853: VecGetArray(a->slvec0,&from);
854: VecGetArray(xx,&x);
856: PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));
857: VecRestoreArray(a->slvec0,&from);
858: VecRestoreArray(xx,&x);
860: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
861: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
862: /* supperdiagonal part */
863: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);
864: return(0);
865: }
869: PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870: {
871: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
873: PetscInt nt;
876: VecGetLocalSize(xx,&nt);
877: if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
879: VecGetLocalSize(yy,&nt);
880: if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
882: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
883: /* do diagonal part */
884: (*a->A->ops->mult)(a->A,xx,yy);
885: /* do supperdiagonal part */
886: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
887: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
888: /* do subdiagonal part */
889: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
890: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
891: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
892: return(0);
893: }
897: PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
898: {
899: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
901: PetscInt mbs=a->mbs,bs=A->rmap->bs;
902: PetscScalar *x,*from,zero=0.0;
905: /*
906: PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
907: PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
908: */
909: /* diagonal part */
910: (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);
911: VecSet(a->slvec1b,zero);
913: /* subdiagonal part */
914: (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);
916: /* copy x into the vec slvec0 */
917: VecGetArray(a->slvec0,&from);
918: VecGetArray(xx,&x);
919: PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));
920: VecRestoreArray(a->slvec0,&from);
922: VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
923: VecRestoreArray(xx,&x);
924: VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);
926: /* supperdiagonal part */
927: (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);
928: return(0);
929: }
933: PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
934: {
935: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
939: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
940: /* do diagonal part */
941: (*a->A->ops->multadd)(a->A,xx,yy,zz);
942: /* do supperdiagonal part */
943: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
944: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
946: /* do subdiagonal part */
947: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
948: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
949: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
950: return(0);
951: }
953: /*
954: This only works correctly for square matrices where the subblock A->A is the
955: diagonal block
956: */
959: PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
960: {
961: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
965: /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
966: MatGetDiagonal(a->A,v);
967: return(0);
968: }
972: PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
973: {
974: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
978: MatScale(a->A,aa);
979: MatScale(a->B,aa);
980: return(0);
981: }
985: PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
986: {
987: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
988: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
990: PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
991: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
992: PetscInt *cmap,*idx_p,cstart = mat->rstartbs;
995: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
996: mat->getrowactive = PETSC_TRUE;
998: if (!mat->rowvalues && (idx || v)) {
999: /*
1000: allocate enough space to hold information from the longest row.
1001: */
1002: Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1003: Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data;
1004: PetscInt max = 1,mbs = mat->mbs,tmp;
1005: for (i=0; i<mbs; i++) {
1006: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1007: if (max < tmp) max = tmp;
1008: }
1009: PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);
1010: }
1012: if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1013: lrow = row - brstart; /* local row index */
1015: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1016: if (!v) {pvA = 0; pvB = 0;}
1017: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1018: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1019: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1020: nztot = nzA + nzB;
1022: cmap = mat->garray;
1023: if (v || idx) {
1024: if (nztot) {
1025: /* Sort by increasing column numbers, assuming A and B already sorted */
1026: PetscInt imark = -1;
1027: if (v) {
1028: *v = v_p = mat->rowvalues;
1029: for (i=0; i<nzB; i++) {
1030: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1031: else break;
1032: }
1033: imark = i;
1034: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1035: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1036: }
1037: if (idx) {
1038: *idx = idx_p = mat->rowindices;
1039: if (imark > -1) {
1040: for (i=0; i<imark; i++) {
1041: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1042: }
1043: } else {
1044: for (i=0; i<nzB; i++) {
1045: if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1046: else break;
1047: }
1048: imark = i;
1049: }
1050: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1051: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1052: }
1053: } else {
1054: if (idx) *idx = 0;
1055: if (v) *v = 0;
1056: }
1057: }
1058: *nz = nztot;
1059: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1060: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1061: return(0);
1062: }
1066: PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1067: {
1068: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1071: if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1072: baij->getrowactive = PETSC_FALSE;
1073: return(0);
1074: }
1078: PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1079: {
1080: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1081: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1084: aA->getrow_utriangular = PETSC_TRUE;
1085: return(0);
1086: }
1089: PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1090: {
1091: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1092: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1095: aA->getrow_utriangular = PETSC_FALSE;
1096: return(0);
1097: }
1101: PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1102: {
1103: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1107: MatRealPart(a->A);
1108: MatRealPart(a->B);
1109: return(0);
1110: }
1114: PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1115: {
1116: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1120: MatImaginaryPart(a->A);
1121: MatImaginaryPart(a->B);
1122: return(0);
1123: }
1127: PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1128: {
1129: Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1133: MatZeroEntries(l->A);
1134: MatZeroEntries(l->B);
1135: return(0);
1136: }
1140: PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1141: {
1142: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1143: Mat A = a->A,B = a->B;
1145: PetscReal isend[5],irecv[5];
1148: info->block_size = (PetscReal)matin->rmap->bs;
1150: MatGetInfo(A,MAT_LOCAL,info);
1152: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1153: isend[3] = info->memory; isend[4] = info->mallocs;
1155: MatGetInfo(B,MAT_LOCAL,info);
1157: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158: isend[3] += info->memory; isend[4] += info->mallocs;
1159: if (flag == MAT_LOCAL) {
1160: info->nz_used = isend[0];
1161: info->nz_allocated = isend[1];
1162: info->nz_unneeded = isend[2];
1163: info->memory = isend[3];
1164: info->mallocs = isend[4];
1165: } else if (flag == MAT_GLOBAL_MAX) {
1166: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));
1168: info->nz_used = irecv[0];
1169: info->nz_allocated = irecv[1];
1170: info->nz_unneeded = irecv[2];
1171: info->memory = irecv[3];
1172: info->mallocs = irecv[4];
1173: } else if (flag == MAT_GLOBAL_SUM) {
1174: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));
1176: info->nz_used = irecv[0];
1177: info->nz_allocated = irecv[1];
1178: info->nz_unneeded = irecv[2];
1179: info->memory = irecv[3];
1180: info->mallocs = irecv[4];
1181: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1182: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1183: info->fill_ratio_needed = 0;
1184: info->factor_mallocs = 0;
1185: return(0);
1186: }
1190: PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1191: {
1192: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1193: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1197: switch (op) {
1198: case MAT_NEW_NONZERO_LOCATIONS:
1199: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201: case MAT_KEEP_NONZERO_PATTERN:
1202: case MAT_NEW_NONZERO_LOCATION_ERR:
1203: MatSetOption(a->A,op,flg);
1204: MatSetOption(a->B,op,flg);
1205: break;
1206: case MAT_ROW_ORIENTED:
1207: a->roworiented = flg;
1209: MatSetOption(a->A,op,flg);
1210: MatSetOption(a->B,op,flg);
1211: break;
1212: case MAT_NEW_DIAGONALS:
1213: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1214: break;
1215: case MAT_IGNORE_OFF_PROC_ENTRIES:
1216: a->donotstash = flg;
1217: break;
1218: case MAT_USE_HASH_TABLE:
1219: a->ht_flag = flg;
1220: break;
1221: case MAT_HERMITIAN:
1222: if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1223: MatSetOption(a->A,op,flg);
1225: A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1226: break;
1227: case MAT_SPD:
1228: A->spd_set = PETSC_TRUE;
1229: A->spd = flg;
1230: if (flg) {
1231: A->symmetric = PETSC_TRUE;
1232: A->structurally_symmetric = PETSC_TRUE;
1233: A->symmetric_set = PETSC_TRUE;
1234: A->structurally_symmetric_set = PETSC_TRUE;
1235: }
1236: break;
1237: case MAT_SYMMETRIC:
1238: MatSetOption(a->A,op,flg);
1239: break;
1240: case MAT_STRUCTURALLY_SYMMETRIC:
1241: MatSetOption(a->A,op,flg);
1242: break;
1243: case MAT_SYMMETRY_ETERNAL:
1244: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1245: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1246: break;
1247: case MAT_IGNORE_LOWER_TRIANGULAR:
1248: aA->ignore_ltriangular = flg;
1249: break;
1250: case MAT_ERROR_LOWER_TRIANGULAR:
1251: aA->ignore_ltriangular = flg;
1252: break;
1253: case MAT_GETROW_UPPERTRIANGULAR:
1254: aA->getrow_utriangular = flg;
1255: break;
1256: default:
1257: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1258: }
1259: return(0);
1260: }
1264: PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1265: {
1269: if (MAT_INITIAL_MATRIX || *B != A) {
1270: MatDuplicate(A,MAT_COPY_VALUES,B);
1271: }
1272: return(0);
1273: }
1277: PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1278: {
1279: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1280: Mat a = baij->A, b=baij->B;
1282: PetscInt nv,m,n;
1283: PetscBool flg;
1286: if (ll != rr) {
1287: VecEqual(ll,rr,&flg);
1288: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1289: }
1290: if (!ll) return(0);
1292: MatGetLocalSize(mat,&m,&n);
1293: if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1295: VecGetLocalSize(rr,&nv);
1296: if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1298: VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1300: /* left diagonalscale the off-diagonal part */
1301: (*b->ops->diagonalscale)(b,ll,NULL);
1303: /* scale the diagonal part */
1304: (*a->ops->diagonalscale)(a,ll,rr);
1306: /* right diagonalscale the off-diagonal part */
1307: VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1308: (*b->ops->diagonalscale)(b,NULL,baij->lvec);
1309: return(0);
1310: }
1314: PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1315: {
1316: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1320: MatSetUnfactored(a->A);
1321: return(0);
1322: }
1324: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1328: PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag)
1329: {
1330: Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1331: Mat a,b,c,d;
1332: PetscBool flg;
1336: a = matA->A; b = matA->B;
1337: c = matB->A; d = matB->B;
1339: MatEqual(a,c,&flg);
1340: if (flg) {
1341: MatEqual(b,d,&flg);
1342: }
1343: MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1344: return(0);
1345: }
1349: PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1350: {
1352: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1353: Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1356: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1357: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1358: MatGetRowUpperTriangular(A);
1359: MatCopy_Basic(A,B,str);
1360: MatRestoreRowUpperTriangular(A);
1361: } else {
1362: MatCopy(a->A,b->A,str);
1363: MatCopy(a->B,b->B,str);
1364: }
1365: return(0);
1366: }
1370: PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1371: {
1375: MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1376: return(0);
1377: }
1381: PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1382: {
1384: Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1385: PetscBLASInt bnz,one=1;
1386: Mat_SeqSBAIJ *xa,*ya;
1387: Mat_SeqBAIJ *xb,*yb;
1390: if (str == SAME_NONZERO_PATTERN) {
1391: PetscScalar alpha = a;
1392: xa = (Mat_SeqSBAIJ*)xx->A->data;
1393: ya = (Mat_SeqSBAIJ*)yy->A->data;
1394: PetscBLASIntCast(xa->nz,&bnz);
1395: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1396: xb = (Mat_SeqBAIJ*)xx->B->data;
1397: yb = (Mat_SeqBAIJ*)yy->B->data;
1398: PetscBLASIntCast(xb->nz,&bnz);
1399: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1400: PetscObjectStateIncrease((PetscObject)Y);
1401: } else {
1402: Mat B;
1403: PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1404: if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1405: MatGetRowUpperTriangular(X);
1406: MatGetRowUpperTriangular(Y);
1407: PetscMalloc1(yy->A->rmap->N,&nnz_d);
1408: PetscMalloc1(yy->B->rmap->N,&nnz_o);
1409: MatCreate(PetscObjectComm((PetscObject)Y),&B);
1410: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
1411: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
1412: MatSetBlockSizesFromMats(B,Y,Y);
1413: MatSetType(B,MATMPISBAIJ);
1414: MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);
1415: MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
1416: MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);
1417: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
1418: MatHeaderReplace(Y,B);
1419: PetscFree(nnz_d);
1420: PetscFree(nnz_o);
1421: MatRestoreRowUpperTriangular(X);
1422: MatRestoreRowUpperTriangular(Y);
1423: }
1424: return(0);
1425: }
1429: PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1430: {
1432: PetscInt i;
1433: PetscBool flg;
1436: for (i=0; i<n; i++) {
1437: ISEqual(irow[i],icol[i],&flg);
1438: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1439: }
1440: MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);
1441: return(0);
1442: }
1445: /* -------------------------------------------------------------------*/
1446: static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1447: MatGetRow_MPISBAIJ,
1448: MatRestoreRow_MPISBAIJ,
1449: MatMult_MPISBAIJ,
1450: /* 4*/ MatMultAdd_MPISBAIJ,
1451: MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1452: MatMultAdd_MPISBAIJ,
1453: 0,
1454: 0,
1455: 0,
1456: /* 10*/ 0,
1457: 0,
1458: 0,
1459: MatSOR_MPISBAIJ,
1460: MatTranspose_MPISBAIJ,
1461: /* 15*/ MatGetInfo_MPISBAIJ,
1462: MatEqual_MPISBAIJ,
1463: MatGetDiagonal_MPISBAIJ,
1464: MatDiagonalScale_MPISBAIJ,
1465: MatNorm_MPISBAIJ,
1466: /* 20*/ MatAssemblyBegin_MPISBAIJ,
1467: MatAssemblyEnd_MPISBAIJ,
1468: MatSetOption_MPISBAIJ,
1469: MatZeroEntries_MPISBAIJ,
1470: /* 24*/ 0,
1471: 0,
1472: 0,
1473: 0,
1474: 0,
1475: /* 29*/ MatSetUp_MPISBAIJ,
1476: 0,
1477: 0,
1478: 0,
1479: 0,
1480: /* 34*/ MatDuplicate_MPISBAIJ,
1481: 0,
1482: 0,
1483: 0,
1484: 0,
1485: /* 39*/ MatAXPY_MPISBAIJ,
1486: MatGetSubMatrices_MPISBAIJ,
1487: MatIncreaseOverlap_MPISBAIJ,
1488: MatGetValues_MPISBAIJ,
1489: MatCopy_MPISBAIJ,
1490: /* 44*/ 0,
1491: MatScale_MPISBAIJ,
1492: 0,
1493: 0,
1494: 0,
1495: /* 49*/ 0,
1496: 0,
1497: 0,
1498: 0,
1499: 0,
1500: /* 54*/ 0,
1501: 0,
1502: MatSetUnfactored_MPISBAIJ,
1503: 0,
1504: MatSetValuesBlocked_MPISBAIJ,
1505: /* 59*/ 0,
1506: 0,
1507: 0,
1508: 0,
1509: 0,
1510: /* 64*/ 0,
1511: 0,
1512: 0,
1513: 0,
1514: 0,
1515: /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1516: 0,
1517: 0,
1518: 0,
1519: 0,
1520: /* 74*/ 0,
1521: 0,
1522: 0,
1523: 0,
1524: 0,
1525: /* 79*/ 0,
1526: 0,
1527: 0,
1528: 0,
1529: MatLoad_MPISBAIJ,
1530: /* 84*/ 0,
1531: 0,
1532: 0,
1533: 0,
1534: 0,
1535: /* 89*/ 0,
1536: 0,
1537: 0,
1538: 0,
1539: 0,
1540: /* 94*/ 0,
1541: 0,
1542: 0,
1543: 0,
1544: 0,
1545: /* 99*/ 0,
1546: 0,
1547: 0,
1548: 0,
1549: 0,
1550: /*104*/ 0,
1551: MatRealPart_MPISBAIJ,
1552: MatImaginaryPart_MPISBAIJ,
1553: MatGetRowUpperTriangular_MPISBAIJ,
1554: MatRestoreRowUpperTriangular_MPISBAIJ,
1555: /*109*/ 0,
1556: 0,
1557: 0,
1558: 0,
1559: 0,
1560: /*114*/ 0,
1561: 0,
1562: 0,
1563: 0,
1564: 0,
1565: /*119*/ 0,
1566: 0,
1567: 0,
1568: 0,
1569: 0,
1570: /*124*/ 0,
1571: 0,
1572: 0,
1573: 0,
1574: 0,
1575: /*129*/ 0,
1576: 0,
1577: 0,
1578: 0,
1579: 0,
1580: /*134*/ 0,
1581: 0,
1582: 0,
1583: 0,
1584: 0,
1585: /*139*/ 0,
1586: 0,
1587: 0
1588: };
1593: PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1594: {
1596: *a = ((Mat_MPISBAIJ*)A->data)->A;
1597: return(0);
1598: }
1602: PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1603: {
1604: Mat_MPISBAIJ *b;
1606: PetscInt i,mbs,Mbs;
1609: MatSetBlockSize(B,PetscAbs(bs));
1610: PetscLayoutSetUp(B->rmap);
1611: PetscLayoutSetUp(B->cmap);
1612: PetscLayoutGetBlockSize(B->rmap,&bs);
1614: b = (Mat_MPISBAIJ*)B->data;
1615: mbs = B->rmap->n/bs;
1616: Mbs = B->rmap->N/bs;
1617: if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1619: B->rmap->bs = bs;
1620: b->bs2 = bs*bs;
1621: b->mbs = mbs;
1622: b->nbs = mbs;
1623: b->Mbs = Mbs;
1624: b->Nbs = Mbs;
1626: for (i=0; i<=b->size; i++) {
1627: b->rangebs[i] = B->rmap->range[i]/bs;
1628: }
1629: b->rstartbs = B->rmap->rstart/bs;
1630: b->rendbs = B->rmap->rend/bs;
1632: b->cstartbs = B->cmap->rstart/bs;
1633: b->cendbs = B->cmap->rend/bs;
1635: if (!B->preallocated) {
1636: MatCreate(PETSC_COMM_SELF,&b->A);
1637: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
1638: MatSetType(b->A,MATSEQSBAIJ);
1639: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
1640: MatCreate(PETSC_COMM_SELF,&b->B);
1641: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
1642: MatSetType(b->B,MATSEQBAIJ);
1643: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
1644: MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);
1645: }
1647: MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
1648: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
1650: B->preallocated = PETSC_TRUE;
1651: return(0);
1652: }
1656: PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1657: {
1658: PetscInt m,rstart,cstart,cend;
1659: PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1660: const PetscInt *JJ =0;
1661: PetscScalar *values=0;
1665: if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1666: PetscLayoutSetBlockSize(B->rmap,bs);
1667: PetscLayoutSetBlockSize(B->cmap,bs);
1668: PetscLayoutSetUp(B->rmap);
1669: PetscLayoutSetUp(B->cmap);
1670: PetscLayoutGetBlockSize(B->rmap,&bs);
1671: m = B->rmap->n/bs;
1672: rstart = B->rmap->rstart/bs;
1673: cstart = B->cmap->rstart/bs;
1674: cend = B->cmap->rend/bs;
1676: if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1677: PetscMalloc2(m,&d_nnz,m,&o_nnz);
1678: for (i=0; i<m; i++) {
1679: nz = ii[i+1] - ii[i];
1680: if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1681: nz_max = PetscMax(nz_max,nz);
1682: JJ = jj + ii[i];
1683: for (j=0; j<nz; j++) {
1684: if (*JJ >= cstart) break;
1685: JJ++;
1686: }
1687: d = 0;
1688: for (; j<nz; j++) {
1689: if (*JJ++ >= cend) break;
1690: d++;
1691: }
1692: d_nnz[i] = d;
1693: o_nnz[i] = nz - d;
1694: }
1695: MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
1696: PetscFree2(d_nnz,o_nnz);
1698: values = (PetscScalar*)V;
1699: if (!values) {
1700: PetscMalloc1(bs*bs*nz_max,&values);
1701: PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
1702: }
1703: for (i=0; i<m; i++) {
1704: PetscInt row = i + rstart;
1705: PetscInt ncols = ii[i+1] - ii[i];
1706: const PetscInt *icols = jj + ii[i];
1707: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1708: MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
1709: }
1711: if (!V) { PetscFree(values); }
1712: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1713: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1714: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1715: return(0);
1716: }
1718: #if defined(PETSC_HAVE_MUMPS)
1719: PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1720: #endif
1721: #if defined(PETSC_HAVE_PASTIX)
1722: PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1723: #endif
1725: /*MC
1726: MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1727: based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of
1728: the matrix is stored.
1730: For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1731: can call MatSetOption(Mat, MAT_HERMITIAN);
1733: Options Database Keys:
1734: . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1736: Level: beginner
1738: .seealso: MatCreateMPISBAIJ
1739: M*/
1741: PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1745: PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1746: {
1747: Mat_MPISBAIJ *b;
1749: PetscBool flg;
1752: PetscNewLog(B,&b);
1753: B->data = (void*)b;
1754: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1756: B->ops->destroy = MatDestroy_MPISBAIJ;
1757: B->ops->view = MatView_MPISBAIJ;
1758: B->assembled = PETSC_FALSE;
1759: B->insertmode = NOT_SET_VALUES;
1761: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
1762: MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);
1764: /* build local table of row and column ownerships */
1765: PetscMalloc1((b->size+2),&b->rangebs);
1767: /* build cache for off array entries formed */
1768: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
1770: b->donotstash = PETSC_FALSE;
1771: b->colmap = NULL;
1772: b->garray = NULL;
1773: b->roworiented = PETSC_TRUE;
1775: /* stuff used in block assembly */
1776: b->barray = 0;
1778: /* stuff used for matrix vector multiply */
1779: b->lvec = 0;
1780: b->Mvctx = 0;
1781: b->slvec0 = 0;
1782: b->slvec0b = 0;
1783: b->slvec1 = 0;
1784: b->slvec1a = 0;
1785: b->slvec1b = 0;
1786: b->sMvctx = 0;
1788: /* stuff for MatGetRow() */
1789: b->rowindices = 0;
1790: b->rowvalues = 0;
1791: b->getrowactive = PETSC_FALSE;
1793: /* hash table stuff */
1794: b->ht = 0;
1795: b->hd = 0;
1796: b->ht_size = 0;
1797: b->ht_flag = PETSC_FALSE;
1798: b->ht_fact = 0;
1799: b->ht_total_ct = 0;
1800: b->ht_insert_ct = 0;
1802: /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1803: b->ijonly = PETSC_FALSE;
1805: b->in_loc = 0;
1806: b->v_loc = 0;
1807: b->n_loc = 0;
1808: PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");
1809: PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);
1810: if (flg) {
1811: PetscReal fact = 1.39;
1812: MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
1813: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);
1814: if (fact <= 1.0) fact = 1.39;
1815: MatMPIBAIJSetHashTableFactor(B,fact);
1816: PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
1817: }
1818: PetscOptionsEnd();
1820: #if defined(PETSC_HAVE_PASTIX)
1821: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);
1822: #endif
1823: #if defined(PETSC_HAVE_MUMPS)
1824: PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);
1825: #endif
1826: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);
1827: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);
1828: PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);
1829: PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);
1830: PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);
1831: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);
1833: B->symmetric = PETSC_TRUE;
1834: B->structurally_symmetric = PETSC_TRUE;
1835: B->symmetric_set = PETSC_TRUE;
1836: B->structurally_symmetric_set = PETSC_TRUE;
1838: PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);
1839: return(0);
1840: }
1842: /*MC
1843: MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1845: This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1846: and MATMPISBAIJ otherwise.
1848: Options Database Keys:
1849: . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1851: Level: beginner
1853: .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1854: M*/
1858: /*@C
1859: MatMPISBAIJSetPreallocation - For good matrix assembly performance
1860: the user should preallocate the matrix storage by setting the parameters
1861: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
1862: performance can be increased by more than a factor of 50.
1864: Collective on Mat
1866: Input Parameters:
1867: + B - the matrix
1868: . bs - size of blockk
1869: . d_nz - number of block nonzeros per block row in diagonal portion of local
1870: submatrix (same for all local rows)
1871: . d_nnz - array containing the number of block nonzeros in the various block rows
1872: in the upper triangular and diagonal part of the in diagonal portion of the local
1873: (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room
1874: for the diagonal entry and set a value even if it is zero.
1875: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
1876: submatrix (same for all local rows).
1877: - o_nnz - array containing the number of nonzeros in the various block rows of the
1878: off-diagonal portion of the local submatrix that is right of the diagonal
1879: (possibly different for each block row) or NULL.
1882: Options Database Keys:
1883: . -mat_no_unroll - uses code that does not unroll the loops in the
1884: block calculations (much slower)
1885: . -mat_block_size - size of the blocks to use
1887: Notes:
1889: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1890: than it must be used on all processors that share the object for that argument.
1892: If the *_nnz parameter is given then the *_nz parameter is ignored
1894: Storage Information:
1895: For a square global matrix we define each processor's diagonal portion
1896: to be its local rows and the corresponding columns (a square submatrix);
1897: each processor's off-diagonal portion encompasses the remainder of the
1898: local matrix (a rectangular submatrix).
1900: The user can specify preallocated storage for the diagonal part of
1901: the local submatrix with either d_nz or d_nnz (not both). Set
1902: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1903: memory allocation. Likewise, specify preallocated storage for the
1904: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1906: You can call MatGetInfo() to get information on how effective the preallocation was;
1907: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1908: You can also run with the option -info and look for messages with the string
1909: malloc in them to see if additional memory allocation was needed.
1911: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1912: the figure below we depict these three local rows and all columns (0-11).
1914: .vb
1915: 0 1 2 3 4 5 6 7 8 9 10 11
1916: --------------------------
1917: row 3 |. . . d d d o o o o o o
1918: row 4 |. . . d d d o o o o o o
1919: row 5 |. . . d d d o o o o o o
1920: --------------------------
1921: .ve
1923: Thus, any entries in the d locations are stored in the d (diagonal)
1924: submatrix, and any entries in the o locations are stored in the
1925: o (off-diagonal) submatrix. Note that the d matrix is stored in
1926: MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1928: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1929: plus the diagonal part of the d matrix,
1930: and o_nz should indicate the number of block nonzeros per row in the o matrix
1932: In general, for PDE problems in which most nonzeros are near the diagonal,
1933: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1934: or you will get TERRIBLE performance; see the users' manual chapter on
1935: matrices.
1937: Level: intermediate
1939: .keywords: matrix, block, aij, compressed row, sparse, parallel
1941: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1942: @*/
1943: PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1944: {
1951: PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
1952: return(0);
1953: }
1957: /*@C
1958: MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1959: (block compressed row). For good matrix assembly performance
1960: the user should preallocate the matrix storage by setting the parameters
1961: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
1962: performance can be increased by more than a factor of 50.
1964: Collective on MPI_Comm
1966: Input Parameters:
1967: + comm - MPI communicator
1968: . bs - size of blockk
1969: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1970: This value should be the same as the local size used in creating the
1971: y vector for the matrix-vector product y = Ax.
1972: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1973: This value should be the same as the local size used in creating the
1974: x vector for the matrix-vector product y = Ax.
1975: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1976: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1977: . d_nz - number of block nonzeros per block row in diagonal portion of local
1978: submatrix (same for all local rows)
1979: . d_nnz - array containing the number of block nonzeros in the various block rows
1980: in the upper triangular portion of the in diagonal portion of the local
1981: (possibly different for each block block row) or NULL.
1982: If you plan to factor the matrix you must leave room for the diagonal entry and
1983: set its value even if it is zero.
1984: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
1985: submatrix (same for all local rows).
1986: - o_nnz - array containing the number of nonzeros in the various block rows of the
1987: off-diagonal portion of the local submatrix (possibly different for
1988: each block row) or NULL.
1990: Output Parameter:
1991: . A - the matrix
1993: Options Database Keys:
1994: . -mat_no_unroll - uses code that does not unroll the loops in the
1995: block calculations (much slower)
1996: . -mat_block_size - size of the blocks to use
1997: . -mat_mpi - use the parallel matrix data structures even on one processor
1998: (defaults to using SeqBAIJ format on one processor)
2000: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2001: MatXXXXSetPreallocation() paradgm instead of this routine directly.
2002: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2004: Notes:
2005: The number of rows and columns must be divisible by blocksize.
2006: This matrix type does not support complex Hermitian operation.
2008: The user MUST specify either the local or global matrix dimensions
2009: (possibly both).
2011: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2012: than it must be used on all processors that share the object for that argument.
2014: If the *_nnz parameter is given then the *_nz parameter is ignored
2016: Storage Information:
2017: For a square global matrix we define each processor's diagonal portion
2018: to be its local rows and the corresponding columns (a square submatrix);
2019: each processor's off-diagonal portion encompasses the remainder of the
2020: local matrix (a rectangular submatrix).
2022: The user can specify preallocated storage for the diagonal part of
2023: the local submatrix with either d_nz or d_nnz (not both). Set
2024: d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2025: memory allocation. Likewise, specify preallocated storage for the
2026: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2028: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2029: the figure below we depict these three local rows and all columns (0-11).
2031: .vb
2032: 0 1 2 3 4 5 6 7 8 9 10 11
2033: --------------------------
2034: row 3 |. . . d d d o o o o o o
2035: row 4 |. . . d d d o o o o o o
2036: row 5 |. . . d d d o o o o o o
2037: --------------------------
2038: .ve
2040: Thus, any entries in the d locations are stored in the d (diagonal)
2041: submatrix, and any entries in the o locations are stored in the
2042: o (off-diagonal) submatrix. Note that the d matrix is stored in
2043: MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2045: Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2046: plus the diagonal part of the d matrix,
2047: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2048: In general, for PDE problems in which most nonzeros are near the diagonal,
2049: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2050: or you will get TERRIBLE performance; see the users' manual chapter on
2051: matrices.
2053: Level: intermediate
2055: .keywords: matrix, block, aij, compressed row, sparse, parallel
2057: .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2058: @*/
2060: PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2061: {
2063: PetscMPIInt size;
2066: MatCreate(comm,A);
2067: MatSetSizes(*A,m,n,M,N);
2068: MPI_Comm_size(comm,&size);
2069: if (size > 1) {
2070: MatSetType(*A,MATMPISBAIJ);
2071: MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
2072: } else {
2073: MatSetType(*A,MATSEQSBAIJ);
2074: MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
2075: }
2076: return(0);
2077: }
2082: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2083: {
2084: Mat mat;
2085: Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2087: PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2088: PetscScalar *array;
2091: *newmat = 0;
2093: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
2094: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
2095: MatSetType(mat,((PetscObject)matin)->type_name);
2096: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
2097: PetscLayoutReference(matin->rmap,&mat->rmap);
2098: PetscLayoutReference(matin->cmap,&mat->cmap);
2100: mat->factortype = matin->factortype;
2101: mat->preallocated = PETSC_TRUE;
2102: mat->assembled = PETSC_TRUE;
2103: mat->insertmode = NOT_SET_VALUES;
2105: a = (Mat_MPISBAIJ*)mat->data;
2106: a->bs2 = oldmat->bs2;
2107: a->mbs = oldmat->mbs;
2108: a->nbs = oldmat->nbs;
2109: a->Mbs = oldmat->Mbs;
2110: a->Nbs = oldmat->Nbs;
2113: a->size = oldmat->size;
2114: a->rank = oldmat->rank;
2115: a->donotstash = oldmat->donotstash;
2116: a->roworiented = oldmat->roworiented;
2117: a->rowindices = 0;
2118: a->rowvalues = 0;
2119: a->getrowactive = PETSC_FALSE;
2120: a->barray = 0;
2121: a->rstartbs = oldmat->rstartbs;
2122: a->rendbs = oldmat->rendbs;
2123: a->cstartbs = oldmat->cstartbs;
2124: a->cendbs = oldmat->cendbs;
2126: /* hash table stuff */
2127: a->ht = 0;
2128: a->hd = 0;
2129: a->ht_size = 0;
2130: a->ht_flag = oldmat->ht_flag;
2131: a->ht_fact = oldmat->ht_fact;
2132: a->ht_total_ct = 0;
2133: a->ht_insert_ct = 0;
2135: PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));
2136: if (oldmat->colmap) {
2137: #if defined(PETSC_USE_CTABLE)
2138: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2139: #else
2140: PetscMalloc1((a->Nbs),&a->colmap);
2141: PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));
2142: PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));
2143: #endif
2144: } else a->colmap = 0;
2146: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2147: PetscMalloc1(len,&a->garray);
2148: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
2149: PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));
2150: } else a->garray = 0;
2152: MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);
2153: VecDuplicate(oldmat->lvec,&a->lvec);
2154: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
2155: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2156: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
2158: VecDuplicate(oldmat->slvec0,&a->slvec0);
2159: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);
2160: VecDuplicate(oldmat->slvec1,&a->slvec1);
2161: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);
2163: VecGetLocalSize(a->slvec1,&nt);
2164: VecGetArray(a->slvec1,&array);
2165: VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);
2166: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);
2167: VecRestoreArray(a->slvec1,&array);
2168: VecGetArray(a->slvec0,&array);
2169: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);
2170: VecRestoreArray(a->slvec0,&array);
2171: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);
2172: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);
2173: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);
2174: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);
2175: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);
2177: /* VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2178: PetscObjectReference((PetscObject)oldmat->sMvctx);
2179: a->sMvctx = oldmat->sMvctx;
2180: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);
2182: MatDuplicate(oldmat->A,cpvalues,&a->A);
2183: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
2184: MatDuplicate(oldmat->B,cpvalues,&a->B);
2185: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
2186: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
2187: *newmat = mat;
2188: return(0);
2189: }
2193: PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2194: {
2196: PetscInt i,nz,j,rstart,rend;
2197: PetscScalar *vals,*buf;
2198: MPI_Comm comm;
2199: MPI_Status status;
2200: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2201: PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2202: PetscInt *procsnz = 0,jj,*mycols,*ibuf;
2203: PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2204: PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2205: PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2206: int fd;
2209: PetscObjectGetComm((PetscObject)viewer,&comm);
2210: PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");
2211: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
2212: PetscOptionsEnd();
2213: if (bs < 0) bs = 1;
2215: MPI_Comm_size(comm,&size);
2216: MPI_Comm_rank(comm,&rank);
2217: if (!rank) {
2218: PetscViewerBinaryGetDescriptor(viewer,&fd);
2219: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
2220: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2221: if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2222: }
2224: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2226: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2227: M = header[1];
2228: N = header[2];
2230: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2231: if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2232: if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2234: /* If global sizes are set, check if they are consistent with that given in the file */
2235: if (sizesset) {
2236: MatGetSize(newmat,&grows,&gcols);
2237: }
2238: if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2239: if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2241: if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2243: /*
2244: This code adds extra rows to make sure the number of rows is
2245: divisible by the blocksize
2246: */
2247: Mbs = M/bs;
2248: extra_rows = bs - M + bs*(Mbs);
2249: if (extra_rows == bs) extra_rows = 0;
2250: else Mbs++;
2251: if (extra_rows &&!rank) {
2252: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
2253: }
2255: /* determine ownership of all rows */
2256: if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2257: mbs = Mbs/size + ((Mbs % size) > rank);
2258: m = mbs*bs;
2259: } else { /* User Set */
2260: m = newmat->rmap->n;
2261: mbs = m/bs;
2262: }
2263: PetscMalloc2(size+1,&rowners,size+1,&browners);
2264: PetscMPIIntCast(mbs,&mmbs);
2265: MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
2266: rowners[0] = 0;
2267: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2268: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2269: rstart = rowners[rank];
2270: rend = rowners[rank+1];
2272: /* distribute row lengths to all processors */
2273: PetscMalloc1((rend-rstart)*bs,&locrowlens);
2274: if (!rank) {
2275: PetscMalloc1((M+extra_rows),&rowlengths);
2276: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2277: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2278: PetscMalloc1(size,&sndcounts);
2279: for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2280: MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);
2281: PetscFree(sndcounts);
2282: } else {
2283: MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);
2284: }
2286: if (!rank) { /* procs[0] */
2287: /* calculate the number of nonzeros on each processor */
2288: PetscMalloc1(size,&procsnz);
2289: PetscMemzero(procsnz,size*sizeof(PetscInt));
2290: for (i=0; i<size; i++) {
2291: for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2292: procsnz[i] += rowlengths[j];
2293: }
2294: }
2295: PetscFree(rowlengths);
2297: /* determine max buffer needed and allocate it */
2298: maxnz = 0;
2299: for (i=0; i<size; i++) {
2300: maxnz = PetscMax(maxnz,procsnz[i]);
2301: }
2302: PetscMalloc1(maxnz,&cols);
2304: /* read in my part of the matrix column indices */
2305: nz = procsnz[0];
2306: PetscMalloc1(nz,&ibuf);
2307: mycols = ibuf;
2308: if (size == 1) nz -= extra_rows;
2309: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2310: if (size == 1) {
2311: for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2312: }
2314: /* read in every ones (except the last) and ship off */
2315: for (i=1; i<size-1; i++) {
2316: nz = procsnz[i];
2317: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2318: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2319: }
2320: /* read in the stuff for the last proc */
2321: if (size != 1) {
2322: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
2323: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2324: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2325: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
2326: }
2327: PetscFree(cols);
2328: } else { /* procs[i], i>0 */
2329: /* determine buffer space needed for message */
2330: nz = 0;
2331: for (i=0; i<m; i++) nz += locrowlens[i];
2332: PetscMalloc1(nz,&ibuf);
2333: mycols = ibuf;
2334: /* receive message of column indices*/
2335: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2336: MPI_Get_count(&status,MPIU_INT,&maxnz);
2337: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2338: }
2340: /* loop over local rows, determining number of off diagonal entries */
2341: PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);
2342: PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);
2343: PetscMemzero(mask,Mbs*sizeof(PetscInt));
2344: PetscMemzero(masked1,Mbs*sizeof(PetscInt));
2345: PetscMemzero(masked2,Mbs*sizeof(PetscInt));
2346: rowcount = 0;
2347: nzcount = 0;
2348: for (i=0; i<mbs; i++) {
2349: dcount = 0;
2350: odcount = 0;
2351: for (j=0; j<bs; j++) {
2352: kmax = locrowlens[rowcount];
2353: for (k=0; k<kmax; k++) {
2354: tmp = mycols[nzcount++]/bs; /* block col. index */
2355: if (!mask[tmp]) {
2356: mask[tmp] = 1;
2357: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2358: else masked1[dcount++] = tmp; /* entry in diag portion */
2359: }
2360: }
2361: rowcount++;
2362: }
2364: dlens[i] = dcount; /* d_nzz[i] */
2365: odlens[i] = odcount; /* o_nzz[i] */
2367: /* zero out the mask elements we set */
2368: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2369: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2370: }
2371: if (!sizesset) {
2372: MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);
2373: }
2374: MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);
2375: MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);
2377: if (!rank) {
2378: PetscMalloc1(maxnz,&buf);
2379: /* read in my part of the matrix numerical values */
2380: nz = procsnz[0];
2381: vals = buf;
2382: mycols = ibuf;
2383: if (size == 1) nz -= extra_rows;
2384: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2385: if (size == 1) {
2386: for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2387: }
2389: /* insert into matrix */
2390: jj = rstart*bs;
2391: for (i=0; i<m; i++) {
2392: MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2393: mycols += locrowlens[i];
2394: vals += locrowlens[i];
2395: jj++;
2396: }
2398: /* read in other processors (except the last one) and ship out */
2399: for (i=1; i<size-1; i++) {
2400: nz = procsnz[i];
2401: vals = buf;
2402: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2403: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
2404: }
2405: /* the last proc */
2406: if (size != 1) {
2407: nz = procsnz[i] - extra_rows;
2408: vals = buf;
2409: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2410: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2411: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);
2412: }
2413: PetscFree(procsnz);
2415: } else {
2416: /* receive numeric values */
2417: PetscMalloc1(nz,&buf);
2419: /* receive message of values*/
2420: vals = buf;
2421: mycols = ibuf;
2422: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
2423: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2424: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2426: /* insert into matrix */
2427: jj = rstart*bs;
2428: for (i=0; i<m; i++) {
2429: MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2430: mycols += locrowlens[i];
2431: vals += locrowlens[i];
2432: jj++;
2433: }
2434: }
2436: PetscFree(locrowlens);
2437: PetscFree(buf);
2438: PetscFree(ibuf);
2439: PetscFree2(rowners,browners);
2440: PetscFree2(dlens,odlens);
2441: PetscFree3(mask,masked1,masked2);
2442: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
2443: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
2444: return(0);
2445: }
2449: /*XXXXX@
2450: MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2452: Input Parameters:
2453: . mat - the matrix
2454: . fact - factor
2456: Not Collective on Mat, each process can have a different hash factor
2458: Level: advanced
2460: Notes:
2461: This can also be set by the command line option: -mat_use_hash_table fact
2463: .keywords: matrix, hashtable, factor, HT
2465: .seealso: MatSetOption()
2466: @XXXXX*/
2471: PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2472: {
2473: Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2474: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data;
2475: PetscReal atmp;
2476: PetscReal *work,*svalues,*rvalues;
2478: PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2479: PetscMPIInt rank,size;
2480: PetscInt *rowners_bs,dest,count,source;
2481: PetscScalar *va;
2482: MatScalar *ba;
2483: MPI_Status stat;
2486: if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2487: MatGetRowMaxAbs(a->A,v,NULL);
2488: VecGetArray(v,&va);
2490: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
2491: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
2493: bs = A->rmap->bs;
2494: mbs = a->mbs;
2495: Mbs = a->Mbs;
2496: ba = b->a;
2497: bi = b->i;
2498: bj = b->j;
2500: /* find ownerships */
2501: rowners_bs = A->rmap->range;
2503: /* each proc creates an array to be distributed */
2504: PetscMalloc1(bs*Mbs,&work);
2505: PetscMemzero(work,bs*Mbs*sizeof(PetscReal));
2507: /* row_max for B */
2508: if (rank != size-1) {
2509: for (i=0; i<mbs; i++) {
2510: ncols = bi[1] - bi[0]; bi++;
2511: brow = bs*i;
2512: for (j=0; j<ncols; j++) {
2513: bcol = bs*(*bj);
2514: for (kcol=0; kcol<bs; kcol++) {
2515: col = bcol + kcol; /* local col index */
2516: col += rowners_bs[rank+1]; /* global col index */
2517: for (krow=0; krow<bs; krow++) {
2518: atmp = PetscAbsScalar(*ba); ba++;
2519: row = brow + krow; /* local row index */
2520: if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2521: if (work[col] < atmp) work[col] = atmp;
2522: }
2523: }
2524: bj++;
2525: }
2526: }
2528: /* send values to its owners */
2529: for (dest=rank+1; dest<size; dest++) {
2530: svalues = work + rowners_bs[dest];
2531: count = rowners_bs[dest+1]-rowners_bs[dest];
2532: MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));
2533: }
2534: }
2536: /* receive values */
2537: if (rank) {
2538: rvalues = work;
2539: count = rowners_bs[rank+1]-rowners_bs[rank];
2540: for (source=0; source<rank; source++) {
2541: MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);
2542: /* process values */
2543: for (i=0; i<count; i++) {
2544: if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2545: }
2546: }
2547: }
2549: VecRestoreArray(v,&va);
2550: PetscFree(work);
2551: return(0);
2552: }
2556: PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2557: {
2558: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
2559: PetscErrorCode ierr;
2560: PetscInt mbs=mat->mbs,bs=matin->rmap->bs;
2561: PetscScalar *x,*ptr,*from;
2562: Vec bb1;
2563: const PetscScalar *b;
2566: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2567: if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2569: if (flag == SOR_APPLY_UPPER) {
2570: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2571: return(0);
2572: }
2574: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2575: if (flag & SOR_ZERO_INITIAL_GUESS) {
2576: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
2577: its--;
2578: }
2580: VecDuplicate(bb,&bb1);
2581: while (its--) {
2583: /* lower triangular part: slvec0b = - B^T*xx */
2584: (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);
2586: /* copy xx into slvec0a */
2587: VecGetArray(mat->slvec0,&ptr);
2588: VecGetArray(xx,&x);
2589: PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));
2590: VecRestoreArray(mat->slvec0,&ptr);
2592: VecScale(mat->slvec0,-1.0);
2594: /* copy bb into slvec1a */
2595: VecGetArray(mat->slvec1,&ptr);
2596: VecGetArrayRead(bb,&b);
2597: PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));
2598: VecRestoreArray(mat->slvec1,&ptr);
2600: /* set slvec1b = 0 */
2601: VecSet(mat->slvec1b,0.0);
2603: VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2604: VecRestoreArray(xx,&x);
2605: VecRestoreArrayRead(bb,&b);
2606: VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2608: /* upper triangular part: bb1 = bb1 - B*x */
2609: (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);
2611: /* local diagonal sweep */
2612: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
2613: }
2614: VecDestroy(&bb1);
2615: } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2616: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2617: } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2618: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2619: } else if (flag & SOR_EISENSTAT) {
2620: Vec xx1;
2621: PetscBool hasop;
2622: const PetscScalar *diag;
2623: PetscScalar *sl,scale = (omega - 2.0)/omega;
2624: PetscInt i,n;
2626: if (!mat->xx1) {
2627: VecDuplicate(bb,&mat->xx1);
2628: VecDuplicate(bb,&mat->bb1);
2629: }
2630: xx1 = mat->xx1;
2631: bb1 = mat->bb1;
2633: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
2635: if (!mat->diag) {
2636: /* this is wrong for same matrix with new nonzero values */
2637: MatGetVecs(matin,&mat->diag,NULL);
2638: MatGetDiagonal(matin,mat->diag);
2639: }
2640: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
2642: if (hasop) {
2643: MatMultDiagonalBlock(matin,xx,bb1);
2644: VecAYPX(mat->slvec1a,scale,bb);
2645: } else {
2646: /*
2647: These two lines are replaced by code that may be a bit faster for a good compiler
2648: VecPointwiseMult(mat->slvec1a,mat->diag,xx);
2649: VecAYPX(mat->slvec1a,scale,bb);
2650: */
2651: VecGetArray(mat->slvec1a,&sl);
2652: VecGetArrayRead(mat->diag,&diag);
2653: VecGetArrayRead(bb,&b);
2654: VecGetArray(xx,&x);
2655: VecGetLocalSize(xx,&n);
2656: if (omega == 1.0) {
2657: for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2658: PetscLogFlops(2.0*n);
2659: } else {
2660: for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2661: PetscLogFlops(3.0*n);
2662: }
2663: VecRestoreArray(mat->slvec1a,&sl);
2664: VecRestoreArrayRead(mat->diag,&diag);
2665: VecRestoreArrayRead(bb,&b);
2666: VecRestoreArray(xx,&x);
2667: }
2669: /* multiply off-diagonal portion of matrix */
2670: VecSet(mat->slvec1b,0.0);
2671: (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);
2672: VecGetArray(mat->slvec0,&from);
2673: VecGetArray(xx,&x);
2674: PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));
2675: VecRestoreArray(mat->slvec0,&from);
2676: VecRestoreArray(xx,&x);
2677: VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2678: VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);
2679: (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);
2681: /* local sweep */
2682: (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
2683: VecAXPY(xx,1.0,xx1);
2684: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2685: return(0);
2686: }
2690: PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2691: {
2692: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
2694: Vec lvec1,bb1;
2697: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2698: if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2700: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2701: if (flag & SOR_ZERO_INITIAL_GUESS) {
2702: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
2703: its--;
2704: }
2706: VecDuplicate(mat->lvec,&lvec1);
2707: VecDuplicate(bb,&bb1);
2708: while (its--) {
2709: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2711: /* lower diagonal part: bb1 = bb - B^T*xx */
2712: (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);
2713: VecScale(lvec1,-1.0);
2715: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2716: VecCopy(bb,bb1);
2717: VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);
2719: /* upper diagonal part: bb1 = bb1 - B*x */
2720: VecScale(mat->lvec,-1.0);
2721: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);
2723: VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);
2725: /* diagonal sweep */
2726: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
2727: }
2728: VecDestroy(&lvec1);
2729: VecDestroy(&bb1);
2730: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2731: return(0);
2732: }
2736: /*@
2737: MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2738: CSR format the local rows.
2740: Collective on MPI_Comm
2742: Input Parameters:
2743: + comm - MPI communicator
2744: . bs - the block size, only a block size of 1 is supported
2745: . m - number of local rows (Cannot be PETSC_DECIDE)
2746: . n - This value should be the same as the local size used in creating the
2747: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2748: calculated if N is given) For square matrices n is almost always m.
2749: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2750: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2751: . i - row indices
2752: . j - column indices
2753: - a - matrix values
2755: Output Parameter:
2756: . mat - the matrix
2758: Level: intermediate
2760: Notes:
2761: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2762: thus you CANNOT change the matrix entries by changing the values of a[] after you have
2763: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2765: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2767: .keywords: matrix, aij, compressed row, sparse, parallel
2769: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2770: MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2771: @*/
2772: PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2773: {
2778: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2779: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2780: MatCreate(comm,mat);
2781: MatSetSizes(*mat,m,n,M,N);
2782: MatSetType(*mat,MATMPISBAIJ);
2783: MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);
2784: return(0);
2785: }
2790: /*@C
2791: MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2792: (the default parallel PETSc format).
2794: Collective on MPI_Comm
2796: Input Parameters:
2797: + B - the matrix
2798: . bs - the block size
2799: . i - the indices into j for the start of each local row (starts with zero)
2800: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2801: - v - optional values in the matrix
2803: Level: developer
2805: .keywords: matrix, aij, compressed row, sparse, parallel
2807: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2808: @*/
2809: PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2810: {
2814: PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2815: return(0);
2816: }