Actual source code: baij.c

petsc-3.3-p7 2013-05-11
  2: /*
  3:     Defines the basic matrix operations for the BAIJ (compressed row)
  4:   matrix storage format.
  5: */
  6: #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
  7: #include <petscblaslapack.h>
  8: #include <../src/mat/blockinvert.h>


 13: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
 14: {
 15:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 17:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
 18:   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
 19:   PetscReal      shift = 0.0;

 22:   if (a->idiagvalid) {
 23:     if (values)*values = a->idiag;
 24:     return(0);
 25:   }
 26:   MatMarkDiagonal_SeqBAIJ(A);
 27:   diag_offset = a->diag;
 28:   if (!a->idiag) {
 29:     PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);
 30:     PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));
 31:   }
 32:   diag    = a->idiag;
 33:   mdiag   = a->idiag+bs2*mbs;
 34:   if (values) *values = a->idiag;
 35:   /* factor and invert each block */
 36:   switch (bs){
 37:     case 1:
 38:       for (i=0; i<mbs; i++) {
 39:         odiag = v + 1*diag_offset[i];
 40:         diag[0]  = odiag[0];
 41:         mdiag[0] = odiag[0];
 42:         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
 43:         diag    += 1;
 44:         mdiag   += 1;
 45:       }
 46:       break;
 47:     case 2:
 48:       for (i=0; i<mbs; i++) {
 49:         odiag   = v + 4*diag_offset[i];
 50:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 51:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 52:         PetscKernel_A_gets_inverse_A_2(diag,shift);
 53:         diag    += 4;
 54:         mdiag   += 4;
 55:       }
 56:       break;
 57:     case 3:
 58:       for (i=0; i<mbs; i++) {
 59:         odiag    = v + 9*diag_offset[i];
 60:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 61:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 62:         diag[8]  = odiag[8];
 63:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 64:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 65:         mdiag[8] = odiag[8];
 66:         PetscKernel_A_gets_inverse_A_3(diag,shift);
 67:         diag    += 9;
 68:         mdiag   += 9;
 69:       }
 70:       break;
 71:     case 4:
 72:       for (i=0; i<mbs; i++) {
 73:         odiag  = v + 16*diag_offset[i];
 74:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 75:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 76:         PetscKernel_A_gets_inverse_A_4(diag,shift);
 77:         diag  += 16;
 78:         mdiag += 16;
 79:       }
 80:       break;
 81:     case 5:
 82:       for (i=0; i<mbs; i++) {
 83:         odiag = v + 25*diag_offset[i];
 84:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
 85:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
 86:         PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);
 87:         diag  += 25;
 88:         mdiag += 25;
 89:       }
 90:       break;
 91:     case 6:
 92:       for (i=0; i<mbs; i++) {
 93:         odiag = v + 36*diag_offset[i];
 94:         PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
 95:         PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
 96:         PetscKernel_A_gets_inverse_A_6(diag,shift);
 97:         diag  += 36;
 98:         mdiag += 36;
 99:       }
100:       break;
101:     case 7:
102:       for (i=0; i<mbs; i++) {
103:         odiag = v + 49*diag_offset[i];
104:         PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
105:         PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
106:         PetscKernel_A_gets_inverse_A_7(diag,shift);
107:         diag  += 49;
108:         mdiag += 49;
109:       }
110:       break;
111:     default:
112:       PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);
113:       for (i=0; i<mbs; i++) {
114:         odiag = v + bs2*diag_offset[i];
115:         PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
116:         PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
117:         PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);
118:         diag  += bs2;
119:         mdiag += bs2;
120:       }
121:       PetscFree2(v_work,v_pivots);
122:   }
123:   a->idiagvalid = PETSC_TRUE;
124:   return(0);
125: }

129: PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130: {
131:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132:   PetscScalar        *x,x1,s1;
133:   const PetscScalar  *b;
134:   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135:   PetscErrorCode     ierr;
136:   PetscInt           m = a->mbs,i,i2,nz,j;
137:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

140:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141:   its = its*lits;
142:   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

148:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

150:   diag  = a->diag;
151:   idiag = a->idiag;
152:   VecGetArray(xx,&x);
153:   VecGetArrayRead(bb,&b);

155:   if (flag & SOR_ZERO_INITIAL_GUESS) {
156:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157:       x[0] = b[0]*idiag[0];
158:       i2     = 1;
159:       idiag += 1;
160:       for (i=1; i<m; i++) {
161:         v     = aa + ai[i];
162:         vi    = aj + ai[i];
163:         nz    = diag[i] - ai[i];
164:         s1    = b[i2];
165:         for (j=0; j<nz; j++) {
166:           s1 -= v[j]*x[vi[j]];
167:         }
168:         x[i2]   = idiag[0]*s1;
169:         idiag   += 1;
170:         i2      += 1;
171:       }
172:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173:       PetscLogFlops(a->nz);
174:     }
175:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177:       i2    = 0;
178:       mdiag = a->idiag+a->mbs;
179:       for (i=0; i<m; i++) {
180:         x1      = x[i2];
181:         x[i2]   = mdiag[0]*x1;
182:         mdiag  += 1;
183:         i2     += 1;
184:       }
185:       PetscLogFlops(m);
186:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
188:     }
189:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190:       idiag   = a->idiag+a->mbs - 1;
191:       i2      = m - 1;
192:       x1      = x[i2];
193:       x[i2]   = idiag[0]*x1;
194:       idiag -= 1;
195:       i2    -= 1;
196:       for (i=m-2; i>=0; i--) {
197:         v     = aa + (diag[i]+1);
198:         vi    = aj + diag[i] + 1;
199:         nz    = ai[i+1] - diag[i] - 1;
200:         s1    = x[i2];
201:         for (j=0; j<nz; j++) {
202:           s1 -= v[j]*x[vi[j]];
203:         }
204:         x[i2]   = idiag[0]*s1;
205:         idiag   -= 1;
206:         i2      -= 1;
207:       }
208:       PetscLogFlops(a->nz);
209:     }
210:   } else {
211:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212:   }
213:   VecRestoreArray(xx,&x);
214:   VecRestoreArrayRead(bb,&b);
215:   return(0);
216: }

220: PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221: {
222:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223:   PetscScalar        *x,x1,x2,s1,s2;
224:   const PetscScalar  *b;
225:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226:   PetscErrorCode     ierr;
227:   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

231:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232:   its = its*lits;
233:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

239:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

241:   diag  = a->diag;
242:   idiag = a->idiag;
243:   VecGetArray(xx,&x);
244:   VecGetArrayRead(bb,&b);

246:   if (flag & SOR_ZERO_INITIAL_GUESS) {
247:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250:       i2     = 2;
251:       idiag += 4;
252:       for (i=1; i<m; i++) {
253:         v     = aa + 4*ai[i];
254:         vi    = aj + ai[i];
255:         nz    = diag[i] - ai[i];
256:         s1    = b[i2]; s2 = b[i2+1];
257:         for (j=0; j<nz; j++) {
258:           idx  = 2*vi[j];
259:           it   = 4*j;
260:           x1   = x[idx]; x2 = x[1+idx];
261:           s1  -= v[it]*x1 + v[it+2]*x2;
262:           s2  -= v[it+1]*x1 + v[it+3]*x2;
263:         }
264:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266:         idiag   += 4;
267:         i2      += 2;
268:       }
269:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270:       PetscLogFlops(4.0*(a->nz));
271:     }
272:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274:       i2    = 0;
275:       mdiag = a->idiag+4*a->mbs;
276:       for (i=0; i<m; i++) {
277:         x1      = x[i2]; x2 = x[i2+1];
278:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280:         mdiag  += 4;
281:         i2     += 2;
282:       }
283:       PetscLogFlops(6.0*m);
284:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
286:     }
287:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288:       idiag   = a->idiag+4*a->mbs - 4;
289:       i2      = 2*m - 2;
290:       x1      = x[i2]; x2 = x[i2+1];
291:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293:       idiag -= 4;
294:       i2    -= 2;
295:       for (i=m-2; i>=0; i--) {
296:         v     = aa + 4*(diag[i]+1);
297:         vi    = aj + diag[i] + 1;
298:         nz    = ai[i+1] - diag[i] - 1;
299:         s1    = x[i2]; s2 = x[i2+1];
300:         for (j=0; j<nz; j++) {
301:            idx  = 2*vi[j];
302:           it   = 4*j;
303:           x1   = x[idx]; x2 = x[1+idx];
304:           s1  -= v[it]*x1 + v[it+2]*x2;
305:           s2  -= v[it+1]*x1 + v[it+3]*x2;
306:         }
307:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309:         idiag   -= 4;
310:         i2      -= 2;
311:       }
312:       PetscLogFlops(4.0*(a->nz));
313:     }
314:   } else {
315:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316:   }
317:   VecRestoreArray(xx,&x);
318:   VecRestoreArrayRead(bb,&b);
319:   return(0);
320: }

324: PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325: {
326:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329:   const PetscScalar  *b;
330:   PetscErrorCode     ierr;
331:   PetscInt           m = a->mbs,i,i2,nz,idx;
332:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

335:   its = its*lits;
336:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

343:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

345:   diag  = a->diag;
346:   idiag = a->idiag;
347:   VecGetArray(xx,&x);
348:   VecGetArrayRead(bb,&b);

350:   if (flag & SOR_ZERO_INITIAL_GUESS) {
351:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355:       i2     = 3;
356:       idiag += 9;
357:       for (i=1; i<m; i++) {
358:         v     = aa + 9*ai[i];
359:         vi    = aj + ai[i];
360:         nz    = diag[i] - ai[i];
361:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362:         while (nz--) {
363:           idx  = 3*(*vi++);
364:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368:           v   += 9;
369:         }
370:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373:         idiag   += 9;
374:         i2      += 3;
375:       }
376:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377:       PetscLogFlops(9.0*(a->nz));
378:     }
379:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381:       i2    = 0;
382:       mdiag = a->idiag+9*a->mbs;
383:       for (i=0; i<m; i++) {
384:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388:         mdiag  += 9;
389:         i2     += 3;
390:       }
391:       PetscLogFlops(15.0*m);
392:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
394:     }
395:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396:       idiag   = a->idiag+9*a->mbs - 9;
397:       i2      = 3*m - 3;
398:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402:       idiag -= 9;
403:       i2    -= 3;
404:       for (i=m-2; i>=0; i--) {
405:         v     = aa + 9*(diag[i]+1);
406:         vi    = aj + diag[i] + 1;
407:         nz    = ai[i+1] - diag[i] - 1;
408:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409:         while (nz--) {
410:           idx  = 3*(*vi++);
411:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415:           v   += 9;
416:         }
417:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420:         idiag   -= 9;
421:         i2      -= 3;
422:       }
423:       PetscLogFlops(9.0*(a->nz));
424:     }
425:   } else {
426:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427:   }
428:   VecRestoreArray(xx,&x);
429:   VecRestoreArrayRead(bb,&b);
430:   return(0);
431: }

435: PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436: {
437:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440:   const PetscScalar  *b;
441:   PetscErrorCode     ierr;
442:   PetscInt           m = a->mbs,i,i2,nz,idx;
443:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

446:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447:   its = its*lits;
448:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

454:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

456:   diag  = a->diag;
457:   idiag = a->idiag;
458:   VecGetArray(xx,&x);
459:   VecGetArrayRead(bb,&b);

461:   if (flag & SOR_ZERO_INITIAL_GUESS) {
462:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467:       i2     = 4;
468:       idiag += 16;
469:       for (i=1; i<m; i++) {
470:         v     = aa + 16*ai[i];
471:         vi    = aj + ai[i];
472:         nz    = diag[i] - ai[i];
473:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474:         while (nz--) {
475:           idx  = 4*(*vi++);
476:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481:           v   += 16;
482:         }
483:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487:         idiag   += 16;
488:         i2      += 4;
489:       }
490:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491:       PetscLogFlops(16.0*(a->nz));
492:     }
493:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495:       i2    = 0;
496:       mdiag = a->idiag+16*a->mbs;
497:       for (i=0; i<m; i++) {
498:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503:         mdiag  += 16;
504:         i2     += 4;
505:       }
506:       PetscLogFlops(28.0*m);
507:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
509:     }
510:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511:       idiag   = a->idiag+16*a->mbs - 16;
512:       i2      = 4*m - 4;
513:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518:       idiag -= 16;
519:       i2    -= 4;
520:       for (i=m-2; i>=0; i--) {
521:         v     = aa + 16*(diag[i]+1);
522:         vi    = aj + diag[i] + 1;
523:         nz    = ai[i+1] - diag[i] - 1;
524:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525:         while (nz--) {
526:           idx  = 4*(*vi++);
527:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532:           v   += 16;
533:         }
534:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538:         idiag   -= 16;
539:         i2      -= 4;
540:       }
541:       PetscLogFlops(16.0*(a->nz));
542:     }
543:   } else {
544:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545:   }
546:   VecRestoreArray(xx,&x);
547:   VecRestoreArrayRead(bb,&b);
548:   return(0);
549: }

553: PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554: {
555:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558:   const PetscScalar  *b;
559:   PetscErrorCode     ierr;
560:   PetscInt           m = a->mbs,i,i2,nz,idx;
561:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

564:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565:   its = its*lits;
566:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

572:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

574:   diag  = a->diag;
575:   idiag = a->idiag;
576:   VecGetArray(xx,&x);
577:   VecGetArrayRead(bb,&b);

579:   if (flag & SOR_ZERO_INITIAL_GUESS) {
580:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586:       i2     = 5;
587:       idiag += 25;
588:       for (i=1; i<m; i++) {
589:         v     = aa + 25*ai[i];
590:         vi    = aj + ai[i];
591:         nz    = diag[i] - ai[i];
592:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593:         while (nz--) {
594:           idx  = 5*(*vi++);
595:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601:           v   += 25;
602:         }
603:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608:         idiag   += 25;
609:         i2      += 5;
610:       }
611:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612:       PetscLogFlops(25.0*(a->nz));
613:     }
614:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616:       i2    = 0;
617:       mdiag = a->idiag+25*a->mbs;
618:       for (i=0; i<m; i++) {
619:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625:         mdiag  += 25;
626:         i2     += 5;
627:       }
628:       PetscLogFlops(45.0*m);
629:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
631:     }
632:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633:       idiag   = a->idiag+25*a->mbs - 25;
634:       i2      = 5*m - 5;
635:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641:       idiag -= 25;
642:       i2    -= 5;
643:       for (i=m-2; i>=0; i--) {
644:         v     = aa + 25*(diag[i]+1);
645:         vi    = aj + diag[i] + 1;
646:         nz    = ai[i+1] - diag[i] - 1;
647:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648:         while (nz--) {
649:           idx  = 5*(*vi++);
650:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656:           v   += 25;
657:         }
658:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663:         idiag   -= 25;
664:         i2      -= 5;
665:       }
666:       PetscLogFlops(25.0*(a->nz));
667:     }
668:   } else {
669:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670:   }
671:   VecRestoreArray(xx,&x);
672:   VecRestoreArrayRead(bb,&b);
673:   return(0);
674: }

678: PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679: {
680:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683:   const PetscScalar  *b;
684:   PetscErrorCode     ierr;
685:   PetscInt           m = a->mbs,i,i2,nz,idx;
686:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

689:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690:   its = its*lits;
691:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

697:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

699:   diag  = a->diag;
700:   idiag = a->idiag;
701:   VecGetArray(xx,&x);
702:   VecGetArrayRead(bb,&b);

704:   if (flag & SOR_ZERO_INITIAL_GUESS) {
705:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706:       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707:       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708:       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709:       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710:       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711:       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712:       i2     = 6;
713:       idiag += 36;
714:       for (i=1; i<m; i++) {
715:         v     = aa + 36*ai[i];
716:         vi    = aj + ai[i];
717:         nz    = diag[i] - ai[i];
718:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719:         while (nz--) {
720:           idx  = 6*(*vi++);
721:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728:           v   += 36;
729:         }
730:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736:         idiag   += 36;
737:         i2      += 6;
738:       }
739:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740:       PetscLogFlops(36.0*(a->nz));
741:     }
742:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744:       i2    = 0;
745:       mdiag = a->idiag+36*a->mbs;
746:       for (i=0; i<m; i++) {
747:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748:         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749:         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750:         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751:         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752:         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753:         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754:         mdiag  += 36;
755:         i2     += 6;
756:       }
757:       PetscLogFlops(60.0*m);
758:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
760:     }
761:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762:       idiag   = a->idiag+36*a->mbs - 36;
763:       i2      = 6*m - 6;
764:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765:       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766:       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767:       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768:       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769:       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770:       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771:       idiag -= 36;
772:       i2    -= 6;
773:       for (i=m-2; i>=0; i--) {
774:         v     = aa + 36*(diag[i]+1);
775:         vi    = aj + diag[i] + 1;
776:         nz    = ai[i+1] - diag[i] - 1;
777:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778:         while (nz--) {
779:           idx  = 6*(*vi++);
780:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787:           v   += 36;
788:         }
789:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795:         idiag   -= 36;
796:         i2      -= 6;
797:       }
798:       PetscLogFlops(36.0*(a->nz));
799:     }
800:   } else {
801:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802:   }
803:   VecRestoreArray(xx,&x);
804:   VecRestoreArrayRead(bb,&b);
805:   return(0);
806: }

810: PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811: {
812:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815:   const PetscScalar  *b;
816:   PetscErrorCode     ierr;
817:   PetscInt           m = a->mbs,i,i2,nz,idx;
818:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

821:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822:   its = its*lits;
823:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

829:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

831:   diag  = a->diag;
832:   idiag = a->idiag;
833:   VecGetArray(xx,&x);
834:   VecGetArrayRead(bb,&b);

836:   if (flag & SOR_ZERO_INITIAL_GUESS) {
837:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838:       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839:       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840:       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841:       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842:       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843:       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844:       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845:       i2     = 7;
846:       idiag += 49;
847:       for (i=1; i<m; i++) {
848:         v     = aa + 49*ai[i];
849:         vi    = aj + ai[i];
850:         nz    = diag[i] - ai[i];
851:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852:         while (nz--) {
853:           idx  = 7*(*vi++);
854:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862:           v   += 49;
863:         }
864:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871:         idiag   += 49;
872:         i2      += 7;
873:       }
874:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875:       PetscLogFlops(49.0*(a->nz));
876:     }
877:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879:       i2    = 0;
880:       mdiag = a->idiag+49*a->mbs;
881:       for (i=0; i<m; i++) {
882:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883:         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884:         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885:         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886:         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887:         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888:         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889:         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890:         mdiag  += 49;
891:         i2     += 7;
892:       }
893:       PetscLogFlops(93.0*m);
894:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
896:     }
897:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898:       idiag   = a->idiag+49*a->mbs - 49;
899:       i2      = 7*m - 7;
900:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901:       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902:       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903:       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904:       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905:       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906:       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907:       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908:       idiag -= 49;
909:       i2    -= 7;
910:       for (i=m-2; i>=0; i--) {
911:         v     = aa + 49*(diag[i]+1);
912:         vi    = aj + diag[i] + 1;
913:         nz    = ai[i+1] - diag[i] - 1;
914:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915:         while (nz--) {
916:           idx  = 7*(*vi++);
917:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925:           v   += 49;
926:         }
927:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934:         idiag   -= 49;
935:         i2      -= 7;
936:       }
937:       PetscLogFlops(49.0*(a->nz));
938:     }
939:   } else {
940:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941:   }
942:   VecRestoreArray(xx,&x);
943:   VecRestoreArrayRead(bb,&b);
944:   return(0);
945: }

949: PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950: {
951:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952:   PetscScalar        *x,*work,*w,*workt;
953:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954:   const PetscScalar  *b;
955:   PetscErrorCode     ierr;
956:   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

960:   its = its*lits;
961:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966:   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

968:   if (!a->idiagvalid){MatInvertBlockDiagonal(A,PETSC_NULL);}

970:   diag  = a->diag;
971:   idiag = a->idiag;
972:   if (!a->mult_work) {
973:     k    = PetscMax(A->rmap->n,A->cmap->n);
974:     PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);
975:   }
976:   work = a->mult_work;
977:   if (!a->sor_work) {
978:     PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);
979:   }
980:   w = a->sor_work;

982:   VecGetArray(xx,&x);
983:   VecGetArrayRead(bb,&b);

985:   if (flag & SOR_ZERO_INITIAL_GUESS) {
986:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987:       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988:       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991:       i2     = bs;
992:       idiag += bs2;
993:       for (i=1; i<m; i++) {
994:         v     = aa + bs2*ai[i];
995:         vi    = aj + ai[i];
996:         nz    = diag[i] - ai[i];

998:         PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
999:         /* copy all rows of x that are needed into contiguous space */
1000:         workt = work;
1001:         for (j=0; j<nz; j++) {
1002:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1003:           workt += bs;
1004:         }
1005:         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006:        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007:         while (nz--) {
1008:           idx  = N*(*vi++);
1009:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013:           v   += N2;
1014:           } */

1016:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017:         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/

1021:         idiag   += bs2;
1022:         i2      += bs;
1023:       }
1024:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025:       PetscLogFlops(1.0*bs2*(a->nz));
1026:     }
1027:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029:       i2    = 0;
1030:       mdiag = a->idiag+bs2*a->mbs;
1031:       PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));
1032:       for (i=0; i<m; i++) {
1033:         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034:         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */

1039:         mdiag  += bs2;
1040:         i2     += bs;
1041:       }
1042:       PetscLogFlops(2.0*bs*(bs-1)*m);
1043:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
1045:     }
1046:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047:       idiag   = a->idiag+bs2*a->mbs - bs2;
1048:       i2      = bs*m - bs;
1049:       PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));
1050:       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051:       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055:       idiag -= bs2;
1056:       i2    -= bs;
1057:       for (i=m-2; i>=0; i--) {
1058:         v     = aa + bs2*(diag[i]+1);
1059:         vi    = aj + diag[i] + 1;
1060:         nz    = ai[i+1] - diag[i] - 1;

1062:         PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));
1063:         /* copy all rows of x that are needed into contiguous space */
1064:         workt = work;
1065:         for (j=0; j<nz; j++) {
1066:           PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
1067:           workt += bs;
1068:         }
1069:         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070:         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 
1071:         while (nz--) {
1072:           idx  = N*(*vi++);
1073:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077:           v   += N2;
1078:           } */

1080:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081:         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084:         idiag   -= bs2;
1085:         i2      -= bs;
1086:       }
1087:       PetscLogFlops(1.0*bs2*(a->nz));
1088:     }
1089:   } else {
1090:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091:   }
1092:   VecRestoreArray(xx,&x);
1093:   VecRestoreArrayRead(bb,&b);
1094:   return(0);
1095: }

1097: /*
1098:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099: */
1100: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103: #define matsetvaluesblocked4_ matsetvaluesblocked4
1104: #endif

1106: EXTERN_C_BEGIN
1109: void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110: {
1111:   Mat               A = *AA;
1112:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114:   PetscInt          *ai=a->i,*ailen=a->ilen;
1115:   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116:   const PetscScalar *value = v;
1117:   MatScalar         *ap,*aa = a->a,*bap;

1120:   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121:   stepval = (n-1)*4;
1122:   for (k=0; k<m; k++) { /* loop over added rows */
1123:     row  = im[k];
1124:     rp   = aj + ai[row];
1125:     ap   = aa + 16*ai[row];
1126:     nrow = ailen[row];
1127:     low  = 0;
1128:     high = nrow;
1129:     for (l=0; l<n; l++) { /* loop over added columns */
1130:       col = in[l];
1131:       if (col <= lastcol) low = 0; else high = nrow;
1132:       lastcol = col;
1133:       value = v + k*(stepval+4 + l)*4;
1134:       while (high-low > 7) {
1135:         t = (low+high)/2;
1136:         if (rp[t] > col) high = t;
1137:         else             low  = t;
1138:       }
1139:       for (i=low; i<high; i++) {
1140:         if (rp[i] > col) break;
1141:         if (rp[i] == col) {
1142:           bap  = ap +  16*i;
1143:           for (ii=0; ii<4; ii++,value+=stepval) {
1144:             for (jj=ii; jj<16; jj+=4) {
1145:               bap[jj] += *value++;
1146:             }
1147:           }
1148:           goto noinsert2;
1149:         }
1150:       }
1151:       N = nrow++ - 1;
1152:       high++; /* added new column index thus must search to one higher than before */
1153:       /* shift up all the later entries in this row */
1154:       for (ii=N; ii>=i; ii--) {
1155:         rp[ii+1] = rp[ii];
1156:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157:       }
1158:       if (N >= i) {
1159:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160:       }
1161:       rp[i] = col;
1162:       bap   = ap +  16*i;
1163:       for (ii=0; ii<4; ii++,value+=stepval) {
1164:         for (jj=ii; jj<16; jj+=4) {
1165:           bap[jj] = *value++;
1166:         }
1167:       }
1168:       noinsert2:;
1169:       low = i;
1170:     }
1171:     ailen[row] = nrow;
1172:   }
1173:   PetscFunctionReturnVoid();
1174: }
1175: EXTERN_C_END

1177: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178: #define matsetvalues4_ MATSETVALUES4
1179: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180: #define matsetvalues4_ matsetvalues4
1181: #endif

1183: EXTERN_C_BEGIN
1186: void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187: {
1188:   Mat         A = *AA;
1189:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191:   PetscInt    *ai=a->i,*ailen=a->ilen;
1192:   PetscInt    *aj=a->j,brow,bcol;
1193:   PetscInt    ridx,cidx,lastcol = -1;
1194:   MatScalar   *ap,value,*aa=a->a,*bap;
1195: 
1197:   for (k=0; k<m; k++) { /* loop over added rows */
1198:     row  = im[k]; brow = row/4;
1199:     rp   = aj + ai[brow];
1200:     ap   = aa + 16*ai[brow];
1201:     nrow = ailen[brow];
1202:     low  = 0;
1203:     high = nrow;
1204:     for (l=0; l<n; l++) { /* loop over added columns */
1205:       col = in[l]; bcol = col/4;
1206:       ridx = row % 4; cidx = col % 4;
1207:       value = v[l + k*n];
1208:       if (col <= lastcol) low = 0; else high = nrow;
1209:       lastcol = col;
1210:       while (high-low > 7) {
1211:         t = (low+high)/2;
1212:         if (rp[t] > bcol) high = t;
1213:         else              low  = t;
1214:       }
1215:       for (i=low; i<high; i++) {
1216:         if (rp[i] > bcol) break;
1217:         if (rp[i] == bcol) {
1218:           bap  = ap +  16*i + 4*cidx + ridx;
1219:           *bap += value;
1220:           goto noinsert1;
1221:         }
1222:       }
1223:       N = nrow++ - 1;
1224:       high++; /* added new column thus must search to one higher than before */
1225:       /* shift up all the later entries in this row */
1226:       for (ii=N; ii>=i; ii--) {
1227:         rp[ii+1] = rp[ii];
1228:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229:       }
1230:       if (N>=i) {
1231:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232:       }
1233:       rp[i]                    = bcol;
1234:       ap[16*i + 4*cidx + ridx] = value;
1235:       noinsert1:;
1236:       low = i;
1237:     }
1238:     ailen[brow] = nrow;
1239:   }
1240:   PetscFunctionReturnVoid();
1241: }
1242: EXTERN_C_END

1244: /*
1245:      Checks for missing diagonals
1246: */
1249: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250: {
1251:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1253:   PetscInt       *diag,*jj = a->j,i;

1256:   MatMarkDiagonal_SeqBAIJ(A);
1257:   *missing = PETSC_FALSE;
1258:   if (A->rmap->n > 0 && !jj) {
1259:     *missing  = PETSC_TRUE;
1260:     if (d) *d = 0;
1261:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1262:   } else {
1263:     diag     = a->diag;
1264:     for (i=0; i<a->mbs; i++) {
1265:       if (jj[diag[i]] != i) {
1266:         *missing  = PETSC_TRUE;
1267:         if (d) *d = i;
1268:         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269:         break;
1270:       }
1271:     }
1272:   }
1273:   return(0);
1274: }

1278: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1279: {
1280:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1282:   PetscInt       i,j,m = a->mbs;

1285:   if (!a->diag) {
1286:     PetscMalloc(m*sizeof(PetscInt),&a->diag);
1287:     PetscLogObjectMemory(A,m*sizeof(PetscInt));
1288:     a->free_diag = PETSC_TRUE;
1289:   }
1290:   for (i=0; i<m; i++) {
1291:     a->diag[i] = a->i[i+1];
1292:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1293:       if (a->j[j] == i) {
1294:         a->diag[i] = j;
1295:         break;
1296:       }
1297:     }
1298:   }
1299:   return(0);
1300: }


1303: extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);

1307: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1308: {
1309:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1311:   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1312:   PetscInt       *tia, *tja;

1315:   *nn = n;
1316:   if (!ia) return(0);
1317:   if (symmetric) {
1318:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1319:     nz   = tia[n];
1320:   } else {
1321:     tia = a->i; tja = a->j;
1322:   }
1323: 
1324:   if (!blockcompressed && bs > 1) {
1325:     (*nn) *= bs;
1326:     /* malloc & create the natural set of indices */
1327:     PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1328:     if (n) {
1329:       (*ia)[0] = 0;
1330:       for (j=1; j<bs; j++) {
1331:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1332:       }
1333:     }

1335:     for (i=1; i<n; i++) {
1336:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1337:       for (j=1; j<bs; j++) {
1338:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1339:       }
1340:     }
1341:     if (n) {
1342:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1343:     }

1345:     if (ja) {
1346:       PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1347:       cnt = 0;
1348:       for (i=0; i<n; i++) {
1349:         for (j=0; j<bs; j++) {
1350:           for (k=tia[i]; k<tia[i+1]; k++) {
1351:             for (l=0; l<bs; l++) {
1352:               (*ja)[cnt++] = bs*tja[k] + l;
1353:             }
1354:           }
1355:         }
1356:       }
1357:     }

1359:     n     *= bs;
1360:     nz *= bs*bs;
1361:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1362:       PetscFree(tia);
1363:       PetscFree(tja);
1364:     }
1365:   } else if (oshift == 1) {
1366:     if (symmetric) {
1367:       PetscInt nz = tia[A->rmap->n/bs];
1368:       /*  add 1 to i and j indices */
1369:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1370:       *ia = tia;
1371:       if (ja) {
1372:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1373:         *ja = tja;
1374:       }
1375:     } else {
1376:       PetscInt nz = a->i[A->rmap->n/bs];
1377:       /* malloc space and  add 1 to i and j indices */
1378:       PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);
1379:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1380:       if (ja) {
1381:         PetscMalloc(nz*sizeof(PetscInt),ja);
1382:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1383:       }
1384:     }
1385:   } else {
1386:     *ia = tia;
1387:     if (ja) *ja = tja;
1388:   }
1389: 
1390:   return(0);
1391: }

1395: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1396: {

1400:   if (!ia) return(0);
1401:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1402:     PetscFree(*ia);
1403:     if (ja) {PetscFree(*ja);}
1404:   }
1405:   return(0);
1406: }

1410: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1411: {
1412:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1416: #if defined(PETSC_USE_LOG)
1417:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1418: #endif
1419:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1420:   ISDestroy(&a->row);
1421:   ISDestroy(&a->col);
1422:   if (a->free_diag) {PetscFree(a->diag);}
1423:   PetscFree(a->idiag);
1424:   if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1425:   PetscFree(a->solve_work);
1426:   PetscFree(a->mult_work);
1427:   PetscFree(a->sor_work);
1428:   ISDestroy(&a->icol);
1429:   PetscFree(a->saved_values);
1430:   PetscFree(a->xtoy);
1431:   PetscFree2(a->compressedrow.i,a->compressedrow.rindex);

1433:   MatDestroy(&a->sbaijMat);
1434:   MatDestroy(&a->parent);
1435:   PetscFree(A->data);

1437:   PetscObjectChangeTypeName((PetscObject)A,0);
1438:   PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);
1439:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
1440:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
1441:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
1442:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
1443:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
1444:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
1445:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);
1446:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);
1447:   PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);
1448:   return(0);
1449: }

1453: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1454: {
1455:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1459:   switch (op) {
1460:   case MAT_ROW_ORIENTED:
1461:     a->roworiented    = flg;
1462:     break;
1463:   case MAT_KEEP_NONZERO_PATTERN:
1464:     a->keepnonzeropattern = flg;
1465:     break;
1466:   case MAT_NEW_NONZERO_LOCATIONS:
1467:     a->nonew          = (flg ? 0 : 1);
1468:     break;
1469:   case MAT_NEW_NONZERO_LOCATION_ERR:
1470:     a->nonew          = (flg ? -1 : 0);
1471:     break;
1472:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473:     a->nonew          = (flg ? -2 : 0);
1474:     break;
1475:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1476:     a->nounused       = (flg ? -1 : 0);
1477:     break;
1478:   case MAT_CHECK_COMPRESSED_ROW:
1479:     a->compressedrow.check = flg;
1480:     break;
1481:   case MAT_NEW_DIAGONALS:
1482:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1483:   case MAT_USE_HASH_TABLE:
1484:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1485:     break;
1486:   case MAT_SYMMETRIC:
1487:   case MAT_STRUCTURALLY_SYMMETRIC:
1488:   case MAT_HERMITIAN:
1489:   case MAT_SYMMETRY_ETERNAL:
1490:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1491:     break;
1492:   default:
1493:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1494:   }
1495:   return(0);
1496: }

1500: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1501: {
1502:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1504:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1505:   MatScalar      *aa,*aa_i;
1506:   PetscScalar    *v_i;

1509:   bs  = A->rmap->bs;
1510:   ai  = a->i;
1511:   aj  = a->j;
1512:   aa  = a->a;
1513:   bs2 = a->bs2;
1514: 
1515:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1516: 
1517:   bn  = row/bs;   /* Block number */
1518:   bp  = row % bs; /* Block Position */
1519:   M   = ai[bn+1] - ai[bn];
1520:   *nz = bs*M;
1521: 
1522:   if (v) {
1523:     *v = 0;
1524:     if (*nz) {
1525:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
1526:       for (i=0; i<M; i++) { /* for each block in the block row */
1527:         v_i  = *v + i*bs;
1528:         aa_i = aa + bs2*(ai[bn] + i);
1529:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1530:       }
1531:     }
1532:   }

1534:   if (idx) {
1535:     *idx = 0;
1536:     if (*nz) {
1537:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
1538:       for (i=0; i<M; i++) { /* for each block in the block row */
1539:         idx_i = *idx + i*bs;
1540:         itmp  = bs*aj[ai[bn] + i];
1541:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1542:       }
1543:     }
1544:   }
1545:   return(0);
1546: }

1550: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1551: {

1555:   if (idx) {PetscFree(*idx);}
1556:   if (v)   {PetscFree(*v);}
1557:   return(0);
1558: }

1560: extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);

1564: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1565: {
1566:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1567:   Mat            C;
1569:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1570:   PetscInt       *rows,*cols,bs2=a->bs2;
1571:   MatScalar      *array;

1574:   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1575:   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1576:     PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1577:     PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1579:     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1580:     MatCreate(((PetscObject)A)->comm,&C);
1581:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1582:     MatSetType(C,((PetscObject)A)->type_name);
1583:     MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);
1584:     PetscFree(col);
1585:   } else {
1586:     C = *B;
1587:   }

1589:   array = a->a;
1590:   PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);
1591:   for (i=0; i<mbs; i++) {
1592:     cols[0] = i*bs;
1593:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1594:     len = ai[i+1] - ai[i];
1595:     for (j=0; j<len; j++) {
1596:       rows[0] = (*aj++)*bs;
1597:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1598:       MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1599:       array += bs2;
1600:     }
1601:   }
1602:   PetscFree2(rows,cols);
1603: 
1604:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1605:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1606: 
1607:   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1608:     *B = C;
1609:   } else {
1610:     MatHeaderMerge(A,C);
1611:   }
1612:   return(0);
1613: }

1615: EXTERN_C_BEGIN
1618: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1619: {
1621:   Mat            Btrans;

1624:   *f = PETSC_FALSE;
1625:   MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1626:   MatEqual_SeqBAIJ(B,Btrans,f);
1627:   MatDestroy(&Btrans);
1628:   return(0);
1629: }
1630: EXTERN_C_END

1634: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1635: {
1636:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1638:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1639:   int            fd;
1640:   PetscScalar    *aa;
1641:   FILE           *file;

1644:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1645:   PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);
1646:   col_lens[0] = MAT_FILE_CLASSID;

1648:   col_lens[1] = A->rmap->N;
1649:   col_lens[2] = A->cmap->n;
1650:   col_lens[3] = a->nz*bs2;

1652:   /* store lengths of each row and write (including header) to file */
1653:   count = 0;
1654:   for (i=0; i<a->mbs; i++) {
1655:     for (j=0; j<bs; j++) {
1656:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1657:     }
1658:   }
1659:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1660:   PetscFree(col_lens);

1662:   /* store column indices (zero start index) */
1663:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1664:   count = 0;
1665:   for (i=0; i<a->mbs; i++) {
1666:     for (j=0; j<bs; j++) {
1667:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1668:         for (l=0; l<bs; l++) {
1669:           jj[count++] = bs*a->j[k] + l;
1670:         }
1671:       }
1672:     }
1673:   }
1674:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1675:   PetscFree(jj);

1677:   /* store nonzero values */
1678:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1679:   count = 0;
1680:   for (i=0; i<a->mbs; i++) {
1681:     for (j=0; j<bs; j++) {
1682:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1683:         for (l=0; l<bs; l++) {
1684:           aa[count++] = a->a[bs2*k + l*bs + j];
1685:         }
1686:       }
1687:     }
1688:   }
1689:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1690:   PetscFree(aa);

1692:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1693:   if (file) {
1694:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1695:   }
1696:   return(0);
1697: }

1701: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1702: {
1703:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1704:   PetscErrorCode    ierr;
1705:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1706:   PetscViewerFormat format;

1709:   PetscViewerGetFormat(viewer,&format);
1710:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1711:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1712:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1713:     Mat aij;
1714:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1715:     MatView(aij,viewer);
1716:     MatDestroy(&aij);
1717:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1718:      return(0);
1719:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1720:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1721:     PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");
1722:     for (i=0; i<a->mbs; i++) {
1723:       for (j=0; j<bs; j++) {
1724:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1725:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1726:           for (l=0; l<bs; l++) {
1727: #if defined(PETSC_USE_COMPLEX)
1728:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1729:               PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1730:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1731:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1732:               PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1733:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1734:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1735:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1736:             }
1737: #else
1738:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1739:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1740:             }
1741: #endif
1742:           }
1743:         }
1744:         PetscViewerASCIIPrintf(viewer,"\n");
1745:       }
1746:     }
1747:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1748:   } else {
1749:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1750:     PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");
1751:     for (i=0; i<a->mbs; i++) {
1752:       for (j=0; j<bs; j++) {
1753:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1754:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1755:           for (l=0; l<bs; l++) {
1756: #if defined(PETSC_USE_COMPLEX)
1757:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1758:               PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1759:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1760:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1761:               PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1762:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1763:             } else {
1764:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1765:             }
1766: #else
1767:             PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1768: #endif
1769:           }
1770:         }
1771:         PetscViewerASCIIPrintf(viewer,"\n");
1772:       }
1773:     }
1774:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1775:   }
1776:   PetscViewerFlush(viewer);
1777:   return(0);
1778: }

1782: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1783: {
1784:   Mat            A = (Mat) Aa;
1785:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1787:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1788:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1789:   MatScalar      *aa;
1790:   PetscViewer    viewer;
1791:   PetscViewerFormat format;

1794:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1795:   PetscViewerGetFormat(viewer,&format);

1797:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1799:   /* loop over matrix elements drawing boxes */

1801:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1802:     color = PETSC_DRAW_BLUE;
1803:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1804:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1805:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1806:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1807:         aa = a->a + j*bs2;
1808:         for (k=0; k<bs; k++) {
1809:           for (l=0; l<bs; l++) {
1810:             if (PetscRealPart(*aa++) >=  0.) continue;
1811:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1812:           }
1813:         }
1814:       }
1815:     }
1816:     color = PETSC_DRAW_CYAN;
1817:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1818:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1819:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1820:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1821:         aa = a->a + j*bs2;
1822:         for (k=0; k<bs; k++) {
1823:           for (l=0; l<bs; l++) {
1824:             if (PetscRealPart(*aa++) != 0.) continue;
1825:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1826:           }
1827:         }
1828:       }
1829:     }
1830:     color = PETSC_DRAW_RED;
1831:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1832:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1833:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1834:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1835:         aa = a->a + j*bs2;
1836:         for (k=0; k<bs; k++) {
1837:           for (l=0; l<bs; l++) {
1838:             if (PetscRealPart(*aa++) <= 0.) continue;
1839:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1840:           }
1841:         }
1842:       }
1843:     }
1844:   } else {
1845:     /* use contour shading to indicate magnitude of values */
1846:     /* first determine max of all nonzero values */
1847:     PetscDraw   popup;
1848:     PetscReal scale,maxv = 0.0;

1850:     for (i=0; i<a->nz*a->bs2; i++) {
1851:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1852:     }
1853:     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1854:     PetscDrawGetPopup(draw,&popup);
1855:     if (popup) {PetscDrawScalePopup(popup,0.0,maxv);}
1856:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1857:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1858:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1859:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1860:         aa = a->a + j*bs2;
1861:         for (k=0; k<bs; k++) {
1862:           for (l=0; l<bs; l++) {
1863:             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1864:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1865:           }
1866:         }
1867:       }
1868:     }
1869:   }
1870:   return(0);
1871: }

1875: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1876: {
1878:   PetscReal      xl,yl,xr,yr,w,h;
1879:   PetscDraw      draw;
1880:   PetscBool      isnull;


1884:   PetscViewerDrawGetDraw(viewer,0,&draw);
1885:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1887:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1888:   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1889:   xr += w;    yr += h;  xl = -w;     yl = -h;
1890:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1891:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1892:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1893:   return(0);
1894: }

1898: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1899: {
1901:   PetscBool      iascii,isbinary,isdraw;

1904:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1905:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1906:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1907:   if (iascii){
1908:     MatView_SeqBAIJ_ASCII(A,viewer);
1909:   } else if (isbinary) {
1910:     MatView_SeqBAIJ_Binary(A,viewer);
1911:   } else if (isdraw) {
1912:     MatView_SeqBAIJ_Draw(A,viewer);
1913:   } else {
1914:     Mat B;
1915:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1916:     MatView(B,viewer);
1917:     MatDestroy(&B);
1918:   }
1919:   return(0);
1920: }


1925: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1926: {
1927:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1928:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1929:   PetscInt    *ai = a->i,*ailen = a->ilen;
1930:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1931:   MatScalar   *ap,*aa = a->a;

1934:   for (k=0; k<m; k++) { /* loop over rows */
1935:     row  = im[k]; brow = row/bs;
1936:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1937:     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1938:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1939:     nrow = ailen[brow];
1940:     for (l=0; l<n; l++) { /* loop over columns */
1941:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1942:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1943:       col  = in[l] ;
1944:       bcol = col/bs;
1945:       cidx = col%bs;
1946:       ridx = row%bs;
1947:       high = nrow;
1948:       low  = 0; /* assume unsorted */
1949:       while (high-low > 5) {
1950:         t = (low+high)/2;
1951:         if (rp[t] > bcol) high = t;
1952:         else             low  = t;
1953:       }
1954:       for (i=low; i<high; i++) {
1955:         if (rp[i] > bcol) break;
1956:         if (rp[i] == bcol) {
1957:           *v++ = ap[bs2*i+bs*cidx+ridx];
1958:           goto finished;
1959:         }
1960:       }
1961:       *v++ = 0.0;
1962:       finished:;
1963:     }
1964:   }
1965:   return(0);
1966: }

1970: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1971: {
1972:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1973:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1974:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1975:   PetscErrorCode    ierr;
1976:   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1977:   PetscBool         roworiented=a->roworiented;
1978:   const PetscScalar *value = v;
1979:   MatScalar         *ap,*aa = a->a,*bap;

1982:   if (roworiented) {
1983:     stepval = (n-1)*bs;
1984:   } else {
1985:     stepval = (m-1)*bs;
1986:   }
1987:   for (k=0; k<m; k++) { /* loop over added rows */
1988:     row  = im[k];
1989:     if (row < 0) continue;
1990: #if defined(PETSC_USE_DEBUG)  
1991:     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1992: #endif
1993:     rp   = aj + ai[row];
1994:     ap   = aa + bs2*ai[row];
1995:     rmax = imax[row];
1996:     nrow = ailen[row];
1997:     low  = 0;
1998:     high = nrow;
1999:     for (l=0; l<n; l++) { /* loop over added columns */
2000:       if (in[l] < 0) continue;
2001: #if defined(PETSC_USE_DEBUG)  
2002:       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2003: #endif
2004:       col = in[l];
2005:       if (roworiented) {
2006:         value = v + (k*(stepval+bs) + l)*bs;
2007:       } else {
2008:         value = v + (l*(stepval+bs) + k)*bs;
2009:       }
2010:       if (col <= lastcol) low = 0; else high = nrow;
2011:       lastcol = col;
2012:       while (high-low > 7) {
2013:         t = (low+high)/2;
2014:         if (rp[t] > col) high = t;
2015:         else             low  = t;
2016:       }
2017:       for (i=low; i<high; i++) {
2018:         if (rp[i] > col) break;
2019:         if (rp[i] == col) {
2020:           bap  = ap +  bs2*i;
2021:           if (roworiented) {
2022:             if (is == ADD_VALUES) {
2023:               for (ii=0; ii<bs; ii++,value+=stepval) {
2024:                 for (jj=ii; jj<bs2; jj+=bs) {
2025:                   bap[jj] += *value++;
2026:                 }
2027:               }
2028:             } else {
2029:               for (ii=0; ii<bs; ii++,value+=stepval) {
2030:                 for (jj=ii; jj<bs2; jj+=bs) {
2031:                   bap[jj] = *value++;
2032:                 }
2033:               }
2034:             }
2035:           } else {
2036:             if (is == ADD_VALUES) {
2037:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2038:                 for (jj=0; jj<bs; jj++) {
2039:                   bap[jj] += value[jj];
2040:                 }
2041:                 bap += bs;
2042:               }
2043:             } else {
2044:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2045:                 for (jj=0; jj<bs; jj++) {
2046:                   bap[jj]  = value[jj];
2047:                 }
2048:                 bap += bs;
2049:               }
2050:             }
2051:           }
2052:           goto noinsert2;
2053:         }
2054:       }
2055:       if (nonew == 1) goto noinsert2;
2056:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2057:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2058:       N = nrow++ - 1; high++;
2059:       /* shift up all the later entries in this row */
2060:       for (ii=N; ii>=i; ii--) {
2061:         rp[ii+1] = rp[ii];
2062:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2063:       }
2064:       if (N >= i) {
2065:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2066:       }
2067:       rp[i] = col;
2068:       bap   = ap +  bs2*i;
2069:       if (roworiented) {
2070:         for (ii=0; ii<bs; ii++,value+=stepval) {
2071:           for (jj=ii; jj<bs2; jj+=bs) {
2072:             bap[jj] = *value++;
2073:           }
2074:         }
2075:       } else {
2076:         for (ii=0; ii<bs; ii++,value+=stepval) {
2077:           for (jj=0; jj<bs; jj++) {
2078:             *bap++  = *value++;
2079:           }
2080:         }
2081:       }
2082:       noinsert2:;
2083:       low = i;
2084:     }
2085:     ailen[row] = nrow;
2086:   }
2087:   return(0);
2088: }

2092: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2093: {
2094:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2095:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2096:   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2098:   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2099:   MatScalar      *aa = a->a,*ap;
2100:   PetscReal      ratio=0.6;

2103:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

2105:   if (m) rmax = ailen[0];
2106:   for (i=1; i<mbs; i++) {
2107:     /* move each row back by the amount of empty slots (fshift) before it*/
2108:     fshift += imax[i-1] - ailen[i-1];
2109:     rmax   = PetscMax(rmax,ailen[i]);
2110:     if (fshift) {
2111:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2112:       N = ailen[i];
2113:       for (j=0; j<N; j++) {
2114:         ip[j-fshift] = ip[j];
2115:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
2116:       }
2117:     }
2118:     ai[i] = ai[i-1] + ailen[i-1];
2119:   }
2120:   if (mbs) {
2121:     fshift += imax[mbs-1] - ailen[mbs-1];
2122:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2123:   }
2124:   /* reset ilen and imax for each row */
2125:   for (i=0; i<mbs; i++) {
2126:     ailen[i] = imax[i] = ai[i+1] - ai[i];
2127:   }
2128:   a->nz = ai[mbs];

2130:   /* diagonals may have moved, so kill the diagonal pointers */
2131:   a->idiagvalid = PETSC_FALSE;
2132:   if (fshift && a->diag) {
2133:     PetscFree(a->diag);
2134:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
2135:     a->diag = 0;
2136:   }
2137:   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2138:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
2139:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
2140:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
2141:   A->info.mallocs     += a->reallocs;
2142:   a->reallocs          = 0;
2143:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

2145:   MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
2146:   A->same_nonzero = PETSC_TRUE;
2147:   return(0);
2148: }

2150: /* 
2151:    This function returns an array of flags which indicate the locations of contiguous
2152:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2153:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2154:    Assume: sizes should be long enough to hold all the values.
2155: */
2158: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2159: {
2160:   PetscInt   i,j,k,row;
2161:   PetscBool  flg;

2164:   for (i=0,j=0; i<n; j++) {
2165:     row = idx[i];
2166:     if (row%bs!=0) { /* Not the begining of a block */
2167:       sizes[j] = 1;
2168:       i++;
2169:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2170:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2171:       i++;
2172:     } else { /* Begining of the block, so check if the complete block exists */
2173:       flg = PETSC_TRUE;
2174:       for (k=1; k<bs; k++) {
2175:         if (row+k != idx[i+k]) { /* break in the block */
2176:           flg = PETSC_FALSE;
2177:           break;
2178:         }
2179:       }
2180:       if (flg) { /* No break in the bs */
2181:         sizes[j] = bs;
2182:         i+= bs;
2183:       } else {
2184:         sizes[j] = 1;
2185:         i++;
2186:       }
2187:     }
2188:   }
2189:   *bs_max = j;
2190:   return(0);
2191: }
2192: 
2195: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2196: {
2197:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2198:   PetscErrorCode    ierr;
2199:   PetscInt          i,j,k,count,*rows;
2200:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2201:   PetscScalar       zero = 0.0;
2202:   MatScalar         *aa;
2203:   const PetscScalar *xx;
2204:   PetscScalar       *bb;

2207:   /* fix right hand side if needed */
2208:   if (x && b) {
2209:     VecGetArrayRead(x,&xx);
2210:     VecGetArray(b,&bb);
2211:     for (i=0; i<is_n; i++) {
2212:       bb[is_idx[i]] = diag*xx[is_idx[i]];
2213:     }
2214:     VecRestoreArrayRead(x,&xx);
2215:     VecRestoreArray(b,&bb);
2216:   }

2218:   /* Make a copy of the IS and  sort it */
2219:   /* allocate memory for rows,sizes */
2220:   PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);

2222:   /* copy IS values to rows, and sort them */
2223:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2224:   PetscSortInt(is_n,rows);

2226:   if (baij->keepnonzeropattern) {
2227:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2228:     bs_max = is_n;
2229:     A->same_nonzero = PETSC_TRUE;
2230:   } else {
2231:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2232:     A->same_nonzero = PETSC_FALSE;
2233:   }

2235:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2236:     row   = rows[j];
2237:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2238:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2239:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2240:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2241:       if (diag != (PetscScalar)0.0) {
2242:         if (baij->ilen[row/bs] > 0) {
2243:           baij->ilen[row/bs]       = 1;
2244:           baij->j[baij->i[row/bs]] = row/bs;
2245:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2246:         }
2247:         /* Now insert all the diagonal values for this bs */
2248:         for (k=0; k<bs; k++) {
2249:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2250:         }
2251:       } else { /* (diag == 0.0) */
2252:         baij->ilen[row/bs] = 0;
2253:       } /* end (diag == 0.0) */
2254:     } else { /* (sizes[i] != bs) */
2255: #if defined (PETSC_USE_DEBUG)
2256:       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2257: #endif
2258:       for (k=0; k<count; k++) {
2259:         aa[0] =  zero;
2260:         aa    += bs;
2261:       }
2262:       if (diag != (PetscScalar)0.0) {
2263:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2264:       }
2265:     }
2266:   }

2268:   PetscFree2(rows,sizes);
2269:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2270:   return(0);
2271: }

2275: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2276: {
2277:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2278:   PetscErrorCode    ierr;
2279:   PetscInt          i,j,k,count;
2280:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2281:   PetscScalar       zero = 0.0;
2282:   MatScalar         *aa;
2283:   const PetscScalar *xx;
2284:   PetscScalar       *bb;
2285:   PetscBool         *zeroed,vecs = PETSC_FALSE;

2288:   /* fix right hand side if needed */
2289:   if (x && b) {
2290:     VecGetArrayRead(x,&xx);
2291:     VecGetArray(b,&bb);
2292:     vecs = PETSC_TRUE;
2293:   }
2294:   A->same_nonzero = PETSC_TRUE;

2296:   /* zero the columns */
2297:   PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);
2298:   PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));
2299:   for (i=0; i<is_n; i++) {
2300:     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2301:     zeroed[is_idx[i]] = PETSC_TRUE;
2302:   }
2303:   for (i=0; i<A->rmap->N; i++) {
2304:     if (!zeroed[i]) {
2305:       row = i/bs;
2306:       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2307:         for (k=0; k<bs; k++) {
2308:           col = bs*baij->j[j] + k;
2309:           if (zeroed[col]) {
2310:             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2311:             if (vecs) bb[i] -= aa[0]*xx[col];
2312:             aa[0] = 0.0;
2313:           }
2314:         }
2315:       }
2316:     } else if (vecs) bb[i] = diag*xx[i];
2317:   }
2318:   PetscFree(zeroed);
2319:   if (vecs) {
2320:     VecRestoreArrayRead(x,&xx);
2321:     VecRestoreArray(b,&bb);
2322:   }

2324:   /* zero the rows */
2325:   for (i=0; i<is_n; i++) {
2326:     row   = is_idx[i];
2327:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2328:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2329:     for (k=0; k<count; k++) {
2330:       aa[0] =  zero;
2331:       aa    += bs;
2332:     }
2333:     if (diag != (PetscScalar)0.0) {
2334:       (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2335:     }
2336:   }
2337:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2338:   return(0);
2339: }

2343: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2344: {
2345:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2346:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2347:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2348:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2350:   PetscInt       ridx,cidx,bs2=a->bs2;
2351:   PetscBool      roworiented=a->roworiented;
2352:   MatScalar      *ap,value,*aa=a->a,*bap;

2356:   for (k=0; k<m; k++) { /* loop over added rows */
2357:     row  = im[k];
2358:     brow = row/bs;
2359:     if (row < 0) continue;
2360: #if defined(PETSC_USE_DEBUG)  
2361:     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2362: #endif
2363:     rp   = aj + ai[brow];
2364:     ap   = aa + bs2*ai[brow];
2365:     rmax = imax[brow];
2366:     nrow = ailen[brow];
2367:     low  = 0;
2368:     high = nrow;
2369:     for (l=0; l<n; l++) { /* loop over added columns */
2370:       if (in[l] < 0) continue;
2371: #if defined(PETSC_USE_DEBUG)  
2372:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2373: #endif
2374:       col = in[l]; bcol = col/bs;
2375:       ridx = row % bs; cidx = col % bs;
2376:       if (roworiented) {
2377:         value = v[l + k*n];
2378:       } else {
2379:         value = v[k + l*m];
2380:       }
2381:       if (col <= lastcol) low = 0; else high = nrow;
2382:       lastcol = col;
2383:       while (high-low > 7) {
2384:         t = (low+high)/2;
2385:         if (rp[t] > bcol) high = t;
2386:         else              low  = t;
2387:       }
2388:       for (i=low; i<high; i++) {
2389:         if (rp[i] > bcol) break;
2390:         if (rp[i] == bcol) {
2391:           bap  = ap +  bs2*i + bs*cidx + ridx;
2392:           if (is == ADD_VALUES) *bap += value;
2393:           else                  *bap  = value;
2394:           goto noinsert1;
2395:         }
2396:       }
2397:       if (nonew == 1) goto noinsert1;
2398:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2399:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2400:       N = nrow++ - 1; high++;
2401:       /* shift up all the later entries in this row */
2402:       for (ii=N; ii>=i; ii--) {
2403:         rp[ii+1] = rp[ii];
2404:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2405:       }
2406:       if (N>=i) {
2407:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2408:       }
2409:       rp[i]                      = bcol;
2410:       ap[bs2*i + bs*cidx + ridx] = value;
2411:       a->nz++;
2412:       noinsert1:;
2413:       low = i;
2414:     }
2415:     ailen[brow] = nrow;
2416:   }
2417:   A->same_nonzero = PETSC_FALSE;
2418:   return(0);
2419: }

2423: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2424: {
2425:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2426:   Mat            outA;
2428:   PetscBool      row_identity,col_identity;

2431:   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2432:   ISIdentity(row,&row_identity);
2433:   ISIdentity(col,&col_identity);
2434:   if (!row_identity || !col_identity) {
2435:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2436:   }

2438:   outA            = inA;
2439:   inA->factortype = MAT_FACTOR_LU;

2441:   MatMarkDiagonal_SeqBAIJ(inA);

2443:   PetscObjectReference((PetscObject)row);
2444:   ISDestroy(&a->row);
2445:   a->row = row;
2446:   PetscObjectReference((PetscObject)col);
2447:   ISDestroy(&a->col);
2448:   a->col = col;
2449: 
2450:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2451:   ISDestroy(&a->icol);
2452:    ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2453:   PetscLogObjectParent(inA,a->icol);
2454: 
2455:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2456:   if (!a->solve_work) {
2457:     PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
2458:     PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2459:   }
2460:   MatLUFactorNumeric(outA,inA,info);

2462:   return(0);
2463: }

2465: EXTERN_C_BEGIN
2468: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2469: {
2470:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2471:   PetscInt    i,nz,mbs;

2474:   nz  = baij->maxnz;
2475:   mbs = baij->mbs;
2476:   for (i=0; i<nz; i++) {
2477:     baij->j[i] = indices[i];
2478:   }
2479:   baij->nz = nz;
2480:   for (i=0; i<mbs; i++) {
2481:     baij->ilen[i] = baij->imax[i];
2482:   }
2483:   return(0);
2484: }
2485: EXTERN_C_END

2489: /*@
2490:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2491:        in the matrix.

2493:   Input Parameters:
2494: +  mat - the SeqBAIJ matrix
2495: -  indices - the column indices

2497:   Level: advanced

2499:   Notes:
2500:     This can be called if you have precomputed the nonzero structure of the 
2501:   matrix and want to provide it to the matrix object to improve the performance
2502:   of the MatSetValues() operation.

2504:     You MUST have set the correct numbers of nonzeros per row in the call to 
2505:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2507:     MUST be called before any calls to MatSetValues();

2509: @*/
2510: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2511: {

2517:   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));
2518:   return(0);
2519: }

2523: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2524: {
2525:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2527:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2528:   PetscReal      atmp;
2529:   PetscScalar    *x,zero = 0.0;
2530:   MatScalar      *aa;
2531:   PetscInt       ncols,brow,krow,kcol;

2534:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2535:   bs   = A->rmap->bs;
2536:   aa   = a->a;
2537:   ai   = a->i;
2538:   aj   = a->j;
2539:   mbs  = a->mbs;

2541:   VecSet(v,zero);
2542:   VecGetArray(v,&x);
2543:   VecGetLocalSize(v,&n);
2544:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2545:   for (i=0; i<mbs; i++) {
2546:     ncols = ai[1] - ai[0]; ai++;
2547:     brow  = bs*i;
2548:     for (j=0; j<ncols; j++){
2549:       for (kcol=0; kcol<bs; kcol++){
2550:         for (krow=0; krow<bs; krow++){
2551:           atmp = PetscAbsScalar(*aa);aa++;
2552:           row = brow + krow;    /* row index */
2553:           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2554:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2555:         }
2556:       }
2557:       aj++;
2558:     }
2559:   }
2560:   VecRestoreArray(v,&x);
2561:   return(0);
2562: }

2566: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2567: {

2571:   /* If the two matrices have the same copy implementation, use fast copy. */
2572:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2573:     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2574:     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2575:     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;

2577:     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2578:     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2579:     PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2580:   } else {
2581:     MatCopy_Basic(A,B,str);
2582:   }
2583:   return(0);
2584: }

2588: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2589: {

2593:    MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);
2594:   return(0);
2595: }

2599: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2600: {
2601:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2603:   *array = a->a;
2604:   return(0);
2605: }

2609: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2610: {
2612:   return(0);
2613: }

2617: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2618: {
2619:   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2621:   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2622:   PetscBLASInt   one=1;

2625:   if (str == SAME_NONZERO_PATTERN) {
2626:     PetscScalar alpha = a;
2627:     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2628:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2629:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2630:     if (y->xtoy && y->XtoY != X) {
2631:       PetscFree(y->xtoy);
2632:       MatDestroy(&y->XtoY);
2633:     }
2634:     if (!y->xtoy) { /* get xtoy */
2635:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2636:       y->XtoY = X;
2637:       PetscObjectReference((PetscObject)X);
2638:     }
2639:     for (i=0; i<x->nz; i++) {
2640:       j = 0;
2641:       while (j < bs2){
2642:         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2643:         j++;
2644:       }
2645:     }
2646:     PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2647:   } else {
2648:     MatAXPY_Basic(Y,a,X,str);
2649:   }
2650:   return(0);
2651: }

2655: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2656: {
2657:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2658:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2659:   MatScalar      *aa = a->a;

2662:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2663:   return(0);
2664: }

2668: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2669: {
2670:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2671:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2672:   MatScalar      *aa = a->a;

2675:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2676:   return(0);
2677: }

2679: extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);

2683: /*
2684:     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2685: */
2686: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2687: {
2688:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2690:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2691:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2694:   *nn = n;
2695:   if (!ia) return(0);
2696:   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2697:   else {
2698:     PetscMalloc((n+1)*sizeof(PetscInt),&collengths);
2699:     PetscMemzero(collengths,n*sizeof(PetscInt));
2700:     PetscMalloc((n+1)*sizeof(PetscInt),&cia);
2701:     PetscMalloc((nz+1)*sizeof(PetscInt),&cja);
2702:     jj = a->j;
2703:     for (i=0; i<nz; i++) {
2704:       collengths[jj[i]]++;
2705:     }
2706:     cia[0] = oshift;
2707:     for (i=0; i<n; i++) {
2708:       cia[i+1] = cia[i] + collengths[i];
2709:     }
2710:     PetscMemzero(collengths,n*sizeof(PetscInt));
2711:     jj   = a->j;
2712:     for (row=0; row<m; row++) {
2713:       mr = a->i[row+1] - a->i[row];
2714:       for (i=0; i<mr; i++) {
2715:         col = *jj++;
2716:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2717:       }
2718:     }
2719:     PetscFree(collengths);
2720:     *ia = cia; *ja = cja;
2721:   }
2722:   return(0);
2723: }

2727: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2728: {

2732:   if (!ia) return(0);
2733:   PetscFree(*ia);
2734:   PetscFree(*ja);
2735:   return(0);
2736: }

2740: PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2741: {
2742:   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2744:   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2745:   PetscScalar    dx,*y,*xx,*w3_array;
2746:   PetscScalar    *vscale_array;
2747:   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2748:   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2749:   void           *fctx = coloring->fctx;
2750:   PetscBool      flg = PETSC_FALSE;
2751:   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2752:   Vec            x1_tmp;

2758:   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");

2760:   PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);
2761:   MatSetUnfactored(J);
2762:   PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);
2763:   if (flg) {
2764:     PetscInfo(coloring,"Not calling MatZeroEntries()\n");
2765:   } else {
2766:     PetscBool  assembled;
2767:     MatAssembled(J,&assembled);
2768:     if (assembled) {
2769:       MatZeroEntries(J);
2770:     }
2771:   }

2773:   x1_tmp = x1;
2774:   if (!coloring->vscale){
2775:     VecDuplicate(x1_tmp,&coloring->vscale);
2776:   }
2777: 
2778:   /*
2779:     This is a horrible, horrible, hack. 
2780:   */
2781:   if (coloring->F) {
2782:     VecGetLocalSize(coloring->F,&m1);
2783:     VecGetLocalSize(w1,&m2);
2784:     if (m1 != m2) {
2785:       coloring->F = 0;
2786:       }
2787:     }

2789:   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2790:     VecNorm(x1_tmp,NORM_2,&unorm);
2791:   }
2792:   VecGetOwnershipRange(w1,&start,&end); /* OwnershipRange is used by ghosted x! */
2793: 
2794:   /* Set w1 = F(x1) */
2795:   if (coloring->F) {
2796:     w1          = coloring->F; /* use already computed value of function */
2797:     coloring->F = 0;
2798:   } else {
2799:     PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2800:     (*f)(sctx,x1_tmp,w1,fctx);
2801:     PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2802:   }
2803: 
2804:   if (!coloring->w3) {
2805:     VecDuplicate(x1_tmp,&coloring->w3);
2806:     PetscLogObjectParent(coloring,coloring->w3);
2807:   }
2808:   w3 = coloring->w3;

2810:     CHKMEMQ;
2811:     /* Compute all the local scale factors, including ghost points */
2812:   VecGetLocalSize(x1_tmp,&N);
2813:   VecGetArray(x1_tmp,&xx);
2814:   VecGetArray(coloring->vscale,&vscale_array);
2815:   if (ctype == IS_COLORING_GHOSTED){
2816:     col_start = 0; col_end = N;
2817:   } else if (ctype == IS_COLORING_GLOBAL){
2818:     xx = xx - start;
2819:     vscale_array = vscale_array - start;
2820:     col_start = start; col_end = N + start;
2821:   }    CHKMEMQ;
2822:   for (col=col_start; col<col_end; col++){
2823:     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2824:     if (coloring->htype[0] == 'w') {
2825:       dx = 1.0 + unorm;
2826:     } else {
2827:       dx  = xx[col];
2828:     }
2829:     if (dx == (PetscScalar)0.0) dx = 1.0;
2830: #if !defined(PETSC_USE_COMPLEX)
2831:     if (dx < umin && dx >= 0.0)      dx = umin;
2832:     else if (dx < 0.0 && dx > -umin) dx = -umin;
2833: #else
2834:     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2835:     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2836: #endif
2837:     dx               *= epsilon;
2838:     vscale_array[col] = (PetscScalar)1.0/dx;
2839:   }     CHKMEMQ;
2840:   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2841:   VecRestoreArray(coloring->vscale,&vscale_array);
2842:   if (ctype == IS_COLORING_GLOBAL){
2843:     VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2844:     VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2845:   }
2846:   CHKMEMQ;
2847:   if (coloring->vscaleforrow) {
2848:     vscaleforrow = coloring->vscaleforrow;
2849:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");

2851:   PetscMalloc(bs*sizeof(PetscInt),&srows);
2852:   /*
2853:     Loop over each color
2854:   */
2855:   VecGetArray(coloring->vscale,&vscale_array);
2856:   for (k=0; k<coloring->ncolors; k++) {
2857:     coloring->currentcolor = k;
2858:     for (i=0; i<bs; i++) {
2859:       VecCopy(x1_tmp,w3);
2860:       VecGetArray(w3,&w3_array);
2861:       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2862:       /*
2863:         Loop over each column associated with color 
2864:         adding the perturbation to the vector w3.
2865:       */
2866:       for (l=0; l<coloring->ncolumns[k]; l++) {
2867:         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2868:         if (coloring->htype[0] == 'w') {
2869:           dx = 1.0 + unorm;
2870:         } else {
2871:           dx  = xx[col];
2872:         }
2873:         if (dx == (PetscScalar)0.0) dx = 1.0;
2874: #if !defined(PETSC_USE_COMPLEX)
2875:         if (dx < umin && dx >= 0.0)      dx = umin;
2876:         else if (dx < 0.0 && dx > -umin) dx = -umin;
2877: #else
2878:         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2879:         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2880: #endif
2881:         dx            *= epsilon;
2882:         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2883:         w3_array[col] += dx;
2884:       }
2885:       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2886:       VecRestoreArray(w3,&w3_array);

2888:       /*
2889:         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2890:         w2 = F(x1 + dx) - F(x1)
2891:       */
2892:       PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2893:       (*f)(sctx,w3,w2,fctx);
2894:       PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2895:       VecAXPY(w2,-1.0,w1);
2896: 
2897:       /*
2898:         Loop over rows of vector, putting results into Jacobian matrix
2899:       */
2900:       VecGetArray(w2,&y);
2901:       for (l=0; l<coloring->nrows[k]; l++) {
2902:         row    = bs*coloring->rows[k][l];             /* local row index */
2903:         col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2904:         for (j=0; j<bs; j++) {
2905:             y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2906:           srows[j]  = row + start + j;
2907:         }
2908:         MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);
2909:       }
2910:       VecRestoreArray(w2,&y);
2911:     }
2912:   } /* endof for each color */
2913:   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2914:   VecRestoreArray(coloring->vscale,&vscale_array);
2915:   VecRestoreArray(x1_tmp,&xx);
2916:   PetscFree(srows);
2917: 
2918:   coloring->currentcolor = -1;
2919:   MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2920:   MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2921:   PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);
2922:   return(0);
2923: }

2925: /* -------------------------------------------------------------------*/
2926: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2927:        MatGetRow_SeqBAIJ,
2928:        MatRestoreRow_SeqBAIJ,
2929:        MatMult_SeqBAIJ_N,
2930: /* 4*/ MatMultAdd_SeqBAIJ_N,
2931:        MatMultTranspose_SeqBAIJ,
2932:        MatMultTransposeAdd_SeqBAIJ,
2933:        0,
2934:        0,
2935:        0,
2936: /*10*/ 0,
2937:        MatLUFactor_SeqBAIJ,
2938:        0,
2939:        0,
2940:        MatTranspose_SeqBAIJ,
2941: /*15*/ MatGetInfo_SeqBAIJ,
2942:        MatEqual_SeqBAIJ,
2943:        MatGetDiagonal_SeqBAIJ,
2944:        MatDiagonalScale_SeqBAIJ,
2945:        MatNorm_SeqBAIJ,
2946: /*20*/ 0,
2947:        MatAssemblyEnd_SeqBAIJ,
2948:        MatSetOption_SeqBAIJ,
2949:        MatZeroEntries_SeqBAIJ,
2950: /*24*/ MatZeroRows_SeqBAIJ,
2951:        0,
2952:        0,
2953:        0,
2954:        0,
2955: /*29*/ MatSetUp_SeqBAIJ,
2956:        0,
2957:        0,
2958:        MatGetArray_SeqBAIJ,
2959:        MatRestoreArray_SeqBAIJ,
2960: /*34*/ MatDuplicate_SeqBAIJ,
2961:        0,
2962:        0,
2963:        MatILUFactor_SeqBAIJ,
2964:        0,
2965: /*39*/ MatAXPY_SeqBAIJ,
2966:        MatGetSubMatrices_SeqBAIJ,
2967:        MatIncreaseOverlap_SeqBAIJ,
2968:        MatGetValues_SeqBAIJ,
2969:        MatCopy_SeqBAIJ,
2970: /*44*/ 0,
2971:        MatScale_SeqBAIJ,
2972:        0,
2973:        0,
2974:        MatZeroRowsColumns_SeqBAIJ,
2975: /*49*/ 0,
2976:        MatGetRowIJ_SeqBAIJ,
2977:        MatRestoreRowIJ_SeqBAIJ,
2978:        MatGetColumnIJ_SeqBAIJ,
2979:        MatRestoreColumnIJ_SeqBAIJ,
2980: /*54*/ MatFDColoringCreate_SeqAIJ,
2981:        0,
2982:        0,
2983:        0,
2984:        MatSetValuesBlocked_SeqBAIJ,
2985: /*59*/ MatGetSubMatrix_SeqBAIJ,
2986:        MatDestroy_SeqBAIJ,
2987:        MatView_SeqBAIJ,
2988:        0,
2989:        0,
2990: /*64*/ 0,
2991:        0,
2992:        0,
2993:        0,
2994:        0,
2995: /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2996:        0,
2997:        MatConvert_Basic,
2998:        0,
2999:        0,
3000: /*74*/ 0,
3001:        MatFDColoringApply_BAIJ,
3002:        0,
3003:        0,
3004:        0,
3005: /*79*/ 0,
3006:        0,
3007:        0,
3008:        0,
3009:        MatLoad_SeqBAIJ,
3010: /*84*/ 0,
3011:        0,
3012:        0,
3013:        0,
3014:        0,
3015: /*89*/ 0,
3016:        0,
3017:        0,
3018:        0,
3019:        0,
3020: /*94*/ 0,
3021:        0,
3022:        0,
3023:        0,
3024:        0,
3025: /*99*/0,
3026:        0,
3027:        0,
3028:        0,
3029:        0,
3030: /*104*/0,
3031:        MatRealPart_SeqBAIJ,
3032:        MatImaginaryPart_SeqBAIJ,
3033:        0,
3034:        0,
3035: /*109*/0,
3036:        0,
3037:        0,
3038:        0,
3039:        MatMissingDiagonal_SeqBAIJ,
3040: /*114*/0,
3041:        0,
3042:        0,
3043:        0,
3044:        0,
3045: /*119*/0,
3046:        0,
3047:        MatMultHermitianTranspose_SeqBAIJ,
3048:        MatMultHermitianTransposeAdd_SeqBAIJ,
3049:        0,
3050: /*124*/0,
3051:        0,
3052:        MatInvertBlockDiagonal_SeqBAIJ
3053: };

3055: EXTERN_C_BEGIN
3058: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3059: {
3060:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3061:   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;

3065:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

3067:   /* allocate space for values if not already there */
3068:   if (!aij->saved_values) {
3069:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
3070:     PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));
3071:   }

3073:   /* copy values over */
3074:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
3075:   return(0);
3076: }
3077: EXTERN_C_END

3079: EXTERN_C_BEGIN
3082: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3083: {
3084:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3086:   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;

3089:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3090:   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");

3092:   /* copy values over */
3093:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
3094:   return(0);
3095: }
3096: EXTERN_C_END

3098: EXTERN_C_BEGIN
3099: extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3100: extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3101: EXTERN_C_END

3103: EXTERN_C_BEGIN
3106: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3107: {
3108:   Mat_SeqBAIJ    *b;
3110:   PetscInt       i,mbs,nbs,bs2;
3111:   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;

3114:   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3115:   if (nz == MAT_SKIP_ALLOCATION) {
3116:     skipallocation = PETSC_TRUE;
3117:     nz             = 0;
3118:   }

3120:   PetscLayoutSetBlockSize(B->rmap,bs);
3121:   PetscLayoutSetBlockSize(B->cmap,bs);
3122:   PetscLayoutSetUp(B->rmap);
3123:   PetscLayoutSetUp(B->cmap);
3124:   PetscLayoutGetBlockSize(B->rmap,&bs);

3126:   B->preallocated = PETSC_TRUE;

3128:   mbs  = B->rmap->n/bs;
3129:   nbs  = B->cmap->n/bs;
3130:   bs2  = bs*bs;

3132:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);

3134:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3135:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3136:   if (nnz) {
3137:     for (i=0; i<mbs; i++) {
3138:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3139:       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3140:     }
3141:   }

3143:   b       = (Mat_SeqBAIJ*)B->data;
3144:   PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
3145:     PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
3146:   PetscOptionsEnd();

3148:   if (!flg) {
3149:     switch (bs) {
3150:     case 1:
3151:       B->ops->mult            = MatMult_SeqBAIJ_1;
3152:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3153:       B->ops->sor             = MatSOR_SeqBAIJ_1;
3154:       break;
3155:     case 2:
3156:       B->ops->mult            = MatMult_SeqBAIJ_2;
3157:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3158:       B->ops->sor             = MatSOR_SeqBAIJ_2;
3159:       break;
3160:     case 3:
3161:       B->ops->mult            = MatMult_SeqBAIJ_3;
3162:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3163:       B->ops->sor             = MatSOR_SeqBAIJ_3;
3164:       break;
3165:     case 4:
3166:       B->ops->mult            = MatMult_SeqBAIJ_4;
3167:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3168:       B->ops->sor             = MatSOR_SeqBAIJ_4;
3169:       break;
3170:     case 5:
3171:       B->ops->mult            = MatMult_SeqBAIJ_5;
3172:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3173:       B->ops->sor             = MatSOR_SeqBAIJ_5;
3174:       break;
3175:     case 6:
3176:       B->ops->mult            = MatMult_SeqBAIJ_6;
3177:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3178:       B->ops->sor             = MatSOR_SeqBAIJ_6;
3179:       break;
3180:     case 7:
3181:       B->ops->mult            = MatMult_SeqBAIJ_7;
3182:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3183:       B->ops->sor             = MatSOR_SeqBAIJ_7;
3184:       break;
3185:     case 15:
3186:       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3187:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3188:       B->ops->sor             = MatSOR_SeqBAIJ_N;
3189:       break;
3190:     default:
3191:       B->ops->mult            = MatMult_SeqBAIJ_N;
3192:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3193:       B->ops->sor             = MatSOR_SeqBAIJ_N;
3194:       break;
3195:     }
3196:   }
3197:   b->mbs       = mbs;
3198:   b->nbs       = nbs;
3199:   if (!skipallocation) {
3200:     if (!b->imax) {
3201:       PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
3202:       PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3203:       b->free_imax_ilen = PETSC_TRUE;
3204:     }
3205:     /* b->ilen will count nonzeros in each block row so far. */
3206:     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3207:     if (!nnz) {
3208:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3209:       else if (nz < 0) nz = 1;
3210:       for (i=0; i<mbs; i++) b->imax[i] = nz;
3211:       nz = nz*mbs;
3212:     } else {
3213:       nz = 0;
3214:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3215:     }

3217:     /* allocate the matrix space */
3218:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
3219:     PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
3220:     PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
3221:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
3222:     PetscMemzero(b->j,nz*sizeof(PetscInt));
3223:     b->singlemalloc = PETSC_TRUE;
3224:     b->i[0] = 0;
3225:     for (i=1; i<mbs+1; i++) {
3226:       b->i[i] = b->i[i-1] + b->imax[i-1];
3227:     }
3228:     b->free_a     = PETSC_TRUE;
3229:     b->free_ij    = PETSC_TRUE;
3230:   } else {
3231:     b->free_a     = PETSC_FALSE;
3232:     b->free_ij    = PETSC_FALSE;
3233:   }

3235:   b->bs2              = bs2;
3236:   b->mbs              = mbs;
3237:   b->nz               = 0;
3238:   b->maxnz            = nz;
3239:   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3240:   if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
3241:   return(0);
3242: }
3243: EXTERN_C_END

3245: EXTERN_C_BEGIN
3248: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3249: {
3250:   PetscInt       i,m,nz,nz_max=0,*nnz;
3251:   PetscScalar    *values=0;

3255:   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3256:   PetscLayoutSetBlockSize(B->rmap,bs);
3257:   PetscLayoutSetBlockSize(B->cmap,bs);
3258:   PetscLayoutSetUp(B->rmap);
3259:   PetscLayoutSetUp(B->cmap);
3260:   PetscLayoutGetBlockSize(B->rmap,&bs);
3261:   m = B->rmap->n/bs;

3263:   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3264:   PetscMalloc((m+1) * sizeof(PetscInt), &nnz);
3265:   for(i=0; i<m; i++) {
3266:     nz = ii[i+1]- ii[i];
3267:     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3268:     nz_max = PetscMax(nz_max, nz);
3269:     nnz[i] = nz;
3270:   }
3271:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3272:   PetscFree(nnz);

3274:   values = (PetscScalar*)V;
3275:   if (!values) {
3276:     PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
3277:     PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
3278:   }
3279:   for (i=0; i<m; i++) {
3280:     PetscInt          ncols  = ii[i+1] - ii[i];
3281:     const PetscInt    *icols = jj + ii[i];
3282:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3283:     MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3284:   }
3285:   if (!V) { PetscFree(values); }
3286:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3287:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3288:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3289:   return(0);
3290: }
3291: EXTERN_C_END


3294: EXTERN_C_BEGIN
3295: extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3296: extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3297: #if defined(PETSC_HAVE_MUMPS)
3298: extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3299: #endif
3300: extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3301: EXTERN_C_END

3303: /*MC
3304:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
3305:    block sparse compressed row format.

3307:    Options Database Keys:
3308: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

3310:   Level: beginner

3312: .seealso: MatCreateSeqBAIJ()
3313: M*/

3315: EXTERN_C_BEGIN
3316: extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3317: EXTERN_C_END

3319: EXTERN_C_BEGIN
3322: PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3323: {
3325:   PetscMPIInt    size;
3326:   Mat_SeqBAIJ    *b;

3329:   MPI_Comm_size(((PetscObject)B)->comm,&size);
3330:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

3332:   PetscNewLog(B,Mat_SeqBAIJ,&b);
3333:   B->data = (void*)b;
3334:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3335:   b->row                   = 0;
3336:   b->col                   = 0;
3337:   b->icol                  = 0;
3338:   b->reallocs              = 0;
3339:   b->saved_values          = 0;

3341:   b->roworiented           = PETSC_TRUE;
3342:   b->nonew                 = 0;
3343:   b->diag                  = 0;
3344:   b->solve_work            = 0;
3345:   b->mult_work             = 0;
3346:   B->spptr                 = 0;
3347:   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3348:   b->keepnonzeropattern    = PETSC_FALSE;
3349:   b->xtoy                  = 0;
3350:   b->XtoY                  = 0;
3351:   B->same_nonzero          = PETSC_FALSE;

3353:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3354:                                      "MatGetFactorAvailable_seqbaij_petsc",
3355:                                      MatGetFactorAvailable_seqbaij_petsc);
3356:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3357:                                      "MatGetFactor_seqbaij_petsc",
3358:                                      MatGetFactor_seqbaij_petsc);
3359:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3360:                                      "MatGetFactor_seqbaij_bstrm",
3361:                                      MatGetFactor_seqbaij_bstrm);
3362: #if defined(PETSC_HAVE_MUMPS)
3363:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);
3364: #endif
3365:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3366:                                      "MatInvertBlockDiagonal_SeqBAIJ",
3367:                                       MatInvertBlockDiagonal_SeqBAIJ);
3368:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3369:                                      "MatStoreValues_SeqBAIJ",
3370:                                       MatStoreValues_SeqBAIJ);
3371:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3372:                                      "MatRetrieveValues_SeqBAIJ",
3373:                                       MatRetrieveValues_SeqBAIJ);
3374:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3375:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3376:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
3377:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3378:                                      "MatConvert_SeqBAIJ_SeqAIJ",
3379:                                       MatConvert_SeqBAIJ_SeqAIJ);
3380:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3381:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3382:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
3383:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3384:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3385:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
3386:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3387:                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3388:                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3389:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3390:                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3391:                                       MatConvert_SeqBAIJ_SeqBSTRM);
3392:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3393:                                      "MatIsTranspose_SeqBAIJ",
3394:                                       MatIsTranspose_SeqBAIJ);
3395:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3396:   return(0);
3397: }
3398: EXTERN_C_END

3402: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3403: {
3404:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3406:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

3409:   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");

3411:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3412:     c->imax = a->imax;
3413:     c->ilen = a->ilen;
3414:     c->free_imax_ilen = PETSC_FALSE;
3415:   } else {
3416:     PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
3417:     PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));
3418:     for (i=0; i<mbs; i++) {
3419:       c->imax[i] = a->imax[i];
3420:       c->ilen[i] = a->ilen[i];
3421:     }
3422:     c->free_imax_ilen = PETSC_TRUE;
3423:   }

3425:   /* allocate the matrix space */
3426:   if (mallocmatspace){
3427:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3428:       PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);
3429:       PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));
3430:       PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));
3431:       c->i            = a->i;
3432:       c->j            = a->j;
3433:       c->singlemalloc = PETSC_FALSE;
3434:       c->free_a       = PETSC_TRUE;
3435:       c->free_ij      = PETSC_FALSE;
3436:       c->parent       = A;
3437:       C->preallocated = PETSC_TRUE;
3438:       C->assembled    = PETSC_TRUE;
3439:       PetscObjectReference((PetscObject)A);
3440:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3441:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3442:     } else {
3443:       PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
3444:       PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3445:       c->singlemalloc = PETSC_TRUE;
3446:       c->free_a       = PETSC_TRUE;
3447:       c->free_ij      = PETSC_TRUE;
3448:       PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3449:       if (mbs > 0) {
3450:         PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3451:         if (cpvalues == MAT_COPY_VALUES) {
3452:           PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3453:         } else {
3454:           PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3455:         }
3456:       }
3457:       C->preallocated = PETSC_TRUE;
3458:       C->assembled    = PETSC_TRUE;
3459:     }
3460:   }

3462:   c->roworiented = a->roworiented;
3463:   c->nonew       = a->nonew;
3464:   PetscLayoutReference(A->rmap,&C->rmap);
3465:   PetscLayoutReference(A->cmap,&C->cmap);
3466:   c->bs2         = a->bs2;
3467:   c->mbs         = a->mbs;
3468:   c->nbs         = a->nbs;

3470:   if (a->diag) {
3471:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3472:       c->diag      = a->diag;
3473:       c->free_diag = PETSC_FALSE;
3474:     } else {
3475:       PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
3476:       PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
3477:       for (i=0; i<mbs; i++) {
3478:         c->diag[i] = a->diag[i];
3479:       }
3480:       c->free_diag = PETSC_TRUE;
3481:     }
3482:   } else c->diag        = 0;
3483:   c->nz                 = a->nz;
3484:   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3485:   c->solve_work         = 0;
3486:   c->mult_work          = 0;

3488:   c->compressedrow.use     = a->compressedrow.use;
3489:   c->compressedrow.nrows   = a->compressedrow.nrows;
3490:   c->compressedrow.check   = a->compressedrow.check;
3491:   if (a->compressedrow.use){
3492:     i = a->compressedrow.nrows;
3493:     PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);
3494:     PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));
3495:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3496:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3497:   } else {
3498:     c->compressedrow.use    = PETSC_FALSE;
3499:     c->compressedrow.i      = PETSC_NULL;
3500:     c->compressedrow.rindex = PETSC_NULL;
3501:   }
3502:   C->same_nonzero = A->same_nonzero;
3503:   PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3504:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3505:   return(0);
3506: }

3510: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3511: {

3515:   MatCreate(((PetscObject)A)->comm,B);
3516:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3517:   MatSetType(*B,MATSEQBAIJ);
3518:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3519:   return(0);
3520: }

3524: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3525: {
3526:   Mat_SeqBAIJ    *a;
3528:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3529:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3530:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3531:   PetscInt       *masked,nmask,tmp,bs2,ishift;
3532:   PetscMPIInt    size;
3533:   int            fd;
3534:   PetscScalar    *aa;
3535:   MPI_Comm       comm = ((PetscObject)viewer)->comm;

3538:   PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
3539:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
3540:   PetscOptionsEnd();
3541:   bs2  = bs*bs;

3543:   MPI_Comm_size(comm,&size);
3544:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3545:   PetscViewerBinaryGetDescriptor(viewer,&fd);
3546:   PetscBinaryRead(fd,header,4,PETSC_INT);
3547:   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3548:   M = header[1]; N = header[2]; nz = header[3];

3550:   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3551:   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");

3553:   /* 
3554:      This code adds extra rows to make sure the number of rows is 
3555:     divisible by the blocksize
3556:   */
3557:   mbs        = M/bs;
3558:   extra_rows = bs - M + bs*(mbs);
3559:   if (extra_rows == bs) extra_rows = 0;
3560:   else                  mbs++;
3561:   if (extra_rows) {
3562:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3563:   }

3565:   /* Set global sizes if not already set */
3566:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3567:     MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3568:   } else { /* Check if the matrix global sizes are correct */
3569:     MatGetSize(newmat,&rows,&cols);
3570:     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3571:       MatGetLocalSize(newmat,&rows,&cols);
3572:     }
3573:     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3574:   }
3575: 
3576:   /* read in row lengths */
3577:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
3578:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3579:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

3581:   /* read in column indices */
3582:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
3583:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
3584:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

3586:   /* loop over row lengths determining block row lengths */
3587:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
3588:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
3589:   PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);
3590:   PetscMemzero(mask,mbs*sizeof(PetscInt));
3591:   rowcount = 0;
3592:   nzcount = 0;
3593:   for (i=0; i<mbs; i++) {
3594:     nmask = 0;
3595:     for (j=0; j<bs; j++) {
3596:       kmax = rowlengths[rowcount];
3597:       for (k=0; k<kmax; k++) {
3598:         tmp = jj[nzcount++]/bs;
3599:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3600:       }
3601:       rowcount++;
3602:     }
3603:     browlengths[i] += nmask;
3604:     /* zero out the mask elements we set */
3605:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3606:   }

3608:   /* Do preallocation  */
3609:   MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);
3610:   a = (Mat_SeqBAIJ*)newmat->data;

3612:   /* set matrix "i" values */
3613:   a->i[0] = 0;
3614:   for (i=1; i<= mbs; i++) {
3615:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3616:     a->ilen[i-1] = browlengths[i-1];
3617:   }
3618:   a->nz         = 0;
3619:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3621:   /* read in nonzero values */
3622:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3623:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3624:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3626:   /* set "a" and "j" values into matrix */
3627:   nzcount = 0; jcount = 0;
3628:   for (i=0; i<mbs; i++) {
3629:     nzcountb = nzcount;
3630:     nmask    = 0;
3631:     for (j=0; j<bs; j++) {
3632:       kmax = rowlengths[i*bs+j];
3633:       for (k=0; k<kmax; k++) {
3634:         tmp = jj[nzcount++]/bs;
3635:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3636:       }
3637:     }
3638:     /* sort the masked values */
3639:     PetscSortInt(nmask,masked);

3641:     /* set "j" values into matrix */
3642:     maskcount = 1;
3643:     for (j=0; j<nmask; j++) {
3644:       a->j[jcount++]  = masked[j];
3645:       mask[masked[j]] = maskcount++;
3646:     }
3647:     /* set "a" values into matrix */
3648:     ishift = bs2*a->i[i];
3649:     for (j=0; j<bs; j++) {
3650:       kmax = rowlengths[i*bs+j];
3651:       for (k=0; k<kmax; k++) {
3652:         tmp       = jj[nzcountb]/bs ;
3653:         block     = mask[tmp] - 1;
3654:         point     = jj[nzcountb] - bs*tmp;
3655:         idx       = ishift + bs2*block + j + bs*point;
3656:         a->a[idx] = (MatScalar)aa[nzcountb++];
3657:       }
3658:     }
3659:     /* zero out the mask elements we set */
3660:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3661:   }
3662:   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3664:   PetscFree(rowlengths);
3665:   PetscFree(browlengths);
3666:   PetscFree(aa);
3667:   PetscFree(jj);
3668:   PetscFree2(mask,masked);

3670:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3671:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3672:   MatView_Private(newmat);
3673:   return(0);
3674: }

3678: /*@C
3679:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3680:    compressed row) format.  For good matrix assembly performance the
3681:    user should preallocate the matrix storage by setting the parameter nz
3682:    (or the array nnz).  By setting these parameters accurately, performance
3683:    during matrix assembly can be increased by more than a factor of 50.

3685:    Collective on MPI_Comm

3687:    Input Parameters:
3688: +  comm - MPI communicator, set to PETSC_COMM_SELF
3689: .  bs - size of block
3690: .  m - number of rows
3691: .  n - number of columns
3692: .  nz - number of nonzero blocks  per block row (same for all rows)
3693: -  nnz - array containing the number of nonzero blocks in the various block rows 
3694:          (possibly different for each block row) or PETSC_NULL

3696:    Output Parameter:
3697: .  A - the matrix 

3699:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3700:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3701:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3703:    Options Database Keys:
3704: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3705:                      block calculations (much slower)
3706: .    -mat_block_size - size of the blocks to use

3708:    Level: intermediate

3710:    Notes:
3711:    The number of rows and columns must be divisible by blocksize.

3713:    If the nnz parameter is given then the nz parameter is ignored

3715:    A nonzero block is any block that as 1 or more nonzeros in it

3717:    The block AIJ format is fully compatible with standard Fortran 77
3718:    storage.  That is, the stored row and column indices can begin at
3719:    either one (as in Fortran) or zero.  See the users' manual for details.

3721:    Specify the preallocated storage with either nz or nnz (not both).
3722:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3723:    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3724:    matrices.

3726: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3727: @*/
3728: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3729: {
3731: 
3733:   MatCreate(comm,A);
3734:   MatSetSizes(*A,m,n,m,n);
3735:   MatSetType(*A,MATSEQBAIJ);
3736:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3737:   return(0);
3738: }

3742: /*@C
3743:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3744:    per row in the matrix. For good matrix assembly performance the
3745:    user should preallocate the matrix storage by setting the parameter nz
3746:    (or the array nnz).  By setting these parameters accurately, performance
3747:    during matrix assembly can be increased by more than a factor of 50.

3749:    Collective on MPI_Comm

3751:    Input Parameters:
3752: +  A - the matrix
3753: .  bs - size of block
3754: .  nz - number of block nonzeros per block row (same for all rows)
3755: -  nnz - array containing the number of block nonzeros in the various block rows 
3756:          (possibly different for each block row) or PETSC_NULL

3758:    Options Database Keys:
3759: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3760:                      block calculations (much slower)
3761: .    -mat_block_size - size of the blocks to use

3763:    Level: intermediate

3765:    Notes:
3766:    If the nnz parameter is given then the nz parameter is ignored

3768:    You can call MatGetInfo() to get information on how effective the preallocation was;
3769:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3770:    You can also run with the option -info and look for messages with the string 
3771:    malloc in them to see if additional memory allocation was needed.

3773:    The block AIJ format is fully compatible with standard Fortran 77
3774:    storage.  That is, the stored row and column indices can begin at
3775:    either one (as in Fortran) or zero.  See the users' manual for details.

3777:    Specify the preallocated storage with either nz or nnz (not both).
3778:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3779:    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.

3781: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3782: @*/
3783: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3784: {

3791:   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3792:   return(0);
3793: }

3797: /*@C
3798:    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3799:    (the default sequential PETSc format).  

3801:    Collective on MPI_Comm

3803:    Input Parameters:
3804: +  A - the matrix 
3805: .  i - the indices into j for the start of each local row (starts with zero)
3806: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3807: -  v - optional values in the matrix

3809:    Level: developer

3811: .keywords: matrix, aij, compressed row, sparse

3813: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3814: @*/
3815: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3816: {

3823:   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3824:   return(0);
3825: }


3830: /*@
3831:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.

3833:      Collective on MPI_Comm

3835:    Input Parameters:
3836: +  comm - must be an MPI communicator of size 1
3837: .  bs - size of block
3838: .  m - number of rows
3839: .  n - number of columns
3840: .  i - row indices
3841: .  j - column indices
3842: -  a - matrix values

3844:    Output Parameter:
3845: .  mat - the matrix

3847:    Level: advanced

3849:    Notes:
3850:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3851:     once the matrix is destroyed

3853:        You cannot set new nonzero locations into this matrix, that will generate an error.

3855:        The i and j indices are 0 based

3857:        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).


3860: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()

3862: @*/
3863: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3864: {
3866:   PetscInt       ii;
3867:   Mat_SeqBAIJ    *baij;

3870:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3871:   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3872: 
3873:   MatCreate(comm,mat);
3874:   MatSetSizes(*mat,m,n,m,n);
3875:   MatSetType(*mat,MATSEQBAIJ);
3876:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3877:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3878:   PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
3879:   PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));

3881:   baij->i = i;
3882:   baij->j = j;
3883:   baij->a = a;
3884:   baij->singlemalloc = PETSC_FALSE;
3885:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3886:   baij->free_a       = PETSC_FALSE;
3887:   baij->free_ij      = PETSC_FALSE;

3889:   for (ii=0; ii<m; ii++) {
3890:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3891: #if defined(PETSC_USE_DEBUG)
3892:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3893: #endif    
3894:   }
3895: #if defined(PETSC_USE_DEBUG)
3896:   for (ii=0; ii<baij->i[m]; ii++) {
3897:     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3898:     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3899:   }
3900: #endif    

3902:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3903:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3904:   return(0);
3905: }