Actual source code: baij.c

petsc-3.7.3 2016-08-01
Report Typos and Errors
  2: /*
  3:     Defines the basic matrix operations for the BAIJ (compressed row)
  4:   matrix storage format.
  5: */
  6: #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
  7: #include <petscblaslapack.h>
  8: #include <petsc/private/kernels/blockinvert.h>
  9: #include <petsc/private/kernels/blockmatmult.h>

 13: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
 14: {
 15:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 17:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
 18:   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
 19:   PetscReal      shift = 0.0;
 20:   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;

 23:   allowzeropivot = PetscNot(A->erroriffailure);

 25:   if (a->idiagvalid) {
 26:     if (values) *values = a->idiag;
 27:     return(0);
 28:   }
 29:   MatMarkDiagonal_SeqBAIJ(A);
 30:   diag_offset = a->diag;
 31:   if (!a->idiag) {
 32:     PetscMalloc1(2*bs2*mbs,&a->idiag);
 33:     PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));
 34:   }
 35:   diag  = a->idiag;
 36:   mdiag = a->idiag+bs2*mbs;
 37:   if (values) *values = a->idiag;
 38:   /* factor and invert each block */
 39:   switch (bs) {
 40:   case 1:
 41:     for (i=0; i<mbs; i++) {
 42:       odiag    = v + 1*diag_offset[i];
 43:       diag[0]  = odiag[0];
 44:       mdiag[0] = odiag[0];
 45: 
 46:       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
 47:         if (allowzeropivot) {
 48:           A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 49:           PetscInfo1(A,"Zero pivot, row %D\n",i);
 50:         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i);
 51:       }
 52: 
 53:       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
 54:       diag    += 1;
 55:       mdiag   += 1;
 56:     }
 57:     break;
 58:   case 2:
 59:     for (i=0; i<mbs; i++) {
 60:       odiag    = v + 4*diag_offset[i];
 61:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 62:       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 63:       PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
 64:       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 65:       diag    += 4;
 66:       mdiag   += 4;
 67:     }
 68:     break;
 69:   case 3:
 70:     for (i=0; i<mbs; i++) {
 71:       odiag    = v + 9*diag_offset[i];
 72:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 73:       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 74:       diag[8]  = odiag[8];
 75:       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 76:       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 77:       mdiag[8] = odiag[8];
 78:       PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
 79:       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 80:       diag    += 9;
 81:       mdiag   += 9;
 82:     }
 83:     break;
 84:   case 4:
 85:     for (i=0; i<mbs; i++) {
 86:       odiag  = v + 16*diag_offset[i];
 87:       PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 88:       PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 89:       PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
 90:       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 91:       diag  += 16;
 92:       mdiag += 16;
 93:     }
 94:     break;
 95:   case 5:
 96:     for (i=0; i<mbs; i++) {
 97:       odiag  = v + 25*diag_offset[i];
 98:       PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
 99:       PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
100:       PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
101:       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
102:       diag  += 25;
103:       mdiag += 25;
104:     }
105:     break;
106:   case 6:
107:     for (i=0; i<mbs; i++) {
108:       odiag  = v + 36*diag_offset[i];
109:       PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
110:       PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
111:       PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
112:       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
113:       diag  += 36;
114:       mdiag += 36;
115:     }
116:     break;
117:   case 7:
118:     for (i=0; i<mbs; i++) {
119:       odiag  = v + 49*diag_offset[i];
120:       PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
121:       PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
122:       PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
123:       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
124:       diag  += 49;
125:       mdiag += 49;
126:     }
127:     break;
128:   default:
129:     PetscMalloc2(bs,&v_work,bs,&v_pivots);
130:     for (i=0; i<mbs; i++) {
131:       odiag  = v + bs2*diag_offset[i];
132:       PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
133:       PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
134:       PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
135:       if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136:       diag  += bs2;
137:       mdiag += bs2;
138:     }
139:     PetscFree2(v_work,v_pivots);
140:   }
141:   a->idiagvalid = PETSC_TRUE;
142:   return(0);
143: }

147: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
148: {
149:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
150:   PetscScalar       *x,*work,*w,*workt,*t;
151:   const MatScalar   *v,*aa = a->a, *idiag;
152:   const PetscScalar *b,*xb;
153:   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
154:   PetscErrorCode    ierr;
155:   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
156:   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;

159:   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
160:   its = its*lits;
161:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
163:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
164:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
165:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");

167:   if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}

169:   if (!m) return(0);
170:   diag  = a->diag;
171:   idiag = a->idiag;
172:   k    = PetscMax(A->rmap->n,A->cmap->n);
173:   if (!a->mult_work) {
174:     PetscMalloc1(k+1,&a->mult_work);
175:   }
176:   if (!a->sor_workt) {
177:     PetscMalloc1(k,&a->sor_workt);
178:   }
179:   if (!a->sor_work) {
180:     PetscMalloc1(bs,&a->sor_work);
181:   }
182:   work = a->mult_work;
183:   t    = a->sor_workt;
184:   w    = a->sor_work;

186:   VecGetArray(xx,&x);
187:   VecGetArrayRead(bb,&b);

189:   if (flag & SOR_ZERO_INITIAL_GUESS) {
190:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
191:       switch (bs) {
192:       case 1:
193:         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
194:         t[0] = b[0];
195:         i2     = 1;
196:         idiag += 1;
197:         for (i=1; i<m; i++) {
198:           v  = aa + ai[i];
199:           vi = aj + ai[i];
200:           nz = diag[i] - ai[i];
201:           s[0] = b[i2];
202:           for (j=0; j<nz; j++) {
203:             xw[0] = x[vi[j]];
204:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
205:           }
206:           t[i2] = s[0];
207:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
208:           x[i2]  = xw[0];
209:           idiag += 1;
210:           i2    += 1;
211:         }
212:         break;
213:       case 2:
214:         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
215:         t[0] = b[0]; t[1] = b[1];
216:         i2     = 2;
217:         idiag += 4;
218:         for (i=1; i<m; i++) {
219:           v  = aa + 4*ai[i];
220:           vi = aj + ai[i];
221:           nz = diag[i] - ai[i];
222:           s[0] = b[i2]; s[1] = b[i2+1];
223:           for (j=0; j<nz; j++) {
224:             idx = 2*vi[j];
225:             it  = 4*j;
226:             xw[0] = x[idx]; xw[1] = x[1+idx];
227:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
228:           }
229:           t[i2] = s[0]; t[i2+1] = s[1];
230:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
231:           x[i2]   = xw[0]; x[i2+1] = xw[1];
232:           idiag  += 4;
233:           i2     += 2;
234:         }
235:         break;
236:       case 3:
237:         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
238:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
239:         i2     = 3;
240:         idiag += 9;
241:         for (i=1; i<m; i++) {
242:           v  = aa + 9*ai[i];
243:           vi = aj + ai[i];
244:           nz = diag[i] - ai[i];
245:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
246:           while (nz--) {
247:             idx = 3*(*vi++);
248:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
249:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
250:             v  += 9;
251:           }
252:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
253:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
254:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
255:           idiag  += 9;
256:           i2     += 3;
257:         }
258:         break;
259:       case 4:
260:         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
261:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
262:         i2     = 4;
263:         idiag += 16;
264:         for (i=1; i<m; i++) {
265:           v  = aa + 16*ai[i];
266:           vi = aj + ai[i];
267:           nz = diag[i] - ai[i];
268:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
269:           while (nz--) {
270:             idx = 4*(*vi++);
271:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
272:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
273:             v  += 16;
274:           }
275:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
276:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
277:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
278:           idiag  += 16;
279:           i2     += 4;
280:         }
281:         break;
282:       case 5:
283:         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
284:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
285:         i2     = 5;
286:         idiag += 25;
287:         for (i=1; i<m; i++) {
288:           v  = aa + 25*ai[i];
289:           vi = aj + ai[i];
290:           nz = diag[i] - ai[i];
291:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
292:           while (nz--) {
293:             idx = 5*(*vi++);
294:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
295:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
296:             v  += 25;
297:           }
298:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
299:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
300:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
301:           idiag  += 25;
302:           i2     += 5;
303:         }
304:         break;
305:       case 6:
306:         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
307:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
308:         i2     = 6;
309:         idiag += 36;
310:         for (i=1; i<m; i++) {
311:           v  = aa + 36*ai[i];
312:           vi = aj + ai[i];
313:           nz = diag[i] - ai[i];
314:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
315:           while (nz--) {
316:             idx = 6*(*vi++);
317:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
318:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
319:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
320:             v  += 36;
321:           }
322:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
323:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
324:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
325:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
326:           idiag  += 36;
327:           i2     += 6;
328:         }
329:         break;
330:       case 7:
331:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
332:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
333:         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
334:         i2     = 7;
335:         idiag += 49;
336:         for (i=1; i<m; i++) {
337:           v  = aa + 49*ai[i];
338:           vi = aj + ai[i];
339:           nz = diag[i] - ai[i];
340:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
341:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
342:           while (nz--) {
343:             idx = 7*(*vi++);
344:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
345:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
346:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
347:             v  += 49;
348:           }
349:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
350:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
351:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
352:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
353:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
354:           idiag  += 49;
355:           i2     += 7;
356:         }
357:         break;
358:       default:
359:         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
360:         PetscMemcpy(t,b,bs*sizeof(PetscScalar));
361:         i2     = bs;
362:         idiag += bs2;
363:         for (i=1; i<m; i++) {
364:           v  = aa + bs2*ai[i];
365:           vi = aj + ai[i];
366:           nz = diag[i] - ai[i];

368:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
369:           /* copy all rows of x that are needed into contiguous space */
370:           workt = work;
371:           for (j=0; j<nz; j++) {
372:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
373:             workt += bs;
374:           }
375:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
376:           PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
377:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

379:           idiag += bs2;
380:           i2    += bs;
381:         }
382:         break;
383:       }
384:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
385:       PetscLogFlops(1.0*bs2*a->nz);
386:       xb = t;
387:     }
388:     else xb = b;
389:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
390:       idiag = a->idiag+bs2*(a->mbs-1);
391:       i2 = bs * (m-1);
392:       switch (bs) {
393:       case 1:
394:         s[0]  = xb[i2];
395:         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396:         x[i2] = xw[0];
397:         i2   -= 1;
398:         for (i=m-2; i>=0; i--) {
399:           v  = aa + (diag[i]+1);
400:           vi = aj + diag[i] + 1;
401:           nz = ai[i+1] - diag[i] - 1;
402:           s[0] = xb[i2];
403:           for (j=0; j<nz; j++) {
404:             xw[0] = x[vi[j]];
405:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
406:           }
407:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
408:           x[i2]  = xw[0];
409:           idiag -= 1;
410:           i2    -= 1;
411:         }
412:         break;
413:       case 2:
414:         s[0]  = xb[i2]; s[1] = xb[i2+1];
415:         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
416:         x[i2] = xw[0]; x[i2+1] = xw[1];
417:         i2    -= 2;
418:         idiag -= 4;
419:         for (i=m-2; i>=0; i--) {
420:           v  = aa + 4*(diag[i] + 1);
421:           vi = aj + diag[i] + 1;
422:           nz = ai[i+1] - diag[i] - 1;
423:           s[0] = xb[i2]; s[1] = xb[i2+1];
424:           for (j=0; j<nz; j++) {
425:             idx = 2*vi[j];
426:             it  = 4*j;
427:             xw[0] = x[idx]; xw[1] = x[1+idx];
428:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
429:           }
430:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
431:           x[i2]   = xw[0]; x[i2+1] = xw[1];
432:           idiag  -= 4;
433:           i2     -= 2;
434:         }
435:         break;
436:       case 3:
437:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
438:         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
439:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
440:         i2    -= 3;
441:         idiag -= 9;
442:         for (i=m-2; i>=0; i--) {
443:           v  = aa + 9*(diag[i]+1);
444:           vi = aj + diag[i] + 1;
445:           nz = ai[i+1] - diag[i] - 1;
446:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
447:           while (nz--) {
448:             idx = 3*(*vi++);
449:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
450:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
451:             v  += 9;
452:           }
453:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
454:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
455:           idiag  -= 9;
456:           i2     -= 3;
457:         }
458:         break;
459:       case 4:
460:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
461:         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
462:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
463:         i2    -= 4;
464:         idiag -= 16;
465:         for (i=m-2; i>=0; i--) {
466:           v  = aa + 16*(diag[i]+1);
467:           vi = aj + diag[i] + 1;
468:           nz = ai[i+1] - diag[i] - 1;
469:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
470:           while (nz--) {
471:             idx = 4*(*vi++);
472:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
473:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
474:             v  += 16;
475:           }
476:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
477:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
478:           idiag  -= 16;
479:           i2     -= 4;
480:         }
481:         break;
482:       case 5:
483:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
484:         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
485:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
486:         i2    -= 5;
487:         idiag -= 25;
488:         for (i=m-2; i>=0; i--) {
489:           v  = aa + 25*(diag[i]+1);
490:           vi = aj + diag[i] + 1;
491:           nz = ai[i+1] - diag[i] - 1;
492:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
493:           while (nz--) {
494:             idx = 5*(*vi++);
495:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
496:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
497:             v  += 25;
498:           }
499:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
500:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
501:           idiag  -= 25;
502:           i2     -= 5;
503:         }
504:         break;
505:       case 6:
506:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
507:         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
508:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
509:         i2    -= 6;
510:         idiag -= 36;
511:         for (i=m-2; i>=0; i--) {
512:           v  = aa + 36*(diag[i]+1);
513:           vi = aj + diag[i] + 1;
514:           nz = ai[i+1] - diag[i] - 1;
515:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
516:           while (nz--) {
517:             idx = 6*(*vi++);
518:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
519:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
520:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
521:             v  += 36;
522:           }
523:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
524:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
525:           idiag  -= 36;
526:           i2     -= 6;
527:         }
528:         break;
529:       case 7:
530:         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
531:         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
532:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
533:         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
534:         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
535:         i2    -= 7;
536:         idiag -= 49;
537:         for (i=m-2; i>=0; i--) {
538:           v  = aa + 49*(diag[i]+1);
539:           vi = aj + diag[i] + 1;
540:           nz = ai[i+1] - diag[i] - 1;
541:           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
542:           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
543:           while (nz--) {
544:             idx = 7*(*vi++);
545:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
546:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
547:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
548:             v  += 49;
549:           }
550:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
551:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
552:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
553:           idiag  -= 49;
554:           i2     -= 7;
555:         }
556:         break;
557:       default:
558:         PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
559:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
560:         i2    -= bs;
561:         idiag -= bs2;
562:         for (i=m-2; i>=0; i--) {
563:           v  = aa + bs2*(diag[i]+1);
564:           vi = aj + diag[i] + 1;
565:           nz = ai[i+1] - diag[i] - 1;

567:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
568:           /* copy all rows of x that are needed into contiguous space */
569:           workt = work;
570:           for (j=0; j<nz; j++) {
571:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
572:             workt += bs;
573:           }
574:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
575:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

577:           idiag -= bs2;
578:           i2    -= bs;
579:         }
580:         break;
581:       }
582:       PetscLogFlops(1.0*bs2*(a->nz));
583:     }
584:     its--;
585:   }
586:   while (its--) {
587:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
588:       idiag = a->idiag;
589:       i2 = 0;
590:       switch (bs) {
591:       case 1:
592:         for (i=0; i<m; i++) {
593:           v  = aa + ai[i];
594:           vi = aj + ai[i];
595:           nz = ai[i+1] - ai[i];
596:           s[0] = b[i2];
597:           for (j=0; j<nz; j++) {
598:             xw[0] = x[vi[j]];
599:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
600:           }
601:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
602:           x[i2] += xw[0];
603:           idiag += 1;
604:           i2    += 1;
605:         }
606:         break;
607:       case 2:
608:         for (i=0; i<m; i++) {
609:           v  = aa + 4*ai[i];
610:           vi = aj + ai[i];
611:           nz = ai[i+1] - ai[i];
612:           s[0] = b[i2]; s[1] = b[i2+1];
613:           for (j=0; j<nz; j++) {
614:             idx = 2*vi[j];
615:             it  = 4*j;
616:             xw[0] = x[idx]; xw[1] = x[1+idx];
617:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
618:           }
619:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
620:           x[i2]  += xw[0]; x[i2+1] += xw[1];
621:           idiag  += 4;
622:           i2     += 2;
623:         }
624:         break;
625:       case 3:
626:         for (i=0; i<m; i++) {
627:           v  = aa + 9*ai[i];
628:           vi = aj + ai[i];
629:           nz = ai[i+1] - ai[i];
630:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
631:           while (nz--) {
632:             idx = 3*(*vi++);
633:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
634:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
635:             v  += 9;
636:           }
637:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
638:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
639:           idiag  += 9;
640:           i2     += 3;
641:         }
642:         break;
643:       case 4:
644:         for (i=0; i<m; i++) {
645:           v  = aa + 16*ai[i];
646:           vi = aj + ai[i];
647:           nz = ai[i+1] - ai[i];
648:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
649:           while (nz--) {
650:             idx = 4*(*vi++);
651:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
652:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
653:             v  += 16;
654:           }
655:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
656:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
657:           idiag  += 16;
658:           i2     += 4;
659:         }
660:         break;
661:       case 5:
662:         for (i=0; i<m; i++) {
663:           v  = aa + 25*ai[i];
664:           vi = aj + ai[i];
665:           nz = ai[i+1] - ai[i];
666:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
667:           while (nz--) {
668:             idx = 5*(*vi++);
669:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
670:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
671:             v  += 25;
672:           }
673:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
674:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
675:           idiag  += 25;
676:           i2     += 5;
677:         }
678:         break;
679:       case 6:
680:         for (i=0; i<m; i++) {
681:           v  = aa + 36*ai[i];
682:           vi = aj + ai[i];
683:           nz = ai[i+1] - ai[i];
684:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
685:           while (nz--) {
686:             idx = 6*(*vi++);
687:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
688:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
689:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
690:             v  += 36;
691:           }
692:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
693:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
694:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
695:           idiag  += 36;
696:           i2     += 6;
697:         }
698:         break;
699:       case 7:
700:         for (i=0; i<m; i++) {
701:           v  = aa + 49*ai[i];
702:           vi = aj + ai[i];
703:           nz = ai[i+1] - ai[i];
704:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
705:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
706:           while (nz--) {
707:             idx = 7*(*vi++);
708:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
709:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
710:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
711:             v  += 49;
712:           }
713:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
714:           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
715:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
716:           idiag  += 49;
717:           i2     += 7;
718:         }
719:         break;
720:       default:
721:         for (i=0; i<m; i++) {
722:           v  = aa + bs2*ai[i];
723:           vi = aj + ai[i];
724:           nz = ai[i+1] - ai[i];

726:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
727:           /* copy all rows of x that are needed into contiguous space */
728:           workt = work;
729:           for (j=0; j<nz; j++) {
730:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
731:             workt += bs;
732:           }
733:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
734:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

736:           idiag += bs2;
737:           i2    += bs;
738:         }
739:         break;
740:       }
741:       PetscLogFlops(2.0*bs2*a->nz);
742:     }
743:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
744:       idiag = a->idiag+bs2*(a->mbs-1);
745:       i2 = bs * (m-1);
746:       switch (bs) {
747:       case 1:
748:         for (i=m-1; i>=0; i--) {
749:           v  = aa + ai[i];
750:           vi = aj + ai[i];
751:           nz = ai[i+1] - ai[i];
752:           s[0] = b[i2];
753:           for (j=0; j<nz; j++) {
754:             xw[0] = x[vi[j]];
755:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
756:           }
757:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
758:           x[i2] += xw[0];
759:           idiag -= 1;
760:           i2    -= 1;
761:         }
762:         break;
763:       case 2:
764:         for (i=m-1; i>=0; i--) {
765:           v  = aa + 4*ai[i];
766:           vi = aj + ai[i];
767:           nz = ai[i+1] - ai[i];
768:           s[0] = b[i2]; s[1] = b[i2+1];
769:           for (j=0; j<nz; j++) {
770:             idx = 2*vi[j];
771:             it  = 4*j;
772:             xw[0] = x[idx]; xw[1] = x[1+idx];
773:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
774:           }
775:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
776:           x[i2]  += xw[0]; x[i2+1] += xw[1];
777:           idiag  -= 4;
778:           i2     -= 2;
779:         }
780:         break;
781:       case 3:
782:         for (i=m-1; i>=0; i--) {
783:           v  = aa + 9*ai[i];
784:           vi = aj + ai[i];
785:           nz = ai[i+1] - ai[i];
786:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
787:           while (nz--) {
788:             idx = 3*(*vi++);
789:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
790:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
791:             v  += 9;
792:           }
793:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
794:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
795:           idiag  -= 9;
796:           i2     -= 3;
797:         }
798:         break;
799:       case 4:
800:         for (i=m-1; i>=0; i--) {
801:           v  = aa + 16*ai[i];
802:           vi = aj + ai[i];
803:           nz = ai[i+1] - ai[i];
804:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
805:           while (nz--) {
806:             idx = 4*(*vi++);
807:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
808:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
809:             v  += 16;
810:           }
811:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
812:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
813:           idiag  -= 16;
814:           i2     -= 4;
815:         }
816:         break;
817:       case 5:
818:         for (i=m-1; i>=0; i--) {
819:           v  = aa + 25*ai[i];
820:           vi = aj + ai[i];
821:           nz = ai[i+1] - ai[i];
822:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
823:           while (nz--) {
824:             idx = 5*(*vi++);
825:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
826:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
827:             v  += 25;
828:           }
829:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
830:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
831:           idiag  -= 25;
832:           i2     -= 5;
833:         }
834:         break;
835:       case 6:
836:         for (i=m-1; i>=0; i--) {
837:           v  = aa + 36*ai[i];
838:           vi = aj + ai[i];
839:           nz = ai[i+1] - ai[i];
840:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
841:           while (nz--) {
842:             idx = 6*(*vi++);
843:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
844:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
845:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
846:             v  += 36;
847:           }
848:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
849:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
850:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
851:           idiag  -= 36;
852:           i2     -= 6;
853:         }
854:         break;
855:       case 7:
856:         for (i=m-1; i>=0; i--) {
857:           v  = aa + 49*ai[i];
858:           vi = aj + ai[i];
859:           nz = ai[i+1] - ai[i];
860:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
861:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
862:           while (nz--) {
863:             idx = 7*(*vi++);
864:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
865:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
866:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
867:             v  += 49;
868:           }
869:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
870:           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
871:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
872:           idiag  -= 49;
873:           i2     -= 7;
874:         }
875:         break;
876:       default:
877:         for (i=m-1; i>=0; i--) {
878:           v  = aa + bs2*ai[i];
879:           vi = aj + ai[i];
880:           nz = ai[i+1] - ai[i];

882:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
883:           /* copy all rows of x that are needed into contiguous space */
884:           workt = work;
885:           for (j=0; j<nz; j++) {
886:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
887:             workt += bs;
888:           }
889:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
890:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

892:           idiag -= bs2;
893:           i2    -= bs;
894:         }
895:         break;
896:       }
897:       PetscLogFlops(2.0*bs2*(a->nz));
898:     }
899:   }
900:   VecRestoreArray(xx,&x);
901:   VecRestoreArrayRead(bb,&b);
902:   return(0);
903: }


906: /*
907:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
908: */
909: #if defined(PETSC_HAVE_FORTRAN_CAPS)
910: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
911: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
912: #define matsetvaluesblocked4_ matsetvaluesblocked4
913: #endif

917: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
918: {
919:   Mat               A  = *AA;
920:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
921:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
922:   PetscInt          *ai    =a->i,*ailen=a->ilen;
923:   PetscInt          *aj    =a->j,stepval,lastcol = -1;
924:   const PetscScalar *value = v;
925:   MatScalar         *ap,*aa = a->a,*bap;

928:   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
929:   stepval = (n-1)*4;
930:   for (k=0; k<m; k++) { /* loop over added rows */
931:     row  = im[k];
932:     rp   = aj + ai[row];
933:     ap   = aa + 16*ai[row];
934:     nrow = ailen[row];
935:     low  = 0;
936:     high = nrow;
937:     for (l=0; l<n; l++) { /* loop over added columns */
938:       col = in[l];
939:       if (col <= lastcol)  low = 0;
940:       else                high = nrow;
941:       lastcol = col;
942:       value   = v + k*(stepval+4 + l)*4;
943:       while (high-low > 7) {
944:         t = (low+high)/2;
945:         if (rp[t] > col) high = t;
946:         else             low  = t;
947:       }
948:       for (i=low; i<high; i++) {
949:         if (rp[i] > col) break;
950:         if (rp[i] == col) {
951:           bap = ap +  16*i;
952:           for (ii=0; ii<4; ii++,value+=stepval) {
953:             for (jj=ii; jj<16; jj+=4) {
954:               bap[jj] += *value++;
955:             }
956:           }
957:           goto noinsert2;
958:         }
959:       }
960:       N = nrow++ - 1;
961:       high++; /* added new column index thus must search to one higher than before */
962:       /* shift up all the later entries in this row */
963:       for (ii=N; ii>=i; ii--) {
964:         rp[ii+1] = rp[ii];
965:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
966:       }
967:       if (N >= i) {
968:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
969:       }
970:       rp[i] = col;
971:       bap   = ap +  16*i;
972:       for (ii=0; ii<4; ii++,value+=stepval) {
973:         for (jj=ii; jj<16; jj+=4) {
974:           bap[jj] = *value++;
975:         }
976:       }
977:       noinsert2:;
978:       low = i;
979:     }
980:     ailen[row] = nrow;
981:   }
982:   PetscFunctionReturnVoid();
983: }

985: #if defined(PETSC_HAVE_FORTRAN_CAPS)
986: #define matsetvalues4_ MATSETVALUES4
987: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
988: #define matsetvalues4_ matsetvalues4
989: #endif

993: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
994: {
995:   Mat         A  = *AA;
996:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
997:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
998:   PetscInt    *ai=a->i,*ailen=a->ilen;
999:   PetscInt    *aj=a->j,brow,bcol;
1000:   PetscInt    ridx,cidx,lastcol = -1;
1001:   MatScalar   *ap,value,*aa=a->a,*bap;

1004:   for (k=0; k<m; k++) { /* loop over added rows */
1005:     row  = im[k]; brow = row/4;
1006:     rp   = aj + ai[brow];
1007:     ap   = aa + 16*ai[brow];
1008:     nrow = ailen[brow];
1009:     low  = 0;
1010:     high = nrow;
1011:     for (l=0; l<n; l++) { /* loop over added columns */
1012:       col   = in[l]; bcol = col/4;
1013:       ridx  = row % 4; cidx = col % 4;
1014:       value = v[l + k*n];
1015:       if (col <= lastcol)  low = 0;
1016:       else                high = nrow;
1017:       lastcol = col;
1018:       while (high-low > 7) {
1019:         t = (low+high)/2;
1020:         if (rp[t] > bcol) high = t;
1021:         else              low  = t;
1022:       }
1023:       for (i=low; i<high; i++) {
1024:         if (rp[i] > bcol) break;
1025:         if (rp[i] == bcol) {
1026:           bap   = ap +  16*i + 4*cidx + ridx;
1027:           *bap += value;
1028:           goto noinsert1;
1029:         }
1030:       }
1031:       N = nrow++ - 1;
1032:       high++; /* added new column thus must search to one higher than before */
1033:       /* shift up all the later entries in this row */
1034:       for (ii=N; ii>=i; ii--) {
1035:         rp[ii+1] = rp[ii];
1036:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1037:       }
1038:       if (N>=i) {
1039:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1040:       }
1041:       rp[i]                    = bcol;
1042:       ap[16*i + 4*cidx + ridx] = value;
1043: noinsert1:;
1044:       low = i;
1045:     }
1046:     ailen[brow] = nrow;
1047:   }
1048:   PetscFunctionReturnVoid();
1049: }

1051: /*
1052:      Checks for missing diagonals
1053: */
1056: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1057: {
1058:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1060:   PetscInt       *diag,*ii = a->i,i;

1063:   MatMarkDiagonal_SeqBAIJ(A);
1064:   *missing = PETSC_FALSE;
1065:   if (A->rmap->n > 0 && !ii) {
1066:     *missing = PETSC_TRUE;
1067:     if (d) *d = 0;
1068:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1069:   } else {
1070:     diag = a->diag;
1071:     for (i=0; i<a->mbs; i++) {
1072:       if (diag[i] >= ii[i+1]) {
1073:         *missing = PETSC_TRUE;
1074:         if (d) *d = i;
1075:         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1076:         break;
1077:       }
1078:     }
1079:   }
1080:   return(0);
1081: }

1085: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1086: {
1087:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1089:   PetscInt       i,j,m = a->mbs;

1092:   if (!a->diag) {
1093:     PetscMalloc1(m,&a->diag);
1094:     PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1095:     a->free_diag = PETSC_TRUE;
1096:   }
1097:   for (i=0; i<m; i++) {
1098:     a->diag[i] = a->i[i+1];
1099:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1100:       if (a->j[j] == i) {
1101:         a->diag[i] = j;
1102:         break;
1103:       }
1104:     }
1105:   }
1106:   return(0);
1107: }


1112: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1113: {
1114:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1116:   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1117:   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;

1120:   *nn = n;
1121:   if (!ia) return(0);
1122:   if (symmetric) {
1123:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1124:     nz   = tia[n];
1125:   } else {
1126:     tia = a->i; tja = a->j;
1127:   }

1129:   if (!blockcompressed && bs > 1) {
1130:     (*nn) *= bs;
1131:     /* malloc & create the natural set of indices */
1132:     PetscMalloc1((n+1)*bs,ia);
1133:     if (n) {
1134:       (*ia)[0] = oshift;
1135:       for (j=1; j<bs; j++) {
1136:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1137:       }
1138:     }

1140:     for (i=1; i<n; i++) {
1141:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1142:       for (j=1; j<bs; j++) {
1143:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1144:       }
1145:     }
1146:     if (n) {
1147:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1148:     }

1150:     if (inja) {
1151:       PetscMalloc1(nz*bs*bs,ja);
1152:       cnt = 0;
1153:       for (i=0; i<n; i++) {
1154:         for (j=0; j<bs; j++) {
1155:           for (k=tia[i]; k<tia[i+1]; k++) {
1156:             for (l=0; l<bs; l++) {
1157:               (*ja)[cnt++] = bs*tja[k] + l;
1158:             }
1159:           }
1160:         }
1161:       }
1162:     }

1164:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1165:       PetscFree(tia);
1166:       PetscFree(tja);
1167:     }
1168:   } else if (oshift == 1) {
1169:     if (symmetric) {
1170:       nz = tia[A->rmap->n/bs];
1171:       /*  add 1 to i and j indices */
1172:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1173:       *ia = tia;
1174:       if (ja) {
1175:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1176:         *ja = tja;
1177:       }
1178:     } else {
1179:       nz = a->i[A->rmap->n/bs];
1180:       /* malloc space and  add 1 to i and j indices */
1181:       PetscMalloc1(A->rmap->n/bs+1,ia);
1182:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1183:       if (ja) {
1184:         PetscMalloc1(nz,ja);
1185:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1186:       }
1187:     }
1188:   } else {
1189:     *ia = tia;
1190:     if (ja) *ja = tja;
1191:   }
1192:   return(0);
1193: }

1197: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1198: {

1202:   if (!ia) return(0);
1203:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1204:     PetscFree(*ia);
1205:     if (ja) {PetscFree(*ja);}
1206:   }
1207:   return(0);
1208: }

1212: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1213: {
1214:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1218: #if defined(PETSC_USE_LOG)
1219:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1220: #endif
1221:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1222:   ISDestroy(&a->row);
1223:   ISDestroy(&a->col);
1224:   if (a->free_diag) {PetscFree(a->diag);}
1225:   PetscFree(a->idiag);
1226:   if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1227:   PetscFree(a->solve_work);
1228:   PetscFree(a->mult_work);
1229:   PetscFree(a->sor_workt);
1230:   PetscFree(a->sor_work);
1231:   ISDestroy(&a->icol);
1232:   PetscFree(a->saved_values);
1233:   PetscFree2(a->compressedrow.i,a->compressedrow.rindex);

1235:   MatDestroy(&a->sbaijMat);
1236:   MatDestroy(&a->parent);
1237:   PetscFree(A->data);

1239:   PetscObjectChangeTypeName((PetscObject)A,0);
1240:   PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1241:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1242:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1243:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1244:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1245:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1246:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1247:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1248:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1249:   PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1250:   return(0);
1251: }

1255: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1256: {
1257:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1261:   switch (op) {
1262:   case MAT_ROW_ORIENTED:
1263:     a->roworiented = flg;
1264:     break;
1265:   case MAT_KEEP_NONZERO_PATTERN:
1266:     a->keepnonzeropattern = flg;
1267:     break;
1268:   case MAT_NEW_NONZERO_LOCATIONS:
1269:     a->nonew = (flg ? 0 : 1);
1270:     break;
1271:   case MAT_NEW_NONZERO_LOCATION_ERR:
1272:     a->nonew = (flg ? -1 : 0);
1273:     break;
1274:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1275:     a->nonew = (flg ? -2 : 0);
1276:     break;
1277:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1278:     a->nounused = (flg ? -1 : 0);
1279:     break;
1280:   case MAT_NEW_DIAGONALS:
1281:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1282:   case MAT_USE_HASH_TABLE:
1283:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1284:     break;
1285:   case MAT_SPD:
1286:   case MAT_SYMMETRIC:
1287:   case MAT_STRUCTURALLY_SYMMETRIC:
1288:   case MAT_HERMITIAN:
1289:   case MAT_SYMMETRY_ETERNAL:
1290:     /* These options are handled directly by MatSetOption() */
1291:     break;
1292:   default:
1293:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1294:   }
1295:   return(0);
1296: }

1298: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1301: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1302: {
1304:   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1305:   MatScalar      *aa_i;
1306:   PetscScalar    *v_i;

1309:   bs  = A->rmap->bs;
1310:   bs2 = bs*bs;
1311:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);

1313:   bn  = row/bs;   /* Block number */
1314:   bp  = row % bs; /* Block Position */
1315:   M   = ai[bn+1] - ai[bn];
1316:   *nz = bs*M;

1318:   if (v) {
1319:     *v = 0;
1320:     if (*nz) {
1321:       PetscMalloc1(*nz,v);
1322:       for (i=0; i<M; i++) { /* for each block in the block row */
1323:         v_i  = *v + i*bs;
1324:         aa_i = aa + bs2*(ai[bn] + i);
1325:         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1326:       }
1327:     }
1328:   }

1330:   if (idx) {
1331:     *idx = 0;
1332:     if (*nz) {
1333:       PetscMalloc1(*nz,idx);
1334:       for (i=0; i<M; i++) { /* for each block in the block row */
1335:         idx_i = *idx + i*bs;
1336:         itmp  = bs*aj[ai[bn] + i];
1337:         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1338:       }
1339:     }
1340:   }
1341:   return(0);
1342: }

1346: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1347: {
1348:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1352:   MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1353:   return(0);
1354: }

1358: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1359: {

1363:   if (idx) {PetscFree(*idx);}
1364:   if (v)   {PetscFree(*v);}
1365:   return(0);
1366: }

1368: extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);

1372: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1373: {
1374:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1375:   Mat            C;
1377:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1378:   PetscInt       *rows,*cols,bs2=a->bs2;
1379:   MatScalar      *array;

1382:   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1383:   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1384:     PetscCalloc1(1+nbs,&col);

1386:     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1387:     MatCreate(PetscObjectComm((PetscObject)A),&C);
1388:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1389:     MatSetType(C,((PetscObject)A)->type_name);
1390:     MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);
1391:     PetscFree(col);
1392:   } else {
1393:     C = *B;
1394:   }

1396:   array = a->a;
1397:   PetscMalloc2(bs,&rows,bs,&cols);
1398:   for (i=0; i<mbs; i++) {
1399:     cols[0] = i*bs;
1400:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1401:     len = ai[i+1] - ai[i];
1402:     for (j=0; j<len; j++) {
1403:       rows[0] = (*aj++)*bs;
1404:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1405:       MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1406:       array += bs2;
1407:     }
1408:   }
1409:   PetscFree2(rows,cols);

1411:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1412:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

1414:   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1415:     *B = C;
1416:   } else {
1417:     MatHeaderMerge(A,&C);
1418:   }
1419:   return(0);
1420: }

1424: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1425: {
1427:   Mat            Btrans;

1430:   *f   = PETSC_FALSE;
1431:   MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1432:   MatEqual_SeqBAIJ(B,Btrans,f);
1433:   MatDestroy(&Btrans);
1434:   return(0);
1435: }

1439: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1440: {
1441:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1443:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1444:   int            fd;
1445:   PetscScalar    *aa;
1446:   FILE           *file;

1449:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1450:   PetscMalloc1(4+A->rmap->N,&col_lens);
1451:   col_lens[0] = MAT_FILE_CLASSID;

1453:   col_lens[1] = A->rmap->N;
1454:   col_lens[2] = A->cmap->n;
1455:   col_lens[3] = a->nz*bs2;

1457:   /* store lengths of each row and write (including header) to file */
1458:   count = 0;
1459:   for (i=0; i<a->mbs; i++) {
1460:     for (j=0; j<bs; j++) {
1461:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1462:     }
1463:   }
1464:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1465:   PetscFree(col_lens);

1467:   /* store column indices (zero start index) */
1468:   PetscMalloc1((a->nz+1)*bs2,&jj);
1469:   count = 0;
1470:   for (i=0; i<a->mbs; i++) {
1471:     for (j=0; j<bs; j++) {
1472:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1473:         for (l=0; l<bs; l++) {
1474:           jj[count++] = bs*a->j[k] + l;
1475:         }
1476:       }
1477:     }
1478:   }
1479:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1480:   PetscFree(jj);

1482:   /* store nonzero values */
1483:   PetscMalloc1((a->nz+1)*bs2,&aa);
1484:   count = 0;
1485:   for (i=0; i<a->mbs; i++) {
1486:     for (j=0; j<bs; j++) {
1487:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1488:         for (l=0; l<bs; l++) {
1489:           aa[count++] = a->a[bs2*k + l*bs + j];
1490:         }
1491:       }
1492:     }
1493:   }
1494:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1495:   PetscFree(aa);

1497:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1498:   if (file) {
1499:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1500:   }
1501:   return(0);
1502: }

1506: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1507: {
1508:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1509:   PetscErrorCode    ierr;
1510:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1511:   PetscViewerFormat format;

1514:   PetscViewerGetFormat(viewer,&format);
1515:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1516:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1517:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1518:     const char *matname;
1519:     Mat        aij;
1520:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1521:     PetscObjectGetName((PetscObject)A,&matname);
1522:     PetscObjectSetName((PetscObject)aij,matname);
1523:     MatView(aij,viewer);
1524:     MatDestroy(&aij);
1525:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1526:       return(0);
1527:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1528:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1529:     for (i=0; i<a->mbs; i++) {
1530:       for (j=0; j<bs; j++) {
1531:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1532:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1533:           for (l=0; l<bs; l++) {
1534: #if defined(PETSC_USE_COMPLEX)
1535:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536:               PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1537:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1538:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539:               PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1540:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1541:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1543:             }
1544: #else
1545:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1546:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1547:             }
1548: #endif
1549:           }
1550:         }
1551:         PetscViewerASCIIPrintf(viewer,"\n");
1552:       }
1553:     }
1554:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1555:   } else {
1556:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1557:     for (i=0; i<a->mbs; i++) {
1558:       for (j=0; j<bs; j++) {
1559:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1560:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1561:           for (l=0; l<bs; l++) {
1562: #if defined(PETSC_USE_COMPLEX)
1563:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1564:               PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1565:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1566:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1567:               PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1568:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1569:             } else {
1570:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1571:             }
1572: #else
1573:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1574: #endif
1575:           }
1576:         }
1577:         PetscViewerASCIIPrintf(viewer,"\n");
1578:       }
1579:     }
1580:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1581:   }
1582:   PetscViewerFlush(viewer);
1583:   return(0);
1584: }

1586: #include <petscdraw.h>
1589: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1590: {
1591:   Mat               A = (Mat) Aa;
1592:   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1593:   PetscErrorCode    ierr;
1594:   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1595:   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1596:   MatScalar         *aa;
1597:   PetscViewer       viewer;
1598:   PetscViewerFormat format;

1601:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1602:   PetscViewerGetFormat(viewer,&format);
1603:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1605:   /* loop over matrix elements drawing boxes */

1607:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1608:     PetscDrawCollectiveBegin(draw);
1609:     /* Blue for negative, Cyan for zero and  Red for positive */
1610:     color = PETSC_DRAW_BLUE;
1611:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1612:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1613:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615:         aa  = a->a + j*bs2;
1616:         for (k=0; k<bs; k++) {
1617:           for (l=0; l<bs; l++) {
1618:             if (PetscRealPart(*aa++) >=  0.) continue;
1619:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1620:           }
1621:         }
1622:       }
1623:     }
1624:     color = PETSC_DRAW_CYAN;
1625:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1626:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1627:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629:         aa  = a->a + j*bs2;
1630:         for (k=0; k<bs; k++) {
1631:           for (l=0; l<bs; l++) {
1632:             if (PetscRealPart(*aa++) != 0.) continue;
1633:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1634:           }
1635:         }
1636:       }
1637:     }
1638:     color = PETSC_DRAW_RED;
1639:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1640:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1641:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643:         aa  = a->a + j*bs2;
1644:         for (k=0; k<bs; k++) {
1645:           for (l=0; l<bs; l++) {
1646:             if (PetscRealPart(*aa++) <= 0.) continue;
1647:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1648:           }
1649:         }
1650:       }
1651:     }
1652:     PetscDrawCollectiveEnd(draw);
1653:   } else {
1654:     /* use contour shading to indicate magnitude of values */
1655:     /* first determine max of all nonzero values */
1656:     PetscReal minv = 0.0, maxv = 0.0;
1657:     PetscDraw popup;

1659:     for (i=0; i<a->nz*a->bs2; i++) {
1660:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1661:     }
1662:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1663:     PetscDrawGetPopup(draw,&popup);
1664:     PetscDrawScalePopup(popup,0.0,maxv);

1666:     PetscDrawCollectiveBegin(draw);
1667:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1668:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1669:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671:         aa  = a->a + j*bs2;
1672:         for (k=0; k<bs; k++) {
1673:           for (l=0; l<bs; l++) {
1674:             MatScalar v = *aa++;
1675:             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1676:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1677:           }
1678:         }
1679:       }
1680:     }
1681:     PetscDrawCollectiveEnd(draw);
1682:   }
1683:   return(0);
1684: }

1688: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1689: {
1691:   PetscReal      xl,yl,xr,yr,w,h;
1692:   PetscDraw      draw;
1693:   PetscBool      isnull;

1696:   PetscViewerDrawGetDraw(viewer,0,&draw);
1697:   PetscDrawIsNull(draw,&isnull);
1698:   if (isnull) return(0);

1700:   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1701:   xr  += w;          yr += h;        xl = -w;     yl = -h;
1702:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1703:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1704:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1705:   PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1706:   PetscDrawSave(draw);
1707:   return(0);
1708: }

1712: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1713: {
1715:   PetscBool      iascii,isbinary,isdraw;

1718:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1719:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1720:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1721:   if (iascii) {
1722:     MatView_SeqBAIJ_ASCII(A,viewer);
1723:   } else if (isbinary) {
1724:     MatView_SeqBAIJ_Binary(A,viewer);
1725:   } else if (isdraw) {
1726:     MatView_SeqBAIJ_Draw(A,viewer);
1727:   } else {
1728:     Mat B;
1729:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1730:     MatView(B,viewer);
1731:     MatDestroy(&B);
1732:   }
1733:   return(0);
1734: }


1739: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1740: {
1741:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1742:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1743:   PetscInt    *ai = a->i,*ailen = a->ilen;
1744:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1745:   MatScalar   *ap,*aa = a->a;

1748:   for (k=0; k<m; k++) { /* loop over rows */
1749:     row = im[k]; brow = row/bs;
1750:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1751:     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1752:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1753:     nrow = ailen[brow];
1754:     for (l=0; l<n; l++) { /* loop over columns */
1755:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1756:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1757:       col  = in[l];
1758:       bcol = col/bs;
1759:       cidx = col%bs;
1760:       ridx = row%bs;
1761:       high = nrow;
1762:       low  = 0; /* assume unsorted */
1763:       while (high-low > 5) {
1764:         t = (low+high)/2;
1765:         if (rp[t] > bcol) high = t;
1766:         else             low  = t;
1767:       }
1768:       for (i=low; i<high; i++) {
1769:         if (rp[i] > bcol) break;
1770:         if (rp[i] == bcol) {
1771:           *v++ = ap[bs2*i+bs*cidx+ridx];
1772:           goto finished;
1773:         }
1774:       }
1775:       *v++ = 0.0;
1776: finished:;
1777:     }
1778:   }
1779:   return(0);
1780: }

1784: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1785: {
1786:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1787:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1788:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1789:   PetscErrorCode    ierr;
1790:   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1791:   PetscBool         roworiented=a->roworiented;
1792:   const PetscScalar *value     = v;
1793:   MatScalar         *ap,*aa = a->a,*bap;

1796:   if (roworiented) {
1797:     stepval = (n-1)*bs;
1798:   } else {
1799:     stepval = (m-1)*bs;
1800:   }
1801:   for (k=0; k<m; k++) { /* loop over added rows */
1802:     row = im[k];
1803:     if (row < 0) continue;
1804: #if defined(PETSC_USE_DEBUG)
1805:     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1806: #endif
1807:     rp   = aj + ai[row];
1808:     ap   = aa + bs2*ai[row];
1809:     rmax = imax[row];
1810:     nrow = ailen[row];
1811:     low  = 0;
1812:     high = nrow;
1813:     for (l=0; l<n; l++) { /* loop over added columns */
1814:       if (in[l] < 0) continue;
1815: #if defined(PETSC_USE_DEBUG)
1816:       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1817: #endif
1818:       col = in[l];
1819:       if (roworiented) {
1820:         value = v + (k*(stepval+bs) + l)*bs;
1821:       } else {
1822:         value = v + (l*(stepval+bs) + k)*bs;
1823:       }
1824:       if (col <= lastcol) low = 0;
1825:       else high = nrow;
1826:       lastcol = col;
1827:       while (high-low > 7) {
1828:         t = (low+high)/2;
1829:         if (rp[t] > col) high = t;
1830:         else             low  = t;
1831:       }
1832:       for (i=low; i<high; i++) {
1833:         if (rp[i] > col) break;
1834:         if (rp[i] == col) {
1835:           bap = ap +  bs2*i;
1836:           if (roworiented) {
1837:             if (is == ADD_VALUES) {
1838:               for (ii=0; ii<bs; ii++,value+=stepval) {
1839:                 for (jj=ii; jj<bs2; jj+=bs) {
1840:                   bap[jj] += *value++;
1841:                 }
1842:               }
1843:             } else {
1844:               for (ii=0; ii<bs; ii++,value+=stepval) {
1845:                 for (jj=ii; jj<bs2; jj+=bs) {
1846:                   bap[jj] = *value++;
1847:                 }
1848:               }
1849:             }
1850:           } else {
1851:             if (is == ADD_VALUES) {
1852:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1853:                 for (jj=0; jj<bs; jj++) {
1854:                   bap[jj] += value[jj];
1855:                 }
1856:                 bap += bs;
1857:               }
1858:             } else {
1859:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1860:                 for (jj=0; jj<bs; jj++) {
1861:                   bap[jj]  = value[jj];
1862:                 }
1863:                 bap += bs;
1864:               }
1865:             }
1866:           }
1867:           goto noinsert2;
1868:         }
1869:       }
1870:       if (nonew == 1) goto noinsert2;
1871:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1872:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1873:       N = nrow++ - 1; high++;
1874:       /* shift up all the later entries in this row */
1875:       for (ii=N; ii>=i; ii--) {
1876:         rp[ii+1] = rp[ii];
1877:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1878:       }
1879:       if (N >= i) {
1880:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1881:       }
1882:       rp[i] = col;
1883:       bap   = ap +  bs2*i;
1884:       if (roworiented) {
1885:         for (ii=0; ii<bs; ii++,value+=stepval) {
1886:           for (jj=ii; jj<bs2; jj+=bs) {
1887:             bap[jj] = *value++;
1888:           }
1889:         }
1890:       } else {
1891:         for (ii=0; ii<bs; ii++,value+=stepval) {
1892:           for (jj=0; jj<bs; jj++) {
1893:             *bap++ = *value++;
1894:           }
1895:         }
1896:       }
1897: noinsert2:;
1898:       low = i;
1899:     }
1900:     ailen[row] = nrow;
1901:   }
1902:   return(0);
1903: }

1907: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1908: {
1909:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1910:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1911:   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1913:   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1914:   MatScalar      *aa  = a->a,*ap;
1915:   PetscReal      ratio=0.6;

1918:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1920:   if (m) rmax = ailen[0];
1921:   for (i=1; i<mbs; i++) {
1922:     /* move each row back by the amount of empty slots (fshift) before it*/
1923:     fshift += imax[i-1] - ailen[i-1];
1924:     rmax    = PetscMax(rmax,ailen[i]);
1925:     if (fshift) {
1926:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1927:       N  = ailen[i];
1928:       for (j=0; j<N; j++) {
1929:         ip[j-fshift] = ip[j];

1931:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1932:       }
1933:     }
1934:     ai[i] = ai[i-1] + ailen[i-1];
1935:   }
1936:   if (mbs) {
1937:     fshift += imax[mbs-1] - ailen[mbs-1];
1938:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1939:   }

1941:   /* reset ilen and imax for each row */
1942:   a->nonzerorowcnt = 0;
1943:   for (i=0; i<mbs; i++) {
1944:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1945:     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1946:   }
1947:   a->nz = ai[mbs];

1949:   /* diagonals may have moved, so kill the diagonal pointers */
1950:   a->idiagvalid = PETSC_FALSE;
1951:   if (fshift && a->diag) {
1952:     PetscFree(a->diag);
1953:     PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1954:     a->diag = 0;
1955:   }
1956:   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1957:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1958:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1959:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);

1961:   A->info.mallocs    += a->reallocs;
1962:   a->reallocs         = 0;
1963:   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1964:   a->rmax             = rmax;

1966:   MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1967:   return(0);
1968: }

1970: /*
1971:    This function returns an array of flags which indicate the locations of contiguous
1972:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1973:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1974:    Assume: sizes should be long enough to hold all the values.
1975: */
1978: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1979: {
1980:   PetscInt  i,j,k,row;
1981:   PetscBool flg;

1984:   for (i=0,j=0; i<n; j++) {
1985:     row = idx[i];
1986:     if (row%bs!=0) { /* Not the begining of a block */
1987:       sizes[j] = 1;
1988:       i++;
1989:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1990:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1991:       i++;
1992:     } else { /* Begining of the block, so check if the complete block exists */
1993:       flg = PETSC_TRUE;
1994:       for (k=1; k<bs; k++) {
1995:         if (row+k != idx[i+k]) { /* break in the block */
1996:           flg = PETSC_FALSE;
1997:           break;
1998:         }
1999:       }
2000:       if (flg) { /* No break in the bs */
2001:         sizes[j] = bs;
2002:         i       += bs;
2003:       } else {
2004:         sizes[j] = 1;
2005:         i++;
2006:       }
2007:     }
2008:   }
2009:   *bs_max = j;
2010:   return(0);
2011: }

2015: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2016: {
2017:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2018:   PetscErrorCode    ierr;
2019:   PetscInt          i,j,k,count,*rows;
2020:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2021:   PetscScalar       zero = 0.0;
2022:   MatScalar         *aa;
2023:   const PetscScalar *xx;
2024:   PetscScalar       *bb;

2027:   /* fix right hand side if needed */
2028:   if (x && b) {
2029:     VecGetArrayRead(x,&xx);
2030:     VecGetArray(b,&bb);
2031:     for (i=0; i<is_n; i++) {
2032:       bb[is_idx[i]] = diag*xx[is_idx[i]];
2033:     }
2034:     VecRestoreArrayRead(x,&xx);
2035:     VecRestoreArray(b,&bb);
2036:   }

2038:   /* Make a copy of the IS and  sort it */
2039:   /* allocate memory for rows,sizes */
2040:   PetscMalloc2(is_n,&rows,2*is_n,&sizes);

2042:   /* copy IS values to rows, and sort them */
2043:   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2044:   PetscSortInt(is_n,rows);

2046:   if (baij->keepnonzeropattern) {
2047:     for (i=0; i<is_n; i++) sizes[i] = 1;
2048:     bs_max          = is_n;
2049:   } else {
2050:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2051:     A->nonzerostate++;
2052:   }

2054:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2055:     row = rows[j];
2056:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2057:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2058:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2059:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2060:       if (diag != (PetscScalar)0.0) {
2061:         if (baij->ilen[row/bs] > 0) {
2062:           baij->ilen[row/bs]       = 1;
2063:           baij->j[baij->i[row/bs]] = row/bs;

2065:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2066:         }
2067:         /* Now insert all the diagonal values for this bs */
2068:         for (k=0; k<bs; k++) {
2069:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2070:         }
2071:       } else { /* (diag == 0.0) */
2072:         baij->ilen[row/bs] = 0;
2073:       } /* end (diag == 0.0) */
2074:     } else { /* (sizes[i] != bs) */
2075: #if defined(PETSC_USE_DEBUG)
2076:       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2077: #endif
2078:       for (k=0; k<count; k++) {
2079:         aa[0] =  zero;
2080:         aa   += bs;
2081:       }
2082:       if (diag != (PetscScalar)0.0) {
2083:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2084:       }
2085:     }
2086:   }

2088:   PetscFree2(rows,sizes);
2089:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2090:   return(0);
2091: }

2095: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2096: {
2097:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2098:   PetscErrorCode    ierr;
2099:   PetscInt          i,j,k,count;
2100:   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2101:   PetscScalar       zero = 0.0;
2102:   MatScalar         *aa;
2103:   const PetscScalar *xx;
2104:   PetscScalar       *bb;
2105:   PetscBool         *zeroed,vecs = PETSC_FALSE;

2108:   /* fix right hand side if needed */
2109:   if (x && b) {
2110:     VecGetArrayRead(x,&xx);
2111:     VecGetArray(b,&bb);
2112:     vecs = PETSC_TRUE;
2113:   }

2115:   /* zero the columns */
2116:   PetscCalloc1(A->rmap->n,&zeroed);
2117:   for (i=0; i<is_n; i++) {
2118:     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2119:     zeroed[is_idx[i]] = PETSC_TRUE;
2120:   }
2121:   for (i=0; i<A->rmap->N; i++) {
2122:     if (!zeroed[i]) {
2123:       row = i/bs;
2124:       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2125:         for (k=0; k<bs; k++) {
2126:           col = bs*baij->j[j] + k;
2127:           if (zeroed[col]) {
2128:             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2129:             if (vecs) bb[i] -= aa[0]*xx[col];
2130:             aa[0] = 0.0;
2131:           }
2132:         }
2133:       }
2134:     } else if (vecs) bb[i] = diag*xx[i];
2135:   }
2136:   PetscFree(zeroed);
2137:   if (vecs) {
2138:     VecRestoreArrayRead(x,&xx);
2139:     VecRestoreArray(b,&bb);
2140:   }

2142:   /* zero the rows */
2143:   for (i=0; i<is_n; i++) {
2144:     row   = is_idx[i];
2145:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2146:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2147:     for (k=0; k<count; k++) {
2148:       aa[0] =  zero;
2149:       aa   += bs;
2150:     }
2151:     if (diag != (PetscScalar)0.0) {
2152:       (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2153:     }
2154:   }
2155:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2156:   return(0);
2157: }

2161: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2162: {
2163:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2164:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2165:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2166:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2168:   PetscInt       ridx,cidx,bs2=a->bs2;
2169:   PetscBool      roworiented=a->roworiented;
2170:   MatScalar      *ap,value,*aa=a->a,*bap;

2173:   for (k=0; k<m; k++) { /* loop over added rows */
2174:     row  = im[k];
2175:     brow = row/bs;
2176:     if (row < 0) continue;
2177: #if defined(PETSC_USE_DEBUG)
2178:     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2179: #endif
2180:     rp   = aj + ai[brow];
2181:     ap   = aa + bs2*ai[brow];
2182:     rmax = imax[brow];
2183:     nrow = ailen[brow];
2184:     low  = 0;
2185:     high = nrow;
2186:     for (l=0; l<n; l++) { /* loop over added columns */
2187:       if (in[l] < 0) continue;
2188: #if defined(PETSC_USE_DEBUG)
2189:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2190: #endif
2191:       col  = in[l]; bcol = col/bs;
2192:       ridx = row % bs; cidx = col % bs;
2193:       if (roworiented) {
2194:         value = v[l + k*n];
2195:       } else {
2196:         value = v[k + l*m];
2197:       }
2198:       if (col <= lastcol) low = 0; else high = nrow;
2199:       lastcol = col;
2200:       while (high-low > 7) {
2201:         t = (low+high)/2;
2202:         if (rp[t] > bcol) high = t;
2203:         else              low  = t;
2204:       }
2205:       for (i=low; i<high; i++) {
2206:         if (rp[i] > bcol) break;
2207:         if (rp[i] == bcol) {
2208:           bap = ap +  bs2*i + bs*cidx + ridx;
2209:           if (is == ADD_VALUES) *bap += value;
2210:           else                  *bap  = value;
2211:           goto noinsert1;
2212:         }
2213:       }
2214:       if (nonew == 1) goto noinsert1;
2215:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2216:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2217:       N = nrow++ - 1; high++;
2218:       /* shift up all the later entries in this row */
2219:       for (ii=N; ii>=i; ii--) {
2220:         rp[ii+1] = rp[ii];
2221:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2222:       }
2223:       if (N>=i) {
2224:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2225:       }
2226:       rp[i]                      = bcol;
2227:       ap[bs2*i + bs*cidx + ridx] = value;
2228:       a->nz++;
2229:       A->nonzerostate++;
2230: noinsert1:;
2231:       low = i;
2232:     }
2233:     ailen[brow] = nrow;
2234:   }
2235:   return(0);
2236: }

2240: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2241: {
2242:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2243:   Mat            outA;
2245:   PetscBool      row_identity,col_identity;

2248:   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2249:   ISIdentity(row,&row_identity);
2250:   ISIdentity(col,&col_identity);
2251:   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");

2253:   outA            = inA;
2254:   inA->factortype = MAT_FACTOR_LU;
2255:   PetscFree(inA->solvertype);
2256:   PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);

2258:   MatMarkDiagonal_SeqBAIJ(inA);

2260:   PetscObjectReference((PetscObject)row);
2261:   ISDestroy(&a->row);
2262:   a->row = row;
2263:   PetscObjectReference((PetscObject)col);
2264:   ISDestroy(&a->col);
2265:   a->col = col;

2267:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2268:   ISDestroy(&a->icol);
2269:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2270:   PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);

2272:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2273:   if (!a->solve_work) {
2274:     PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2275:     PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2276:   }
2277:   MatLUFactorNumeric(outA,inA,info);
2278:   return(0);
2279: }

2283: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2284: {
2285:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2286:   PetscInt    i,nz,mbs;

2289:   nz  = baij->maxnz;
2290:   mbs = baij->mbs;
2291:   for (i=0; i<nz; i++) {
2292:     baij->j[i] = indices[i];
2293:   }
2294:   baij->nz = nz;
2295:   for (i=0; i<mbs; i++) {
2296:     baij->ilen[i] = baij->imax[i];
2297:   }
2298:   return(0);
2299: }

2303: /*@
2304:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2305:        in the matrix.

2307:   Input Parameters:
2308: +  mat - the SeqBAIJ matrix
2309: -  indices - the column indices

2311:   Level: advanced

2313:   Notes:
2314:     This can be called if you have precomputed the nonzero structure of the
2315:   matrix and want to provide it to the matrix object to improve the performance
2316:   of the MatSetValues() operation.

2318:     You MUST have set the correct numbers of nonzeros per row in the call to
2319:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2321:     MUST be called before any calls to MatSetValues();

2323: @*/
2324: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2325: {

2331:   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2332:   return(0);
2333: }

2337: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2338: {
2339:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2341:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2342:   PetscReal      atmp;
2343:   PetscScalar    *x,zero = 0.0;
2344:   MatScalar      *aa;
2345:   PetscInt       ncols,brow,krow,kcol;

2348:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2349:   bs  = A->rmap->bs;
2350:   aa  = a->a;
2351:   ai  = a->i;
2352:   aj  = a->j;
2353:   mbs = a->mbs;

2355:   VecSet(v,zero);
2356:   VecGetArray(v,&x);
2357:   VecGetLocalSize(v,&n);
2358:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2359:   for (i=0; i<mbs; i++) {
2360:     ncols = ai[1] - ai[0]; ai++;
2361:     brow  = bs*i;
2362:     for (j=0; j<ncols; j++) {
2363:       for (kcol=0; kcol<bs; kcol++) {
2364:         for (krow=0; krow<bs; krow++) {
2365:           atmp = PetscAbsScalar(*aa);aa++;
2366:           row  = brow + krow;   /* row index */
2367:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2368:         }
2369:       }
2370:       aj++;
2371:     }
2372:   }
2373:   VecRestoreArray(v,&x);
2374:   return(0);
2375: }

2379: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2380: {

2384:   /* If the two matrices have the same copy implementation, use fast copy. */
2385:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2386:     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2387:     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2388:     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;

2390:     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2391:     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2392:     PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2393:   } else {
2394:     MatCopy_Basic(A,B,str);
2395:   }
2396:   return(0);
2397: }

2401: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2402: {

2406:   MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);
2407:   return(0);
2408: }

2412: PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2413: {
2414:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

2417:   *array = a->a;
2418:   return(0);
2419: }

2423: PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2424: {
2426:   return(0);
2427: }

2431: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2432: {
2433:   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2434:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2435:   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;

2439:   /* Set the number of nonzeros in the new matrix */
2440:   MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2441:   return(0);
2442: }

2446: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2447: {
2448:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2450:   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2451:   PetscBLASInt   one=1;

2454:   if (str == SAME_NONZERO_PATTERN) {
2455:     PetscScalar  alpha = a;
2456:     PetscBLASInt bnz;
2457:     PetscBLASIntCast(x->nz*bs2,&bnz);
2458:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2459:     PetscObjectStateIncrease((PetscObject)Y);
2460:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2461:     MatAXPY_Basic(Y,a,X,str);
2462:   } else {
2463:     Mat      B;
2464:     PetscInt *nnz;
2465:     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2466:     PetscMalloc1(Y->rmap->N,&nnz);
2467:     MatCreate(PetscObjectComm((PetscObject)Y),&B);
2468:     PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2469:     MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2470:     MatSetBlockSizesFromMats(B,Y,Y);
2471:     MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2472:     MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2473:     MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2474:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2475:     MatHeaderReplace(Y,&B);
2476:     PetscFree(nnz);
2477:   }
2478:   return(0);
2479: }

2483: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2484: {
2485:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2486:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2487:   MatScalar   *aa = a->a;

2490:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2491:   return(0);
2492: }

2496: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2497: {
2498:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2499:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2500:   MatScalar   *aa = a->a;

2503:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2504:   return(0);
2505: }

2509: /*
2510:     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2511: */
2512: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2513: {
2514:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2516:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2517:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2520:   *nn = n;
2521:   if (!ia) return(0);
2522:   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2523:   else {
2524:     PetscCalloc1(n+1,&collengths);
2525:     PetscMalloc1(n+1,&cia);
2526:     PetscMalloc1(nz+1,&cja);
2527:     jj   = a->j;
2528:     for (i=0; i<nz; i++) {
2529:       collengths[jj[i]]++;
2530:     }
2531:     cia[0] = oshift;
2532:     for (i=0; i<n; i++) {
2533:       cia[i+1] = cia[i] + collengths[i];
2534:     }
2535:     PetscMemzero(collengths,n*sizeof(PetscInt));
2536:     jj   = a->j;
2537:     for (row=0; row<m; row++) {
2538:       mr = a->i[row+1] - a->i[row];
2539:       for (i=0; i<mr; i++) {
2540:         col = *jj++;

2542:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2543:       }
2544:     }
2545:     PetscFree(collengths);
2546:     *ia  = cia; *ja = cja;
2547:   }
2548:   return(0);
2549: }

2553: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2554: {

2558:   if (!ia) return(0);
2559:   PetscFree(*ia);
2560:   PetscFree(*ja);
2561:   return(0);
2562: }

2564: /*
2565:  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2566:  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2567:  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2568:  */
2571: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2572: {
2573:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2575:   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2576:   PetscInt       nz = a->i[m],row,*jj,mr,col;
2577:   PetscInt       *cspidx;

2580:   *nn = n;
2581:   if (!ia) return(0);

2583:   PetscCalloc1(n+1,&collengths);
2584:   PetscMalloc1(n+1,&cia);
2585:   PetscMalloc1(nz+1,&cja);
2586:   PetscMalloc1(nz+1,&cspidx);
2587:   jj   = a->j;
2588:   for (i=0; i<nz; i++) {
2589:     collengths[jj[i]]++;
2590:   }
2591:   cia[0] = oshift;
2592:   for (i=0; i<n; i++) {
2593:     cia[i+1] = cia[i] + collengths[i];
2594:   }
2595:   PetscMemzero(collengths,n*sizeof(PetscInt));
2596:   jj   = a->j;
2597:   for (row=0; row<m; row++) {
2598:     mr = a->i[row+1] - a->i[row];
2599:     for (i=0; i<mr; i++) {
2600:       col = *jj++;
2601:       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2602:       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2603:     }
2604:   }
2605:   PetscFree(collengths);
2606:   *ia    = cia; *ja = cja;
2607:   *spidx = cspidx;
2608:   return(0);
2609: }

2613: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2614: {

2618:   MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2619:   PetscFree(*spidx);
2620:   return(0);
2621: }

2625: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2626: {
2628:   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;

2631:   if (!Y->preallocated || !aij->nz) {
2632:     MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2633:   }
2634:   MatShift_Basic(Y,a);
2635:   return(0);
2636: }

2638: /* -------------------------------------------------------------------*/
2639: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2640:                                        MatGetRow_SeqBAIJ,
2641:                                        MatRestoreRow_SeqBAIJ,
2642:                                        MatMult_SeqBAIJ_N,
2643:                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2644:                                        MatMultTranspose_SeqBAIJ,
2645:                                        MatMultTransposeAdd_SeqBAIJ,
2646:                                        0,
2647:                                        0,
2648:                                        0,
2649:                                /* 10*/ 0,
2650:                                        MatLUFactor_SeqBAIJ,
2651:                                        0,
2652:                                        0,
2653:                                        MatTranspose_SeqBAIJ,
2654:                                /* 15*/ MatGetInfo_SeqBAIJ,
2655:                                        MatEqual_SeqBAIJ,
2656:                                        MatGetDiagonal_SeqBAIJ,
2657:                                        MatDiagonalScale_SeqBAIJ,
2658:                                        MatNorm_SeqBAIJ,
2659:                                /* 20*/ 0,
2660:                                        MatAssemblyEnd_SeqBAIJ,
2661:                                        MatSetOption_SeqBAIJ,
2662:                                        MatZeroEntries_SeqBAIJ,
2663:                                /* 24*/ MatZeroRows_SeqBAIJ,
2664:                                        0,
2665:                                        0,
2666:                                        0,
2667:                                        0,
2668:                                /* 29*/ MatSetUp_SeqBAIJ,
2669:                                        0,
2670:                                        0,
2671:                                        0,
2672:                                        0,
2673:                                /* 34*/ MatDuplicate_SeqBAIJ,
2674:                                        0,
2675:                                        0,
2676:                                        MatILUFactor_SeqBAIJ,
2677:                                        0,
2678:                                /* 39*/ MatAXPY_SeqBAIJ,
2679:                                        MatGetSubMatrices_SeqBAIJ,
2680:                                        MatIncreaseOverlap_SeqBAIJ,
2681:                                        MatGetValues_SeqBAIJ,
2682:                                        MatCopy_SeqBAIJ,
2683:                                /* 44*/ 0,
2684:                                        MatScale_SeqBAIJ,
2685:                                        MatShift_SeqBAIJ,
2686:                                        0,
2687:                                        MatZeroRowsColumns_SeqBAIJ,
2688:                                /* 49*/ 0,
2689:                                        MatGetRowIJ_SeqBAIJ,
2690:                                        MatRestoreRowIJ_SeqBAIJ,
2691:                                        MatGetColumnIJ_SeqBAIJ,
2692:                                        MatRestoreColumnIJ_SeqBAIJ,
2693:                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2694:                                        0,
2695:                                        0,
2696:                                        0,
2697:                                        MatSetValuesBlocked_SeqBAIJ,
2698:                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2699:                                        MatDestroy_SeqBAIJ,
2700:                                        MatView_SeqBAIJ,
2701:                                        0,
2702:                                        0,
2703:                                /* 64*/ 0,
2704:                                        0,
2705:                                        0,
2706:                                        0,
2707:                                        0,
2708:                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2709:                                        0,
2710:                                        MatConvert_Basic,
2711:                                        0,
2712:                                        0,
2713:                                /* 74*/ 0,
2714:                                        MatFDColoringApply_BAIJ,
2715:                                        0,
2716:                                        0,
2717:                                        0,
2718:                                /* 79*/ 0,
2719:                                        0,
2720:                                        0,
2721:                                        0,
2722:                                        MatLoad_SeqBAIJ,
2723:                                /* 84*/ 0,
2724:                                        0,
2725:                                        0,
2726:                                        0,
2727:                                        0,
2728:                                /* 89*/ 0,
2729:                                        0,
2730:                                        0,
2731:                                        0,
2732:                                        0,
2733:                                /* 94*/ 0,
2734:                                        0,
2735:                                        0,
2736:                                        0,
2737:                                        0,
2738:                                /* 99*/ 0,
2739:                                        0,
2740:                                        0,
2741:                                        0,
2742:                                        0,
2743:                                /*104*/ 0,
2744:                                        MatRealPart_SeqBAIJ,
2745:                                        MatImaginaryPart_SeqBAIJ,
2746:                                        0,
2747:                                        0,
2748:                                /*109*/ 0,
2749:                                        0,
2750:                                        0,
2751:                                        0,
2752:                                        MatMissingDiagonal_SeqBAIJ,
2753:                                /*114*/ 0,
2754:                                        0,
2755:                                        0,
2756:                                        0,
2757:                                        0,
2758:                                /*119*/ 0,
2759:                                        0,
2760:                                        MatMultHermitianTranspose_SeqBAIJ,
2761:                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2762:                                        0,
2763:                                /*124*/ 0,
2764:                                        0,
2765:                                        MatInvertBlockDiagonal_SeqBAIJ,
2766:                                        0,
2767:                                        0,
2768:                                /*129*/ 0,
2769:                                        0,
2770:                                        0,
2771:                                        0,
2772:                                        0,
2773:                                /*134*/ 0,
2774:                                        0,
2775:                                        0,
2776:                                        0,
2777:                                        0,
2778:                                /*139*/ 0,
2779:                                        0,
2780:                                        0,
2781:                                        MatFDColoringSetUp_SeqXAIJ,
2782:                                        0,
2783:                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2784: };

2788: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2789: {
2790:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2791:   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;

2795:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

2797:   /* allocate space for values if not already there */
2798:   if (!aij->saved_values) {
2799:     PetscMalloc1(nz+1,&aij->saved_values);
2800:     PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2801:   }

2803:   /* copy values over */
2804:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2805:   return(0);
2806: }

2810: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2811: {
2812:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2814:   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;

2817:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2818:   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");

2820:   /* copy values over */
2821:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2822:   return(0);
2823: }

2825: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2826: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);

2830: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2831: {
2832:   Mat_SeqBAIJ    *b;
2834:   PetscInt       i,mbs,nbs,bs2;
2835:   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;

2838:   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2839:   if (nz == MAT_SKIP_ALLOCATION) {
2840:     skipallocation = PETSC_TRUE;
2841:     nz             = 0;
2842:   }

2844:   MatSetBlockSize(B,PetscAbs(bs));
2845:   PetscLayoutSetUp(B->rmap);
2846:   PetscLayoutSetUp(B->cmap);
2847:   PetscLayoutGetBlockSize(B->rmap,&bs);

2849:   B->preallocated = PETSC_TRUE;

2851:   mbs = B->rmap->n/bs;
2852:   nbs = B->cmap->n/bs;
2853:   bs2 = bs*bs;

2855:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);

2857:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2858:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2859:   if (nnz) {
2860:     for (i=0; i<mbs; i++) {
2861:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2862:       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2863:     }
2864:   }

2866:   b    = (Mat_SeqBAIJ*)B->data;
2867:   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2868:   PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2869:   PetscOptionsEnd();

2871:   if (!flg) {
2872:     switch (bs) {
2873:     case 1:
2874:       B->ops->mult    = MatMult_SeqBAIJ_1;
2875:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2876:       break;
2877:     case 2:
2878:       B->ops->mult    = MatMult_SeqBAIJ_2;
2879:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2880:       break;
2881:     case 3:
2882:       B->ops->mult    = MatMult_SeqBAIJ_3;
2883:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2884:       break;
2885:     case 4:
2886:       B->ops->mult    = MatMult_SeqBAIJ_4;
2887:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2888:       break;
2889:     case 5:
2890:       B->ops->mult    = MatMult_SeqBAIJ_5;
2891:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2892:       break;
2893:     case 6:
2894:       B->ops->mult    = MatMult_SeqBAIJ_6;
2895:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2896:       break;
2897:     case 7:
2898:       B->ops->mult    = MatMult_SeqBAIJ_7;
2899:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2900:       break;
2901:     case 15:
2902:       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2903:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2904:       break;
2905:     default:
2906:       B->ops->mult    = MatMult_SeqBAIJ_N;
2907:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2908:       break;
2909:     }
2910:   }
2911:   B->ops->sor = MatSOR_SeqBAIJ;
2912:   b->mbs = mbs;
2913:   b->nbs = nbs;
2914:   if (!skipallocation) {
2915:     if (!b->imax) {
2916:       PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2917:       PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));

2919:       b->free_imax_ilen = PETSC_TRUE;
2920:     }
2921:     /* b->ilen will count nonzeros in each block row so far. */
2922:     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2923:     if (!nnz) {
2924:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2925:       else if (nz < 0) nz = 1;
2926:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2927:       nz = nz*mbs;
2928:     } else {
2929:       nz = 0;
2930:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2931:     }

2933:     /* allocate the matrix space */
2934:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2935:     PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2936:     PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2937:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2938:     PetscMemzero(b->j,nz*sizeof(PetscInt));

2940:     b->singlemalloc = PETSC_TRUE;
2941:     b->i[0]         = 0;
2942:     for (i=1; i<mbs+1; i++) {
2943:       b->i[i] = b->i[i-1] + b->imax[i-1];
2944:     }
2945:     b->free_a  = PETSC_TRUE;
2946:     b->free_ij = PETSC_TRUE;
2947:   } else {
2948:     b->free_a  = PETSC_FALSE;
2949:     b->free_ij = PETSC_FALSE;
2950:   }

2952:   b->bs2              = bs2;
2953:   b->mbs              = mbs;
2954:   b->nz               = 0;
2955:   b->maxnz            = nz;
2956:   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2957:   if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
2958:   return(0);
2959: }

2963: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2964: {
2965:   PetscInt       i,m,nz,nz_max=0,*nnz;
2966:   PetscScalar    *values=0;
2967:   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;

2971:   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2972:   PetscLayoutSetBlockSize(B->rmap,bs);
2973:   PetscLayoutSetBlockSize(B->cmap,bs);
2974:   PetscLayoutSetUp(B->rmap);
2975:   PetscLayoutSetUp(B->cmap);
2976:   PetscLayoutGetBlockSize(B->rmap,&bs);
2977:   m    = B->rmap->n/bs;

2979:   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2980:   PetscMalloc1(m+1, &nnz);
2981:   for (i=0; i<m; i++) {
2982:     nz = ii[i+1]- ii[i];
2983:     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2984:     nz_max = PetscMax(nz_max, nz);
2985:     nnz[i] = nz;
2986:   }
2987:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2988:   PetscFree(nnz);

2990:   values = (PetscScalar*)V;
2991:   if (!values) {
2992:     PetscCalloc1(bs*bs*(nz_max+1),&values);
2993:   }
2994:   for (i=0; i<m; i++) {
2995:     PetscInt          ncols  = ii[i+1] - ii[i];
2996:     const PetscInt    *icols = jj + ii[i];
2997:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2998:     if (!roworiented) {
2999:       MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3000:     } else {
3001:       PetscInt j;
3002:       for (j=0; j<ncols; j++) {
3003:         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3004:         MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
3005:       }
3006:     }
3007:   }
3008:   if (!V) { PetscFree(values); }
3009:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3010:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3011:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3012:   return(0);
3013: }

3015: /*MC
3016:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3017:    block sparse compressed row format.

3019:    Options Database Keys:
3020: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

3022:   Level: beginner

3024: .seealso: MatCreateSeqBAIJ()
3025: M*/

3027: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);

3031: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3032: {
3034:   PetscMPIInt    size;
3035:   Mat_SeqBAIJ    *b;

3038:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3039:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

3041:   PetscNewLog(B,&b);
3042:   B->data = (void*)b;
3043:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));

3045:   b->row          = 0;
3046:   b->col          = 0;
3047:   b->icol         = 0;
3048:   b->reallocs     = 0;
3049:   b->saved_values = 0;

3051:   b->roworiented        = PETSC_TRUE;
3052:   b->nonew              = 0;
3053:   b->diag               = 0;
3054:   B->spptr              = 0;
3055:   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3056:   b->keepnonzeropattern = PETSC_FALSE;

3058:   PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3059:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3060:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3061:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3062:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3063:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3064:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3065:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3066:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);
3067:   PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3068:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3069:   return(0);
3070: }

3074: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3075: {
3076:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3078:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

3081:   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");

3083:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3084:     c->imax           = a->imax;
3085:     c->ilen           = a->ilen;
3086:     c->free_imax_ilen = PETSC_FALSE;
3087:   } else {
3088:     PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3089:     PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3090:     for (i=0; i<mbs; i++) {
3091:       c->imax[i] = a->imax[i];
3092:       c->ilen[i] = a->ilen[i];
3093:     }
3094:     c->free_imax_ilen = PETSC_TRUE;
3095:   }

3097:   /* allocate the matrix space */
3098:   if (mallocmatspace) {
3099:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3100:       PetscCalloc1(bs2*nz,&c->a);
3101:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));

3103:       c->i            = a->i;
3104:       c->j            = a->j;
3105:       c->singlemalloc = PETSC_FALSE;
3106:       c->free_a       = PETSC_TRUE;
3107:       c->free_ij      = PETSC_FALSE;
3108:       c->parent       = A;
3109:       C->preallocated = PETSC_TRUE;
3110:       C->assembled    = PETSC_TRUE;

3112:       PetscObjectReference((PetscObject)A);
3113:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3114:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3115:     } else {
3116:       PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3117:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));

3119:       c->singlemalloc = PETSC_TRUE;
3120:       c->free_a       = PETSC_TRUE;
3121:       c->free_ij      = PETSC_TRUE;

3123:       PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3124:       if (mbs > 0) {
3125:         PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3126:         if (cpvalues == MAT_COPY_VALUES) {
3127:           PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3128:         } else {
3129:           PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3130:         }
3131:       }
3132:       C->preallocated = PETSC_TRUE;
3133:       C->assembled    = PETSC_TRUE;
3134:     }
3135:   }

3137:   c->roworiented = a->roworiented;
3138:   c->nonew       = a->nonew;

3140:   PetscLayoutReference(A->rmap,&C->rmap);
3141:   PetscLayoutReference(A->cmap,&C->cmap);

3143:   c->bs2         = a->bs2;
3144:   c->mbs         = a->mbs;
3145:   c->nbs         = a->nbs;

3147:   if (a->diag) {
3148:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3149:       c->diag      = a->diag;
3150:       c->free_diag = PETSC_FALSE;
3151:     } else {
3152:       PetscMalloc1(mbs+1,&c->diag);
3153:       PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3154:       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3155:       c->free_diag = PETSC_TRUE;
3156:     }
3157:   } else c->diag = 0;

3159:   c->nz         = a->nz;
3160:   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3161:   c->solve_work = NULL;
3162:   c->mult_work  = NULL;
3163:   c->sor_workt  = NULL;
3164:   c->sor_work   = NULL;

3166:   c->compressedrow.use   = a->compressedrow.use;
3167:   c->compressedrow.nrows = a->compressedrow.nrows;
3168:   if (a->compressedrow.use) {
3169:     i    = a->compressedrow.nrows;
3170:     PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3171:     PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3172:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3173:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3174:   } else {
3175:     c->compressedrow.use    = PETSC_FALSE;
3176:     c->compressedrow.i      = NULL;
3177:     c->compressedrow.rindex = NULL;
3178:   }
3179:   C->nonzerostate = A->nonzerostate;

3181:   PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3182:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3183:   return(0);
3184: }

3188: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3189: {

3193:   MatCreate(PetscObjectComm((PetscObject)A),B);
3194:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3195:   MatSetType(*B,MATSEQBAIJ);
3196:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3197:   return(0);
3198: }

3202: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3203: {
3204:   Mat_SeqBAIJ    *a;
3206:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3207:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3208:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3209:   PetscInt       *masked,nmask,tmp,bs2,ishift;
3210:   PetscMPIInt    size;
3211:   int            fd;
3212:   PetscScalar    *aa;
3213:   MPI_Comm       comm;

3216:   /* force binary viewer to load .info file if it has not yet done so */
3217:   PetscViewerSetUp(viewer);
3218:   PetscObjectGetComm((PetscObject)viewer,&comm);
3219:   PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3220:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3221:   PetscOptionsEnd();
3222:   if (bs < 0) bs = 1;
3223:   bs2  = bs*bs;

3225:   MPI_Comm_size(comm,&size);
3226:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3227:   PetscViewerBinaryGetDescriptor(viewer,&fd);
3228:   PetscBinaryRead(fd,header,4,PETSC_INT);
3229:   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3230:   M = header[1]; N = header[2]; nz = header[3];

3232:   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3233:   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");

3235:   /*
3236:      This code adds extra rows to make sure the number of rows is
3237:     divisible by the blocksize
3238:   */
3239:   mbs        = M/bs;
3240:   extra_rows = bs - M + bs*(mbs);
3241:   if (extra_rows == bs) extra_rows = 0;
3242:   else mbs++;
3243:   if (extra_rows) {
3244:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3245:   }

3247:   /* Set global sizes if not already set */
3248:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3249:     MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3250:   } else { /* Check if the matrix global sizes are correct */
3251:     MatGetSize(newmat,&rows,&cols);
3252:     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3253:       MatGetLocalSize(newmat,&rows,&cols);
3254:     }
3255:     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3256:   }

3258:   /* read in row lengths */
3259:   PetscMalloc1(M+extra_rows,&rowlengths);
3260:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3261:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

3263:   /* read in column indices */
3264:   PetscMalloc1(nz+extra_rows,&jj);
3265:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
3266:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

3268:   /* loop over row lengths determining block row lengths */
3269:   PetscCalloc1(mbs,&browlengths);
3270:   PetscMalloc2(mbs,&mask,mbs,&masked);
3271:   PetscMemzero(mask,mbs*sizeof(PetscInt));
3272:   rowcount = 0;
3273:   nzcount  = 0;
3274:   for (i=0; i<mbs; i++) {
3275:     nmask = 0;
3276:     for (j=0; j<bs; j++) {
3277:       kmax = rowlengths[rowcount];
3278:       for (k=0; k<kmax; k++) {
3279:         tmp = jj[nzcount++]/bs;
3280:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3281:       }
3282:       rowcount++;
3283:     }
3284:     browlengths[i] += nmask;
3285:     /* zero out the mask elements we set */
3286:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3287:   }

3289:   /* Do preallocation  */
3290:   MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);
3291:   a    = (Mat_SeqBAIJ*)newmat->data;

3293:   /* set matrix "i" values */
3294:   a->i[0] = 0;
3295:   for (i=1; i<= mbs; i++) {
3296:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3297:     a->ilen[i-1] = browlengths[i-1];
3298:   }
3299:   a->nz = 0;
3300:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3302:   /* read in nonzero values */
3303:   PetscMalloc1(nz+extra_rows,&aa);
3304:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3305:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3307:   /* set "a" and "j" values into matrix */
3308:   nzcount = 0; jcount = 0;
3309:   for (i=0; i<mbs; i++) {
3310:     nzcountb = nzcount;
3311:     nmask    = 0;
3312:     for (j=0; j<bs; j++) {
3313:       kmax = rowlengths[i*bs+j];
3314:       for (k=0; k<kmax; k++) {
3315:         tmp = jj[nzcount++]/bs;
3316:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3317:       }
3318:     }
3319:     /* sort the masked values */
3320:     PetscSortInt(nmask,masked);

3322:     /* set "j" values into matrix */
3323:     maskcount = 1;
3324:     for (j=0; j<nmask; j++) {
3325:       a->j[jcount++]  = masked[j];
3326:       mask[masked[j]] = maskcount++;
3327:     }
3328:     /* set "a" values into matrix */
3329:     ishift = bs2*a->i[i];
3330:     for (j=0; j<bs; j++) {
3331:       kmax = rowlengths[i*bs+j];
3332:       for (k=0; k<kmax; k++) {
3333:         tmp       = jj[nzcountb]/bs;
3334:         block     = mask[tmp] - 1;
3335:         point     = jj[nzcountb] - bs*tmp;
3336:         idx       = ishift + bs2*block + j + bs*point;
3337:         a->a[idx] = (MatScalar)aa[nzcountb++];
3338:       }
3339:     }
3340:     /* zero out the mask elements we set */
3341:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3342:   }
3343:   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3345:   PetscFree(rowlengths);
3346:   PetscFree(browlengths);
3347:   PetscFree(aa);
3348:   PetscFree(jj);
3349:   PetscFree2(mask,masked);

3351:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3352:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3353:   return(0);
3354: }

3358: /*@C
3359:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3360:    compressed row) format.  For good matrix assembly performance the
3361:    user should preallocate the matrix storage by setting the parameter nz
3362:    (or the array nnz).  By setting these parameters accurately, performance
3363:    during matrix assembly can be increased by more than a factor of 50.

3365:    Collective on MPI_Comm

3367:    Input Parameters:
3368: +  comm - MPI communicator, set to PETSC_COMM_SELF
3369: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3370:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3371: .  m - number of rows
3372: .  n - number of columns
3373: .  nz - number of nonzero blocks  per block row (same for all rows)
3374: -  nnz - array containing the number of nonzero blocks in the various block rows
3375:          (possibly different for each block row) or NULL

3377:    Output Parameter:
3378: .  A - the matrix

3380:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3381:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3382:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3384:    Options Database Keys:
3385: .   -mat_no_unroll - uses code that does not unroll the loops in the
3386:                      block calculations (much slower)
3387: .    -mat_block_size - size of the blocks to use

3389:    Level: intermediate

3391:    Notes:
3392:    The number of rows and columns must be divisible by blocksize.

3394:    If the nnz parameter is given then the nz parameter is ignored

3396:    A nonzero block is any block that as 1 or more nonzeros in it

3398:    The block AIJ format is fully compatible with standard Fortran 77
3399:    storage.  That is, the stored row and column indices can begin at
3400:    either one (as in Fortran) or zero.  See the users' manual for details.

3402:    Specify the preallocated storage with either nz or nnz (not both).
3403:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3404:    allocation.  See Users-Manual: ch_mat for details.
3405:    matrices.

3407: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3408: @*/
3409: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3410: {

3414:   MatCreate(comm,A);
3415:   MatSetSizes(*A,m,n,m,n);
3416:   MatSetType(*A,MATSEQBAIJ);
3417:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3418:   return(0);
3419: }

3423: /*@C
3424:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3425:    per row in the matrix. For good matrix assembly performance the
3426:    user should preallocate the matrix storage by setting the parameter nz
3427:    (or the array nnz).  By setting these parameters accurately, performance
3428:    during matrix assembly can be increased by more than a factor of 50.

3430:    Collective on MPI_Comm

3432:    Input Parameters:
3433: +  B - the matrix
3434: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3435:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3436: .  nz - number of block nonzeros per block row (same for all rows)
3437: -  nnz - array containing the number of block nonzeros in the various block rows
3438:          (possibly different for each block row) or NULL

3440:    Options Database Keys:
3441: .   -mat_no_unroll - uses code that does not unroll the loops in the
3442:                      block calculations (much slower)
3443: .   -mat_block_size - size of the blocks to use

3445:    Level: intermediate

3447:    Notes:
3448:    If the nnz parameter is given then the nz parameter is ignored

3450:    You can call MatGetInfo() to get information on how effective the preallocation was;
3451:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3452:    You can also run with the option -info and look for messages with the string
3453:    malloc in them to see if additional memory allocation was needed.

3455:    The block AIJ format is fully compatible with standard Fortran 77
3456:    storage.  That is, the stored row and column indices can begin at
3457:    either one (as in Fortran) or zero.  See the users' manual for details.

3459:    Specify the preallocated storage with either nz or nnz (not both).
3460:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3461:    allocation.  See Users-Manual: ch_mat for details.

3463: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3464: @*/
3465: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3466: {

3473:   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3474:   return(0);
3475: }

3479: /*@C
3480:    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3481:    (the default sequential PETSc format).

3483:    Collective on MPI_Comm

3485:    Input Parameters:
3486: +  B - the matrix
3487: .  i - the indices into j for the start of each local row (starts with zero)
3488: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3489: -  v - optional values in the matrix

3491:    Level: developer

3493:    Notes:
3494:    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3495:    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3496:    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3497:    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3498:    block column and the second index is over columns within a block.

3500: .keywords: matrix, aij, compressed row, sparse

3502: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3503: @*/
3504: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3505: {

3512:   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3513:   return(0);
3514: }


3519: /*@
3520:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.

3522:      Collective on MPI_Comm

3524:    Input Parameters:
3525: +  comm - must be an MPI communicator of size 1
3526: .  bs - size of block
3527: .  m - number of rows
3528: .  n - number of columns
3529: .  i - row indices
3530: .  j - column indices
3531: -  a - matrix values

3533:    Output Parameter:
3534: .  mat - the matrix

3536:    Level: advanced

3538:    Notes:
3539:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3540:     once the matrix is destroyed

3542:        You cannot set new nonzero locations into this matrix, that will generate an error.

3544:        The i and j indices are 0 based

3546:        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).

3548:       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3549:       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3550:       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3551:       with column-major ordering within blocks.

3553: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()

3555: @*/
3556: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3557: {
3559:   PetscInt       ii;
3560:   Mat_SeqBAIJ    *baij;

3563:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3564:   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");

3566:   MatCreate(comm,mat);
3567:   MatSetSizes(*mat,m,n,m,n);
3568:   MatSetType(*mat,MATSEQBAIJ);
3569:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3570:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3571:   PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3572:   PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));

3574:   baij->i = i;
3575:   baij->j = j;
3576:   baij->a = a;

3578:   baij->singlemalloc = PETSC_FALSE;
3579:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3580:   baij->free_a       = PETSC_FALSE;
3581:   baij->free_ij      = PETSC_FALSE;

3583:   for (ii=0; ii<m; ii++) {
3584:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3585: #if defined(PETSC_USE_DEBUG)
3586:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3587: #endif
3588:   }
3589: #if defined(PETSC_USE_DEBUG)
3590:   for (ii=0; ii<baij->i[m]; ii++) {
3591:     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3592:     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3593:   }
3594: #endif

3596:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3597:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3598:   return(0);
3599: }

3603: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3604: {

3608:   MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3609:   return(0);
3610: }