Actual source code: baij.c

petsc-3.12.5 2020-03-29
Report Typos and Errors

  2: /*
  3:     Defines the basic matrix operations for the BAIJ (compressed row)
  4:   matrix storage format.
  5: */
  6:  #include <../src/mat/impls/baij/seq/baij.h>
  7:  #include <petscblaslapack.h>
  8:  #include <petsc/private/kernels/blockinvert.h>
  9:  #include <petsc/private/kernels/blockmatmult.h>

 11: #if defined(PETSC_HAVE_HYPRE)
 12: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
 13: #endif

 15: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 16: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
 17: #endif
 18: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
 19: PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);

 21: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
 22: {
 23:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 25:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
 26:   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
 27:   PetscReal      shift = 0.0;
 28:   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;

 31:   allowzeropivot = PetscNot(A->erroriffailure);

 33:   if (a->idiagvalid) {
 34:     if (values) *values = a->idiag;
 35:     return(0);
 36:   }
 37:   MatMarkDiagonal_SeqBAIJ(A);
 38:   diag_offset = a->diag;
 39:   if (!a->idiag) {
 40:     PetscMalloc1(bs2*mbs,&a->idiag);
 41:     PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));
 42:   }
 43:   diag  = a->idiag;
 44:   if (values) *values = a->idiag;
 45:   /* factor and invert each block */
 46:   switch (bs) {
 47:   case 1:
 48:     for (i=0; i<mbs; i++) {
 49:       odiag    = v + 1*diag_offset[i];
 50:       diag[0]  = odiag[0];

 52:       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
 53:         if (allowzeropivot) {
 54:           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 55:           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
 56:           A->factorerror_zeropivot_row   = i;
 57:           PetscInfo1(A,"Zero pivot, row %D\n",i);
 58:         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
 59:       }

 61:       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
 62:       diag    += 1;
 63:     }
 64:     break;
 65:   case 2:
 66:     for (i=0; i<mbs; i++) {
 67:       odiag    = v + 4*diag_offset[i];
 68:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 69:       PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
 70:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 71:       diag    += 4;
 72:     }
 73:     break;
 74:   case 3:
 75:     for (i=0; i<mbs; i++) {
 76:       odiag    = v + 9*diag_offset[i];
 77:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 78:       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 79:       diag[8]  = odiag[8];
 80:       PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
 81:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 82:       diag    += 9;
 83:     }
 84:     break;
 85:   case 4:
 86:     for (i=0; i<mbs; i++) {
 87:       odiag  = v + 16*diag_offset[i];
 88:       PetscArraycpy(diag,odiag,16);
 89:       PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
 90:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 91:       diag  += 16;
 92:     }
 93:     break;
 94:   case 5:
 95:     for (i=0; i<mbs; i++) {
 96:       odiag  = v + 25*diag_offset[i];
 97:       PetscArraycpy(diag,odiag,25);
 98:       PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
 99:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100:       diag  += 25;
101:     }
102:     break;
103:   case 6:
104:     for (i=0; i<mbs; i++) {
105:       odiag  = v + 36*diag_offset[i];
106:       PetscArraycpy(diag,odiag,36);
107:       PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
108:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109:       diag  += 36;
110:     }
111:     break;
112:   case 7:
113:     for (i=0; i<mbs; i++) {
114:       odiag  = v + 49*diag_offset[i];
115:       PetscArraycpy(diag,odiag,49);
116:       PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
117:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118:       diag  += 49;
119:     }
120:     break;
121:   default:
122:     PetscMalloc2(bs,&v_work,bs,&v_pivots);
123:     for (i=0; i<mbs; i++) {
124:       odiag  = v + bs2*diag_offset[i];
125:       PetscArraycpy(diag,odiag,bs2);
126:       PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
127:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128:       diag  += bs2;
129:     }
130:     PetscFree2(v_work,v_pivots);
131:   }
132:   a->idiagvalid = PETSC_TRUE;
133:   return(0);
134: }

136: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137: {
138:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139:   PetscScalar       *x,*work,*w,*workt,*t;
140:   const MatScalar   *v,*aa = a->a, *idiag;
141:   const PetscScalar *b,*xb;
142:   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143:   PetscErrorCode    ierr;
144:   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145:   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;

148:   its = its*lits;
149:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");

155:   if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}

157:   if (!m) return(0);
158:   diag  = a->diag;
159:   idiag = a->idiag;
160:   k    = PetscMax(A->rmap->n,A->cmap->n);
161:   if (!a->mult_work) {
162:     PetscMalloc1(k+1,&a->mult_work);
163:   }
164:   if (!a->sor_workt) {
165:     PetscMalloc1(k,&a->sor_workt);
166:   }
167:   if (!a->sor_work) {
168:     PetscMalloc1(bs,&a->sor_work);
169:   }
170:   work = a->mult_work;
171:   t    = a->sor_workt;
172:   w    = a->sor_work;

174:   VecGetArray(xx,&x);
175:   VecGetArrayRead(bb,&b);

177:   if (flag & SOR_ZERO_INITIAL_GUESS) {
178:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179:       switch (bs) {
180:       case 1:
181:         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182:         t[0] = b[0];
183:         i2     = 1;
184:         idiag += 1;
185:         for (i=1; i<m; i++) {
186:           v  = aa + ai[i];
187:           vi = aj + ai[i];
188:           nz = diag[i] - ai[i];
189:           s[0] = b[i2];
190:           for (j=0; j<nz; j++) {
191:             xw[0] = x[vi[j]];
192:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193:           }
194:           t[i2] = s[0];
195:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196:           x[i2]  = xw[0];
197:           idiag += 1;
198:           i2    += 1;
199:         }
200:         break;
201:       case 2:
202:         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203:         t[0] = b[0]; t[1] = b[1];
204:         i2     = 2;
205:         idiag += 4;
206:         for (i=1; i<m; i++) {
207:           v  = aa + 4*ai[i];
208:           vi = aj + ai[i];
209:           nz = diag[i] - ai[i];
210:           s[0] = b[i2]; s[1] = b[i2+1];
211:           for (j=0; j<nz; j++) {
212:             idx = 2*vi[j];
213:             it  = 4*j;
214:             xw[0] = x[idx]; xw[1] = x[1+idx];
215:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216:           }
217:           t[i2] = s[0]; t[i2+1] = s[1];
218:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219:           x[i2]   = xw[0]; x[i2+1] = xw[1];
220:           idiag  += 4;
221:           i2     += 2;
222:         }
223:         break;
224:       case 3:
225:         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227:         i2     = 3;
228:         idiag += 9;
229:         for (i=1; i<m; i++) {
230:           v  = aa + 9*ai[i];
231:           vi = aj + ai[i];
232:           nz = diag[i] - ai[i];
233:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234:           while (nz--) {
235:             idx = 3*(*vi++);
236:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238:             v  += 9;
239:           }
240:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243:           idiag  += 9;
244:           i2     += 3;
245:         }
246:         break;
247:       case 4:
248:         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250:         i2     = 4;
251:         idiag += 16;
252:         for (i=1; i<m; i++) {
253:           v  = aa + 16*ai[i];
254:           vi = aj + ai[i];
255:           nz = diag[i] - ai[i];
256:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257:           while (nz--) {
258:             idx = 4*(*vi++);
259:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261:             v  += 16;
262:           }
263:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266:           idiag  += 16;
267:           i2     += 4;
268:         }
269:         break;
270:       case 5:
271:         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273:         i2     = 5;
274:         idiag += 25;
275:         for (i=1; i<m; i++) {
276:           v  = aa + 25*ai[i];
277:           vi = aj + ai[i];
278:           nz = diag[i] - ai[i];
279:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280:           while (nz--) {
281:             idx = 5*(*vi++);
282:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284:             v  += 25;
285:           }
286:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289:           idiag  += 25;
290:           i2     += 5;
291:         }
292:         break;
293:       case 6:
294:         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296:         i2     = 6;
297:         idiag += 36;
298:         for (i=1; i<m; i++) {
299:           v  = aa + 36*ai[i];
300:           vi = aj + ai[i];
301:           nz = diag[i] - ai[i];
302:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303:           while (nz--) {
304:             idx = 6*(*vi++);
305:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308:             v  += 36;
309:           }
310:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314:           idiag  += 36;
315:           i2     += 6;
316:         }
317:         break;
318:       case 7:
319:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321:         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322:         i2     = 7;
323:         idiag += 49;
324:         for (i=1; i<m; i++) {
325:           v  = aa + 49*ai[i];
326:           vi = aj + ai[i];
327:           nz = diag[i] - ai[i];
328:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330:           while (nz--) {
331:             idx = 7*(*vi++);
332:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335:             v  += 49;
336:           }
337:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342:           idiag  += 49;
343:           i2     += 7;
344:         }
345:         break;
346:       default:
347:         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348:         PetscArraycpy(t,b,bs);
349:         i2     = bs;
350:         idiag += bs2;
351:         for (i=1; i<m; i++) {
352:           v  = aa + bs2*ai[i];
353:           vi = aj + ai[i];
354:           nz = diag[i] - ai[i];

356:           PetscArraycpy(w,b+i2,bs);
357:           /* copy all rows of x that are needed into contiguous space */
358:           workt = work;
359:           for (j=0; j<nz; j++) {
360:             PetscArraycpy(workt,x + bs*(*vi++),bs);
361:             workt += bs;
362:           }
363:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364:           PetscArraycpy(t+i2,w,bs);
365:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

367:           idiag += bs2;
368:           i2    += bs;
369:         }
370:         break;
371:       }
372:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373:       PetscLogFlops(1.0*bs2*a->nz);
374:       xb = t;
375:     }
376:     else xb = b;
377:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378:       idiag = a->idiag+bs2*(a->mbs-1);
379:       i2 = bs * (m-1);
380:       switch (bs) {
381:       case 1:
382:         s[0]  = xb[i2];
383:         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384:         x[i2] = xw[0];
385:         i2   -= 1;
386:         for (i=m-2; i>=0; i--) {
387:           v  = aa + (diag[i]+1);
388:           vi = aj + diag[i] + 1;
389:           nz = ai[i+1] - diag[i] - 1;
390:           s[0] = xb[i2];
391:           for (j=0; j<nz; j++) {
392:             xw[0] = x[vi[j]];
393:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394:           }
395:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396:           x[i2]  = xw[0];
397:           idiag -= 1;
398:           i2    -= 1;
399:         }
400:         break;
401:       case 2:
402:         s[0]  = xb[i2]; s[1] = xb[i2+1];
403:         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404:         x[i2] = xw[0]; x[i2+1] = xw[1];
405:         i2    -= 2;
406:         idiag -= 4;
407:         for (i=m-2; i>=0; i--) {
408:           v  = aa + 4*(diag[i] + 1);
409:           vi = aj + diag[i] + 1;
410:           nz = ai[i+1] - diag[i] - 1;
411:           s[0] = xb[i2]; s[1] = xb[i2+1];
412:           for (j=0; j<nz; j++) {
413:             idx = 2*vi[j];
414:             it  = 4*j;
415:             xw[0] = x[idx]; xw[1] = x[1+idx];
416:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417:           }
418:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419:           x[i2]   = xw[0]; x[i2+1] = xw[1];
420:           idiag  -= 4;
421:           i2     -= 2;
422:         }
423:         break;
424:       case 3:
425:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426:         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428:         i2    -= 3;
429:         idiag -= 9;
430:         for (i=m-2; i>=0; i--) {
431:           v  = aa + 9*(diag[i]+1);
432:           vi = aj + diag[i] + 1;
433:           nz = ai[i+1] - diag[i] - 1;
434:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435:           while (nz--) {
436:             idx = 3*(*vi++);
437:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439:             v  += 9;
440:           }
441:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443:           idiag  -= 9;
444:           i2     -= 3;
445:         }
446:         break;
447:       case 4:
448:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449:         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451:         i2    -= 4;
452:         idiag -= 16;
453:         for (i=m-2; i>=0; i--) {
454:           v  = aa + 16*(diag[i]+1);
455:           vi = aj + diag[i] + 1;
456:           nz = ai[i+1] - diag[i] - 1;
457:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458:           while (nz--) {
459:             idx = 4*(*vi++);
460:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462:             v  += 16;
463:           }
464:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466:           idiag  -= 16;
467:           i2     -= 4;
468:         }
469:         break;
470:       case 5:
471:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472:         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474:         i2    -= 5;
475:         idiag -= 25;
476:         for (i=m-2; i>=0; i--) {
477:           v  = aa + 25*(diag[i]+1);
478:           vi = aj + diag[i] + 1;
479:           nz = ai[i+1] - diag[i] - 1;
480:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481:           while (nz--) {
482:             idx = 5*(*vi++);
483:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485:             v  += 25;
486:           }
487:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489:           idiag  -= 25;
490:           i2     -= 5;
491:         }
492:         break;
493:       case 6:
494:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495:         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497:         i2    -= 6;
498:         idiag -= 36;
499:         for (i=m-2; i>=0; i--) {
500:           v  = aa + 36*(diag[i]+1);
501:           vi = aj + diag[i] + 1;
502:           nz = ai[i+1] - diag[i] - 1;
503:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504:           while (nz--) {
505:             idx = 6*(*vi++);
506:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509:             v  += 36;
510:           }
511:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513:           idiag  -= 36;
514:           i2     -= 6;
515:         }
516:         break;
517:       case 7:
518:         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519:         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521:         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522:         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523:         i2    -= 7;
524:         idiag -= 49;
525:         for (i=m-2; i>=0; i--) {
526:           v  = aa + 49*(diag[i]+1);
527:           vi = aj + diag[i] + 1;
528:           nz = ai[i+1] - diag[i] - 1;
529:           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530:           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531:           while (nz--) {
532:             idx = 7*(*vi++);
533:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536:             v  += 49;
537:           }
538:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541:           idiag  -= 49;
542:           i2     -= 7;
543:         }
544:         break;
545:       default:
546:         PetscArraycpy(w,xb+i2,bs);
547:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548:         i2    -= bs;
549:         idiag -= bs2;
550:         for (i=m-2; i>=0; i--) {
551:           v  = aa + bs2*(diag[i]+1);
552:           vi = aj + diag[i] + 1;
553:           nz = ai[i+1] - diag[i] - 1;

555:           PetscArraycpy(w,xb+i2,bs);
556:           /* copy all rows of x that are needed into contiguous space */
557:           workt = work;
558:           for (j=0; j<nz; j++) {
559:             PetscArraycpy(workt,x + bs*(*vi++),bs);
560:             workt += bs;
561:           }
562:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

565:           idiag -= bs2;
566:           i2    -= bs;
567:         }
568:         break;
569:       }
570:       PetscLogFlops(1.0*bs2*(a->nz));
571:     }
572:     its--;
573:   }
574:   while (its--) {
575:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576:       idiag = a->idiag;
577:       i2 = 0;
578:       switch (bs) {
579:       case 1:
580:         for (i=0; i<m; i++) {
581:           v  = aa + ai[i];
582:           vi = aj + ai[i];
583:           nz = ai[i+1] - ai[i];
584:           s[0] = b[i2];
585:           for (j=0; j<nz; j++) {
586:             xw[0] = x[vi[j]];
587:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588:           }
589:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590:           x[i2] += xw[0];
591:           idiag += 1;
592:           i2    += 1;
593:         }
594:         break;
595:       case 2:
596:         for (i=0; i<m; i++) {
597:           v  = aa + 4*ai[i];
598:           vi = aj + ai[i];
599:           nz = ai[i+1] - ai[i];
600:           s[0] = b[i2]; s[1] = b[i2+1];
601:           for (j=0; j<nz; j++) {
602:             idx = 2*vi[j];
603:             it  = 4*j;
604:             xw[0] = x[idx]; xw[1] = x[1+idx];
605:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606:           }
607:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608:           x[i2]  += xw[0]; x[i2+1] += xw[1];
609:           idiag  += 4;
610:           i2     += 2;
611:         }
612:         break;
613:       case 3:
614:         for (i=0; i<m; i++) {
615:           v  = aa + 9*ai[i];
616:           vi = aj + ai[i];
617:           nz = ai[i+1] - ai[i];
618:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619:           while (nz--) {
620:             idx = 3*(*vi++);
621:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623:             v  += 9;
624:           }
625:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627:           idiag  += 9;
628:           i2     += 3;
629:         }
630:         break;
631:       case 4:
632:         for (i=0; i<m; i++) {
633:           v  = aa + 16*ai[i];
634:           vi = aj + ai[i];
635:           nz = ai[i+1] - ai[i];
636:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637:           while (nz--) {
638:             idx = 4*(*vi++);
639:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641:             v  += 16;
642:           }
643:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645:           idiag  += 16;
646:           i2     += 4;
647:         }
648:         break;
649:       case 5:
650:         for (i=0; i<m; i++) {
651:           v  = aa + 25*ai[i];
652:           vi = aj + ai[i];
653:           nz = ai[i+1] - ai[i];
654:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655:           while (nz--) {
656:             idx = 5*(*vi++);
657:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659:             v  += 25;
660:           }
661:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663:           idiag  += 25;
664:           i2     += 5;
665:         }
666:         break;
667:       case 6:
668:         for (i=0; i<m; i++) {
669:           v  = aa + 36*ai[i];
670:           vi = aj + ai[i];
671:           nz = ai[i+1] - ai[i];
672:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673:           while (nz--) {
674:             idx = 6*(*vi++);
675:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678:             v  += 36;
679:           }
680:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683:           idiag  += 36;
684:           i2     += 6;
685:         }
686:         break;
687:       case 7:
688:         for (i=0; i<m; i++) {
689:           v  = aa + 49*ai[i];
690:           vi = aj + ai[i];
691:           nz = ai[i+1] - ai[i];
692:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694:           while (nz--) {
695:             idx = 7*(*vi++);
696:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699:             v  += 49;
700:           }
701:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702:           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704:           idiag  += 49;
705:           i2     += 7;
706:         }
707:         break;
708:       default:
709:         for (i=0; i<m; i++) {
710:           v  = aa + bs2*ai[i];
711:           vi = aj + ai[i];
712:           nz = ai[i+1] - ai[i];

714:           PetscArraycpy(w,b+i2,bs);
715:           /* copy all rows of x that are needed into contiguous space */
716:           workt = work;
717:           for (j=0; j<nz; j++) {
718:             PetscArraycpy(workt,x + bs*(*vi++),bs);
719:             workt += bs;
720:           }
721:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

724:           idiag += bs2;
725:           i2    += bs;
726:         }
727:         break;
728:       }
729:       PetscLogFlops(2.0*bs2*a->nz);
730:     }
731:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732:       idiag = a->idiag+bs2*(a->mbs-1);
733:       i2 = bs * (m-1);
734:       switch (bs) {
735:       case 1:
736:         for (i=m-1; i>=0; i--) {
737:           v  = aa + ai[i];
738:           vi = aj + ai[i];
739:           nz = ai[i+1] - ai[i];
740:           s[0] = b[i2];
741:           for (j=0; j<nz; j++) {
742:             xw[0] = x[vi[j]];
743:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744:           }
745:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746:           x[i2] += xw[0];
747:           idiag -= 1;
748:           i2    -= 1;
749:         }
750:         break;
751:       case 2:
752:         for (i=m-1; i>=0; i--) {
753:           v  = aa + 4*ai[i];
754:           vi = aj + ai[i];
755:           nz = ai[i+1] - ai[i];
756:           s[0] = b[i2]; s[1] = b[i2+1];
757:           for (j=0; j<nz; j++) {
758:             idx = 2*vi[j];
759:             it  = 4*j;
760:             xw[0] = x[idx]; xw[1] = x[1+idx];
761:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762:           }
763:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764:           x[i2]  += xw[0]; x[i2+1] += xw[1];
765:           idiag  -= 4;
766:           i2     -= 2;
767:         }
768:         break;
769:       case 3:
770:         for (i=m-1; i>=0; i--) {
771:           v  = aa + 9*ai[i];
772:           vi = aj + ai[i];
773:           nz = ai[i+1] - ai[i];
774:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775:           while (nz--) {
776:             idx = 3*(*vi++);
777:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779:             v  += 9;
780:           }
781:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783:           idiag  -= 9;
784:           i2     -= 3;
785:         }
786:         break;
787:       case 4:
788:         for (i=m-1; i>=0; i--) {
789:           v  = aa + 16*ai[i];
790:           vi = aj + ai[i];
791:           nz = ai[i+1] - ai[i];
792:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793:           while (nz--) {
794:             idx = 4*(*vi++);
795:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797:             v  += 16;
798:           }
799:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801:           idiag  -= 16;
802:           i2     -= 4;
803:         }
804:         break;
805:       case 5:
806:         for (i=m-1; i>=0; i--) {
807:           v  = aa + 25*ai[i];
808:           vi = aj + ai[i];
809:           nz = ai[i+1] - ai[i];
810:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811:           while (nz--) {
812:             idx = 5*(*vi++);
813:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815:             v  += 25;
816:           }
817:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819:           idiag  -= 25;
820:           i2     -= 5;
821:         }
822:         break;
823:       case 6:
824:         for (i=m-1; i>=0; i--) {
825:           v  = aa + 36*ai[i];
826:           vi = aj + ai[i];
827:           nz = ai[i+1] - ai[i];
828:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829:           while (nz--) {
830:             idx = 6*(*vi++);
831:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834:             v  += 36;
835:           }
836:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839:           idiag  -= 36;
840:           i2     -= 6;
841:         }
842:         break;
843:       case 7:
844:         for (i=m-1; i>=0; i--) {
845:           v  = aa + 49*ai[i];
846:           vi = aj + ai[i];
847:           nz = ai[i+1] - ai[i];
848:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850:           while (nz--) {
851:             idx = 7*(*vi++);
852:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855:             v  += 49;
856:           }
857:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858:           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860:           idiag  -= 49;
861:           i2     -= 7;
862:         }
863:         break;
864:       default:
865:         for (i=m-1; i>=0; i--) {
866:           v  = aa + bs2*ai[i];
867:           vi = aj + ai[i];
868:           nz = ai[i+1] - ai[i];

870:           PetscArraycpy(w,b+i2,bs);
871:           /* copy all rows of x that are needed into contiguous space */
872:           workt = work;
873:           for (j=0; j<nz; j++) {
874:             PetscArraycpy(workt,x + bs*(*vi++),bs);
875:             workt += bs;
876:           }
877:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

880:           idiag -= bs2;
881:           i2    -= bs;
882:         }
883:         break;
884:       }
885:       PetscLogFlops(2.0*bs2*(a->nz));
886:     }
887:   }
888:   VecRestoreArray(xx,&x);
889:   VecRestoreArrayRead(bb,&b);
890:   return(0);
891: }


894: /*
895:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896: */
897: #if defined(PETSC_HAVE_FORTRAN_CAPS)
898: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900: #define matsetvaluesblocked4_ matsetvaluesblocked4
901: #endif

903: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904: {
905:   Mat               A  = *AA;
906:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908:   PetscInt          *ai    =a->i,*ailen=a->ilen;
909:   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910:   const PetscScalar *value = v;
911:   MatScalar         *ap,*aa = a->a,*bap;
912:   PetscErrorCode    ierr;

915:   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
916:   stepval = (n-1)*4;
917:   for (k=0; k<m; k++) { /* loop over added rows */
918:     row  = im[k];
919:     rp   = aj + ai[row];
920:     ap   = aa + 16*ai[row];
921:     nrow = ailen[row];
922:     low  = 0;
923:     high = nrow;
924:     for (l=0; l<n; l++) { /* loop over added columns */
925:       col = in[l];
926:       if (col <= lastcol)  low = 0;
927:       else                high = nrow;
928:       lastcol = col;
929:       value   = v + k*(stepval+4 + l)*4;
930:       while (high-low > 7) {
931:         t = (low+high)/2;
932:         if (rp[t] > col) high = t;
933:         else             low  = t;
934:       }
935:       for (i=low; i<high; i++) {
936:         if (rp[i] > col) break;
937:         if (rp[i] == col) {
938:           bap = ap +  16*i;
939:           for (ii=0; ii<4; ii++,value+=stepval) {
940:             for (jj=ii; jj<16; jj+=4) {
941:               bap[jj] += *value++;
942:             }
943:           }
944:           goto noinsert2;
945:         }
946:       }
947:       N = nrow++ - 1;
948:       high++; /* added new column index thus must search to one higher than before */
949:       /* shift up all the later entries in this row */
950:       for (ii=N; ii>=i; ii--) {
951:         rp[ii+1] = rp[ii];
952:         PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
953:       }
954:       if (N >= i) {
955:         PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
956:       }
957:       rp[i] = col;
958:       bap   = ap +  16*i;
959:       for (ii=0; ii<4; ii++,value+=stepval) {
960:         for (jj=ii; jj<16; jj+=4) {
961:           bap[jj] = *value++;
962:         }
963:       }
964:       noinsert2:;
965:       low = i;
966:     }
967:     ailen[row] = nrow;
968:   }
969:   PetscFunctionReturnVoid();
970: }

972: #if defined(PETSC_HAVE_FORTRAN_CAPS)
973: #define matsetvalues4_ MATSETVALUES4
974: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
975: #define matsetvalues4_ matsetvalues4
976: #endif

978: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
979: {
980:   Mat         A  = *AA;
981:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
983:   PetscInt    *ai=a->i,*ailen=a->ilen;
984:   PetscInt    *aj=a->j,brow,bcol;
985:   PetscInt    ridx,cidx,lastcol = -1;
986:   MatScalar   *ap,value,*aa=a->a,*bap;

990:   for (k=0; k<m; k++) { /* loop over added rows */
991:     row  = im[k]; brow = row/4;
992:     rp   = aj + ai[brow];
993:     ap   = aa + 16*ai[brow];
994:     nrow = ailen[brow];
995:     low  = 0;
996:     high = nrow;
997:     for (l=0; l<n; l++) { /* loop over added columns */
998:       col   = in[l]; bcol = col/4;
999:       ridx  = row % 4; cidx = col % 4;
1000:       value = v[l + k*n];
1001:       if (col <= lastcol)  low = 0;
1002:       else                high = nrow;
1003:       lastcol = col;
1004:       while (high-low > 7) {
1005:         t = (low+high)/2;
1006:         if (rp[t] > bcol) high = t;
1007:         else              low  = t;
1008:       }
1009:       for (i=low; i<high; i++) {
1010:         if (rp[i] > bcol) break;
1011:         if (rp[i] == bcol) {
1012:           bap   = ap +  16*i + 4*cidx + ridx;
1013:           *bap += value;
1014:           goto noinsert1;
1015:         }
1016:       }
1017:       N = nrow++ - 1;
1018:       high++; /* added new column thus must search to one higher than before */
1019:       /* shift up all the later entries in this row */
1020:       PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1021:       PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1022:       PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1023:       rp[i]                    = bcol;
1024:       ap[16*i + 4*cidx + ridx] = value;
1025: noinsert1:;
1026:       low = i;
1027:     }
1028:     ailen[brow] = nrow;
1029:   }
1030:   PetscFunctionReturnVoid();
1031: }

1033: /*
1034:      Checks for missing diagonals
1035: */
1036: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1037: {
1038:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1040:   PetscInt       *diag,*ii = a->i,i;

1043:   MatMarkDiagonal_SeqBAIJ(A);
1044:   *missing = PETSC_FALSE;
1045:   if (A->rmap->n > 0 && !ii) {
1046:     *missing = PETSC_TRUE;
1047:     if (d) *d = 0;
1048:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1049:   } else {
1050:     PetscInt n;
1051:     n = PetscMin(a->mbs, a->nbs);
1052:     diag = a->diag;
1053:     for (i=0; i<n; i++) {
1054:       if (diag[i] >= ii[i+1]) {
1055:         *missing = PETSC_TRUE;
1056:         if (d) *d = i;
1057:         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1058:         break;
1059:       }
1060:     }
1061:   }
1062:   return(0);
1063: }

1065: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1066: {
1067:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1069:   PetscInt       i,j,m = a->mbs;

1072:   if (!a->diag) {
1073:     PetscMalloc1(m,&a->diag);
1074:     PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1075:     a->free_diag = PETSC_TRUE;
1076:   }
1077:   for (i=0; i<m; i++) {
1078:     a->diag[i] = a->i[i+1];
1079:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1080:       if (a->j[j] == i) {
1081:         a->diag[i] = j;
1082:         break;
1083:       }
1084:     }
1085:   }
1086:   return(0);
1087: }


1090: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091: {
1092:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1094:   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095:   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;

1098:   *nn = n;
1099:   if (!ia) return(0);
1100:   if (symmetric) {
1101:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1102:     nz   = tia[n];
1103:   } else {
1104:     tia = a->i; tja = a->j;
1105:   }

1107:   if (!blockcompressed && bs > 1) {
1108:     (*nn) *= bs;
1109:     /* malloc & create the natural set of indices */
1110:     PetscMalloc1((n+1)*bs,ia);
1111:     if (n) {
1112:       (*ia)[0] = oshift;
1113:       for (j=1; j<bs; j++) {
1114:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115:       }
1116:     }

1118:     for (i=1; i<n; i++) {
1119:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120:       for (j=1; j<bs; j++) {
1121:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122:       }
1123:     }
1124:     if (n) {
1125:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126:     }

1128:     if (inja) {
1129:       PetscMalloc1(nz*bs*bs,ja);
1130:       cnt = 0;
1131:       for (i=0; i<n; i++) {
1132:         for (j=0; j<bs; j++) {
1133:           for (k=tia[i]; k<tia[i+1]; k++) {
1134:             for (l=0; l<bs; l++) {
1135:               (*ja)[cnt++] = bs*tja[k] + l;
1136:             }
1137:           }
1138:         }
1139:       }
1140:     }

1142:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143:       PetscFree(tia);
1144:       PetscFree(tja);
1145:     }
1146:   } else if (oshift == 1) {
1147:     if (symmetric) {
1148:       nz = tia[A->rmap->n/bs];
1149:       /*  add 1 to i and j indices */
1150:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151:       *ia = tia;
1152:       if (ja) {
1153:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154:         *ja = tja;
1155:       }
1156:     } else {
1157:       nz = a->i[A->rmap->n/bs];
1158:       /* malloc space and  add 1 to i and j indices */
1159:       PetscMalloc1(A->rmap->n/bs+1,ia);
1160:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161:       if (ja) {
1162:         PetscMalloc1(nz,ja);
1163:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164:       }
1165:     }
1166:   } else {
1167:     *ia = tia;
1168:     if (ja) *ja = tja;
1169:   }
1170:   return(0);
1171: }

1173: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1174: {

1178:   if (!ia) return(0);
1179:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1180:     PetscFree(*ia);
1181:     if (ja) {PetscFree(*ja);}
1182:   }
1183:   return(0);
1184: }

1186: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1187: {
1188:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1192: #if defined(PETSC_USE_LOG)
1193:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1194: #endif
1195:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1196:   ISDestroy(&a->row);
1197:   ISDestroy(&a->col);
1198:   if (a->free_diag) {PetscFree(a->diag);}
1199:   PetscFree(a->idiag);
1200:   if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1201:   PetscFree(a->solve_work);
1202:   PetscFree(a->mult_work);
1203:   PetscFree(a->sor_workt);
1204:   PetscFree(a->sor_work);
1205:   ISDestroy(&a->icol);
1206:   PetscFree(a->saved_values);
1207:   PetscFree2(a->compressedrow.i,a->compressedrow.rindex);

1209:   MatDestroy(&a->sbaijMat);
1210:   MatDestroy(&a->parent);
1211:   PetscFree(A->data);

1213:   PetscObjectChangeTypeName((PetscObject)A,0);
1214:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);
1215:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);
1216:   PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1217:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1218:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1219:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1220:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1221:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1222:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1223:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1224:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1225:   PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1226: #if defined(PETSC_HAVE_HYPRE)
1227:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);
1228: #endif
1229:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);
1230:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);
1231:   return(0);
1232: }

1234: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1235: {
1236:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1240:   switch (op) {
1241:   case MAT_ROW_ORIENTED:
1242:     a->roworiented = flg;
1243:     break;
1244:   case MAT_KEEP_NONZERO_PATTERN:
1245:     a->keepnonzeropattern = flg;
1246:     break;
1247:   case MAT_NEW_NONZERO_LOCATIONS:
1248:     a->nonew = (flg ? 0 : 1);
1249:     break;
1250:   case MAT_NEW_NONZERO_LOCATION_ERR:
1251:     a->nonew = (flg ? -1 : 0);
1252:     break;
1253:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1254:     a->nonew = (flg ? -2 : 0);
1255:     break;
1256:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1257:     a->nounused = (flg ? -1 : 0);
1258:     break;
1259:   case MAT_NEW_DIAGONALS:
1260:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1261:   case MAT_USE_HASH_TABLE:
1262:   case MAT_SORTED_FULL:
1263:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1264:     break;
1265:   case MAT_SPD:
1266:   case MAT_SYMMETRIC:
1267:   case MAT_STRUCTURALLY_SYMMETRIC:
1268:   case MAT_HERMITIAN:
1269:   case MAT_SYMMETRY_ETERNAL:
1270:   case MAT_SUBMAT_SINGLEIS:
1271:   case MAT_STRUCTURE_ONLY:
1272:     /* These options are handled directly by MatSetOption() */
1273:     break;
1274:   default:
1275:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1276:   }
1277:   return(0);
1278: }

1280: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1281: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1282: {
1284:   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1285:   MatScalar      *aa_i;
1286:   PetscScalar    *v_i;

1289:   bs  = A->rmap->bs;
1290:   bs2 = bs*bs;
1291:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);

1293:   bn  = row/bs;   /* Block number */
1294:   bp  = row % bs; /* Block Position */
1295:   M   = ai[bn+1] - ai[bn];
1296:   *nz = bs*M;

1298:   if (v) {
1299:     *v = 0;
1300:     if (*nz) {
1301:       PetscMalloc1(*nz,v);
1302:       for (i=0; i<M; i++) { /* for each block in the block row */
1303:         v_i  = *v + i*bs;
1304:         aa_i = aa + bs2*(ai[bn] + i);
1305:         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1306:       }
1307:     }
1308:   }

1310:   if (idx) {
1311:     *idx = 0;
1312:     if (*nz) {
1313:       PetscMalloc1(*nz,idx);
1314:       for (i=0; i<M; i++) { /* for each block in the block row */
1315:         idx_i = *idx + i*bs;
1316:         itmp  = bs*aj[ai[bn] + i];
1317:         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1318:       }
1319:     }
1320:   }
1321:   return(0);
1322: }

1324: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1325: {
1326:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1330:   MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1331:   return(0);
1332: }

1334: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1335: {

1339:   if (idx) {PetscFree(*idx);}
1340:   if (v)   {PetscFree(*v);}
1341:   return(0);
1342: }

1344: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1345: {
1346:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1347:   Mat            C;
1349:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1350:   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1351:   MatScalar      *ata,*aa=a->a;

1354:   PetscCalloc1(1+nbs,&atfill);
1355:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1356:     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */

1358:     MatCreate(PetscObjectComm((PetscObject)A),&C);
1359:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1360:     MatSetType(C,((PetscObject)A)->type_name);
1361:     MatSeqBAIJSetPreallocation(C,bs,0,atfill);

1363:     at  = (Mat_SeqBAIJ*)C->data;
1364:     ati = at->i;
1365:     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1366:   } else {
1367:     C = *B;
1368:     at = (Mat_SeqBAIJ*)C->data;
1369:     ati = at->i;
1370:   }

1372:   atj = at->j;
1373:   ata = at->a;

1375:   /* Copy ati into atfill so we have locations of the next free space in atj */
1376:   PetscArraycpy(atfill,ati,nbs);

1378:   /* Walk through A row-wise and mark nonzero entries of A^T. */
1379:   for (i=0; i<mbs; i++) {
1380:     anzj = ai[i+1] - ai[i];
1381:     for (j=0; j<anzj; j++) {
1382:       atj[atfill[*aj]] = i;
1383:       for (kr=0; kr<bs; kr++) {
1384:         for (k=0; k<bs; k++) {
1385:           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1386:         }
1387:       }
1388:       atfill[*aj++] += 1;
1389:     }
1390:   }
1391:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1392:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

1394:   /* Clean up temporary space and complete requests. */
1395:   PetscFree(atfill);

1397:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1398:     MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1399:     *B = C;
1400:   } else {
1401:     MatHeaderMerge(A,&C);
1402:   }
1403:   return(0);
1404: }

1406: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1407: {
1409:   Mat            Btrans;

1412:   *f   = PETSC_FALSE;
1413:   MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1414:   MatEqual_SeqBAIJ(B,Btrans,f);
1415:   MatDestroy(&Btrans);
1416:   return(0);
1417: }

1419: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1420: {
1421:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1423:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1424:   int            fd;
1425:   PetscScalar    *aa;
1426:   FILE           *file;

1429:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1430:   PetscMalloc1(4+A->rmap->N,&col_lens);
1431:   col_lens[0] = MAT_FILE_CLASSID;

1433:   col_lens[1] = A->rmap->N;
1434:   col_lens[2] = A->cmap->n;
1435:   col_lens[3] = a->nz*bs2;

1437:   /* store lengths of each row and write (including header) to file */
1438:   count = 0;
1439:   for (i=0; i<a->mbs; i++) {
1440:     for (j=0; j<bs; j++) {
1441:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1442:     }
1443:   }
1444:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1445:   PetscFree(col_lens);

1447:   /* store column indices (zero start index) */
1448:   PetscMalloc1((a->nz+1)*bs2,&jj);
1449:   count = 0;
1450:   for (i=0; i<a->mbs; i++) {
1451:     for (j=0; j<bs; j++) {
1452:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1453:         for (l=0; l<bs; l++) {
1454:           jj[count++] = bs*a->j[k] + l;
1455:         }
1456:       }
1457:     }
1458:   }
1459:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1460:   PetscFree(jj);

1462:   /* store nonzero values */
1463:   PetscMalloc1((a->nz+1)*bs2,&aa);
1464:   count = 0;
1465:   for (i=0; i<a->mbs; i++) {
1466:     for (j=0; j<bs; j++) {
1467:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1468:         for (l=0; l<bs; l++) {
1469:           aa[count++] = a->a[bs2*k + l*bs + j];
1470:         }
1471:       }
1472:     }
1473:   }
1474:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1475:   PetscFree(aa);

1477:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1478:   if (file) {
1479:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1480:   }
1481:   return(0);
1482: }

1484: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1485: {
1487:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1488:   PetscInt       i,bs = A->rmap->bs,k;

1491:   PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1492:   for (i=0; i<a->mbs; i++) {
1493:     PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);
1494:     for (k=a->i[i]; k<a->i[i+1]; k++) {
1495:       PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);
1496:     }
1497:     PetscViewerASCIIPrintf(viewer,"\n");
1498:   }
1499:   PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1500:   return(0);
1501: }

1503: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1504: {
1505:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1506:   PetscErrorCode    ierr;
1507:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1508:   PetscViewerFormat format;

1511:   if (A->structure_only) {
1512:     MatView_SeqBAIJ_ASCII_structonly(A,viewer);
1513:     return(0);
1514:   }

1516:   PetscViewerGetFormat(viewer,&format);
1517:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1518:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1519:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1520:     const char *matname;
1521:     Mat        aij;
1522:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1523:     PetscObjectGetName((PetscObject)A,&matname);
1524:     PetscObjectSetName((PetscObject)aij,matname);
1525:     MatView(aij,viewer);
1526:     MatDestroy(&aij);
1527:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1528:       return(0);
1529:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1530:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1531:     for (i=0; i<a->mbs; i++) {
1532:       for (j=0; j<bs; j++) {
1533:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1534:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1535:           for (l=0; l<bs; l++) {
1536: #if defined(PETSC_USE_COMPLEX)
1537:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538:               PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1539:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1540:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541:               PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1542:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1543:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1544:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1545:             }
1546: #else
1547:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1548:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1549:             }
1550: #endif
1551:           }
1552:         }
1553:         PetscViewerASCIIPrintf(viewer,"\n");
1554:       }
1555:     }
1556:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1557:   } else {
1558:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1559:     for (i=0; i<a->mbs; i++) {
1560:       for (j=0; j<bs; j++) {
1561:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1562:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1563:           for (l=0; l<bs; l++) {
1564: #if defined(PETSC_USE_COMPLEX)
1565:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1566:               PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1567:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1568:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1569:               PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1570:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1571:             } else {
1572:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1573:             }
1574: #else
1575:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1576: #endif
1577:           }
1578:         }
1579:         PetscViewerASCIIPrintf(viewer,"\n");
1580:       }
1581:     }
1582:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1583:   }
1584:   PetscViewerFlush(viewer);
1585:   return(0);
1586: }

1588:  #include <petscdraw.h>
1589: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1590: {
1591:   Mat               A = (Mat) Aa;
1592:   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1593:   PetscErrorCode    ierr;
1594:   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1595:   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1596:   MatScalar         *aa;
1597:   PetscViewer       viewer;
1598:   PetscViewerFormat format;

1601:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1602:   PetscViewerGetFormat(viewer,&format);
1603:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1605:   /* loop over matrix elements drawing boxes */

1607:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1608:     PetscDrawCollectiveBegin(draw);
1609:     /* Blue for negative, Cyan for zero and  Red for positive */
1610:     color = PETSC_DRAW_BLUE;
1611:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1612:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1613:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615:         aa  = a->a + j*bs2;
1616:         for (k=0; k<bs; k++) {
1617:           for (l=0; l<bs; l++) {
1618:             if (PetscRealPart(*aa++) >=  0.) continue;
1619:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1620:           }
1621:         }
1622:       }
1623:     }
1624:     color = PETSC_DRAW_CYAN;
1625:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1626:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1627:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629:         aa  = a->a + j*bs2;
1630:         for (k=0; k<bs; k++) {
1631:           for (l=0; l<bs; l++) {
1632:             if (PetscRealPart(*aa++) != 0.) continue;
1633:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1634:           }
1635:         }
1636:       }
1637:     }
1638:     color = PETSC_DRAW_RED;
1639:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1640:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1641:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643:         aa  = a->a + j*bs2;
1644:         for (k=0; k<bs; k++) {
1645:           for (l=0; l<bs; l++) {
1646:             if (PetscRealPart(*aa++) <= 0.) continue;
1647:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1648:           }
1649:         }
1650:       }
1651:     }
1652:     PetscDrawCollectiveEnd(draw);
1653:   } else {
1654:     /* use contour shading to indicate magnitude of values */
1655:     /* first determine max of all nonzero values */
1656:     PetscReal minv = 0.0, maxv = 0.0;
1657:     PetscDraw popup;

1659:     for (i=0; i<a->nz*a->bs2; i++) {
1660:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1661:     }
1662:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1663:     PetscDrawGetPopup(draw,&popup);
1664:     PetscDrawScalePopup(popup,0.0,maxv);

1666:     PetscDrawCollectiveBegin(draw);
1667:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1668:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1669:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671:         aa  = a->a + j*bs2;
1672:         for (k=0; k<bs; k++) {
1673:           for (l=0; l<bs; l++) {
1674:             MatScalar v = *aa++;
1675:             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1676:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1677:           }
1678:         }
1679:       }
1680:     }
1681:     PetscDrawCollectiveEnd(draw);
1682:   }
1683:   return(0);
1684: }

1686: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1687: {
1689:   PetscReal      xl,yl,xr,yr,w,h;
1690:   PetscDraw      draw;
1691:   PetscBool      isnull;

1694:   PetscViewerDrawGetDraw(viewer,0,&draw);
1695:   PetscDrawIsNull(draw,&isnull);
1696:   if (isnull) return(0);

1698:   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1699:   xr  += w;          yr += h;        xl = -w;     yl = -h;
1700:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1701:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1702:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1703:   PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1704:   PetscDrawSave(draw);
1705:   return(0);
1706: }

1708: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1709: {
1711:   PetscBool      iascii,isbinary,isdraw;

1714:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1715:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1716:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1717:   if (iascii) {
1718:     MatView_SeqBAIJ_ASCII(A,viewer);
1719:   } else if (isbinary) {
1720:     MatView_SeqBAIJ_Binary(A,viewer);
1721:   } else if (isdraw) {
1722:     MatView_SeqBAIJ_Draw(A,viewer);
1723:   } else {
1724:     Mat B;
1725:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1726:     MatView(B,viewer);
1727:     MatDestroy(&B);
1728:   }
1729:   return(0);
1730: }


1733: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1734: {
1735:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1736:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1737:   PetscInt    *ai = a->i,*ailen = a->ilen;
1738:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1739:   MatScalar   *ap,*aa = a->a;

1742:   for (k=0; k<m; k++) { /* loop over rows */
1743:     row = im[k]; brow = row/bs;
1744:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1745:     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1746:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1747:     nrow = ailen[brow];
1748:     for (l=0; l<n; l++) { /* loop over columns */
1749:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1750:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1751:       col  = in[l];
1752:       bcol = col/bs;
1753:       cidx = col%bs;
1754:       ridx = row%bs;
1755:       high = nrow;
1756:       low  = 0; /* assume unsorted */
1757:       while (high-low > 5) {
1758:         t = (low+high)/2;
1759:         if (rp[t] > bcol) high = t;
1760:         else             low  = t;
1761:       }
1762:       for (i=low; i<high; i++) {
1763:         if (rp[i] > bcol) break;
1764:         if (rp[i] == bcol) {
1765:           *v++ = ap[bs2*i+bs*cidx+ridx];
1766:           goto finished;
1767:         }
1768:       }
1769:       *v++ = 0.0;
1770: finished:;
1771:     }
1772:   }
1773:   return(0);
1774: }

1776: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1777: {
1778:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1779:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1780:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1781:   PetscErrorCode    ierr;
1782:   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1783:   PetscBool         roworiented=a->roworiented;
1784:   const PetscScalar *value     = v;
1785:   MatScalar         *ap=NULL,*aa = a->a,*bap;

1788:   if (roworiented) {
1789:     stepval = (n-1)*bs;
1790:   } else {
1791:     stepval = (m-1)*bs;
1792:   }
1793:   for (k=0; k<m; k++) { /* loop over added rows */
1794:     row = im[k];
1795:     if (row < 0) continue;
1796: #if defined(PETSC_USE_DEBUG)
1797:     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1798: #endif
1799:     rp   = aj + ai[row];
1800:     if (!A->structure_only) ap = aa + bs2*ai[row];
1801:     rmax = imax[row];
1802:     nrow = ailen[row];
1803:     low  = 0;
1804:     high = nrow;
1805:     for (l=0; l<n; l++) { /* loop over added columns */
1806:       if (in[l] < 0) continue;
1807: #if defined(PETSC_USE_DEBUG)
1808:       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1809: #endif
1810:       col = in[l];
1811:       if (!A->structure_only) {
1812:         if (roworiented) {
1813:           value = v + (k*(stepval+bs) + l)*bs;
1814:         } else {
1815:           value = v + (l*(stepval+bs) + k)*bs;
1816:         }
1817:       }
1818:       if (col <= lastcol) low = 0;
1819:       else high = nrow;
1820:       lastcol = col;
1821:       while (high-low > 7) {
1822:         t = (low+high)/2;
1823:         if (rp[t] > col) high = t;
1824:         else             low  = t;
1825:       }
1826:       for (i=low; i<high; i++) {
1827:         if (rp[i] > col) break;
1828:         if (rp[i] == col) {
1829:           if (A->structure_only) goto noinsert2;
1830:           bap = ap +  bs2*i;
1831:           if (roworiented) {
1832:             if (is == ADD_VALUES) {
1833:               for (ii=0; ii<bs; ii++,value+=stepval) {
1834:                 for (jj=ii; jj<bs2; jj+=bs) {
1835:                   bap[jj] += *value++;
1836:                 }
1837:               }
1838:             } else {
1839:               for (ii=0; ii<bs; ii++,value+=stepval) {
1840:                 for (jj=ii; jj<bs2; jj+=bs) {
1841:                   bap[jj] = *value++;
1842:                 }
1843:               }
1844:             }
1845:           } else {
1846:             if (is == ADD_VALUES) {
1847:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1848:                 for (jj=0; jj<bs; jj++) {
1849:                   bap[jj] += value[jj];
1850:                 }
1851:                 bap += bs;
1852:               }
1853:             } else {
1854:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1855:                 for (jj=0; jj<bs; jj++) {
1856:                   bap[jj]  = value[jj];
1857:                 }
1858:                 bap += bs;
1859:               }
1860:             }
1861:           }
1862:           goto noinsert2;
1863:         }
1864:       }
1865:       if (nonew == 1) goto noinsert2;
1866:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1867:       if (A->structure_only) {
1868:         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1869:       } else {
1870:         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1871:       }
1872:       N = nrow++ - 1; high++;
1873:       /* shift up all the later entries in this row */
1874:       PetscArraymove(rp+i+1,rp+i,N-i+1);
1875:       rp[i] = col;
1876:       if (!A->structure_only) {
1877:         PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
1878:         bap   = ap +  bs2*i;
1879:         if (roworiented) {
1880:           for (ii=0; ii<bs; ii++,value+=stepval) {
1881:             for (jj=ii; jj<bs2; jj+=bs) {
1882:               bap[jj] = *value++;
1883:             }
1884:           }
1885:         } else {
1886:           for (ii=0; ii<bs; ii++,value+=stepval) {
1887:             for (jj=0; jj<bs; jj++) {
1888:               *bap++ = *value++;
1889:             }
1890:           }
1891:         }
1892:       }
1893: noinsert2:;
1894:       low = i;
1895:     }
1896:     ailen[row] = nrow;
1897:   }
1898:   return(0);
1899: }

1901: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1902: {
1903:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1904:   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1905:   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1907:   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1908:   MatScalar      *aa  = a->a,*ap;
1909:   PetscReal      ratio=0.6;

1912:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1914:   if (m) rmax = ailen[0];
1915:   for (i=1; i<mbs; i++) {
1916:     /* move each row back by the amount of empty slots (fshift) before it*/
1917:     fshift += imax[i-1] - ailen[i-1];
1918:     rmax    = PetscMax(rmax,ailen[i]);
1919:     if (fshift) {
1920:       ip = aj + ai[i];
1921:       ap = aa + bs2*ai[i];
1922:       N  = ailen[i];
1923:       PetscArraymove(ip-fshift,ip,N);
1924:       if (!A->structure_only) {
1925:         PetscArraymove(ap-bs2*fshift,ap,bs2*N);
1926:       }
1927:     }
1928:     ai[i] = ai[i-1] + ailen[i-1];
1929:   }
1930:   if (mbs) {
1931:     fshift += imax[mbs-1] - ailen[mbs-1];
1932:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1933:   }

1935:   /* reset ilen and imax for each row */
1936:   a->nonzerorowcnt = 0;
1937:   if (A->structure_only) {
1938:     PetscFree2(a->imax,a->ilen);
1939:   } else { /* !A->structure_only */
1940:     for (i=0; i<mbs; i++) {
1941:       ailen[i] = imax[i] = ai[i+1] - ai[i];
1942:       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1943:     }
1944:   }
1945:   a->nz = ai[mbs];

1947:   /* diagonals may have moved, so kill the diagonal pointers */
1948:   a->idiagvalid = PETSC_FALSE;
1949:   if (fshift && a->diag) {
1950:     PetscFree(a->diag);
1951:     PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1952:     a->diag = 0;
1953:   }
1954:   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1955:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1956:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1957:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);

1959:   A->info.mallocs    += a->reallocs;
1960:   a->reallocs         = 0;
1961:   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1962:   a->rmax             = rmax;

1964:   if (!A->structure_only) {
1965:     MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1966:   }
1967:   return(0);
1968: }

1970: /*
1971:    This function returns an array of flags which indicate the locations of contiguous
1972:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1973:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1974:    Assume: sizes should be long enough to hold all the values.
1975: */
1976: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1977: {
1978:   PetscInt  i,j,k,row;
1979:   PetscBool flg;

1982:   for (i=0,j=0; i<n; j++) {
1983:     row = idx[i];
1984:     if (row%bs!=0) { /* Not the begining of a block */
1985:       sizes[j] = 1;
1986:       i++;
1987:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1988:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1989:       i++;
1990:     } else { /* Begining of the block, so check if the complete block exists */
1991:       flg = PETSC_TRUE;
1992:       for (k=1; k<bs; k++) {
1993:         if (row+k != idx[i+k]) { /* break in the block */
1994:           flg = PETSC_FALSE;
1995:           break;
1996:         }
1997:       }
1998:       if (flg) { /* No break in the bs */
1999:         sizes[j] = bs;
2000:         i       += bs;
2001:       } else {
2002:         sizes[j] = 1;
2003:         i++;
2004:       }
2005:     }
2006:   }
2007:   *bs_max = j;
2008:   return(0);
2009: }

2011: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2012: {
2013:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2014:   PetscErrorCode    ierr;
2015:   PetscInt          i,j,k,count,*rows;
2016:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2017:   PetscScalar       zero = 0.0;
2018:   MatScalar         *aa;
2019:   const PetscScalar *xx;
2020:   PetscScalar       *bb;

2023:   /* fix right hand side if needed */
2024:   if (x && b) {
2025:     VecGetArrayRead(x,&xx);
2026:     VecGetArray(b,&bb);
2027:     for (i=0; i<is_n; i++) {
2028:       bb[is_idx[i]] = diag*xx[is_idx[i]];
2029:     }
2030:     VecRestoreArrayRead(x,&xx);
2031:     VecRestoreArray(b,&bb);
2032:   }

2034:   /* Make a copy of the IS and  sort it */
2035:   /* allocate memory for rows,sizes */
2036:   PetscMalloc2(is_n,&rows,2*is_n,&sizes);

2038:   /* copy IS values to rows, and sort them */
2039:   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2040:   PetscSortInt(is_n,rows);

2042:   if (baij->keepnonzeropattern) {
2043:     for (i=0; i<is_n; i++) sizes[i] = 1;
2044:     bs_max          = is_n;
2045:   } else {
2046:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2047:     A->nonzerostate++;
2048:   }

2050:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2051:     row = rows[j];
2052:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2053:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2054:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2055:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2056:       if (diag != (PetscScalar)0.0) {
2057:         if (baij->ilen[row/bs] > 0) {
2058:           baij->ilen[row/bs]       = 1;
2059:           baij->j[baij->i[row/bs]] = row/bs;

2061:           PetscArrayzero(aa,count*bs);
2062:         }
2063:         /* Now insert all the diagonal values for this bs */
2064:         for (k=0; k<bs; k++) {
2065:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2066:         }
2067:       } else { /* (diag == 0.0) */
2068:         baij->ilen[row/bs] = 0;
2069:       } /* end (diag == 0.0) */
2070:     } else { /* (sizes[i] != bs) */
2071: #if defined(PETSC_USE_DEBUG)
2072:       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2073: #endif
2074:       for (k=0; k<count; k++) {
2075:         aa[0] =  zero;
2076:         aa   += bs;
2077:       }
2078:       if (diag != (PetscScalar)0.0) {
2079:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2080:       }
2081:     }
2082:   }

2084:   PetscFree2(rows,sizes);
2085:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2086:   return(0);
2087: }

2089: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2090: {
2091:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2092:   PetscErrorCode    ierr;
2093:   PetscInt          i,j,k,count;
2094:   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2095:   PetscScalar       zero = 0.0;
2096:   MatScalar         *aa;
2097:   const PetscScalar *xx;
2098:   PetscScalar       *bb;
2099:   PetscBool         *zeroed,vecs = PETSC_FALSE;

2102:   /* fix right hand side if needed */
2103:   if (x && b) {
2104:     VecGetArrayRead(x,&xx);
2105:     VecGetArray(b,&bb);
2106:     vecs = PETSC_TRUE;
2107:   }

2109:   /* zero the columns */
2110:   PetscCalloc1(A->rmap->n,&zeroed);
2111:   for (i=0; i<is_n; i++) {
2112:     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2113:     zeroed[is_idx[i]] = PETSC_TRUE;
2114:   }
2115:   for (i=0; i<A->rmap->N; i++) {
2116:     if (!zeroed[i]) {
2117:       row = i/bs;
2118:       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2119:         for (k=0; k<bs; k++) {
2120:           col = bs*baij->j[j] + k;
2121:           if (zeroed[col]) {
2122:             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2123:             if (vecs) bb[i] -= aa[0]*xx[col];
2124:             aa[0] = 0.0;
2125:           }
2126:         }
2127:       }
2128:     } else if (vecs) bb[i] = diag*xx[i];
2129:   }
2130:   PetscFree(zeroed);
2131:   if (vecs) {
2132:     VecRestoreArrayRead(x,&xx);
2133:     VecRestoreArray(b,&bb);
2134:   }

2136:   /* zero the rows */
2137:   for (i=0; i<is_n; i++) {
2138:     row   = is_idx[i];
2139:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2140:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2141:     for (k=0; k<count; k++) {
2142:       aa[0] =  zero;
2143:       aa   += bs;
2144:     }
2145:     if (diag != (PetscScalar)0.0) {
2146:       (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2147:     }
2148:   }
2149:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2150:   return(0);
2151: }

2153: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2154: {
2155:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2156:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2157:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2158:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2160:   PetscInt       ridx,cidx,bs2=a->bs2;
2161:   PetscBool      roworiented=a->roworiented;
2162:   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;

2165:   for (k=0; k<m; k++) { /* loop over added rows */
2166:     row  = im[k];
2167:     brow = row/bs;
2168:     if (row < 0) continue;
2169: #if defined(PETSC_USE_DEBUG)
2170:     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2171: #endif
2172:     rp   = aj + ai[brow];
2173:     if (!A->structure_only) ap = aa + bs2*ai[brow];
2174:     rmax = imax[brow];
2175:     nrow = ailen[brow];
2176:     low  = 0;
2177:     high = nrow;
2178:     for (l=0; l<n; l++) { /* loop over added columns */
2179:       if (in[l] < 0) continue;
2180: #if defined(PETSC_USE_DEBUG)
2181:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2182: #endif
2183:       col  = in[l]; bcol = col/bs;
2184:       ridx = row % bs; cidx = col % bs;
2185:       if (!A->structure_only) {
2186:         if (roworiented) {
2187:           value = v[l + k*n];
2188:         } else {
2189:           value = v[k + l*m];
2190:         }
2191:       }
2192:       if (col <= lastcol) low = 0; else high = nrow;
2193:       lastcol = col;
2194:       while (high-low > 7) {
2195:         t = (low+high)/2;
2196:         if (rp[t] > bcol) high = t;
2197:         else              low  = t;
2198:       }
2199:       for (i=low; i<high; i++) {
2200:         if (rp[i] > bcol) break;
2201:         if (rp[i] == bcol) {
2202:           bap = ap +  bs2*i + bs*cidx + ridx;
2203:           if (!A->structure_only) {
2204:             if (is == ADD_VALUES) *bap += value;
2205:             else                  *bap  = value;
2206:           }
2207:           goto noinsert1;
2208:         }
2209:       }
2210:       if (nonew == 1) goto noinsert1;
2211:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2212:       if (A->structure_only) {
2213:         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2214:       } else {
2215:         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2216:       }
2217:       N = nrow++ - 1; high++;
2218:       /* shift up all the later entries in this row */
2219:       PetscArraymove(rp+i+1,rp+i,N-i+1);
2220:       rp[i] = bcol;
2221:       if (!A->structure_only) {
2222:         PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
2223:         PetscArrayzero(ap+bs2*i,bs2);
2224:         ap[bs2*i + bs*cidx + ridx] = value;
2225:       }
2226:       a->nz++;
2227:       A->nonzerostate++;
2228: noinsert1:;
2229:       low = i;
2230:     }
2231:     ailen[brow] = nrow;
2232:   }
2233:   return(0);
2234: }

2236: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2237: {
2238:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2239:   Mat            outA;
2241:   PetscBool      row_identity,col_identity;

2244:   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2245:   ISIdentity(row,&row_identity);
2246:   ISIdentity(col,&col_identity);
2247:   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");

2249:   outA            = inA;
2250:   inA->factortype = MAT_FACTOR_LU;
2251:   PetscFree(inA->solvertype);
2252:   PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);

2254:   MatMarkDiagonal_SeqBAIJ(inA);

2256:   PetscObjectReference((PetscObject)row);
2257:   ISDestroy(&a->row);
2258:   a->row = row;
2259:   PetscObjectReference((PetscObject)col);
2260:   ISDestroy(&a->col);
2261:   a->col = col;

2263:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2264:   ISDestroy(&a->icol);
2265:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2266:   PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);

2268:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2269:   if (!a->solve_work) {
2270:     PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2271:     PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2272:   }
2273:   MatLUFactorNumeric(outA,inA,info);
2274:   return(0);
2275: }

2277: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2278: {
2279:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2280:   PetscInt    i,nz,mbs;

2283:   nz  = baij->maxnz;
2284:   mbs = baij->mbs;
2285:   for (i=0; i<nz; i++) {
2286:     baij->j[i] = indices[i];
2287:   }
2288:   baij->nz = nz;
2289:   for (i=0; i<mbs; i++) {
2290:     baij->ilen[i] = baij->imax[i];
2291:   }
2292:   return(0);
2293: }

2295: /*@
2296:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2297:        in the matrix.

2299:   Input Parameters:
2300: +  mat - the SeqBAIJ matrix
2301: -  indices - the column indices

2303:   Level: advanced

2305:   Notes:
2306:     This can be called if you have precomputed the nonzero structure of the
2307:   matrix and want to provide it to the matrix object to improve the performance
2308:   of the MatSetValues() operation.

2310:     You MUST have set the correct numbers of nonzeros per row in the call to
2311:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2313:     MUST be called before any calls to MatSetValues();

2315: @*/
2316: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2317: {

2323:   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2324:   return(0);
2325: }

2327: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2328: {
2329:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2331:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2332:   PetscReal      atmp;
2333:   PetscScalar    *x,zero = 0.0;
2334:   MatScalar      *aa;
2335:   PetscInt       ncols,brow,krow,kcol;

2338:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2339:   bs  = A->rmap->bs;
2340:   aa  = a->a;
2341:   ai  = a->i;
2342:   aj  = a->j;
2343:   mbs = a->mbs;

2345:   VecSet(v,zero);
2346:   VecGetArray(v,&x);
2347:   VecGetLocalSize(v,&n);
2348:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2349:   for (i=0; i<mbs; i++) {
2350:     ncols = ai[1] - ai[0]; ai++;
2351:     brow  = bs*i;
2352:     for (j=0; j<ncols; j++) {
2353:       for (kcol=0; kcol<bs; kcol++) {
2354:         for (krow=0; krow<bs; krow++) {
2355:           atmp = PetscAbsScalar(*aa);aa++;
2356:           row  = brow + krow;   /* row index */
2357:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2358:         }
2359:       }
2360:       aj++;
2361:     }
2362:   }
2363:   VecRestoreArray(v,&x);
2364:   return(0);
2365: }

2367: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2368: {

2372:   /* If the two matrices have the same copy implementation, use fast copy. */
2373:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2374:     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2375:     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2376:     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;

2378:     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2379:     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2380:     PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);
2381:     PetscObjectStateIncrease((PetscObject)B);
2382:   } else {
2383:     MatCopy_Basic(A,B,str);
2384:   }
2385:   return(0);
2386: }

2388: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2389: {

2393:   MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);
2394:   return(0);
2395: }

2397: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2398: {
2399:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

2402:   *array = a->a;
2403:   return(0);
2404: }

2406: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2407: {
2409:   *array = NULL;
2410:   return(0);
2411: }

2413: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2414: {
2415:   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2416:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2417:   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;

2421:   /* Set the number of nonzeros in the new matrix */
2422:   MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2423:   return(0);
2424: }

2426: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2427: {
2428:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2430:   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2431:   PetscBLASInt   one=1;

2434:   if (str == SAME_NONZERO_PATTERN) {
2435:     PetscScalar  alpha = a;
2436:     PetscBLASInt bnz;
2437:     PetscBLASIntCast(x->nz*bs2,&bnz);
2438:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2439:     PetscObjectStateIncrease((PetscObject)Y);
2440:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2441:     MatAXPY_Basic(Y,a,X,str);
2442:   } else {
2443:     Mat      B;
2444:     PetscInt *nnz;
2445:     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2446:     PetscMalloc1(Y->rmap->N,&nnz);
2447:     MatCreate(PetscObjectComm((PetscObject)Y),&B);
2448:     PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2449:     MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2450:     MatSetBlockSizesFromMats(B,Y,Y);
2451:     MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2452:     MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2453:     MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2454:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2455:     MatHeaderReplace(Y,&B);
2456:     PetscFree(nnz);
2457:   }
2458:   return(0);
2459: }

2461: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2462: {
2463:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2464:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2465:   MatScalar   *aa = a->a;

2468:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2469:   return(0);
2470: }

2472: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2473: {
2474:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2475:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2476:   MatScalar   *aa = a->a;

2479:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2480:   return(0);
2481: }

2483: /*
2484:     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2485: */
2486: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2487: {
2488:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2490:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2491:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2494:   *nn = n;
2495:   if (!ia) return(0);
2496:   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2497:   else {
2498:     PetscCalloc1(n,&collengths);
2499:     PetscMalloc1(n+1,&cia);
2500:     PetscMalloc1(nz,&cja);
2501:     jj   = a->j;
2502:     for (i=0; i<nz; i++) {
2503:       collengths[jj[i]]++;
2504:     }
2505:     cia[0] = oshift;
2506:     for (i=0; i<n; i++) {
2507:       cia[i+1] = cia[i] + collengths[i];
2508:     }
2509:     PetscArrayzero(collengths,n);
2510:     jj   = a->j;
2511:     for (row=0; row<m; row++) {
2512:       mr = a->i[row+1] - a->i[row];
2513:       for (i=0; i<mr; i++) {
2514:         col = *jj++;

2516:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2517:       }
2518:     }
2519:     PetscFree(collengths);
2520:     *ia  = cia; *ja = cja;
2521:   }
2522:   return(0);
2523: }

2525: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2526: {

2530:   if (!ia) return(0);
2531:   PetscFree(*ia);
2532:   PetscFree(*ja);
2533:   return(0);
2534: }

2536: /*
2537:  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2538:  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2539:  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2540:  */
2541: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2542: {
2543:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2545:   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2546:   PetscInt       nz = a->i[m],row,*jj,mr,col;
2547:   PetscInt       *cspidx;

2550:   *nn = n;
2551:   if (!ia) return(0);

2553:   PetscCalloc1(n,&collengths);
2554:   PetscMalloc1(n+1,&cia);
2555:   PetscMalloc1(nz,&cja);
2556:   PetscMalloc1(nz,&cspidx);
2557:   jj   = a->j;
2558:   for (i=0; i<nz; i++) {
2559:     collengths[jj[i]]++;
2560:   }
2561:   cia[0] = oshift;
2562:   for (i=0; i<n; i++) {
2563:     cia[i+1] = cia[i] + collengths[i];
2564:   }
2565:   PetscArrayzero(collengths,n);
2566:   jj   = a->j;
2567:   for (row=0; row<m; row++) {
2568:     mr = a->i[row+1] - a->i[row];
2569:     for (i=0; i<mr; i++) {
2570:       col = *jj++;
2571:       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2572:       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2573:     }
2574:   }
2575:   PetscFree(collengths);
2576:   *ia    = cia;
2577:   *ja    = cja;
2578:   *spidx = cspidx;
2579:   return(0);
2580: }

2582: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2583: {

2587:   MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2588:   PetscFree(*spidx);
2589:   return(0);
2590: }

2592: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2593: {
2595:   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;

2598:   if (!Y->preallocated || !aij->nz) {
2599:     MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2600:   }
2601:   MatShift_Basic(Y,a);
2602:   return(0);
2603: }

2605: /* -------------------------------------------------------------------*/
2606: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2607:                                        MatGetRow_SeqBAIJ,
2608:                                        MatRestoreRow_SeqBAIJ,
2609:                                        MatMult_SeqBAIJ_N,
2610:                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2611:                                        MatMultTranspose_SeqBAIJ,
2612:                                        MatMultTransposeAdd_SeqBAIJ,
2613:                                        0,
2614:                                        0,
2615:                                        0,
2616:                                /* 10*/ 0,
2617:                                        MatLUFactor_SeqBAIJ,
2618:                                        0,
2619:                                        0,
2620:                                        MatTranspose_SeqBAIJ,
2621:                                /* 15*/ MatGetInfo_SeqBAIJ,
2622:                                        MatEqual_SeqBAIJ,
2623:                                        MatGetDiagonal_SeqBAIJ,
2624:                                        MatDiagonalScale_SeqBAIJ,
2625:                                        MatNorm_SeqBAIJ,
2626:                                /* 20*/ 0,
2627:                                        MatAssemblyEnd_SeqBAIJ,
2628:                                        MatSetOption_SeqBAIJ,
2629:                                        MatZeroEntries_SeqBAIJ,
2630:                                /* 24*/ MatZeroRows_SeqBAIJ,
2631:                                        0,
2632:                                        0,
2633:                                        0,
2634:                                        0,
2635:                                /* 29*/ MatSetUp_SeqBAIJ,
2636:                                        0,
2637:                                        0,
2638:                                        0,
2639:                                        0,
2640:                                /* 34*/ MatDuplicate_SeqBAIJ,
2641:                                        0,
2642:                                        0,
2643:                                        MatILUFactor_SeqBAIJ,
2644:                                        0,
2645:                                /* 39*/ MatAXPY_SeqBAIJ,
2646:                                        MatCreateSubMatrices_SeqBAIJ,
2647:                                        MatIncreaseOverlap_SeqBAIJ,
2648:                                        MatGetValues_SeqBAIJ,
2649:                                        MatCopy_SeqBAIJ,
2650:                                /* 44*/ 0,
2651:                                        MatScale_SeqBAIJ,
2652:                                        MatShift_SeqBAIJ,
2653:                                        0,
2654:                                        MatZeroRowsColumns_SeqBAIJ,
2655:                                /* 49*/ 0,
2656:                                        MatGetRowIJ_SeqBAIJ,
2657:                                        MatRestoreRowIJ_SeqBAIJ,
2658:                                        MatGetColumnIJ_SeqBAIJ,
2659:                                        MatRestoreColumnIJ_SeqBAIJ,
2660:                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2661:                                        0,
2662:                                        0,
2663:                                        0,
2664:                                        MatSetValuesBlocked_SeqBAIJ,
2665:                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2666:                                        MatDestroy_SeqBAIJ,
2667:                                        MatView_SeqBAIJ,
2668:                                        0,
2669:                                        0,
2670:                                /* 64*/ 0,
2671:                                        0,
2672:                                        0,
2673:                                        0,
2674:                                        0,
2675:                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2676:                                        0,
2677:                                        MatConvert_Basic,
2678:                                        0,
2679:                                        0,
2680:                                /* 74*/ 0,
2681:                                        MatFDColoringApply_BAIJ,
2682:                                        0,
2683:                                        0,
2684:                                        0,
2685:                                /* 79*/ 0,
2686:                                        0,
2687:                                        0,
2688:                                        0,
2689:                                        MatLoad_SeqBAIJ,
2690:                                /* 84*/ 0,
2691:                                        0,
2692:                                        0,
2693:                                        0,
2694:                                        0,
2695:                                /* 89*/ 0,
2696:                                        0,
2697:                                        0,
2698:                                        0,
2699:                                        0,
2700:                                /* 94*/ 0,
2701:                                        0,
2702:                                        0,
2703:                                        0,
2704:                                        0,
2705:                                /* 99*/ 0,
2706:                                        0,
2707:                                        0,
2708:                                        0,
2709:                                        0,
2710:                                /*104*/ 0,
2711:                                        MatRealPart_SeqBAIJ,
2712:                                        MatImaginaryPart_SeqBAIJ,
2713:                                        0,
2714:                                        0,
2715:                                /*109*/ 0,
2716:                                        0,
2717:                                        0,
2718:                                        0,
2719:                                        MatMissingDiagonal_SeqBAIJ,
2720:                                /*114*/ 0,
2721:                                        0,
2722:                                        0,
2723:                                        0,
2724:                                        0,
2725:                                /*119*/ 0,
2726:                                        0,
2727:                                        MatMultHermitianTranspose_SeqBAIJ,
2728:                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2729:                                        0,
2730:                                /*124*/ 0,
2731:                                        0,
2732:                                        MatInvertBlockDiagonal_SeqBAIJ,
2733:                                        0,
2734:                                        0,
2735:                                /*129*/ 0,
2736:                                        0,
2737:                                        0,
2738:                                        0,
2739:                                        0,
2740:                                /*134*/ 0,
2741:                                        0,
2742:                                        0,
2743:                                        0,
2744:                                        0,
2745:                                /*139*/ MatSetBlockSizes_Default,
2746:                                        0,
2747:                                        0,
2748:                                        MatFDColoringSetUp_SeqXAIJ,
2749:                                        0,
2750:                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2751:                                        MatDestroySubMatrices_SeqBAIJ
2752: };

2754: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2755: {
2756:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2757:   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;

2761:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

2763:   /* allocate space for values if not already there */
2764:   if (!aij->saved_values) {
2765:     PetscMalloc1(nz+1,&aij->saved_values);
2766:     PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2767:   }

2769:   /* copy values over */
2770:   PetscArraycpy(aij->saved_values,aij->a,nz);
2771:   return(0);
2772: }

2774: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2775: {
2776:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2778:   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;

2781:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2782:   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");

2784:   /* copy values over */
2785:   PetscArraycpy(aij->a,aij->saved_values,nz);
2786:   return(0);
2787: }

2789: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2790: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);

2792: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2793: {
2794:   Mat_SeqBAIJ    *b;
2796:   PetscInt       i,mbs,nbs,bs2;
2797:   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;

2800:   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2801:   if (nz == MAT_SKIP_ALLOCATION) {
2802:     skipallocation = PETSC_TRUE;
2803:     nz             = 0;
2804:   }

2806:   MatSetBlockSize(B,PetscAbs(bs));
2807:   PetscLayoutSetUp(B->rmap);
2808:   PetscLayoutSetUp(B->cmap);
2809:   PetscLayoutGetBlockSize(B->rmap,&bs);

2811:   B->preallocated = PETSC_TRUE;

2813:   mbs = B->rmap->n/bs;
2814:   nbs = B->cmap->n/bs;
2815:   bs2 = bs*bs;

2817:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);

2819:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2820:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2821:   if (nnz) {
2822:     for (i=0; i<mbs; i++) {
2823:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2824:       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2825:     }
2826:   }

2828:   b    = (Mat_SeqBAIJ*)B->data;
2829:   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2830:   PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2831:   PetscOptionsEnd();

2833:   if (!flg) {
2834:     switch (bs) {
2835:     case 1:
2836:       B->ops->mult    = MatMult_SeqBAIJ_1;
2837:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2838:       break;
2839:     case 2:
2840:       B->ops->mult    = MatMult_SeqBAIJ_2;
2841:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2842:       break;
2843:     case 3:
2844:       B->ops->mult    = MatMult_SeqBAIJ_3;
2845:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2846:       break;
2847:     case 4:
2848:       B->ops->mult    = MatMult_SeqBAIJ_4;
2849:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2850:       break;
2851:     case 5:
2852:       B->ops->mult    = MatMult_SeqBAIJ_5;
2853:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2854:       break;
2855:     case 6:
2856:       B->ops->mult    = MatMult_SeqBAIJ_6;
2857:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2858:       break;
2859:     case 7:
2860:       B->ops->mult    = MatMult_SeqBAIJ_7;
2861:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2862:       break;
2863:     case 9:
2864: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2865:       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2866:       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2867: #else
2868:       B->ops->mult    = MatMult_SeqBAIJ_N;
2869:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2870: #endif
2871:       break;
2872:     case 11:
2873:       B->ops->mult    = MatMult_SeqBAIJ_11;
2874:       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2875:       break;
2876:     case 15:
2877:       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2878:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2879:       break;
2880:     default:
2881:       B->ops->mult    = MatMult_SeqBAIJ_N;
2882:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2883:       break;
2884:     }
2885:   }
2886:   B->ops->sor = MatSOR_SeqBAIJ;
2887:   b->mbs = mbs;
2888:   b->nbs = nbs;
2889:   if (!skipallocation) {
2890:     if (!b->imax) {
2891:       PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2892:       PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));

2894:       b->free_imax_ilen = PETSC_TRUE;
2895:     }
2896:     /* b->ilen will count nonzeros in each block row so far. */
2897:     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2898:     if (!nnz) {
2899:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2900:       else if (nz < 0) nz = 1;
2901:       nz = PetscMin(nz,nbs);
2902:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2903:       nz = nz*mbs;
2904:     } else {
2905:       PetscInt64 nz64 = 0;
2906:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2907:       PetscIntCast(nz64,&nz);
2908:     }

2910:     /* allocate the matrix space */
2911:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2912:     if (B->structure_only) {
2913:       PetscMalloc1(nz,&b->j);
2914:       PetscMalloc1(B->rmap->N+1,&b->i);
2915:       PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));
2916:     } else {
2917:       PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2918:       PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2919:       PetscArrayzero(b->a,nz*bs2);
2920:     }
2921:     PetscArrayzero(b->j,nz);

2923:     if (B->structure_only) {
2924:       b->singlemalloc = PETSC_FALSE;
2925:       b->free_a       = PETSC_FALSE;
2926:     } else {
2927:       b->singlemalloc = PETSC_TRUE;
2928:       b->free_a       = PETSC_TRUE;
2929:     }
2930:     b->free_ij = PETSC_TRUE;

2932:     b->i[0] = 0;
2933:     for (i=1; i<mbs+1; i++) {
2934:       b->i[i] = b->i[i-1] + b->imax[i-1];
2935:     }

2937:   } else {
2938:     b->free_a  = PETSC_FALSE;
2939:     b->free_ij = PETSC_FALSE;
2940:   }

2942:   b->bs2              = bs2;
2943:   b->mbs              = mbs;
2944:   b->nz               = 0;
2945:   b->maxnz            = nz;
2946:   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2947:   B->was_assembled    = PETSC_FALSE;
2948:   B->assembled        = PETSC_FALSE;
2949:   if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
2950:   return(0);
2951: }

2953: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2954: {
2955:   PetscInt       i,m,nz,nz_max=0,*nnz;
2956:   PetscScalar    *values=0;
2957:   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;

2961:   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2962:   PetscLayoutSetBlockSize(B->rmap,bs);
2963:   PetscLayoutSetBlockSize(B->cmap,bs);
2964:   PetscLayoutSetUp(B->rmap);
2965:   PetscLayoutSetUp(B->cmap);
2966:   PetscLayoutGetBlockSize(B->rmap,&bs);
2967:   m    = B->rmap->n/bs;

2969:   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2970:   PetscMalloc1(m+1, &nnz);
2971:   for (i=0; i<m; i++) {
2972:     nz = ii[i+1]- ii[i];
2973:     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2974:     nz_max = PetscMax(nz_max, nz);
2975:     nnz[i] = nz;
2976:   }
2977:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2978:   PetscFree(nnz);

2980:   values = (PetscScalar*)V;
2981:   if (!values) {
2982:     PetscCalloc1(bs*bs*(nz_max+1),&values);
2983:   }
2984:   for (i=0; i<m; i++) {
2985:     PetscInt          ncols  = ii[i+1] - ii[i];
2986:     const PetscInt    *icols = jj + ii[i];
2987:     if (bs == 1 || !roworiented) {
2988:       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2989:       MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
2990:     } else {
2991:       PetscInt j;
2992:       for (j=0; j<ncols; j++) {
2993:         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2994:         MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
2995:       }
2996:     }
2997:   }
2998:   if (!V) { PetscFree(values); }
2999:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3000:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3001:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3002:   return(0);
3003: }

3005: /*@C
3006:    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored

3008:    Not Collective

3010:    Input Parameter:
3011: .  mat - a MATSEQBAIJ matrix

3013:    Output Parameter:
3014: .   array - pointer to the data

3016:    Level: intermediate

3018: .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3019: @*/
3020: PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3021: {

3025:   PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3026:   return(0);
3027: }

3029: /*@C
3030:    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()

3032:    Not Collective

3034:    Input Parameters:
3035: +  mat - a MATSEQBAIJ matrix
3036: -  array - pointer to the data

3038:    Level: intermediate

3040: .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3041: @*/
3042: PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3043: {

3047:   PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3048:   return(0);
3049: }

3051: /*MC
3052:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3053:    block sparse compressed row format.

3055:    Options Database Keys:
3056: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

3058:    Level: beginner

3060:    Notes:
3061:     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3062:     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored

3064: .seealso: MatCreateSeqBAIJ()
3065: M*/

3067: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);

3069: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3070: {
3072:   PetscMPIInt    size;
3073:   Mat_SeqBAIJ    *b;

3076:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3077:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

3079:   PetscNewLog(B,&b);
3080:   B->data = (void*)b;
3081:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));

3083:   b->row          = 0;
3084:   b->col          = 0;
3085:   b->icol         = 0;
3086:   b->reallocs     = 0;
3087:   b->saved_values = 0;

3089:   b->roworiented        = PETSC_TRUE;
3090:   b->nonew              = 0;
3091:   b->diag               = 0;
3092:   B->spptr              = 0;
3093:   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3094:   b->keepnonzeropattern = PETSC_FALSE;

3096:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);
3097:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);
3098:   PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3099:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3100:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3101:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3102:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3103:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3104:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3105:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3106:   PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3107: #if defined(PETSC_HAVE_HYPRE)
3108:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);
3109: #endif
3110:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);
3111:   PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);
3112:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3113:   return(0);
3114: }

3116: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3117: {
3118:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3120:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

3123:   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");

3125:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3126:     c->imax           = a->imax;
3127:     c->ilen           = a->ilen;
3128:     c->free_imax_ilen = PETSC_FALSE;
3129:   } else {
3130:     PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3131:     PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3132:     for (i=0; i<mbs; i++) {
3133:       c->imax[i] = a->imax[i];
3134:       c->ilen[i] = a->ilen[i];
3135:     }
3136:     c->free_imax_ilen = PETSC_TRUE;
3137:   }

3139:   /* allocate the matrix space */
3140:   if (mallocmatspace) {
3141:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3142:       PetscCalloc1(bs2*nz,&c->a);
3143:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));

3145:       c->i            = a->i;
3146:       c->j            = a->j;
3147:       c->singlemalloc = PETSC_FALSE;
3148:       c->free_a       = PETSC_TRUE;
3149:       c->free_ij      = PETSC_FALSE;
3150:       c->parent       = A;
3151:       C->preallocated = PETSC_TRUE;
3152:       C->assembled    = PETSC_TRUE;

3154:       PetscObjectReference((PetscObject)A);
3155:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3156:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3157:     } else {
3158:       PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3159:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));

3161:       c->singlemalloc = PETSC_TRUE;
3162:       c->free_a       = PETSC_TRUE;
3163:       c->free_ij      = PETSC_TRUE;

3165:       PetscArraycpy(c->i,a->i,mbs+1);
3166:       if (mbs > 0) {
3167:         PetscArraycpy(c->j,a->j,nz);
3168:         if (cpvalues == MAT_COPY_VALUES) {
3169:           PetscArraycpy(c->a,a->a,bs2*nz);
3170:         } else {
3171:           PetscArrayzero(c->a,bs2*nz);
3172:         }
3173:       }
3174:       C->preallocated = PETSC_TRUE;
3175:       C->assembled    = PETSC_TRUE;
3176:     }
3177:   }

3179:   c->roworiented = a->roworiented;
3180:   c->nonew       = a->nonew;

3182:   PetscLayoutReference(A->rmap,&C->rmap);
3183:   PetscLayoutReference(A->cmap,&C->cmap);

3185:   c->bs2         = a->bs2;
3186:   c->mbs         = a->mbs;
3187:   c->nbs         = a->nbs;

3189:   if (a->diag) {
3190:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3191:       c->diag      = a->diag;
3192:       c->free_diag = PETSC_FALSE;
3193:     } else {
3194:       PetscMalloc1(mbs+1,&c->diag);
3195:       PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3196:       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3197:       c->free_diag = PETSC_TRUE;
3198:     }
3199:   } else c->diag = 0;

3201:   c->nz         = a->nz;
3202:   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3203:   c->solve_work = NULL;
3204:   c->mult_work  = NULL;
3205:   c->sor_workt  = NULL;
3206:   c->sor_work   = NULL;

3208:   c->compressedrow.use   = a->compressedrow.use;
3209:   c->compressedrow.nrows = a->compressedrow.nrows;
3210:   if (a->compressedrow.use) {
3211:     i    = a->compressedrow.nrows;
3212:     PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3213:     PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3214:     PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);
3215:     PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);
3216:   } else {
3217:     c->compressedrow.use    = PETSC_FALSE;
3218:     c->compressedrow.i      = NULL;
3219:     c->compressedrow.rindex = NULL;
3220:   }
3221:   C->nonzerostate = A->nonzerostate;

3223:   PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3224:   return(0);
3225: }

3227: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3228: {

3232:   MatCreate(PetscObjectComm((PetscObject)A),B);
3233:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3234:   MatSetType(*B,MATSEQBAIJ);
3235:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3236:   return(0);
3237: }

3239: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3240: {
3241:   Mat_SeqBAIJ    *a;
3243:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3244:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3245:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3246:   PetscInt       *masked,nmask,tmp,bs2,ishift;
3247:   PetscMPIInt    size;
3248:   int            fd;
3249:   PetscScalar    *aa;
3250:   MPI_Comm       comm;
3251:   PetscBool      isbinary;

3254:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
3255:   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);

3257:   /* force binary viewer to load .info file if it has not yet done so */
3258:   PetscViewerSetUp(viewer);
3259:   PetscObjectGetComm((PetscObject)viewer,&comm);
3260:   PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3261:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3262:   PetscOptionsEnd();
3263:   if (bs < 0) bs = 1;
3264:   bs2  = bs*bs;

3266:   MPI_Comm_size(comm,&size);
3267:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3268:   PetscViewerBinaryGetDescriptor(viewer,&fd);
3269:   PetscBinaryRead(fd,header,4,NULL,PETSC_INT);
3270:   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3271:   M = header[1]; N = header[2]; nz = header[3];

3273:   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3274:   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");

3276:   /*
3277:      This code adds extra rows to make sure the number of rows is
3278:     divisible by the blocksize
3279:   */
3280:   mbs        = M/bs;
3281:   extra_rows = bs - M + bs*(mbs);
3282:   if (extra_rows == bs) extra_rows = 0;
3283:   else mbs++;
3284:   if (extra_rows) {
3285:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3286:   }

3288:   /* Set global sizes if not already set */
3289:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3290:     MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3291:   } else { /* Check if the matrix global sizes are correct */
3292:     MatGetSize(newmat,&rows,&cols);
3293:     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3294:       MatGetLocalSize(newmat,&rows,&cols);
3295:     }
3296:     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3297:   }

3299:   /* read in row lengths */
3300:   PetscMalloc1(M+extra_rows,&rowlengths);
3301:   PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);
3302:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

3304:   /* read in column indices */
3305:   PetscMalloc1(nz+extra_rows,&jj);
3306:   PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);
3307:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

3309:   /* loop over row lengths determining block row lengths */
3310:   PetscCalloc1(mbs,&browlengths);
3311:   PetscMalloc2(mbs,&mask,mbs,&masked);
3312:   PetscArrayzero(mask,mbs);
3313:   rowcount = 0;
3314:   nzcount  = 0;
3315:   for (i=0; i<mbs; i++) {
3316:     nmask = 0;
3317:     for (j=0; j<bs; j++) {
3318:       kmax = rowlengths[rowcount];
3319:       for (k=0; k<kmax; k++) {
3320:         tmp = jj[nzcount++]/bs;
3321:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3322:       }
3323:       rowcount++;
3324:     }
3325:     browlengths[i] += nmask;
3326:     /* zero out the mask elements we set */
3327:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3328:   }

3330:   /* Do preallocation  */
3331:   MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);
3332:   a    = (Mat_SeqBAIJ*)newmat->data;

3334:   /* set matrix "i" values */
3335:   a->i[0] = 0;
3336:   for (i=1; i<= mbs; i++) {
3337:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3338:     a->ilen[i-1] = browlengths[i-1];
3339:   }
3340:   a->nz = 0;
3341:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3343:   /* read in nonzero values */
3344:   PetscMalloc1(nz+extra_rows,&aa);
3345:   PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);
3346:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3348:   /* set "a" and "j" values into matrix */
3349:   nzcount = 0; jcount = 0;
3350:   for (i=0; i<mbs; i++) {
3351:     nzcountb = nzcount;
3352:     nmask    = 0;
3353:     for (j=0; j<bs; j++) {
3354:       kmax = rowlengths[i*bs+j];
3355:       for (k=0; k<kmax; k++) {
3356:         tmp = jj[nzcount++]/bs;
3357:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3358:       }
3359:     }
3360:     /* sort the masked values */
3361:     PetscSortInt(nmask,masked);

3363:     /* set "j" values into matrix */
3364:     maskcount = 1;
3365:     for (j=0; j<nmask; j++) {
3366:       a->j[jcount++]  = masked[j];
3367:       mask[masked[j]] = maskcount++;
3368:     }
3369:     /* set "a" values into matrix */
3370:     ishift = bs2*a->i[i];
3371:     for (j=0; j<bs; j++) {
3372:       kmax = rowlengths[i*bs+j];
3373:       for (k=0; k<kmax; k++) {
3374:         tmp       = jj[nzcountb]/bs;
3375:         block     = mask[tmp] - 1;
3376:         point     = jj[nzcountb] - bs*tmp;
3377:         idx       = ishift + bs2*block + j + bs*point;
3378:         a->a[idx] = (MatScalar)aa[nzcountb++];
3379:       }
3380:     }
3381:     /* zero out the mask elements we set */
3382:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3383:   }
3384:   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3386:   PetscFree(rowlengths);
3387:   PetscFree(browlengths);
3388:   PetscFree(aa);
3389:   PetscFree(jj);
3390:   PetscFree2(mask,masked);

3392:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3393:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3394:   return(0);
3395: }

3397: /*@C
3398:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3399:    compressed row) format.  For good matrix assembly performance the
3400:    user should preallocate the matrix storage by setting the parameter nz
3401:    (or the array nnz).  By setting these parameters accurately, performance
3402:    during matrix assembly can be increased by more than a factor of 50.

3404:    Collective

3406:    Input Parameters:
3407: +  comm - MPI communicator, set to PETSC_COMM_SELF
3408: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3409:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3410: .  m - number of rows
3411: .  n - number of columns
3412: .  nz - number of nonzero blocks  per block row (same for all rows)
3413: -  nnz - array containing the number of nonzero blocks in the various block rows
3414:          (possibly different for each block row) or NULL

3416:    Output Parameter:
3417: .  A - the matrix

3419:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3420:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3421:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3423:    Options Database Keys:
3424: +   -mat_no_unroll - uses code that does not unroll the loops in the
3425:                      block calculations (much slower)
3426: -    -mat_block_size - size of the blocks to use

3428:    Level: intermediate

3430:    Notes:
3431:    The number of rows and columns must be divisible by blocksize.

3433:    If the nnz parameter is given then the nz parameter is ignored

3435:    A nonzero block is any block that as 1 or more nonzeros in it

3437:    The block AIJ format is fully compatible with standard Fortran 77
3438:    storage.  That is, the stored row and column indices can begin at
3439:    either one (as in Fortran) or zero.  See the users' manual for details.

3441:    Specify the preallocated storage with either nz or nnz (not both).
3442:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3443:    allocation.  See Users-Manual: ch_mat for details.
3444:    matrices.

3446: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3447: @*/
3448: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3449: {

3453:   MatCreate(comm,A);
3454:   MatSetSizes(*A,m,n,m,n);
3455:   MatSetType(*A,MATSEQBAIJ);
3456:   MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
3457:   return(0);
3458: }

3460: /*@C
3461:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3462:    per row in the matrix. For good matrix assembly performance the
3463:    user should preallocate the matrix storage by setting the parameter nz
3464:    (or the array nnz).  By setting these parameters accurately, performance
3465:    during matrix assembly can be increased by more than a factor of 50.

3467:    Collective

3469:    Input Parameters:
3470: +  B - the matrix
3471: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3472:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3473: .  nz - number of block nonzeros per block row (same for all rows)
3474: -  nnz - array containing the number of block nonzeros in the various block rows
3475:          (possibly different for each block row) or NULL

3477:    Options Database Keys:
3478: +   -mat_no_unroll - uses code that does not unroll the loops in the
3479:                      block calculations (much slower)
3480: -   -mat_block_size - size of the blocks to use

3482:    Level: intermediate

3484:    Notes:
3485:    If the nnz parameter is given then the nz parameter is ignored

3487:    You can call MatGetInfo() to get information on how effective the preallocation was;
3488:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3489:    You can also run with the option -info and look for messages with the string
3490:    malloc in them to see if additional memory allocation was needed.

3492:    The block AIJ format is fully compatible with standard Fortran 77
3493:    storage.  That is, the stored row and column indices can begin at
3494:    either one (as in Fortran) or zero.  See the users' manual for details.

3496:    Specify the preallocated storage with either nz or nnz (not both).
3497:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3498:    allocation.  See Users-Manual: ch_mat for details.

3500: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3501: @*/
3502: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3503: {

3510:   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3511:   return(0);
3512: }

3514: /*@C
3515:    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values

3517:    Collective

3519:    Input Parameters:
3520: +  B - the matrix
3521: .  i - the indices into j for the start of each local row (starts with zero)
3522: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3523: -  v - optional values in the matrix

3525:    Level: advanced

3527:    Notes:
3528:    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3529:    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3530:    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3531:    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3532:    block column and the second index is over columns within a block.

3534:    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well

3536: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3537: @*/
3538: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3539: {

3546:   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3547:   return(0);
3548: }


3551: /*@
3552:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.

3554:      Collective

3556:    Input Parameters:
3557: +  comm - must be an MPI communicator of size 1
3558: .  bs - size of block
3559: .  m - number of rows
3560: .  n - number of columns
3561: .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3562: .  j - column indices
3563: -  a - matrix values

3565:    Output Parameter:
3566: .  mat - the matrix

3568:    Level: advanced

3570:    Notes:
3571:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3572:     once the matrix is destroyed

3574:        You cannot set new nonzero locations into this matrix, that will generate an error.

3576:        The i and j indices are 0 based

3578:        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).

3580:       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3581:       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3582:       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3583:       with column-major ordering within blocks.

3585: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()

3587: @*/
3588: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3589: {
3591:   PetscInt       ii;
3592:   Mat_SeqBAIJ    *baij;

3595:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3596:   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");

3598:   MatCreate(comm,mat);
3599:   MatSetSizes(*mat,m,n,m,n);
3600:   MatSetType(*mat,MATSEQBAIJ);
3601:   MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);
3602:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3603:   PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3604:   PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));

3606:   baij->i = i;
3607:   baij->j = j;
3608:   baij->a = a;

3610:   baij->singlemalloc = PETSC_FALSE;
3611:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3612:   baij->free_a       = PETSC_FALSE;
3613:   baij->free_ij      = PETSC_FALSE;

3615:   for (ii=0; ii<m; ii++) {
3616:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3617: #if defined(PETSC_USE_DEBUG)
3618:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3619: #endif
3620:   }
3621: #if defined(PETSC_USE_DEBUG)
3622:   for (ii=0; ii<baij->i[m]; ii++) {
3623:     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3624:     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3625:   }
3626: #endif

3628:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3629:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3630:   return(0);
3631: }

3633: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3634: {
3636:   PetscMPIInt    size;

3639:   MPI_Comm_size(comm,&size);
3640:   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3641:     MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
3642:   } else {
3643:     MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3644:   }
3645:   return(0);
3646: }