Actual source code: baij.c


  2: /*
  3:     Defines the basic matrix operations for the BAIJ (compressed row)
  4:   matrix storage format.
  5: */
  6: #include <../src/mat/impls/baij/seq/baij.h>
  7: #include <petscblaslapack.h>
  8: #include <petsc/private/kernels/blockinvert.h>
  9: #include <petsc/private/kernels/blockmatmult.h>

 11: #if defined(PETSC_HAVE_HYPRE)
 12: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
 13: #endif

 15: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 16: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
 17: #endif
 18: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);

 20: PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
 21: {
 22:   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
 23:   PetscInt       m,n,i;
 24:   PetscInt       ib,jb,bs = A->rmap->bs;
 25:   MatScalar      *a_val = a_aij->a;

 27:   MatGetSize(A,&m,&n);
 28:   for (i=0; i<n; i++) reductions[i] = 0.0;
 29:   if (type == NORM_2) {
 30:     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
 31:       for (jb=0; jb<bs; jb++) {
 32:         for (ib=0; ib<bs; ib++) {
 33:           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
 34:           a_val++;
 35:         }
 36:       }
 37:     }
 38:   } else if (type == NORM_1) {
 39:     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
 40:       for (jb=0; jb<bs; jb++) {
 41:         for (ib=0; ib<bs; ib++) {
 42:           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
 43:           a_val++;
 44:         }
 45:       }
 46:     }
 47:   } else if (type == NORM_INFINITY) {
 48:     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
 49:       for (jb=0; jb<bs; jb++) {
 50:         for (ib=0; ib<bs; ib++) {
 51:           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
 52:           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
 53:           a_val++;
 54:         }
 55:       }
 56:     }
 57:   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
 58:     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
 59:       for (jb=0; jb<bs; jb++) {
 60:         for (ib=0; ib<bs; ib++) {
 61:           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
 62:           a_val++;
 63:         }
 64:       }
 65:     }
 66:   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
 67:     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
 68:       for (jb=0; jb<bs; jb++) {
 69:         for (ib=0; ib<bs; ib++) {
 70:           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
 71:           a_val++;
 72:         }
 73:       }
 74:     }
 75:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
 76:   if (type == NORM_2) {
 77:     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
 78:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
 79:     for (i=0; i<n; i++) reductions[i] /= m;
 80:   }
 81:   return 0;
 82: }

 84: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
 85: {
 86:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 87:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
 88:   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
 89:   PetscReal      shift = 0.0;
 90:   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;

 92:   allowzeropivot = PetscNot(A->erroriffailure);

 94:   if (a->idiagvalid) {
 95:     if (values) *values = a->idiag;
 96:     return 0;
 97:   }
 98:   MatMarkDiagonal_SeqBAIJ(A);
 99:   diag_offset = a->diag;
100:   if (!a->idiag) {
101:     PetscMalloc1(bs2*mbs,&a->idiag);
102:     PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));
103:   }
104:   diag  = a->idiag;
105:   if (values) *values = a->idiag;
106:   /* factor and invert each block */
107:   switch (bs) {
108:   case 1:
109:     for (i=0; i<mbs; i++) {
110:       odiag    = v + 1*diag_offset[i];
111:       diag[0]  = odiag[0];

113:       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
114:         if (allowzeropivot) {
115:           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
116:           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
117:           A->factorerror_zeropivot_row   = i;
118:           PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i);
119:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
120:       }

122:       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
123:       diag    += 1;
124:     }
125:     break;
126:   case 2:
127:     for (i=0; i<mbs; i++) {
128:       odiag    = v + 4*diag_offset[i];
129:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
130:       PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
131:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
132:       diag    += 4;
133:     }
134:     break;
135:   case 3:
136:     for (i=0; i<mbs; i++) {
137:       odiag    = v + 9*diag_offset[i];
138:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
139:       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
140:       diag[8]  = odiag[8];
141:       PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
142:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
143:       diag    += 9;
144:     }
145:     break;
146:   case 4:
147:     for (i=0; i<mbs; i++) {
148:       odiag  = v + 16*diag_offset[i];
149:       PetscArraycpy(diag,odiag,16);
150:       PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
151:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
152:       diag  += 16;
153:     }
154:     break;
155:   case 5:
156:     for (i=0; i<mbs; i++) {
157:       odiag  = v + 25*diag_offset[i];
158:       PetscArraycpy(diag,odiag,25);
159:       PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
160:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
161:       diag  += 25;
162:     }
163:     break;
164:   case 6:
165:     for (i=0; i<mbs; i++) {
166:       odiag  = v + 36*diag_offset[i];
167:       PetscArraycpy(diag,odiag,36);
168:       PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
169:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
170:       diag  += 36;
171:     }
172:     break;
173:   case 7:
174:     for (i=0; i<mbs; i++) {
175:       odiag  = v + 49*diag_offset[i];
176:       PetscArraycpy(diag,odiag,49);
177:       PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
178:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
179:       diag  += 49;
180:     }
181:     break;
182:   default:
183:     PetscMalloc2(bs,&v_work,bs,&v_pivots);
184:     for (i=0; i<mbs; i++) {
185:       odiag  = v + bs2*diag_offset[i];
186:       PetscArraycpy(diag,odiag,bs2);
187:       PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
188:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
189:       diag  += bs2;
190:     }
191:     PetscFree2(v_work,v_pivots);
192:   }
193:   a->idiagvalid = PETSC_TRUE;
194:   return 0;
195: }

197: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
198: {
199:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
200:   PetscScalar       *x,*work,*w,*workt,*t;
201:   const MatScalar   *v,*aa = a->a, *idiag;
202:   const PetscScalar *b,*xb;
203:   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
204:   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
205:   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;

207:   its = its*lits;

214:   if (!a->idiagvalid) MatInvertBlockDiagonal(A,NULL);

216:   if (!m) return 0;
217:   diag  = a->diag;
218:   idiag = a->idiag;
219:   k    = PetscMax(A->rmap->n,A->cmap->n);
220:   if (!a->mult_work) {
221:     PetscMalloc1(k+1,&a->mult_work);
222:   }
223:   if (!a->sor_workt) {
224:     PetscMalloc1(k,&a->sor_workt);
225:   }
226:   if (!a->sor_work) {
227:     PetscMalloc1(bs,&a->sor_work);
228:   }
229:   work = a->mult_work;
230:   t    = a->sor_workt;
231:   w    = a->sor_work;

233:   VecGetArray(xx,&x);
234:   VecGetArrayRead(bb,&b);

236:   if (flag & SOR_ZERO_INITIAL_GUESS) {
237:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
238:       switch (bs) {
239:       case 1:
240:         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
241:         t[0] = b[0];
242:         i2     = 1;
243:         idiag += 1;
244:         for (i=1; i<m; i++) {
245:           v  = aa + ai[i];
246:           vi = aj + ai[i];
247:           nz = diag[i] - ai[i];
248:           s[0] = b[i2];
249:           for (j=0; j<nz; j++) {
250:             xw[0] = x[vi[j]];
251:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
252:           }
253:           t[i2] = s[0];
254:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
255:           x[i2]  = xw[0];
256:           idiag += 1;
257:           i2    += 1;
258:         }
259:         break;
260:       case 2:
261:         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
262:         t[0] = b[0]; t[1] = b[1];
263:         i2     = 2;
264:         idiag += 4;
265:         for (i=1; i<m; i++) {
266:           v  = aa + 4*ai[i];
267:           vi = aj + ai[i];
268:           nz = diag[i] - ai[i];
269:           s[0] = b[i2]; s[1] = b[i2+1];
270:           for (j=0; j<nz; j++) {
271:             idx = 2*vi[j];
272:             it  = 4*j;
273:             xw[0] = x[idx]; xw[1] = x[1+idx];
274:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
275:           }
276:           t[i2] = s[0]; t[i2+1] = s[1];
277:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
278:           x[i2]   = xw[0]; x[i2+1] = xw[1];
279:           idiag  += 4;
280:           i2     += 2;
281:         }
282:         break;
283:       case 3:
284:         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
285:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
286:         i2     = 3;
287:         idiag += 9;
288:         for (i=1; i<m; i++) {
289:           v  = aa + 9*ai[i];
290:           vi = aj + ai[i];
291:           nz = diag[i] - ai[i];
292:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
293:           while (nz--) {
294:             idx = 3*(*vi++);
295:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
296:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
297:             v  += 9;
298:           }
299:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
300:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
301:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
302:           idiag  += 9;
303:           i2     += 3;
304:         }
305:         break;
306:       case 4:
307:         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
308:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
309:         i2     = 4;
310:         idiag += 16;
311:         for (i=1; i<m; i++) {
312:           v  = aa + 16*ai[i];
313:           vi = aj + ai[i];
314:           nz = diag[i] - ai[i];
315:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
316:           while (nz--) {
317:             idx = 4*(*vi++);
318:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
319:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
320:             v  += 16;
321:           }
322:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
323:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
324:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
325:           idiag  += 16;
326:           i2     += 4;
327:         }
328:         break;
329:       case 5:
330:         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
331:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
332:         i2     = 5;
333:         idiag += 25;
334:         for (i=1; i<m; i++) {
335:           v  = aa + 25*ai[i];
336:           vi = aj + ai[i];
337:           nz = diag[i] - ai[i];
338:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
339:           while (nz--) {
340:             idx = 5*(*vi++);
341:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
342:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
343:             v  += 25;
344:           }
345:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
346:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
347:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
348:           idiag  += 25;
349:           i2     += 5;
350:         }
351:         break;
352:       case 6:
353:         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
354:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
355:         i2     = 6;
356:         idiag += 36;
357:         for (i=1; i<m; i++) {
358:           v  = aa + 36*ai[i];
359:           vi = aj + ai[i];
360:           nz = diag[i] - ai[i];
361:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
362:           while (nz--) {
363:             idx = 6*(*vi++);
364:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
365:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
366:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
367:             v  += 36;
368:           }
369:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
370:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
371:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
372:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
373:           idiag  += 36;
374:           i2     += 6;
375:         }
376:         break;
377:       case 7:
378:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
379:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
380:         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
381:         i2     = 7;
382:         idiag += 49;
383:         for (i=1; i<m; i++) {
384:           v  = aa + 49*ai[i];
385:           vi = aj + ai[i];
386:           nz = diag[i] - ai[i];
387:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
388:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
389:           while (nz--) {
390:             idx = 7*(*vi++);
391:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
392:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
393:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
394:             v  += 49;
395:           }
396:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
397:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
398:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
399:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
400:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
401:           idiag  += 49;
402:           i2     += 7;
403:         }
404:         break;
405:       default:
406:         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
407:         PetscArraycpy(t,b,bs);
408:         i2     = bs;
409:         idiag += bs2;
410:         for (i=1; i<m; i++) {
411:           v  = aa + bs2*ai[i];
412:           vi = aj + ai[i];
413:           nz = diag[i] - ai[i];

415:           PetscArraycpy(w,b+i2,bs);
416:           /* copy all rows of x that are needed into contiguous space */
417:           workt = work;
418:           for (j=0; j<nz; j++) {
419:             PetscArraycpy(workt,x + bs*(*vi++),bs);
420:             workt += bs;
421:           }
422:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
423:           PetscArraycpy(t+i2,w,bs);
424:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

426:           idiag += bs2;
427:           i2    += bs;
428:         }
429:         break;
430:       }
431:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
432:       PetscLogFlops(1.0*bs2*a->nz);
433:       xb = t;
434:     }
435:     else xb = b;
436:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
437:       idiag = a->idiag+bs2*(a->mbs-1);
438:       i2 = bs * (m-1);
439:       switch (bs) {
440:       case 1:
441:         s[0]  = xb[i2];
442:         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
443:         x[i2] = xw[0];
444:         i2   -= 1;
445:         for (i=m-2; i>=0; i--) {
446:           v  = aa + (diag[i]+1);
447:           vi = aj + diag[i] + 1;
448:           nz = ai[i+1] - diag[i] - 1;
449:           s[0] = xb[i2];
450:           for (j=0; j<nz; j++) {
451:             xw[0] = x[vi[j]];
452:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
453:           }
454:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
455:           x[i2]  = xw[0];
456:           idiag -= 1;
457:           i2    -= 1;
458:         }
459:         break;
460:       case 2:
461:         s[0]  = xb[i2]; s[1] = xb[i2+1];
462:         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
463:         x[i2] = xw[0]; x[i2+1] = xw[1];
464:         i2    -= 2;
465:         idiag -= 4;
466:         for (i=m-2; i>=0; i--) {
467:           v  = aa + 4*(diag[i] + 1);
468:           vi = aj + diag[i] + 1;
469:           nz = ai[i+1] - diag[i] - 1;
470:           s[0] = xb[i2]; s[1] = xb[i2+1];
471:           for (j=0; j<nz; j++) {
472:             idx = 2*vi[j];
473:             it  = 4*j;
474:             xw[0] = x[idx]; xw[1] = x[1+idx];
475:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
476:           }
477:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
478:           x[i2]   = xw[0]; x[i2+1] = xw[1];
479:           idiag  -= 4;
480:           i2     -= 2;
481:         }
482:         break;
483:       case 3:
484:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
485:         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
486:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
487:         i2    -= 3;
488:         idiag -= 9;
489:         for (i=m-2; i>=0; i--) {
490:           v  = aa + 9*(diag[i]+1);
491:           vi = aj + diag[i] + 1;
492:           nz = ai[i+1] - diag[i] - 1;
493:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
494:           while (nz--) {
495:             idx = 3*(*vi++);
496:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
497:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
498:             v  += 9;
499:           }
500:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
501:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
502:           idiag  -= 9;
503:           i2     -= 3;
504:         }
505:         break;
506:       case 4:
507:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
508:         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
509:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
510:         i2    -= 4;
511:         idiag -= 16;
512:         for (i=m-2; i>=0; i--) {
513:           v  = aa + 16*(diag[i]+1);
514:           vi = aj + diag[i] + 1;
515:           nz = ai[i+1] - diag[i] - 1;
516:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
517:           while (nz--) {
518:             idx = 4*(*vi++);
519:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
520:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
521:             v  += 16;
522:           }
523:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
524:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
525:           idiag  -= 16;
526:           i2     -= 4;
527:         }
528:         break;
529:       case 5:
530:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
531:         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
532:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
533:         i2    -= 5;
534:         idiag -= 25;
535:         for (i=m-2; i>=0; i--) {
536:           v  = aa + 25*(diag[i]+1);
537:           vi = aj + diag[i] + 1;
538:           nz = ai[i+1] - diag[i] - 1;
539:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
540:           while (nz--) {
541:             idx = 5*(*vi++);
542:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
543:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
544:             v  += 25;
545:           }
546:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
547:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
548:           idiag  -= 25;
549:           i2     -= 5;
550:         }
551:         break;
552:       case 6:
553:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
554:         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
555:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
556:         i2    -= 6;
557:         idiag -= 36;
558:         for (i=m-2; i>=0; i--) {
559:           v  = aa + 36*(diag[i]+1);
560:           vi = aj + diag[i] + 1;
561:           nz = ai[i+1] - diag[i] - 1;
562:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
563:           while (nz--) {
564:             idx = 6*(*vi++);
565:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
566:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
567:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
568:             v  += 36;
569:           }
570:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
571:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
572:           idiag  -= 36;
573:           i2     -= 6;
574:         }
575:         break;
576:       case 7:
577:         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
578:         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
579:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
580:         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
581:         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
582:         i2    -= 7;
583:         idiag -= 49;
584:         for (i=m-2; i>=0; i--) {
585:           v  = aa + 49*(diag[i]+1);
586:           vi = aj + diag[i] + 1;
587:           nz = ai[i+1] - diag[i] - 1;
588:           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
589:           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
590:           while (nz--) {
591:             idx = 7*(*vi++);
592:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
593:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
594:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
595:             v  += 49;
596:           }
597:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
598:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
599:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
600:           idiag  -= 49;
601:           i2     -= 7;
602:         }
603:         break;
604:       default:
605:         PetscArraycpy(w,xb+i2,bs);
606:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
607:         i2    -= bs;
608:         idiag -= bs2;
609:         for (i=m-2; i>=0; i--) {
610:           v  = aa + bs2*(diag[i]+1);
611:           vi = aj + diag[i] + 1;
612:           nz = ai[i+1] - diag[i] - 1;

614:           PetscArraycpy(w,xb+i2,bs);
615:           /* copy all rows of x that are needed into contiguous space */
616:           workt = work;
617:           for (j=0; j<nz; j++) {
618:             PetscArraycpy(workt,x + bs*(*vi++),bs);
619:             workt += bs;
620:           }
621:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
622:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

624:           idiag -= bs2;
625:           i2    -= bs;
626:         }
627:         break;
628:       }
629:       PetscLogFlops(1.0*bs2*(a->nz));
630:     }
631:     its--;
632:   }
633:   while (its--) {
634:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
635:       idiag = a->idiag;
636:       i2 = 0;
637:       switch (bs) {
638:       case 1:
639:         for (i=0; i<m; i++) {
640:           v  = aa + ai[i];
641:           vi = aj + ai[i];
642:           nz = ai[i+1] - ai[i];
643:           s[0] = b[i2];
644:           for (j=0; j<nz; j++) {
645:             xw[0] = x[vi[j]];
646:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
647:           }
648:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
649:           x[i2] += xw[0];
650:           idiag += 1;
651:           i2    += 1;
652:         }
653:         break;
654:       case 2:
655:         for (i=0; i<m; i++) {
656:           v  = aa + 4*ai[i];
657:           vi = aj + ai[i];
658:           nz = ai[i+1] - ai[i];
659:           s[0] = b[i2]; s[1] = b[i2+1];
660:           for (j=0; j<nz; j++) {
661:             idx = 2*vi[j];
662:             it  = 4*j;
663:             xw[0] = x[idx]; xw[1] = x[1+idx];
664:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
665:           }
666:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
667:           x[i2]  += xw[0]; x[i2+1] += xw[1];
668:           idiag  += 4;
669:           i2     += 2;
670:         }
671:         break;
672:       case 3:
673:         for (i=0; i<m; i++) {
674:           v  = aa + 9*ai[i];
675:           vi = aj + ai[i];
676:           nz = ai[i+1] - ai[i];
677:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
678:           while (nz--) {
679:             idx = 3*(*vi++);
680:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
681:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
682:             v  += 9;
683:           }
684:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
685:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
686:           idiag  += 9;
687:           i2     += 3;
688:         }
689:         break;
690:       case 4:
691:         for (i=0; i<m; i++) {
692:           v  = aa + 16*ai[i];
693:           vi = aj + ai[i];
694:           nz = ai[i+1] - ai[i];
695:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
696:           while (nz--) {
697:             idx = 4*(*vi++);
698:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
699:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
700:             v  += 16;
701:           }
702:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
703:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
704:           idiag  += 16;
705:           i2     += 4;
706:         }
707:         break;
708:       case 5:
709:         for (i=0; i<m; i++) {
710:           v  = aa + 25*ai[i];
711:           vi = aj + ai[i];
712:           nz = ai[i+1] - ai[i];
713:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
714:           while (nz--) {
715:             idx = 5*(*vi++);
716:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
717:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
718:             v  += 25;
719:           }
720:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
721:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
722:           idiag  += 25;
723:           i2     += 5;
724:         }
725:         break;
726:       case 6:
727:         for (i=0; i<m; i++) {
728:           v  = aa + 36*ai[i];
729:           vi = aj + ai[i];
730:           nz = ai[i+1] - ai[i];
731:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
732:           while (nz--) {
733:             idx = 6*(*vi++);
734:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
735:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
736:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
737:             v  += 36;
738:           }
739:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
740:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
741:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
742:           idiag  += 36;
743:           i2     += 6;
744:         }
745:         break;
746:       case 7:
747:         for (i=0; i<m; i++) {
748:           v  = aa + 49*ai[i];
749:           vi = aj + ai[i];
750:           nz = ai[i+1] - ai[i];
751:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
752:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
753:           while (nz--) {
754:             idx = 7*(*vi++);
755:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
756:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
757:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
758:             v  += 49;
759:           }
760:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
761:           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
762:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
763:           idiag  += 49;
764:           i2     += 7;
765:         }
766:         break;
767:       default:
768:         for (i=0; i<m; i++) {
769:           v  = aa + bs2*ai[i];
770:           vi = aj + ai[i];
771:           nz = ai[i+1] - ai[i];

773:           PetscArraycpy(w,b+i2,bs);
774:           /* copy all rows of x that are needed into contiguous space */
775:           workt = work;
776:           for (j=0; j<nz; j++) {
777:             PetscArraycpy(workt,x + bs*(*vi++),bs);
778:             workt += bs;
779:           }
780:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
781:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

783:           idiag += bs2;
784:           i2    += bs;
785:         }
786:         break;
787:       }
788:       PetscLogFlops(2.0*bs2*a->nz);
789:     }
790:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
791:       idiag = a->idiag+bs2*(a->mbs-1);
792:       i2 = bs * (m-1);
793:       switch (bs) {
794:       case 1:
795:         for (i=m-1; i>=0; i--) {
796:           v  = aa + ai[i];
797:           vi = aj + ai[i];
798:           nz = ai[i+1] - ai[i];
799:           s[0] = b[i2];
800:           for (j=0; j<nz; j++) {
801:             xw[0] = x[vi[j]];
802:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
803:           }
804:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
805:           x[i2] += xw[0];
806:           idiag -= 1;
807:           i2    -= 1;
808:         }
809:         break;
810:       case 2:
811:         for (i=m-1; i>=0; i--) {
812:           v  = aa + 4*ai[i];
813:           vi = aj + ai[i];
814:           nz = ai[i+1] - ai[i];
815:           s[0] = b[i2]; s[1] = b[i2+1];
816:           for (j=0; j<nz; j++) {
817:             idx = 2*vi[j];
818:             it  = 4*j;
819:             xw[0] = x[idx]; xw[1] = x[1+idx];
820:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
821:           }
822:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
823:           x[i2]  += xw[0]; x[i2+1] += xw[1];
824:           idiag  -= 4;
825:           i2     -= 2;
826:         }
827:         break;
828:       case 3:
829:         for (i=m-1; i>=0; i--) {
830:           v  = aa + 9*ai[i];
831:           vi = aj + ai[i];
832:           nz = ai[i+1] - ai[i];
833:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
834:           while (nz--) {
835:             idx = 3*(*vi++);
836:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
837:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
838:             v  += 9;
839:           }
840:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
841:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
842:           idiag  -= 9;
843:           i2     -= 3;
844:         }
845:         break;
846:       case 4:
847:         for (i=m-1; i>=0; i--) {
848:           v  = aa + 16*ai[i];
849:           vi = aj + ai[i];
850:           nz = ai[i+1] - ai[i];
851:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
852:           while (nz--) {
853:             idx = 4*(*vi++);
854:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
855:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
856:             v  += 16;
857:           }
858:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
859:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
860:           idiag  -= 16;
861:           i2     -= 4;
862:         }
863:         break;
864:       case 5:
865:         for (i=m-1; i>=0; i--) {
866:           v  = aa + 25*ai[i];
867:           vi = aj + ai[i];
868:           nz = ai[i+1] - ai[i];
869:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
870:           while (nz--) {
871:             idx = 5*(*vi++);
872:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
873:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
874:             v  += 25;
875:           }
876:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
877:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
878:           idiag  -= 25;
879:           i2     -= 5;
880:         }
881:         break;
882:       case 6:
883:         for (i=m-1; i>=0; i--) {
884:           v  = aa + 36*ai[i];
885:           vi = aj + ai[i];
886:           nz = ai[i+1] - ai[i];
887:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
888:           while (nz--) {
889:             idx = 6*(*vi++);
890:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
891:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
892:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
893:             v  += 36;
894:           }
895:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
896:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
897:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
898:           idiag  -= 36;
899:           i2     -= 6;
900:         }
901:         break;
902:       case 7:
903:         for (i=m-1; i>=0; i--) {
904:           v  = aa + 49*ai[i];
905:           vi = aj + ai[i];
906:           nz = ai[i+1] - ai[i];
907:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
908:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
909:           while (nz--) {
910:             idx = 7*(*vi++);
911:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
912:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
913:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
914:             v  += 49;
915:           }
916:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
917:           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
918:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
919:           idiag  -= 49;
920:           i2     -= 7;
921:         }
922:         break;
923:       default:
924:         for (i=m-1; i>=0; i--) {
925:           v  = aa + bs2*ai[i];
926:           vi = aj + ai[i];
927:           nz = ai[i+1] - ai[i];

929:           PetscArraycpy(w,b+i2,bs);
930:           /* copy all rows of x that are needed into contiguous space */
931:           workt = work;
932:           for (j=0; j<nz; j++) {
933:             PetscArraycpy(workt,x + bs*(*vi++),bs);
934:             workt += bs;
935:           }
936:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
937:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

939:           idiag -= bs2;
940:           i2    -= bs;
941:         }
942:         break;
943:       }
944:       PetscLogFlops(2.0*bs2*(a->nz));
945:     }
946:   }
947:   VecRestoreArray(xx,&x);
948:   VecRestoreArrayRead(bb,&b);
949:   return 0;
950: }

952: /*
953:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
954: */
955: #if defined(PETSC_HAVE_FORTRAN_CAPS)
956: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
957: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
958: #define matsetvaluesblocked4_ matsetvaluesblocked4
959: #endif

961: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
962: {
963:   Mat               A  = *AA;
964:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
965:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
966:   PetscInt          *ai    =a->i,*ailen=a->ilen;
967:   PetscInt          *aj    =a->j,stepval,lastcol = -1;
968:   const PetscScalar *value = v;
969:   MatScalar         *ap,*aa = a->a,*bap;

971:   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
972:   stepval = (n-1)*4;
973:   for (k=0; k<m; k++) { /* loop over added rows */
974:     row  = im[k];
975:     rp   = aj + ai[row];
976:     ap   = aa + 16*ai[row];
977:     nrow = ailen[row];
978:     low  = 0;
979:     high = nrow;
980:     for (l=0; l<n; l++) { /* loop over added columns */
981:       col = in[l];
982:       if (col <= lastcol)  low = 0;
983:       else                high = nrow;
984:       lastcol = col;
985:       value   = v + k*(stepval+4 + l)*4;
986:       while (high-low > 7) {
987:         t = (low+high)/2;
988:         if (rp[t] > col) high = t;
989:         else             low  = t;
990:       }
991:       for (i=low; i<high; i++) {
992:         if (rp[i] > col) break;
993:         if (rp[i] == col) {
994:           bap = ap +  16*i;
995:           for (ii=0; ii<4; ii++,value+=stepval) {
996:             for (jj=ii; jj<16; jj+=4) {
997:               bap[jj] += *value++;
998:             }
999:           }
1000:           goto noinsert2;
1001:         }
1002:       }
1003:       N = nrow++ - 1;
1004:       high++; /* added new column index thus must search to one higher than before */
1005:       /* shift up all the later entries in this row */
1006:       for (ii=N; ii>=i; ii--) {
1007:         rp[ii+1] = rp[ii];
1008:         PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);
1009:       }
1010:       if (N >= i) {
1011:         PetscArrayzero(ap+16*i,16);
1012:       }
1013:       rp[i] = col;
1014:       bap   = ap +  16*i;
1015:       for (ii=0; ii<4; ii++,value+=stepval) {
1016:         for (jj=ii; jj<16; jj+=4) {
1017:           bap[jj] = *value++;
1018:         }
1019:       }
1020:       noinsert2:;
1021:       low = i;
1022:     }
1023:     ailen[row] = nrow;
1024:   }
1025:   return;
1026: }

1028: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1029: #define matsetvalues4_ MATSETVALUES4
1030: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1031: #define matsetvalues4_ matsetvalues4
1032: #endif

1034: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1035: {
1036:   Mat         A  = *AA;
1037:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1038:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1039:   PetscInt    *ai=a->i,*ailen=a->ilen;
1040:   PetscInt    *aj=a->j,brow,bcol;
1041:   PetscInt    ridx,cidx,lastcol = -1;
1042:   MatScalar   *ap,value,*aa=a->a,*bap;

1044:   for (k=0; k<m; k++) { /* loop over added rows */
1045:     row  = im[k]; brow = row/4;
1046:     rp   = aj + ai[brow];
1047:     ap   = aa + 16*ai[brow];
1048:     nrow = ailen[brow];
1049:     low  = 0;
1050:     high = nrow;
1051:     for (l=0; l<n; l++) { /* loop over added columns */
1052:       col   = in[l]; bcol = col/4;
1053:       ridx  = row % 4; cidx = col % 4;
1054:       value = v[l + k*n];
1055:       if (col <= lastcol)  low = 0;
1056:       else                high = nrow;
1057:       lastcol = col;
1058:       while (high-low > 7) {
1059:         t = (low+high)/2;
1060:         if (rp[t] > bcol) high = t;
1061:         else              low  = t;
1062:       }
1063:       for (i=low; i<high; i++) {
1064:         if (rp[i] > bcol) break;
1065:         if (rp[i] == bcol) {
1066:           bap   = ap +  16*i + 4*cidx + ridx;
1067:           *bap += value;
1068:           goto noinsert1;
1069:         }
1070:       }
1071:       N = nrow++ - 1;
1072:       high++; /* added new column thus must search to one higher than before */
1073:       /* shift up all the later entries in this row */
1074:       PetscArraymove(rp+i+1,rp+i,N-i+1);
1075:       PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));
1076:       PetscArrayzero(ap+16*i,16);
1077:       rp[i]                    = bcol;
1078:       ap[16*i + 4*cidx + ridx] = value;
1079: noinsert1:;
1080:       low = i;
1081:     }
1082:     ailen[brow] = nrow;
1083:   }
1084:   return;
1085: }

1087: /*
1088:      Checks for missing diagonals
1089: */
1090: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1091: {
1092:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093:   PetscInt       *diag,*ii = a->i,i;

1095:   MatMarkDiagonal_SeqBAIJ(A);
1096:   *missing = PETSC_FALSE;
1097:   if (A->rmap->n > 0 && !ii) {
1098:     *missing = PETSC_TRUE;
1099:     if (d) *d = 0;
1100:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1101:   } else {
1102:     PetscInt n;
1103:     n = PetscMin(a->mbs, a->nbs);
1104:     diag = a->diag;
1105:     for (i=0; i<n; i++) {
1106:       if (diag[i] >= ii[i+1]) {
1107:         *missing = PETSC_TRUE;
1108:         if (d) *d = i;
1109:         PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i);
1110:         break;
1111:       }
1112:     }
1113:   }
1114:   return 0;
1115: }

1117: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1118: {
1119:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1120:   PetscInt       i,j,m = a->mbs;

1122:   if (!a->diag) {
1123:     PetscMalloc1(m,&a->diag);
1124:     PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1125:     a->free_diag = PETSC_TRUE;
1126:   }
1127:   for (i=0; i<m; i++) {
1128:     a->diag[i] = a->i[i+1];
1129:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1130:       if (a->j[j] == i) {
1131:         a->diag[i] = j;
1132:         break;
1133:       }
1134:     }
1135:   }
1136:   return 0;
1137: }

1139: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1140: {
1141:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1142:   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1143:   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;

1145:   *nn = n;
1146:   if (!ia) return 0;
1147:   if (symmetric) {
1148:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1149:     nz   = tia[n];
1150:   } else {
1151:     tia = a->i; tja = a->j;
1152:   }

1154:   if (!blockcompressed && bs > 1) {
1155:     (*nn) *= bs;
1156:     /* malloc & create the natural set of indices */
1157:     PetscMalloc1((n+1)*bs,ia);
1158:     if (n) {
1159:       (*ia)[0] = oshift;
1160:       for (j=1; j<bs; j++) {
1161:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1162:       }
1163:     }

1165:     for (i=1; i<n; i++) {
1166:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1167:       for (j=1; j<bs; j++) {
1168:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1169:       }
1170:     }
1171:     if (n) {
1172:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1173:     }

1175:     if (inja) {
1176:       PetscMalloc1(nz*bs*bs,ja);
1177:       cnt = 0;
1178:       for (i=0; i<n; i++) {
1179:         for (j=0; j<bs; j++) {
1180:           for (k=tia[i]; k<tia[i+1]; k++) {
1181:             for (l=0; l<bs; l++) {
1182:               (*ja)[cnt++] = bs*tja[k] + l;
1183:             }
1184:           }
1185:         }
1186:       }
1187:     }

1189:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1190:       PetscFree(tia);
1191:       PetscFree(tja);
1192:     }
1193:   } else if (oshift == 1) {
1194:     if (symmetric) {
1195:       nz = tia[A->rmap->n/bs];
1196:       /*  add 1 to i and j indices */
1197:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1198:       *ia = tia;
1199:       if (ja) {
1200:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1201:         *ja = tja;
1202:       }
1203:     } else {
1204:       nz = a->i[A->rmap->n/bs];
1205:       /* malloc space and  add 1 to i and j indices */
1206:       PetscMalloc1(A->rmap->n/bs+1,ia);
1207:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1208:       if (ja) {
1209:         PetscMalloc1(nz,ja);
1210:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1211:       }
1212:     }
1213:   } else {
1214:     *ia = tia;
1215:     if (ja) *ja = tja;
1216:   }
1217:   return 0;
1218: }

1220: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1221: {
1222:   if (!ia) return 0;
1223:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1224:     PetscFree(*ia);
1225:     if (ja) PetscFree(*ja);
1226:   }
1227:   return 0;
1228: }

1230: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1231: {
1232:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1234: #if defined(PETSC_USE_LOG)
1235:   PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz);
1236: #endif
1237:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1238:   ISDestroy(&a->row);
1239:   ISDestroy(&a->col);
1240:   if (a->free_diag) PetscFree(a->diag);
1241:   PetscFree(a->idiag);
1242:   if (a->free_imax_ilen) PetscFree2(a->imax,a->ilen);
1243:   PetscFree(a->solve_work);
1244:   PetscFree(a->mult_work);
1245:   PetscFree(a->sor_workt);
1246:   PetscFree(a->sor_work);
1247:   ISDestroy(&a->icol);
1248:   PetscFree(a->saved_values);
1249:   PetscFree2(a->compressedrow.i,a->compressedrow.rindex);

1251:   MatDestroy(&a->sbaijMat);
1252:   MatDestroy(&a->parent);
1253:   PetscFree(A->data);

1255:   PetscObjectChangeTypeName((PetscObject)A,NULL);
1256:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);
1257:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);
1258:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1259:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1260:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1261:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1262:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1263:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1264:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1265:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1266:   PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1267: #if defined(PETSC_HAVE_HYPRE)
1268:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);
1269: #endif
1270:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);
1271:   return 0;
1272: }

1274: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1275: {
1276:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1278:   switch (op) {
1279:   case MAT_ROW_ORIENTED:
1280:     a->roworiented = flg;
1281:     break;
1282:   case MAT_KEEP_NONZERO_PATTERN:
1283:     a->keepnonzeropattern = flg;
1284:     break;
1285:   case MAT_NEW_NONZERO_LOCATIONS:
1286:     a->nonew = (flg ? 0 : 1);
1287:     break;
1288:   case MAT_NEW_NONZERO_LOCATION_ERR:
1289:     a->nonew = (flg ? -1 : 0);
1290:     break;
1291:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1292:     a->nonew = (flg ? -2 : 0);
1293:     break;
1294:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1295:     a->nounused = (flg ? -1 : 0);
1296:     break;
1297:   case MAT_FORCE_DIAGONAL_ENTRIES:
1298:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1299:   case MAT_USE_HASH_TABLE:
1300:   case MAT_SORTED_FULL:
1301:     PetscInfo(A,"Option %s ignored\n",MatOptions[op]);
1302:     break;
1303:   case MAT_SPD:
1304:   case MAT_SYMMETRIC:
1305:   case MAT_STRUCTURALLY_SYMMETRIC:
1306:   case MAT_HERMITIAN:
1307:   case MAT_SYMMETRY_ETERNAL:
1308:   case MAT_SUBMAT_SINGLEIS:
1309:   case MAT_STRUCTURE_ONLY:
1310:     /* These options are handled directly by MatSetOption() */
1311:     break;
1312:   default:
1313:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1314:   }
1315:   return 0;
1316: }

1318: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1319: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1320: {
1321:   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1322:   MatScalar      *aa_i;
1323:   PetscScalar    *v_i;

1325:   bs  = A->rmap->bs;
1326:   bs2 = bs*bs;

1329:   bn  = row/bs;   /* Block number */
1330:   bp  = row % bs; /* Block Position */
1331:   M   = ai[bn+1] - ai[bn];
1332:   *nz = bs*M;

1334:   if (v) {
1335:     *v = NULL;
1336:     if (*nz) {
1337:       PetscMalloc1(*nz,v);
1338:       for (i=0; i<M; i++) { /* for each block in the block row */
1339:         v_i  = *v + i*bs;
1340:         aa_i = aa + bs2*(ai[bn] + i);
1341:         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1342:       }
1343:     }
1344:   }

1346:   if (idx) {
1347:     *idx = NULL;
1348:     if (*nz) {
1349:       PetscMalloc1(*nz,idx);
1350:       for (i=0; i<M; i++) { /* for each block in the block row */
1351:         idx_i = *idx + i*bs;
1352:         itmp  = bs*aj[ai[bn] + i];
1353:         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1354:       }
1355:     }
1356:   }
1357:   return 0;
1358: }

1360: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1361: {
1362:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1364:   MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1365:   return 0;
1366: }

1368: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1369: {
1370:   if (nz)  *nz = 0;
1371:   if (idx) PetscFree(*idx);
1372:   if (v)   PetscFree(*v);
1373:   return 0;
1374: }

1376: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1377: {
1378:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1379:   Mat            C;
1380:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1381:   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1382:   MatScalar      *ata,*aa=a->a;

1384:   PetscCalloc1(1+nbs,&atfill);
1385:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1386:     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */

1388:     MatCreate(PetscObjectComm((PetscObject)A),&C);
1389:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1390:     MatSetType(C,((PetscObject)A)->type_name);
1391:     MatSeqBAIJSetPreallocation(C,bs,0,atfill);

1393:     at  = (Mat_SeqBAIJ*)C->data;
1394:     ati = at->i;
1395:     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1396:   } else {
1397:     C = *B;
1398:     at = (Mat_SeqBAIJ*)C->data;
1399:     ati = at->i;
1400:   }

1402:   atj = at->j;
1403:   ata = at->a;

1405:   /* Copy ati into atfill so we have locations of the next free space in atj */
1406:   PetscArraycpy(atfill,ati,nbs);

1408:   /* Walk through A row-wise and mark nonzero entries of A^T. */
1409:   for (i=0; i<mbs; i++) {
1410:     anzj = ai[i+1] - ai[i];
1411:     for (j=0; j<anzj; j++) {
1412:       atj[atfill[*aj]] = i;
1413:       for (kr=0; kr<bs; kr++) {
1414:         for (k=0; k<bs; k++) {
1415:           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1416:         }
1417:       }
1418:       atfill[*aj++] += 1;
1419:     }
1420:   }
1421:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1422:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

1424:   /* Clean up temporary space and complete requests. */
1425:   PetscFree(atfill);

1427:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1428:     MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1429:     *B = C;
1430:   } else {
1431:     MatHeaderMerge(A,&C);
1432:   }
1433:   return 0;
1434: }

1436: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1437: {
1438:   Mat            Btrans;

1440:   *f   = PETSC_FALSE;
1441:   MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1442:   MatEqual_SeqBAIJ(B,Btrans,f);
1443:   MatDestroy(&Btrans);
1444:   return 0;
1445: }

1447: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1448: PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1449: {
1450:   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1451:   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1452:   PetscInt       *rowlens,*colidxs;
1453:   PetscScalar    *matvals;

1455:   PetscViewerSetUp(viewer);

1457:   M  = mat->rmap->N;
1458:   N  = mat->cmap->N;
1459:   m  = mat->rmap->n;
1460:   bs = mat->rmap->bs;
1461:   nz = bs*bs*A->nz;

1463:   /* write matrix header */
1464:   header[0] = MAT_FILE_CLASSID;
1465:   header[1] = M; header[2] = N; header[3] = nz;
1466:   PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);

1468:   /* store row lengths */
1469:   PetscMalloc1(m,&rowlens);
1470:   for (cnt=0, i=0; i<A->mbs; i++)
1471:     for (j=0; j<bs; j++)
1472:       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1473:   PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);
1474:   PetscFree(rowlens);

1476:   /* store column indices  */
1477:   PetscMalloc1(nz,&colidxs);
1478:   for (cnt=0, i=0; i<A->mbs; i++)
1479:     for (k=0; k<bs; k++)
1480:       for (j=A->i[i]; j<A->i[i+1]; j++)
1481:         for (l=0; l<bs; l++)
1482:           colidxs[cnt++] = bs*A->j[j] + l;
1484:   PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);
1485:   PetscFree(colidxs);

1487:   /* store nonzero values */
1488:   PetscMalloc1(nz,&matvals);
1489:   for (cnt=0, i=0; i<A->mbs; i++)
1490:     for (k=0; k<bs; k++)
1491:       for (j=A->i[i]; j<A->i[i+1]; j++)
1492:         for (l=0; l<bs; l++)
1493:           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1495:   PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);
1496:   PetscFree(matvals);

1498:   /* write block size option to the viewer's .info file */
1499:   MatView_Binary_BlockSizes(mat,viewer);
1500:   return 0;
1501: }

1503: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1504: {
1505:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1506:   PetscInt       i,bs = A->rmap->bs,k;

1508:   PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1509:   for (i=0; i<a->mbs; i++) {
1510:     PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1);
1511:     for (k=a->i[i]; k<a->i[i+1]; k++) {
1512:       PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1);
1513:     }
1514:     PetscViewerASCIIPrintf(viewer,"\n");
1515:   }
1516:   PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1517:   return 0;
1518: }

1520: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1521: {
1522:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1523:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1524:   PetscViewerFormat format;

1526:   if (A->structure_only) {
1527:     MatView_SeqBAIJ_ASCII_structonly(A,viewer);
1528:     return 0;
1529:   }

1531:   PetscViewerGetFormat(viewer,&format);
1532:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1533:     PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs);
1534:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1535:     const char *matname;
1536:     Mat        aij;
1537:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1538:     PetscObjectGetName((PetscObject)A,&matname);
1539:     PetscObjectSetName((PetscObject)aij,matname);
1540:     MatView(aij,viewer);
1541:     MatDestroy(&aij);
1542:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1543:       return 0;
1544:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1545:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1546:     for (i=0; i<a->mbs; i++) {
1547:       for (j=0; j<bs; j++) {
1548:         PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j);
1549:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1550:           for (l=0; l<bs; l++) {
1551: #if defined(PETSC_USE_COMPLEX)
1552:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1553:               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l,
1554:                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1555:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1556:               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l,
1557:                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1558:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1559:               PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1560:             }
1561: #else
1562:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1563:               PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1564:             }
1565: #endif
1566:           }
1567:         }
1568:         PetscViewerASCIIPrintf(viewer,"\n");
1569:       }
1570:     }
1571:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1572:   } else {
1573:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1574:     for (i=0; i<a->mbs; i++) {
1575:       for (j=0; j<bs; j++) {
1576:         PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j);
1577:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1578:           for (l=0; l<bs; l++) {
1579: #if defined(PETSC_USE_COMPLEX)
1580:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1581:               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l,
1582:                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1583:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1584:               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l,
1585:                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1586:             } else {
1587:               PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1588:             }
1589: #else
1590:             PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1591: #endif
1592:           }
1593:         }
1594:         PetscViewerASCIIPrintf(viewer,"\n");
1595:       }
1596:     }
1597:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1598:   }
1599:   PetscViewerFlush(viewer);
1600:   return 0;
1601: }

1603: #include <petscdraw.h>
1604: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1605: {
1606:   Mat               A = (Mat) Aa;
1607:   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1608:   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1609:   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1610:   MatScalar         *aa;
1611:   PetscViewer       viewer;
1612:   PetscViewerFormat format;

1614:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1615:   PetscViewerGetFormat(viewer,&format);
1616:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1618:   /* loop over matrix elements drawing boxes */

1620:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1622:     PetscDrawCollectiveBegin(draw);
1623:     /* Blue for negative, Cyan for zero and  Red for positive */
1624:     color = PETSC_DRAW_BLUE;
1625:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1626:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1627:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629:         aa  = a->a + j*bs2;
1630:         for (k=0; k<bs; k++) {
1631:           for (l=0; l<bs; l++) {
1632:             if (PetscRealPart(*aa++) >=  0.) continue;
1633:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1634:           }
1635:         }
1636:       }
1637:     }
1638:     color = PETSC_DRAW_CYAN;
1639:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1640:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1641:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643:         aa  = a->a + j*bs2;
1644:         for (k=0; k<bs; k++) {
1645:           for (l=0; l<bs; l++) {
1646:             if (PetscRealPart(*aa++) != 0.) continue;
1647:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1648:           }
1649:         }
1650:       }
1651:     }
1652:     color = PETSC_DRAW_RED;
1653:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1654:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1655:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1656:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1657:         aa  = a->a + j*bs2;
1658:         for (k=0; k<bs; k++) {
1659:           for (l=0; l<bs; l++) {
1660:             if (PetscRealPart(*aa++) <= 0.) continue;
1661:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1662:           }
1663:         }
1664:       }
1665:     }
1666:     PetscDrawCollectiveEnd(draw);
1667:   } else {
1668:     /* use contour shading to indicate magnitude of values */
1669:     /* first determine max of all nonzero values */
1670:     PetscReal      minv = 0.0, maxv = 0.0;
1671:     PetscDraw      popup;

1674:     for (i=0; i<a->nz*a->bs2; i++) {
1675:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1676:     }
1677:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1678:     PetscDrawGetPopup(draw,&popup);
1679:     PetscDrawScalePopup(popup,0.0,maxv);

1681:     PetscDrawCollectiveBegin(draw);
1682:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1683:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1684:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1685:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1686:         aa  = a->a + j*bs2;
1687:         for (k=0; k<bs; k++) {
1688:           for (l=0; l<bs; l++) {
1689:             MatScalar v = *aa++;
1690:             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1691:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1692:           }
1693:         }
1694:       }
1695:     }
1696:     PetscDrawCollectiveEnd(draw);
1697:   }
1698:   return 0;
1699: }

1701: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1702: {
1703:   PetscReal      xl,yl,xr,yr,w,h;
1704:   PetscDraw      draw;
1705:   PetscBool      isnull;

1707:   PetscViewerDrawGetDraw(viewer,0,&draw);
1708:   PetscDrawIsNull(draw,&isnull);
1709:   if (isnull) return 0;

1711:   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1712:   xr  += w;          yr += h;        xl = -w;     yl = -h;
1713:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1714:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1715:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1716:   PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1717:   PetscDrawSave(draw);
1718:   return 0;
1719: }

1721: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1722: {
1723:   PetscBool      iascii,isbinary,isdraw;

1725:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1726:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1727:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1728:   if (iascii) {
1729:     MatView_SeqBAIJ_ASCII(A,viewer);
1730:   } else if (isbinary) {
1731:     MatView_SeqBAIJ_Binary(A,viewer);
1732:   } else if (isdraw) {
1733:     MatView_SeqBAIJ_Draw(A,viewer);
1734:   } else {
1735:     Mat B;
1736:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1737:     MatView(B,viewer);
1738:     MatDestroy(&B);
1739:   }
1740:   return 0;
1741: }

1743: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1744: {
1745:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1746:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1747:   PetscInt    *ai = a->i,*ailen = a->ilen;
1748:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1749:   MatScalar   *ap,*aa = a->a;

1751:   for (k=0; k<m; k++) { /* loop over rows */
1752:     row = im[k]; brow = row/bs;
1753:     if (row < 0) {v += n; continue;} /* negative row */
1755:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1756:     nrow = ailen[brow];
1757:     for (l=0; l<n; l++) { /* loop over columns */
1758:       if (in[l] < 0) {v++; continue;} /* negative column */
1760:       col  = in[l];
1761:       bcol = col/bs;
1762:       cidx = col%bs;
1763:       ridx = row%bs;
1764:       high = nrow;
1765:       low  = 0; /* assume unsorted */
1766:       while (high-low > 5) {
1767:         t = (low+high)/2;
1768:         if (rp[t] > bcol) high = t;
1769:         else             low  = t;
1770:       }
1771:       for (i=low; i<high; i++) {
1772:         if (rp[i] > bcol) break;
1773:         if (rp[i] == bcol) {
1774:           *v++ = ap[bs2*i+bs*cidx+ridx];
1775:           goto finished;
1776:         }
1777:       }
1778:       *v++ = 0.0;
1779: finished:;
1780:     }
1781:   }
1782:   return 0;
1783: }

1785: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1786: {
1787:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1788:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1789:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1790:   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1791:   PetscBool         roworiented=a->roworiented;
1792:   const PetscScalar *value     = v;
1793:   MatScalar         *ap=NULL,*aa = a->a,*bap;

1795:   if (roworiented) {
1796:     stepval = (n-1)*bs;
1797:   } else {
1798:     stepval = (m-1)*bs;
1799:   }
1800:   for (k=0; k<m; k++) { /* loop over added rows */
1801:     row = im[k];
1802:     if (row < 0) continue;
1804:     rp   = aj + ai[row];
1805:     if (!A->structure_only) ap = aa + bs2*ai[row];
1806:     rmax = imax[row];
1807:     nrow = ailen[row];
1808:     low  = 0;
1809:     high = nrow;
1810:     for (l=0; l<n; l++) { /* loop over added columns */
1811:       if (in[l] < 0) continue;
1813:       col = in[l];
1814:       if (!A->structure_only) {
1815:         if (roworiented) {
1816:           value = v + (k*(stepval+bs) + l)*bs;
1817:         } else {
1818:           value = v + (l*(stepval+bs) + k)*bs;
1819:         }
1820:       }
1821:       if (col <= lastcol) low = 0;
1822:       else high = nrow;
1823:       lastcol = col;
1824:       while (high-low > 7) {
1825:         t = (low+high)/2;
1826:         if (rp[t] > col) high = t;
1827:         else             low  = t;
1828:       }
1829:       for (i=low; i<high; i++) {
1830:         if (rp[i] > col) break;
1831:         if (rp[i] == col) {
1832:           if (A->structure_only) goto noinsert2;
1833:           bap = ap +  bs2*i;
1834:           if (roworiented) {
1835:             if (is == ADD_VALUES) {
1836:               for (ii=0; ii<bs; ii++,value+=stepval) {
1837:                 for (jj=ii; jj<bs2; jj+=bs) {
1838:                   bap[jj] += *value++;
1839:                 }
1840:               }
1841:             } else {
1842:               for (ii=0; ii<bs; ii++,value+=stepval) {
1843:                 for (jj=ii; jj<bs2; jj+=bs) {
1844:                   bap[jj] = *value++;
1845:                 }
1846:               }
1847:             }
1848:           } else {
1849:             if (is == ADD_VALUES) {
1850:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1851:                 for (jj=0; jj<bs; jj++) {
1852:                   bap[jj] += value[jj];
1853:                 }
1854:                 bap += bs;
1855:               }
1856:             } else {
1857:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1858:                 for (jj=0; jj<bs; jj++) {
1859:                   bap[jj]  = value[jj];
1860:                 }
1861:                 bap += bs;
1862:               }
1863:             }
1864:           }
1865:           goto noinsert2;
1866:         }
1867:       }
1868:       if (nonew == 1) goto noinsert2;
1870:       if (A->structure_only) {
1871:         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1872:       } else {
1873:         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1874:       }
1875:       N = nrow++ - 1; high++;
1876:       /* shift up all the later entries in this row */
1877:       PetscArraymove(rp+i+1,rp+i,N-i+1);
1878:       rp[i] = col;
1879:       if (!A->structure_only) {
1880:         PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
1881:         bap   = ap +  bs2*i;
1882:         if (roworiented) {
1883:           for (ii=0; ii<bs; ii++,value+=stepval) {
1884:             for (jj=ii; jj<bs2; jj+=bs) {
1885:               bap[jj] = *value++;
1886:             }
1887:           }
1888:         } else {
1889:           for (ii=0; ii<bs; ii++,value+=stepval) {
1890:             for (jj=0; jj<bs; jj++) {
1891:               *bap++ = *value++;
1892:             }
1893:           }
1894:         }
1895:       }
1896: noinsert2:;
1897:       low = i;
1898:     }
1899:     ailen[row] = nrow;
1900:   }
1901:   return 0;
1902: }

1904: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1905: {
1906:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1907:   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1908:   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1909:   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1910:   MatScalar      *aa  = a->a,*ap;
1911:   PetscReal      ratio=0.6;

1913:   if (mode == MAT_FLUSH_ASSEMBLY) return 0;

1915:   if (m) rmax = ailen[0];
1916:   for (i=1; i<mbs; i++) {
1917:     /* move each row back by the amount of empty slots (fshift) before it*/
1918:     fshift += imax[i-1] - ailen[i-1];
1919:     rmax    = PetscMax(rmax,ailen[i]);
1920:     if (fshift) {
1921:       ip = aj + ai[i];
1922:       ap = aa + bs2*ai[i];
1923:       N  = ailen[i];
1924:       PetscArraymove(ip-fshift,ip,N);
1925:       if (!A->structure_only) {
1926:         PetscArraymove(ap-bs2*fshift,ap,bs2*N);
1927:       }
1928:     }
1929:     ai[i] = ai[i-1] + ailen[i-1];
1930:   }
1931:   if (mbs) {
1932:     fshift += imax[mbs-1] - ailen[mbs-1];
1933:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1934:   }

1936:   /* reset ilen and imax for each row */
1937:   a->nonzerorowcnt = 0;
1938:   if (A->structure_only) {
1939:     PetscFree2(a->imax,a->ilen);
1940:   } else { /* !A->structure_only */
1941:     for (i=0; i<mbs; i++) {
1942:       ailen[i] = imax[i] = ai[i+1] - ai[i];
1943:       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1944:     }
1945:   }
1946:   a->nz = ai[mbs];

1948:   /* diagonals may have moved, so kill the diagonal pointers */
1949:   a->idiagvalid = PETSC_FALSE;
1950:   if (fshift && a->diag) {
1951:     PetscFree(a->diag);
1952:     PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1953:     a->diag = NULL;
1954:   }
1956:   PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1957:   PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs);
1958:   PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax);

1960:   A->info.mallocs    += a->reallocs;
1961:   a->reallocs         = 0;
1962:   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1963:   a->rmax             = rmax;

1965:   if (!A->structure_only) {
1966:     MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1967:   }
1968:   return 0;
1969: }

1971: /*
1972:    This function returns an array of flags which indicate the locations of contiguous
1973:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1974:    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1975:    Assume: sizes should be long enough to hold all the values.
1976: */
1977: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1978: {
1979:   PetscInt  i,j,k,row;
1980:   PetscBool flg;

1982:   for (i=0,j=0; i<n; j++) {
1983:     row = idx[i];
1984:     if (row%bs!=0) { /* Not the beginning of a block */
1985:       sizes[j] = 1;
1986:       i++;
1987:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1988:       sizes[j] = 1;         /* Also makes sure at least 'bs' values exist for next else */
1989:       i++;
1990:     } else { /* Beginning of the block, so check if the complete block exists */
1991:       flg = PETSC_TRUE;
1992:       for (k=1; k<bs; k++) {
1993:         if (row+k != idx[i+k]) { /* break in the block */
1994:           flg = PETSC_FALSE;
1995:           break;
1996:         }
1997:       }
1998:       if (flg) { /* No break in the bs */
1999:         sizes[j] = bs;
2000:         i       += bs;
2001:       } else {
2002:         sizes[j] = 1;
2003:         i++;
2004:       }
2005:     }
2006:   }
2007:   *bs_max = j;
2008:   return 0;
2009: }

2011: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2012: {
2013:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2014:   PetscInt          i,j,k,count,*rows;
2015:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2016:   PetscScalar       zero = 0.0;
2017:   MatScalar         *aa;
2018:   const PetscScalar *xx;
2019:   PetscScalar       *bb;

2021:   /* fix right hand side if needed */
2022:   if (x && b) {
2023:     VecGetArrayRead(x,&xx);
2024:     VecGetArray(b,&bb);
2025:     for (i=0; i<is_n; i++) {
2026:       bb[is_idx[i]] = diag*xx[is_idx[i]];
2027:     }
2028:     VecRestoreArrayRead(x,&xx);
2029:     VecRestoreArray(b,&bb);
2030:   }

2032:   /* Make a copy of the IS and  sort it */
2033:   /* allocate memory for rows,sizes */
2034:   PetscMalloc2(is_n,&rows,2*is_n,&sizes);

2036:   /* copy IS values to rows, and sort them */
2037:   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2038:   PetscSortInt(is_n,rows);

2040:   if (baij->keepnonzeropattern) {
2041:     for (i=0; i<is_n; i++) sizes[i] = 1;
2042:     bs_max          = is_n;
2043:   } else {
2044:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2045:     A->nonzerostate++;
2046:   }

2048:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2049:     row = rows[j];
2051:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2052:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2053:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2054:       if (diag != (PetscScalar)0.0) {
2055:         if (baij->ilen[row/bs] > 0) {
2056:           baij->ilen[row/bs]       = 1;
2057:           baij->j[baij->i[row/bs]] = row/bs;

2059:           PetscArrayzero(aa,count*bs);
2060:         }
2061:         /* Now insert all the diagonal values for this bs */
2062:         for (k=0; k<bs; k++) {
2063:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2064:         }
2065:       } else { /* (diag == 0.0) */
2066:         baij->ilen[row/bs] = 0;
2067:       } /* end (diag == 0.0) */
2068:     } else { /* (sizes[i] != bs) */
2069:       PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2070:       for (k=0; k<count; k++) {
2071:         aa[0] =  zero;
2072:         aa   += bs;
2073:       }
2074:       if (diag != (PetscScalar)0.0) {
2075:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2076:       }
2077:     }
2078:   }

2080:   PetscFree2(rows,sizes);
2081:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2082:   return 0;
2083: }

2085: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2086: {
2087:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2088:   PetscInt          i,j,k,count;
2089:   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2090:   PetscScalar       zero = 0.0;
2091:   MatScalar         *aa;
2092:   const PetscScalar *xx;
2093:   PetscScalar       *bb;
2094:   PetscBool         *zeroed,vecs = PETSC_FALSE;

2096:   /* fix right hand side if needed */
2097:   if (x && b) {
2098:     VecGetArrayRead(x,&xx);
2099:     VecGetArray(b,&bb);
2100:     vecs = PETSC_TRUE;
2101:   }

2103:   /* zero the columns */
2104:   PetscCalloc1(A->rmap->n,&zeroed);
2105:   for (i=0; i<is_n; i++) {
2107:     zeroed[is_idx[i]] = PETSC_TRUE;
2108:   }
2109:   for (i=0; i<A->rmap->N; i++) {
2110:     if (!zeroed[i]) {
2111:       row = i/bs;
2112:       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2113:         for (k=0; k<bs; k++) {
2114:           col = bs*baij->j[j] + k;
2115:           if (zeroed[col]) {
2116:             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2117:             if (vecs) bb[i] -= aa[0]*xx[col];
2118:             aa[0] = 0.0;
2119:           }
2120:         }
2121:       }
2122:     } else if (vecs) bb[i] = diag*xx[i];
2123:   }
2124:   PetscFree(zeroed);
2125:   if (vecs) {
2126:     VecRestoreArrayRead(x,&xx);
2127:     VecRestoreArray(b,&bb);
2128:   }

2130:   /* zero the rows */
2131:   for (i=0; i<is_n; i++) {
2132:     row   = is_idx[i];
2133:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2134:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2135:     for (k=0; k<count; k++) {
2136:       aa[0] =  zero;
2137:       aa   += bs;
2138:     }
2139:     if (diag != (PetscScalar)0.0) {
2140:       (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2141:     }
2142:   }
2143:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2144:   return 0;
2145: }

2147: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2148: {
2149:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2150:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2151:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2152:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2153:   PetscInt       ridx,cidx,bs2=a->bs2;
2154:   PetscBool      roworiented=a->roworiented;
2155:   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;

2157:   for (k=0; k<m; k++) { /* loop over added rows */
2158:     row  = im[k];
2159:     brow = row/bs;
2160:     if (row < 0) continue;
2162:     rp   = aj + ai[brow];
2163:     if (!A->structure_only) ap = aa + bs2*ai[brow];
2164:     rmax = imax[brow];
2165:     nrow = ailen[brow];
2166:     low  = 0;
2167:     high = nrow;
2168:     for (l=0; l<n; l++) { /* loop over added columns */
2169:       if (in[l] < 0) continue;
2171:       col  = in[l]; bcol = col/bs;
2172:       ridx = row % bs; cidx = col % bs;
2173:       if (!A->structure_only) {
2174:         if (roworiented) {
2175:           value = v[l + k*n];
2176:         } else {
2177:           value = v[k + l*m];
2178:         }
2179:       }
2180:       if (col <= lastcol) low = 0; else high = nrow;
2181:       lastcol = col;
2182:       while (high-low > 7) {
2183:         t = (low+high)/2;
2184:         if (rp[t] > bcol) high = t;
2185:         else              low  = t;
2186:       }
2187:       for (i=low; i<high; i++) {
2188:         if (rp[i] > bcol) break;
2189:         if (rp[i] == bcol) {
2190:           bap = ap +  bs2*i + bs*cidx + ridx;
2191:           if (!A->structure_only) {
2192:             if (is == ADD_VALUES) *bap += value;
2193:             else                  *bap  = value;
2194:           }
2195:           goto noinsert1;
2196:         }
2197:       }
2198:       if (nonew == 1) goto noinsert1;
2200:       if (A->structure_only) {
2201:         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2202:       } else {
2203:         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2204:       }
2205:       N = nrow++ - 1; high++;
2206:       /* shift up all the later entries in this row */
2207:       PetscArraymove(rp+i+1,rp+i,N-i+1);
2208:       rp[i] = bcol;
2209:       if (!A->structure_only) {
2210:         PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
2211:         PetscArrayzero(ap+bs2*i,bs2);
2212:         ap[bs2*i + bs*cidx + ridx] = value;
2213:       }
2214:       a->nz++;
2215:       A->nonzerostate++;
2216: noinsert1:;
2217:       low = i;
2218:     }
2219:     ailen[brow] = nrow;
2220:   }
2221:   return 0;
2222: }

2224: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2225: {
2226:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2227:   Mat            outA;
2228:   PetscBool      row_identity,col_identity;

2231:   ISIdentity(row,&row_identity);
2232:   ISIdentity(col,&col_identity);

2235:   outA            = inA;
2236:   inA->factortype = MAT_FACTOR_LU;
2237:   PetscFree(inA->solvertype);
2238:   PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);

2240:   MatMarkDiagonal_SeqBAIJ(inA);

2242:   PetscObjectReference((PetscObject)row);
2243:   ISDestroy(&a->row);
2244:   a->row = row;
2245:   PetscObjectReference((PetscObject)col);
2246:   ISDestroy(&a->col);
2247:   a->col = col;

2249:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2250:   ISDestroy(&a->icol);
2251:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2252:   PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);

2254:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2255:   if (!a->solve_work) {
2256:     PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2257:     PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2258:   }
2259:   MatLUFactorNumeric(outA,inA,info);
2260:   return 0;
2261: }

2263: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2264: {
2265:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2266:   PetscInt    i,nz,mbs;

2268:   nz  = baij->maxnz;
2269:   mbs = baij->mbs;
2270:   for (i=0; i<nz; i++) {
2271:     baij->j[i] = indices[i];
2272:   }
2273:   baij->nz = nz;
2274:   for (i=0; i<mbs; i++) {
2275:     baij->ilen[i] = baij->imax[i];
2276:   }
2277:   return 0;
2278: }

2280: /*@
2281:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2282:        in the matrix.

2284:   Input Parameters:
2285: +  mat - the SeqBAIJ matrix
2286: -  indices - the column indices

2288:   Level: advanced

2290:   Notes:
2291:     This can be called if you have precomputed the nonzero structure of the
2292:   matrix and want to provide it to the matrix object to improve the performance
2293:   of the MatSetValues() operation.

2295:     You MUST have set the correct numbers of nonzeros per row in the call to
2296:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2298:     MUST be called before any calls to MatSetValues();

2300: @*/
2301: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2302: {
2305:   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2306:   return 0;
2307: }

2309: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2310: {
2311:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2312:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2313:   PetscReal      atmp;
2314:   PetscScalar    *x,zero = 0.0;
2315:   MatScalar      *aa;
2316:   PetscInt       ncols,brow,krow,kcol;

2318:   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2320:   bs  = A->rmap->bs;
2321:   aa  = a->a;
2322:   ai  = a->i;
2323:   aj  = a->j;
2324:   mbs = a->mbs;

2326:   VecSet(v,zero);
2327:   VecGetArray(v,&x);
2328:   VecGetLocalSize(v,&n);
2330:   for (i=0; i<mbs; i++) {
2331:     ncols = ai[1] - ai[0]; ai++;
2332:     brow  = bs*i;
2333:     for (j=0; j<ncols; j++) {
2334:       for (kcol=0; kcol<bs; kcol++) {
2335:         for (krow=0; krow<bs; krow++) {
2336:           atmp = PetscAbsScalar(*aa);aa++;
2337:           row  = brow + krow;   /* row index */
2338:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2339:         }
2340:       }
2341:       aj++;
2342:     }
2343:   }
2344:   VecRestoreArray(v,&x);
2345:   return 0;
2346: }

2348: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2349: {
2350:   /* If the two matrices have the same copy implementation, use fast copy. */
2351:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2352:     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2353:     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2354:     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;

2358:     PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);
2359:     PetscObjectStateIncrease((PetscObject)B);
2360:   } else {
2361:     MatCopy_Basic(A,B,str);
2362:   }
2363:   return 0;
2364: }

2366: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2367: {
2368:   MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);
2369:   return 0;
2370: }

2372: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2373: {
2374:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

2376:   *array = a->a;
2377:   return 0;
2378: }

2380: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2381: {
2382:   *array = NULL;
2383:   return 0;
2384: }

2386: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2387: {
2388:   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2389:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2390:   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;

2392:   /* Set the number of nonzeros in the new matrix */
2393:   MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2394:   return 0;
2395: }

2397: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2398: {
2399:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2400:   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2401:   PetscBLASInt   one=1;

2403:   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2404:     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2405:     if (e) {
2406:       PetscArraycmp(x->i,y->i,x->mbs+1,&e);
2407:       if (e) {
2408:         PetscArraycmp(x->j,y->j,x->i[x->mbs],&e);
2409:         if (e) str = SAME_NONZERO_PATTERN;
2410:       }
2411:     }
2413:   }
2414:   if (str == SAME_NONZERO_PATTERN) {
2415:     PetscScalar  alpha = a;
2416:     PetscBLASInt bnz;
2417:     PetscBLASIntCast(x->nz*bs2,&bnz);
2418:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2419:     PetscObjectStateIncrease((PetscObject)Y);
2420:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2421:     MatAXPY_Basic(Y,a,X,str);
2422:   } else {
2423:     Mat      B;
2424:     PetscInt *nnz;
2426:     PetscMalloc1(Y->rmap->N,&nnz);
2427:     MatCreate(PetscObjectComm((PetscObject)Y),&B);
2428:     PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2429:     MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2430:     MatSetBlockSizesFromMats(B,Y,Y);
2431:     MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2432:     MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2433:     MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2434:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2435:     MatHeaderMerge(Y,&B);
2436:     PetscFree(nnz);
2437:   }
2438:   return 0;
2439: }

2441: PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2442: {
2443: #if defined(PETSC_USE_COMPLEX)
2444:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2445:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2446:   MatScalar   *aa = a->a;

2448:   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2449: #else
2450: #endif
2451:   return 0;
2452: }

2454: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2455: {
2456:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2457:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2458:   MatScalar   *aa = a->a;

2460:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2461:   return 0;
2462: }

2464: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2465: {
2466:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2467:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2468:   MatScalar   *aa = a->a;

2470:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2471:   return 0;
2472: }

2474: /*
2475:     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2476: */
2477: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2478: {
2479:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2480:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2481:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2483:   *nn = n;
2484:   if (!ia) return 0;
2486:   else {
2487:     PetscCalloc1(n,&collengths);
2488:     PetscMalloc1(n+1,&cia);
2489:     PetscMalloc1(nz,&cja);
2490:     jj   = a->j;
2491:     for (i=0; i<nz; i++) {
2492:       collengths[jj[i]]++;
2493:     }
2494:     cia[0] = oshift;
2495:     for (i=0; i<n; i++) {
2496:       cia[i+1] = cia[i] + collengths[i];
2497:     }
2498:     PetscArrayzero(collengths,n);
2499:     jj   = a->j;
2500:     for (row=0; row<m; row++) {
2501:       mr = a->i[row+1] - a->i[row];
2502:       for (i=0; i<mr; i++) {
2503:         col = *jj++;

2505:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2506:       }
2507:     }
2508:     PetscFree(collengths);
2509:     *ia  = cia; *ja = cja;
2510:   }
2511:   return 0;
2512: }

2514: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2515: {
2516:   if (!ia) return 0;
2517:   PetscFree(*ia);
2518:   PetscFree(*ja);
2519:   return 0;
2520: }

2522: /*
2523:  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2524:  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2525:  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2526:  */
2527: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2528: {
2529:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2530:   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2531:   PetscInt       nz = a->i[m],row,*jj,mr,col;
2532:   PetscInt       *cspidx;

2534:   *nn = n;
2535:   if (!ia) return 0;

2537:   PetscCalloc1(n,&collengths);
2538:   PetscMalloc1(n+1,&cia);
2539:   PetscMalloc1(nz,&cja);
2540:   PetscMalloc1(nz,&cspidx);
2541:   jj   = a->j;
2542:   for (i=0; i<nz; i++) {
2543:     collengths[jj[i]]++;
2544:   }
2545:   cia[0] = oshift;
2546:   for (i=0; i<n; i++) {
2547:     cia[i+1] = cia[i] + collengths[i];
2548:   }
2549:   PetscArrayzero(collengths,n);
2550:   jj   = a->j;
2551:   for (row=0; row<m; row++) {
2552:     mr = a->i[row+1] - a->i[row];
2553:     for (i=0; i<mr; i++) {
2554:       col = *jj++;
2555:       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2556:       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2557:     }
2558:   }
2559:   PetscFree(collengths);
2560:   *ia    = cia;
2561:   *ja    = cja;
2562:   *spidx = cspidx;
2563:   return 0;
2564: }

2566: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2567: {
2568:   MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2569:   PetscFree(*spidx);
2570:   return 0;
2571: }

2573: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2574: {
2575:   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;

2577:   if (!Y->preallocated || !aij->nz) {
2578:     MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2579:   }
2580:   MatShift_Basic(Y,a);
2581:   return 0;
2582: }

2584: /* -------------------------------------------------------------------*/
2585: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2586:                                        MatGetRow_SeqBAIJ,
2587:                                        MatRestoreRow_SeqBAIJ,
2588:                                        MatMult_SeqBAIJ_N,
2589:                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2590:                                        MatMultTranspose_SeqBAIJ,
2591:                                        MatMultTransposeAdd_SeqBAIJ,
2592:                                        NULL,
2593:                                        NULL,
2594:                                        NULL,
2595:                                /* 10*/ NULL,
2596:                                        MatLUFactor_SeqBAIJ,
2597:                                        NULL,
2598:                                        NULL,
2599:                                        MatTranspose_SeqBAIJ,
2600:                                /* 15*/ MatGetInfo_SeqBAIJ,
2601:                                        MatEqual_SeqBAIJ,
2602:                                        MatGetDiagonal_SeqBAIJ,
2603:                                        MatDiagonalScale_SeqBAIJ,
2604:                                        MatNorm_SeqBAIJ,
2605:                                /* 20*/ NULL,
2606:                                        MatAssemblyEnd_SeqBAIJ,
2607:                                        MatSetOption_SeqBAIJ,
2608:                                        MatZeroEntries_SeqBAIJ,
2609:                                /* 24*/ MatZeroRows_SeqBAIJ,
2610:                                        NULL,
2611:                                        NULL,
2612:                                        NULL,
2613:                                        NULL,
2614:                                /* 29*/ MatSetUp_SeqBAIJ,
2615:                                        NULL,
2616:                                        NULL,
2617:                                        NULL,
2618:                                        NULL,
2619:                                /* 34*/ MatDuplicate_SeqBAIJ,
2620:                                        NULL,
2621:                                        NULL,
2622:                                        MatILUFactor_SeqBAIJ,
2623:                                        NULL,
2624:                                /* 39*/ MatAXPY_SeqBAIJ,
2625:                                        MatCreateSubMatrices_SeqBAIJ,
2626:                                        MatIncreaseOverlap_SeqBAIJ,
2627:                                        MatGetValues_SeqBAIJ,
2628:                                        MatCopy_SeqBAIJ,
2629:                                /* 44*/ NULL,
2630:                                        MatScale_SeqBAIJ,
2631:                                        MatShift_SeqBAIJ,
2632:                                        NULL,
2633:                                        MatZeroRowsColumns_SeqBAIJ,
2634:                                /* 49*/ NULL,
2635:                                        MatGetRowIJ_SeqBAIJ,
2636:                                        MatRestoreRowIJ_SeqBAIJ,
2637:                                        MatGetColumnIJ_SeqBAIJ,
2638:                                        MatRestoreColumnIJ_SeqBAIJ,
2639:                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2640:                                        NULL,
2641:                                        NULL,
2642:                                        NULL,
2643:                                        MatSetValuesBlocked_SeqBAIJ,
2644:                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2645:                                        MatDestroy_SeqBAIJ,
2646:                                        MatView_SeqBAIJ,
2647:                                        NULL,
2648:                                        NULL,
2649:                                /* 64*/ NULL,
2650:                                        NULL,
2651:                                        NULL,
2652:                                        NULL,
2653:                                        NULL,
2654:                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2655:                                        NULL,
2656:                                        MatConvert_Basic,
2657:                                        NULL,
2658:                                        NULL,
2659:                                /* 74*/ NULL,
2660:                                        MatFDColoringApply_BAIJ,
2661:                                        NULL,
2662:                                        NULL,
2663:                                        NULL,
2664:                                /* 79*/ NULL,
2665:                                        NULL,
2666:                                        NULL,
2667:                                        NULL,
2668:                                        MatLoad_SeqBAIJ,
2669:                                /* 84*/ NULL,
2670:                                        NULL,
2671:                                        NULL,
2672:                                        NULL,
2673:                                        NULL,
2674:                                /* 89*/ NULL,
2675:                                        NULL,
2676:                                        NULL,
2677:                                        NULL,
2678:                                        NULL,
2679:                                /* 94*/ NULL,
2680:                                        NULL,
2681:                                        NULL,
2682:                                        NULL,
2683:                                        NULL,
2684:                                /* 99*/ NULL,
2685:                                        NULL,
2686:                                        NULL,
2687:                                        MatConjugate_SeqBAIJ,
2688:                                        NULL,
2689:                                /*104*/ NULL,
2690:                                        MatRealPart_SeqBAIJ,
2691:                                        MatImaginaryPart_SeqBAIJ,
2692:                                        NULL,
2693:                                        NULL,
2694:                                /*109*/ NULL,
2695:                                        NULL,
2696:                                        NULL,
2697:                                        NULL,
2698:                                        MatMissingDiagonal_SeqBAIJ,
2699:                                /*114*/ NULL,
2700:                                        NULL,
2701:                                        NULL,
2702:                                        NULL,
2703:                                        NULL,
2704:                                /*119*/ NULL,
2705:                                        NULL,
2706:                                        MatMultHermitianTranspose_SeqBAIJ,
2707:                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2708:                                        NULL,
2709:                                /*124*/ NULL,
2710:                                        MatGetColumnReductions_SeqBAIJ,
2711:                                        MatInvertBlockDiagonal_SeqBAIJ,
2712:                                        NULL,
2713:                                        NULL,
2714:                                /*129*/ NULL,
2715:                                        NULL,
2716:                                        NULL,
2717:                                        NULL,
2718:                                        NULL,
2719:                                /*134*/ NULL,
2720:                                        NULL,
2721:                                        NULL,
2722:                                        NULL,
2723:                                        NULL,
2724:                                /*139*/ MatSetBlockSizes_Default,
2725:                                        NULL,
2726:                                        NULL,
2727:                                        MatFDColoringSetUp_SeqXAIJ,
2728:                                        NULL,
2729:                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2730:                                        MatDestroySubMatrices_SeqBAIJ,
2731:                                        NULL,
2732:                                        NULL
2733: };

2735: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2736: {
2737:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2738:   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;


2742:   /* allocate space for values if not already there */
2743:   if (!aij->saved_values) {
2744:     PetscMalloc1(nz+1,&aij->saved_values);
2745:     PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2746:   }

2748:   /* copy values over */
2749:   PetscArraycpy(aij->saved_values,aij->a,nz);
2750:   return 0;
2751: }

2753: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2754: {
2755:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2756:   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;


2761:   /* copy values over */
2762:   PetscArraycpy(aij->a,aij->saved_values,nz);
2763:   return 0;
2764: }

2766: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2767: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);

2769: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2770: {
2771:   Mat_SeqBAIJ    *b;
2773:   PetscInt       i,mbs,nbs,bs2;
2774:   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;

2776:   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2777:   if (nz == MAT_SKIP_ALLOCATION) {
2778:     skipallocation = PETSC_TRUE;
2779:     nz             = 0;
2780:   }

2782:   MatSetBlockSize(B,PetscAbs(bs));
2783:   PetscLayoutSetUp(B->rmap);
2784:   PetscLayoutSetUp(B->cmap);
2785:   PetscLayoutGetBlockSize(B->rmap,&bs);

2787:   B->preallocated = PETSC_TRUE;

2789:   mbs = B->rmap->n/bs;
2790:   nbs = B->cmap->n/bs;
2791:   bs2 = bs*bs;


2795:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2797:   if (nnz) {
2798:     for (i=0; i<mbs; i++) {
2801:     }
2802:   }

2804:   b    = (Mat_SeqBAIJ*)B->data;
2805:   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2806:   PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2807:   PetscOptionsEnd();

2809:   if (!flg) {
2810:     switch (bs) {
2811:     case 1:
2812:       B->ops->mult    = MatMult_SeqBAIJ_1;
2813:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2814:       break;
2815:     case 2:
2816:       B->ops->mult    = MatMult_SeqBAIJ_2;
2817:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2818:       break;
2819:     case 3:
2820:       B->ops->mult    = MatMult_SeqBAIJ_3;
2821:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2822:       break;
2823:     case 4:
2824:       B->ops->mult    = MatMult_SeqBAIJ_4;
2825:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2826:       break;
2827:     case 5:
2828:       B->ops->mult    = MatMult_SeqBAIJ_5;
2829:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2830:       break;
2831:     case 6:
2832:       B->ops->mult    = MatMult_SeqBAIJ_6;
2833:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2834:       break;
2835:     case 7:
2836:       B->ops->mult    = MatMult_SeqBAIJ_7;
2837:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2838:       break;
2839:     case 9:
2840:     {
2841:       PetscInt version = 1;
2842:       PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2843:       switch (version) {
2844: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2845:       case 1:
2846:         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2847:         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2848:         PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2849:         break;
2850: #endif
2851:       default:
2852:         B->ops->mult    = MatMult_SeqBAIJ_N;
2853:         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2854:         PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2855:         break;
2856:       }
2857:       break;
2858:     }
2859:     case 11:
2860:       B->ops->mult    = MatMult_SeqBAIJ_11;
2861:       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2862:       break;
2863:     case 12:
2864:     {
2865:       PetscInt version = 1;
2866:       PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2867:       switch (version) {
2868:       case 1:
2869:         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2870:         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2871:         PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2872:         break;
2873:       case 2:
2874:         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2875:         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2876:         PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2877:         break;
2878: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2879:       case 3:
2880:         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2881:         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2882:         PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2883:         break;
2884: #endif
2885:       default:
2886:         B->ops->mult    = MatMult_SeqBAIJ_N;
2887:         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2888:         PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2889:         break;
2890:       }
2891:       break;
2892:     }
2893:     case 15:
2894:     {
2895:       PetscInt version = 1;
2896:       PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2897:       switch (version) {
2898:       case 1:
2899:         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2900:         PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2901:         break;
2902:       case 2:
2903:         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2904:         PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2905:         break;
2906:       case 3:
2907:         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2908:         PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2909:         break;
2910:       case 4:
2911:         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2912:         PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);
2913:         break;
2914:       default:
2915:         B->ops->mult    = MatMult_SeqBAIJ_N;
2916:         PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2917:         break;
2918:       }
2919:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2920:       break;
2921:     }
2922:     default:
2923:       B->ops->mult    = MatMult_SeqBAIJ_N;
2924:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2925:       PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);
2926:       break;
2927:     }
2928:   }
2929:   B->ops->sor = MatSOR_SeqBAIJ;
2930:   b->mbs = mbs;
2931:   b->nbs = nbs;
2932:   if (!skipallocation) {
2933:     if (!b->imax) {
2934:       PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2935:       PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));

2937:       b->free_imax_ilen = PETSC_TRUE;
2938:     }
2939:     /* b->ilen will count nonzeros in each block row so far. */
2940:     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2941:     if (!nnz) {
2942:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2943:       else if (nz < 0) nz = 1;
2944:       nz = PetscMin(nz,nbs);
2945:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2946:       PetscIntMultError(nz,mbs,&nz);
2947:     } else {
2948:       PetscInt64 nz64 = 0;
2949:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2950:       PetscIntCast(nz64,&nz);
2951:     }

2953:     /* allocate the matrix space */
2954:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2955:     if (B->structure_only) {
2956:       PetscMalloc1(nz,&b->j);
2957:       PetscMalloc1(B->rmap->N+1,&b->i);
2958:       PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));
2959:     } else {
2960:       PetscInt nzbs2 = 0;
2961:       PetscIntMultError(nz,bs2,&nzbs2);
2962:       PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2963:       PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2964:       PetscArrayzero(b->a,nz*bs2);
2965:     }
2966:     PetscArrayzero(b->j,nz);

2968:     if (B->structure_only) {
2969:       b->singlemalloc = PETSC_FALSE;
2970:       b->free_a       = PETSC_FALSE;
2971:     } else {
2972:       b->singlemalloc = PETSC_TRUE;
2973:       b->free_a       = PETSC_TRUE;
2974:     }
2975:     b->free_ij = PETSC_TRUE;

2977:     b->i[0] = 0;
2978:     for (i=1; i<mbs+1; i++) {
2979:       b->i[i] = b->i[i-1] + b->imax[i-1];
2980:     }

2982:   } else {
2983:     b->free_a  = PETSC_FALSE;
2984:     b->free_ij = PETSC_FALSE;
2985:   }

2987:   b->bs2              = bs2;
2988:   b->mbs              = mbs;
2989:   b->nz               = 0;
2990:   b->maxnz            = nz;
2991:   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2992:   B->was_assembled    = PETSC_FALSE;
2993:   B->assembled        = PETSC_FALSE;
2994:   if (realalloc) MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2995:   return 0;
2996: }

2998: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2999: {
3000:   PetscInt       i,m,nz,nz_max=0,*nnz;
3001:   PetscScalar    *values=NULL;
3002:   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;

3005:   PetscLayoutSetBlockSize(B->rmap,bs);
3006:   PetscLayoutSetBlockSize(B->cmap,bs);
3007:   PetscLayoutSetUp(B->rmap);
3008:   PetscLayoutSetUp(B->cmap);
3009:   PetscLayoutGetBlockSize(B->rmap,&bs);
3010:   m    = B->rmap->n/bs;

3013:   PetscMalloc1(m+1, &nnz);
3014:   for (i=0; i<m; i++) {
3015:     nz = ii[i+1]- ii[i];
3017:     nz_max = PetscMax(nz_max, nz);
3018:     nnz[i] = nz;
3019:   }
3020:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3021:   PetscFree(nnz);

3023:   values = (PetscScalar*)V;
3024:   if (!values) {
3025:     PetscCalloc1(bs*bs*(nz_max+1),&values);
3026:   }
3027:   for (i=0; i<m; i++) {
3028:     PetscInt          ncols  = ii[i+1] - ii[i];
3029:     const PetscInt    *icols = jj + ii[i];
3030:     if (bs == 1 || !roworiented) {
3031:       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3032:       MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3033:     } else {
3034:       PetscInt j;
3035:       for (j=0; j<ncols; j++) {
3036:         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3037:         MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
3038:       }
3039:     }
3040:   }
3041:   if (!V) PetscFree(values);
3042:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3043:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3044:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3045:   return 0;
3046: }

3048: /*@C
3049:    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored

3051:    Not Collective

3053:    Input Parameter:
3054: .  mat - a MATSEQBAIJ matrix

3056:    Output Parameter:
3057: .   array - pointer to the data

3059:    Level: intermediate

3061: .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3062: @*/
3063: PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3064: {
3065:   PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3066:   return 0;
3067: }

3069: /*@C
3070:    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()

3072:    Not Collective

3074:    Input Parameters:
3075: +  mat - a MATSEQBAIJ matrix
3076: -  array - pointer to the data

3078:    Level: intermediate

3080: .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3081: @*/
3082: PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3083: {
3084:   PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3085:   return 0;
3086: }

3088: /*MC
3089:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3090:    block sparse compressed row format.

3092:    Options Database Keys:
3093: + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3094: - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)

3096:    Level: beginner

3098:    Notes:
3099:     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3100:     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored

3102:    Run with -info to see what version of the matrix-vector product is being used

3104: .seealso: MatCreateSeqBAIJ()
3105: M*/

3107: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);

3109: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3110: {
3111:   PetscMPIInt    size;
3112:   Mat_SeqBAIJ    *b;

3114:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);

3117:   PetscNewLog(B,&b);
3118:   B->data = (void*)b;
3119:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));

3121:   b->row          = NULL;
3122:   b->col          = NULL;
3123:   b->icol         = NULL;
3124:   b->reallocs     = 0;
3125:   b->saved_values = NULL;

3127:   b->roworiented        = PETSC_TRUE;
3128:   b->nonew              = 0;
3129:   b->diag               = NULL;
3130:   B->spptr              = NULL;
3131:   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3132:   b->keepnonzeropattern = PETSC_FALSE;

3134:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);
3135:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);
3136:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3137:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3138:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3139:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3140:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3141:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3142:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3143:   PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3144: #if defined(PETSC_HAVE_HYPRE)
3145:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);
3146: #endif
3147:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);
3148:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3149:   return 0;
3150: }

3152: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3153: {
3154:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3155:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;


3159:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3160:     c->imax           = a->imax;
3161:     c->ilen           = a->ilen;
3162:     c->free_imax_ilen = PETSC_FALSE;
3163:   } else {
3164:     PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3165:     PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3166:     for (i=0; i<mbs; i++) {
3167:       c->imax[i] = a->imax[i];
3168:       c->ilen[i] = a->ilen[i];
3169:     }
3170:     c->free_imax_ilen = PETSC_TRUE;
3171:   }

3173:   /* allocate the matrix space */
3174:   if (mallocmatspace) {
3175:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3176:       PetscCalloc1(bs2*nz,&c->a);
3177:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));

3179:       c->i            = a->i;
3180:       c->j            = a->j;
3181:       c->singlemalloc = PETSC_FALSE;
3182:       c->free_a       = PETSC_TRUE;
3183:       c->free_ij      = PETSC_FALSE;
3184:       c->parent       = A;
3185:       C->preallocated = PETSC_TRUE;
3186:       C->assembled    = PETSC_TRUE;

3188:       PetscObjectReference((PetscObject)A);
3189:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3190:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3191:     } else {
3192:       PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3193:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));

3195:       c->singlemalloc = PETSC_TRUE;
3196:       c->free_a       = PETSC_TRUE;
3197:       c->free_ij      = PETSC_TRUE;

3199:       PetscArraycpy(c->i,a->i,mbs+1);
3200:       if (mbs > 0) {
3201:         PetscArraycpy(c->j,a->j,nz);
3202:         if (cpvalues == MAT_COPY_VALUES) {
3203:           PetscArraycpy(c->a,a->a,bs2*nz);
3204:         } else {
3205:           PetscArrayzero(c->a,bs2*nz);
3206:         }
3207:       }
3208:       C->preallocated = PETSC_TRUE;
3209:       C->assembled    = PETSC_TRUE;
3210:     }
3211:   }

3213:   c->roworiented = a->roworiented;
3214:   c->nonew       = a->nonew;

3216:   PetscLayoutReference(A->rmap,&C->rmap);
3217:   PetscLayoutReference(A->cmap,&C->cmap);

3219:   c->bs2         = a->bs2;
3220:   c->mbs         = a->mbs;
3221:   c->nbs         = a->nbs;

3223:   if (a->diag) {
3224:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3225:       c->diag      = a->diag;
3226:       c->free_diag = PETSC_FALSE;
3227:     } else {
3228:       PetscMalloc1(mbs+1,&c->diag);
3229:       PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3230:       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3231:       c->free_diag = PETSC_TRUE;
3232:     }
3233:   } else c->diag = NULL;

3235:   c->nz         = a->nz;
3236:   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3237:   c->solve_work = NULL;
3238:   c->mult_work  = NULL;
3239:   c->sor_workt  = NULL;
3240:   c->sor_work   = NULL;

3242:   c->compressedrow.use   = a->compressedrow.use;
3243:   c->compressedrow.nrows = a->compressedrow.nrows;
3244:   if (a->compressedrow.use) {
3245:     i    = a->compressedrow.nrows;
3246:     PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3247:     PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3248:     PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);
3249:     PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);
3250:   } else {
3251:     c->compressedrow.use    = PETSC_FALSE;
3252:     c->compressedrow.i      = NULL;
3253:     c->compressedrow.rindex = NULL;
3254:   }
3255:   C->nonzerostate = A->nonzerostate;

3257:   PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3258:   return 0;
3259: }

3261: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3262: {
3263:   MatCreate(PetscObjectComm((PetscObject)A),B);
3264:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3265:   MatSetType(*B,MATSEQBAIJ);
3266:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3267:   return 0;
3268: }

3270: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3271: PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3272: {
3273:   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3274:   PetscInt       *rowidxs,*colidxs;
3275:   PetscScalar    *matvals;

3277:   PetscViewerSetUp(viewer);

3279:   /* read matrix header */
3280:   PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);
3282:   M = header[1]; N = header[2]; nz = header[3];

3287:   /* set block sizes from the viewer's .info file */
3288:   MatLoad_Binary_BlockSizes(mat,viewer);
3289:   /* set local and global sizes if not set already */
3290:   if (mat->rmap->n < 0) mat->rmap->n = M;
3291:   if (mat->cmap->n < 0) mat->cmap->n = N;
3292:   if (mat->rmap->N < 0) mat->rmap->N = M;
3293:   if (mat->cmap->N < 0) mat->cmap->N = N;
3294:   PetscLayoutSetUp(mat->rmap);
3295:   PetscLayoutSetUp(mat->cmap);

3297:   /* check if the matrix sizes are correct */
3298:   MatGetSize(mat,&rows,&cols);
3300:   MatGetBlockSize(mat,&bs);
3301:   MatGetLocalSize(mat,&m,&n);
3302:   mbs = m/bs; nbs = n/bs;

3304:   /* read in row lengths, column indices and nonzero values */
3305:   PetscMalloc1(m+1,&rowidxs);
3306:   PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);
3307:   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3308:   sum = rowidxs[m];

3311:   /* read in column indices and nonzero values */
3312:   PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);
3313:   PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);
3314:   PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);

3316:   { /* preallocate matrix storage */
3317:     PetscBT   bt; /* helper bit set to count nonzeros */
3318:     PetscInt  *nnz;
3319:     PetscBool sbaij;

3321:     PetscBTCreate(nbs,&bt);
3322:     PetscCalloc1(mbs,&nnz);
3323:     PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);
3324:     for (i=0; i<mbs; i++) {
3325:       PetscBTMemzero(nbs,bt);
3326:       for (k=0; k<bs; k++) {
3327:         PetscInt row = bs*i + k;
3328:         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3329:           PetscInt col = colidxs[j];
3330:           if (!sbaij || col >= row)
3331:             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3332:         }
3333:       }
3334:     }
3335:     PetscBTDestroy(&bt);
3336:     MatSeqBAIJSetPreallocation(mat,bs,0,nnz);
3337:     MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);
3338:     PetscFree(nnz);
3339:   }

3341:   /* store matrix values */
3342:   for (i=0; i<m; i++) {
3343:     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3344:     (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);
3345:   }

3347:   PetscFree(rowidxs);
3348:   PetscFree2(colidxs,matvals);
3349:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
3350:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
3351:   return 0;
3352: }

3354: PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3355: {
3356:   PetscBool isbinary;

3358:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
3360:   MatLoad_SeqBAIJ_Binary(mat,viewer);
3361:   return 0;
3362: }

3364: /*@C
3365:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3366:    compressed row) format.  For good matrix assembly performance the
3367:    user should preallocate the matrix storage by setting the parameter nz
3368:    (or the array nnz).  By setting these parameters accurately, performance
3369:    during matrix assembly can be increased by more than a factor of 50.

3371:    Collective

3373:    Input Parameters:
3374: +  comm - MPI communicator, set to PETSC_COMM_SELF
3375: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3376:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3377: .  m - number of rows
3378: .  n - number of columns
3379: .  nz - number of nonzero blocks  per block row (same for all rows)
3380: -  nnz - array containing the number of nonzero blocks in the various block rows
3381:          (possibly different for each block row) or NULL

3383:    Output Parameter:
3384: .  A - the matrix

3386:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3387:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3388:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3390:    Options Database Keys:
3391: +   -mat_no_unroll - uses code that does not unroll the loops in the
3392:                      block calculations (much slower)
3393: -    -mat_block_size - size of the blocks to use

3395:    Level: intermediate

3397:    Notes:
3398:    The number of rows and columns must be divisible by blocksize.

3400:    If the nnz parameter is given then the nz parameter is ignored

3402:    A nonzero block is any block that as 1 or more nonzeros in it

3404:    The block AIJ format is fully compatible with standard Fortran 77
3405:    storage.  That is, the stored row and column indices can begin at
3406:    either one (as in Fortran) or zero.  See the users' manual for details.

3408:    Specify the preallocated storage with either nz or nnz (not both).
3409:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3410:    allocation.  See Users-Manual: ch_mat for details.
3411:    matrices.

3413: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3414: @*/
3415: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3416: {
3417:   MatCreate(comm,A);
3418:   MatSetSizes(*A,m,n,m,n);
3419:   MatSetType(*A,MATSEQBAIJ);
3420:   MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
3421:   return 0;
3422: }

3424: /*@C
3425:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3426:    per row in the matrix. For good matrix assembly performance the
3427:    user should preallocate the matrix storage by setting the parameter nz
3428:    (or the array nnz).  By setting these parameters accurately, performance
3429:    during matrix assembly can be increased by more than a factor of 50.

3431:    Collective

3433:    Input Parameters:
3434: +  B - the matrix
3435: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3436:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3437: .  nz - number of block nonzeros per block row (same for all rows)
3438: -  nnz - array containing the number of block nonzeros in the various block rows
3439:          (possibly different for each block row) or NULL

3441:    Options Database Keys:
3442: +   -mat_no_unroll - uses code that does not unroll the loops in the
3443:                      block calculations (much slower)
3444: -   -mat_block_size - size of the blocks to use

3446:    Level: intermediate

3448:    Notes:
3449:    If the nnz parameter is given then the nz parameter is ignored

3451:    You can call MatGetInfo() to get information on how effective the preallocation was;
3452:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3453:    You can also run with the option -info and look for messages with the string
3454:    malloc in them to see if additional memory allocation was needed.

3456:    The block AIJ format is fully compatible with standard Fortran 77
3457:    storage.  That is, the stored row and column indices can begin at
3458:    either one (as in Fortran) or zero.  See the users' manual for details.

3460:    Specify the preallocated storage with either nz or nnz (not both).
3461:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3462:    allocation.  See Users-Manual: ch_mat for details.

3464: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3465: @*/
3466: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3467: {
3471:   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3472:   return 0;
3473: }

3475: /*@C
3476:    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values

3478:    Collective

3480:    Input Parameters:
3481: +  B - the matrix
3482: .  i - the indices into j for the start of each local row (starts with zero)
3483: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3484: -  v - optional values in the matrix

3486:    Level: advanced

3488:    Notes:
3489:    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3490:    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3491:    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3492:    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3493:    block column and the second index is over columns within a block.

3495:    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well

3497: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3498: @*/
3499: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3500: {
3504:   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3505:   return 0;
3506: }

3508: /*@
3509:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.

3511:      Collective

3513:    Input Parameters:
3514: +  comm - must be an MPI communicator of size 1
3515: .  bs - size of block
3516: .  m - number of rows
3517: .  n - number of columns
3518: .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3519: .  j - column indices
3520: -  a - matrix values

3522:    Output Parameter:
3523: .  mat - the matrix

3525:    Level: advanced

3527:    Notes:
3528:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3529:     once the matrix is destroyed

3531:        You cannot set new nonzero locations into this matrix, that will generate an error.

3533:        The i and j indices are 0 based

3535:        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).

3537:       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3538:       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3539:       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3540:       with column-major ordering within blocks.

3542: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()

3544: @*/
3545: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3546: {
3547:   PetscInt       ii;
3548:   Mat_SeqBAIJ    *baij;


3553:   MatCreate(comm,mat);
3554:   MatSetSizes(*mat,m,n,m,n);
3555:   MatSetType(*mat,MATSEQBAIJ);
3556:   MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);
3557:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3558:   PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3559:   PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));

3561:   baij->i = i;
3562:   baij->j = j;
3563:   baij->a = a;

3565:   baij->singlemalloc = PETSC_FALSE;
3566:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3567:   baij->free_a       = PETSC_FALSE;
3568:   baij->free_ij      = PETSC_FALSE;

3570:   for (ii=0; ii<m; ii++) {
3571:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3573:   }
3574:   if (PetscDefined(USE_DEBUG)) {
3575:     for (ii=0; ii<baij->i[m]; ii++) {
3578:     }
3579:   }

3581:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3582:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3583:   return 0;
3584: }

3586: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3587: {
3588:   PetscMPIInt    size;

3590:   MPI_Comm_size(comm,&size);
3591:   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3592:     MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
3593:   } else {
3594:     MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3595:   }
3596:   return 0;
3597: }