Actual source code: baij.c


  2: /*
  3:     Defines the basic matrix operations for the BAIJ (compressed row)
  4:   matrix storage format.
  5: */
  6: #include <../src/mat/impls/baij/seq/baij.h>
  7: #include <petscblaslapack.h>
  8: #include <petsc/private/kernels/blockinvert.h>
  9: #include <petsc/private/kernels/blockmatmult.h>

 11: #if defined(PETSC_HAVE_HYPRE)
 12: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
 13: #endif

 15: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 16: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
 17: #endif
 18: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);

 20: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
 21: {
 22:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 24:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
 25:   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
 26:   PetscReal      shift = 0.0;
 27:   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;

 30:   allowzeropivot = PetscNot(A->erroriffailure);

 32:   if (a->idiagvalid) {
 33:     if (values) *values = a->idiag;
 34:     return(0);
 35:   }
 36:   MatMarkDiagonal_SeqBAIJ(A);
 37:   diag_offset = a->diag;
 38:   if (!a->idiag) {
 39:     PetscMalloc1(bs2*mbs,&a->idiag);
 40:     PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));
 41:   }
 42:   diag  = a->idiag;
 43:   if (values) *values = a->idiag;
 44:   /* factor and invert each block */
 45:   switch (bs) {
 46:   case 1:
 47:     for (i=0; i<mbs; i++) {
 48:       odiag    = v + 1*diag_offset[i];
 49:       diag[0]  = odiag[0];

 51:       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
 52:         if (allowzeropivot) {
 53:           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 54:           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
 55:           A->factorerror_zeropivot_row   = i;
 56:           PetscInfo1(A,"Zero pivot, row %D\n",i);
 57:         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
 58:       }

 60:       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
 61:       diag    += 1;
 62:     }
 63:     break;
 64:   case 2:
 65:     for (i=0; i<mbs; i++) {
 66:       odiag    = v + 4*diag_offset[i];
 67:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 68:       PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
 69:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 70:       diag    += 4;
 71:     }
 72:     break;
 73:   case 3:
 74:     for (i=0; i<mbs; i++) {
 75:       odiag    = v + 9*diag_offset[i];
 76:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 77:       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 78:       diag[8]  = odiag[8];
 79:       PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
 80:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 81:       diag    += 9;
 82:     }
 83:     break;
 84:   case 4:
 85:     for (i=0; i<mbs; i++) {
 86:       odiag  = v + 16*diag_offset[i];
 87:       PetscArraycpy(diag,odiag,16);
 88:       PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
 89:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 90:       diag  += 16;
 91:     }
 92:     break;
 93:   case 5:
 94:     for (i=0; i<mbs; i++) {
 95:       odiag  = v + 25*diag_offset[i];
 96:       PetscArraycpy(diag,odiag,25);
 97:       PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
 98:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 99:       diag  += 25;
100:     }
101:     break;
102:   case 6:
103:     for (i=0; i<mbs; i++) {
104:       odiag  = v + 36*diag_offset[i];
105:       PetscArraycpy(diag,odiag,36);
106:       PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
107:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
108:       diag  += 36;
109:     }
110:     break;
111:   case 7:
112:     for (i=0; i<mbs; i++) {
113:       odiag  = v + 49*diag_offset[i];
114:       PetscArraycpy(diag,odiag,49);
115:       PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
116:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117:       diag  += 49;
118:     }
119:     break;
120:   default:
121:     PetscMalloc2(bs,&v_work,bs,&v_pivots);
122:     for (i=0; i<mbs; i++) {
123:       odiag  = v + bs2*diag_offset[i];
124:       PetscArraycpy(diag,odiag,bs2);
125:       PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
126:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127:       diag  += bs2;
128:     }
129:     PetscFree2(v_work,v_pivots);
130:   }
131:   a->idiagvalid = PETSC_TRUE;
132:   return(0);
133: }

135: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
136: {
137:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
138:   PetscScalar       *x,*work,*w,*workt,*t;
139:   const MatScalar   *v,*aa = a->a, *idiag;
140:   const PetscScalar *b,*xb;
141:   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
142:   PetscErrorCode    ierr;
143:   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
144:   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;

147:   its = its*lits;
148:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
149:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
150:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
151:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
152:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");

154:   if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}

156:   if (!m) return(0);
157:   diag  = a->diag;
158:   idiag = a->idiag;
159:   k    = PetscMax(A->rmap->n,A->cmap->n);
160:   if (!a->mult_work) {
161:     PetscMalloc1(k+1,&a->mult_work);
162:   }
163:   if (!a->sor_workt) {
164:     PetscMalloc1(k,&a->sor_workt);
165:   }
166:   if (!a->sor_work) {
167:     PetscMalloc1(bs,&a->sor_work);
168:   }
169:   work = a->mult_work;
170:   t    = a->sor_workt;
171:   w    = a->sor_work;

173:   VecGetArray(xx,&x);
174:   VecGetArrayRead(bb,&b);

176:   if (flag & SOR_ZERO_INITIAL_GUESS) {
177:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
178:       switch (bs) {
179:       case 1:
180:         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
181:         t[0] = b[0];
182:         i2     = 1;
183:         idiag += 1;
184:         for (i=1; i<m; i++) {
185:           v  = aa + ai[i];
186:           vi = aj + ai[i];
187:           nz = diag[i] - ai[i];
188:           s[0] = b[i2];
189:           for (j=0; j<nz; j++) {
190:             xw[0] = x[vi[j]];
191:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
192:           }
193:           t[i2] = s[0];
194:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
195:           x[i2]  = xw[0];
196:           idiag += 1;
197:           i2    += 1;
198:         }
199:         break;
200:       case 2:
201:         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
202:         t[0] = b[0]; t[1] = b[1];
203:         i2     = 2;
204:         idiag += 4;
205:         for (i=1; i<m; i++) {
206:           v  = aa + 4*ai[i];
207:           vi = aj + ai[i];
208:           nz = diag[i] - ai[i];
209:           s[0] = b[i2]; s[1] = b[i2+1];
210:           for (j=0; j<nz; j++) {
211:             idx = 2*vi[j];
212:             it  = 4*j;
213:             xw[0] = x[idx]; xw[1] = x[1+idx];
214:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
215:           }
216:           t[i2] = s[0]; t[i2+1] = s[1];
217:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
218:           x[i2]   = xw[0]; x[i2+1] = xw[1];
219:           idiag  += 4;
220:           i2     += 2;
221:         }
222:         break;
223:       case 3:
224:         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
225:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
226:         i2     = 3;
227:         idiag += 9;
228:         for (i=1; i<m; i++) {
229:           v  = aa + 9*ai[i];
230:           vi = aj + ai[i];
231:           nz = diag[i] - ai[i];
232:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
233:           while (nz--) {
234:             idx = 3*(*vi++);
235:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
236:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
237:             v  += 9;
238:           }
239:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
240:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
241:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
242:           idiag  += 9;
243:           i2     += 3;
244:         }
245:         break;
246:       case 4:
247:         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
248:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
249:         i2     = 4;
250:         idiag += 16;
251:         for (i=1; i<m; i++) {
252:           v  = aa + 16*ai[i];
253:           vi = aj + ai[i];
254:           nz = diag[i] - ai[i];
255:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
256:           while (nz--) {
257:             idx = 4*(*vi++);
258:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
259:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
260:             v  += 16;
261:           }
262:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
263:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
264:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
265:           idiag  += 16;
266:           i2     += 4;
267:         }
268:         break;
269:       case 5:
270:         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
271:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
272:         i2     = 5;
273:         idiag += 25;
274:         for (i=1; i<m; i++) {
275:           v  = aa + 25*ai[i];
276:           vi = aj + ai[i];
277:           nz = diag[i] - ai[i];
278:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
279:           while (nz--) {
280:             idx = 5*(*vi++);
281:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
282:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
283:             v  += 25;
284:           }
285:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
286:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
287:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
288:           idiag  += 25;
289:           i2     += 5;
290:         }
291:         break;
292:       case 6:
293:         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
294:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
295:         i2     = 6;
296:         idiag += 36;
297:         for (i=1; i<m; i++) {
298:           v  = aa + 36*ai[i];
299:           vi = aj + ai[i];
300:           nz = diag[i] - ai[i];
301:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
302:           while (nz--) {
303:             idx = 6*(*vi++);
304:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
305:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
306:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
307:             v  += 36;
308:           }
309:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
310:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
311:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
312:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
313:           idiag  += 36;
314:           i2     += 6;
315:         }
316:         break;
317:       case 7:
318:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
319:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
320:         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
321:         i2     = 7;
322:         idiag += 49;
323:         for (i=1; i<m; i++) {
324:           v  = aa + 49*ai[i];
325:           vi = aj + ai[i];
326:           nz = diag[i] - ai[i];
327:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
328:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
329:           while (nz--) {
330:             idx = 7*(*vi++);
331:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
332:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
333:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
334:             v  += 49;
335:           }
336:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
337:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
338:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
339:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
340:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
341:           idiag  += 49;
342:           i2     += 7;
343:         }
344:         break;
345:       default:
346:         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
347:         PetscArraycpy(t,b,bs);
348:         i2     = bs;
349:         idiag += bs2;
350:         for (i=1; i<m; i++) {
351:           v  = aa + bs2*ai[i];
352:           vi = aj + ai[i];
353:           nz = diag[i] - ai[i];

355:           PetscArraycpy(w,b+i2,bs);
356:           /* copy all rows of x that are needed into contiguous space */
357:           workt = work;
358:           for (j=0; j<nz; j++) {
359:             PetscArraycpy(workt,x + bs*(*vi++),bs);
360:             workt += bs;
361:           }
362:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
363:           PetscArraycpy(t+i2,w,bs);
364:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

366:           idiag += bs2;
367:           i2    += bs;
368:         }
369:         break;
370:       }
371:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372:       PetscLogFlops(1.0*bs2*a->nz);
373:       xb = t;
374:     }
375:     else xb = b;
376:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
377:       idiag = a->idiag+bs2*(a->mbs-1);
378:       i2 = bs * (m-1);
379:       switch (bs) {
380:       case 1:
381:         s[0]  = xb[i2];
382:         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
383:         x[i2] = xw[0];
384:         i2   -= 1;
385:         for (i=m-2; i>=0; i--) {
386:           v  = aa + (diag[i]+1);
387:           vi = aj + diag[i] + 1;
388:           nz = ai[i+1] - diag[i] - 1;
389:           s[0] = xb[i2];
390:           for (j=0; j<nz; j++) {
391:             xw[0] = x[vi[j]];
392:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
393:           }
394:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395:           x[i2]  = xw[0];
396:           idiag -= 1;
397:           i2    -= 1;
398:         }
399:         break;
400:       case 2:
401:         s[0]  = xb[i2]; s[1] = xb[i2+1];
402:         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
403:         x[i2] = xw[0]; x[i2+1] = xw[1];
404:         i2    -= 2;
405:         idiag -= 4;
406:         for (i=m-2; i>=0; i--) {
407:           v  = aa + 4*(diag[i] + 1);
408:           vi = aj + diag[i] + 1;
409:           nz = ai[i+1] - diag[i] - 1;
410:           s[0] = xb[i2]; s[1] = xb[i2+1];
411:           for (j=0; j<nz; j++) {
412:             idx = 2*vi[j];
413:             it  = 4*j;
414:             xw[0] = x[idx]; xw[1] = x[1+idx];
415:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
416:           }
417:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418:           x[i2]   = xw[0]; x[i2+1] = xw[1];
419:           idiag  -= 4;
420:           i2     -= 2;
421:         }
422:         break;
423:       case 3:
424:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425:         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
426:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
427:         i2    -= 3;
428:         idiag -= 9;
429:         for (i=m-2; i>=0; i--) {
430:           v  = aa + 9*(diag[i]+1);
431:           vi = aj + diag[i] + 1;
432:           nz = ai[i+1] - diag[i] - 1;
433:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
434:           while (nz--) {
435:             idx = 3*(*vi++);
436:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
437:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
438:             v  += 9;
439:           }
440:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442:           idiag  -= 9;
443:           i2     -= 3;
444:         }
445:         break;
446:       case 4:
447:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448:         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
449:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
450:         i2    -= 4;
451:         idiag -= 16;
452:         for (i=m-2; i>=0; i--) {
453:           v  = aa + 16*(diag[i]+1);
454:           vi = aj + diag[i] + 1;
455:           nz = ai[i+1] - diag[i] - 1;
456:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
457:           while (nz--) {
458:             idx = 4*(*vi++);
459:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
460:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
461:             v  += 16;
462:           }
463:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465:           idiag  -= 16;
466:           i2     -= 4;
467:         }
468:         break;
469:       case 5:
470:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471:         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
472:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
473:         i2    -= 5;
474:         idiag -= 25;
475:         for (i=m-2; i>=0; i--) {
476:           v  = aa + 25*(diag[i]+1);
477:           vi = aj + diag[i] + 1;
478:           nz = ai[i+1] - diag[i] - 1;
479:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
480:           while (nz--) {
481:             idx = 5*(*vi++);
482:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
483:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
484:             v  += 25;
485:           }
486:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488:           idiag  -= 25;
489:           i2     -= 5;
490:         }
491:         break;
492:       case 6:
493:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494:         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
495:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
496:         i2    -= 6;
497:         idiag -= 36;
498:         for (i=m-2; i>=0; i--) {
499:           v  = aa + 36*(diag[i]+1);
500:           vi = aj + diag[i] + 1;
501:           nz = ai[i+1] - diag[i] - 1;
502:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
503:           while (nz--) {
504:             idx = 6*(*vi++);
505:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
506:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
507:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
508:             v  += 36;
509:           }
510:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
511:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
512:           idiag  -= 36;
513:           i2     -= 6;
514:         }
515:         break;
516:       case 7:
517:         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
518:         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
519:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
520:         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
521:         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
522:         i2    -= 7;
523:         idiag -= 49;
524:         for (i=m-2; i>=0; i--) {
525:           v  = aa + 49*(diag[i]+1);
526:           vi = aj + diag[i] + 1;
527:           nz = ai[i+1] - diag[i] - 1;
528:           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
529:           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
530:           while (nz--) {
531:             idx = 7*(*vi++);
532:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
533:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
534:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
535:             v  += 49;
536:           }
537:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
538:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
539:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
540:           idiag  -= 49;
541:           i2     -= 7;
542:         }
543:         break;
544:       default:
545:         PetscArraycpy(w,xb+i2,bs);
546:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
547:         i2    -= bs;
548:         idiag -= bs2;
549:         for (i=m-2; i>=0; i--) {
550:           v  = aa + bs2*(diag[i]+1);
551:           vi = aj + diag[i] + 1;
552:           nz = ai[i+1] - diag[i] - 1;

554:           PetscArraycpy(w,xb+i2,bs);
555:           /* copy all rows of x that are needed into contiguous space */
556:           workt = work;
557:           for (j=0; j<nz; j++) {
558:             PetscArraycpy(workt,x + bs*(*vi++),bs);
559:             workt += bs;
560:           }
561:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
562:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

564:           idiag -= bs2;
565:           i2    -= bs;
566:         }
567:         break;
568:       }
569:       PetscLogFlops(1.0*bs2*(a->nz));
570:     }
571:     its--;
572:   }
573:   while (its--) {
574:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575:       idiag = a->idiag;
576:       i2 = 0;
577:       switch (bs) {
578:       case 1:
579:         for (i=0; i<m; i++) {
580:           v  = aa + ai[i];
581:           vi = aj + ai[i];
582:           nz = ai[i+1] - ai[i];
583:           s[0] = b[i2];
584:           for (j=0; j<nz; j++) {
585:             xw[0] = x[vi[j]];
586:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
587:           }
588:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
589:           x[i2] += xw[0];
590:           idiag += 1;
591:           i2    += 1;
592:         }
593:         break;
594:       case 2:
595:         for (i=0; i<m; i++) {
596:           v  = aa + 4*ai[i];
597:           vi = aj + ai[i];
598:           nz = ai[i+1] - ai[i];
599:           s[0] = b[i2]; s[1] = b[i2+1];
600:           for (j=0; j<nz; j++) {
601:             idx = 2*vi[j];
602:             it  = 4*j;
603:             xw[0] = x[idx]; xw[1] = x[1+idx];
604:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
605:           }
606:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
607:           x[i2]  += xw[0]; x[i2+1] += xw[1];
608:           idiag  += 4;
609:           i2     += 2;
610:         }
611:         break;
612:       case 3:
613:         for (i=0; i<m; i++) {
614:           v  = aa + 9*ai[i];
615:           vi = aj + ai[i];
616:           nz = ai[i+1] - ai[i];
617:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
618:           while (nz--) {
619:             idx = 3*(*vi++);
620:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
621:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
622:             v  += 9;
623:           }
624:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
625:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
626:           idiag  += 9;
627:           i2     += 3;
628:         }
629:         break;
630:       case 4:
631:         for (i=0; i<m; i++) {
632:           v  = aa + 16*ai[i];
633:           vi = aj + ai[i];
634:           nz = ai[i+1] - ai[i];
635:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
636:           while (nz--) {
637:             idx = 4*(*vi++);
638:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
639:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
640:             v  += 16;
641:           }
642:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
643:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
644:           idiag  += 16;
645:           i2     += 4;
646:         }
647:         break;
648:       case 5:
649:         for (i=0; i<m; i++) {
650:           v  = aa + 25*ai[i];
651:           vi = aj + ai[i];
652:           nz = ai[i+1] - ai[i];
653:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
654:           while (nz--) {
655:             idx = 5*(*vi++);
656:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
657:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
658:             v  += 25;
659:           }
660:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
661:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
662:           idiag  += 25;
663:           i2     += 5;
664:         }
665:         break;
666:       case 6:
667:         for (i=0; i<m; i++) {
668:           v  = aa + 36*ai[i];
669:           vi = aj + ai[i];
670:           nz = ai[i+1] - ai[i];
671:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
672:           while (nz--) {
673:             idx = 6*(*vi++);
674:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
675:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
676:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
677:             v  += 36;
678:           }
679:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
680:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
681:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
682:           idiag  += 36;
683:           i2     += 6;
684:         }
685:         break;
686:       case 7:
687:         for (i=0; i<m; i++) {
688:           v  = aa + 49*ai[i];
689:           vi = aj + ai[i];
690:           nz = ai[i+1] - ai[i];
691:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
692:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
693:           while (nz--) {
694:             idx = 7*(*vi++);
695:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
696:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
697:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
698:             v  += 49;
699:           }
700:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
701:           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
702:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
703:           idiag  += 49;
704:           i2     += 7;
705:         }
706:         break;
707:       default:
708:         for (i=0; i<m; i++) {
709:           v  = aa + bs2*ai[i];
710:           vi = aj + ai[i];
711:           nz = ai[i+1] - ai[i];

713:           PetscArraycpy(w,b+i2,bs);
714:           /* copy all rows of x that are needed into contiguous space */
715:           workt = work;
716:           for (j=0; j<nz; j++) {
717:             PetscArraycpy(workt,x + bs*(*vi++),bs);
718:             workt += bs;
719:           }
720:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
721:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

723:           idiag += bs2;
724:           i2    += bs;
725:         }
726:         break;
727:       }
728:       PetscLogFlops(2.0*bs2*a->nz);
729:     }
730:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
731:       idiag = a->idiag+bs2*(a->mbs-1);
732:       i2 = bs * (m-1);
733:       switch (bs) {
734:       case 1:
735:         for (i=m-1; i>=0; i--) {
736:           v  = aa + ai[i];
737:           vi = aj + ai[i];
738:           nz = ai[i+1] - ai[i];
739:           s[0] = b[i2];
740:           for (j=0; j<nz; j++) {
741:             xw[0] = x[vi[j]];
742:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
743:           }
744:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
745:           x[i2] += xw[0];
746:           idiag -= 1;
747:           i2    -= 1;
748:         }
749:         break;
750:       case 2:
751:         for (i=m-1; i>=0; i--) {
752:           v  = aa + 4*ai[i];
753:           vi = aj + ai[i];
754:           nz = ai[i+1] - ai[i];
755:           s[0] = b[i2]; s[1] = b[i2+1];
756:           for (j=0; j<nz; j++) {
757:             idx = 2*vi[j];
758:             it  = 4*j;
759:             xw[0] = x[idx]; xw[1] = x[1+idx];
760:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
761:           }
762:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
763:           x[i2]  += xw[0]; x[i2+1] += xw[1];
764:           idiag  -= 4;
765:           i2     -= 2;
766:         }
767:         break;
768:       case 3:
769:         for (i=m-1; i>=0; i--) {
770:           v  = aa + 9*ai[i];
771:           vi = aj + ai[i];
772:           nz = ai[i+1] - ai[i];
773:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
774:           while (nz--) {
775:             idx = 3*(*vi++);
776:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
777:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
778:             v  += 9;
779:           }
780:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
781:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
782:           idiag  -= 9;
783:           i2     -= 3;
784:         }
785:         break;
786:       case 4:
787:         for (i=m-1; i>=0; i--) {
788:           v  = aa + 16*ai[i];
789:           vi = aj + ai[i];
790:           nz = ai[i+1] - ai[i];
791:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
792:           while (nz--) {
793:             idx = 4*(*vi++);
794:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
795:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
796:             v  += 16;
797:           }
798:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
799:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
800:           idiag  -= 16;
801:           i2     -= 4;
802:         }
803:         break;
804:       case 5:
805:         for (i=m-1; i>=0; i--) {
806:           v  = aa + 25*ai[i];
807:           vi = aj + ai[i];
808:           nz = ai[i+1] - ai[i];
809:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
810:           while (nz--) {
811:             idx = 5*(*vi++);
812:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
813:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
814:             v  += 25;
815:           }
816:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
817:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
818:           idiag  -= 25;
819:           i2     -= 5;
820:         }
821:         break;
822:       case 6:
823:         for (i=m-1; i>=0; i--) {
824:           v  = aa + 36*ai[i];
825:           vi = aj + ai[i];
826:           nz = ai[i+1] - ai[i];
827:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
828:           while (nz--) {
829:             idx = 6*(*vi++);
830:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
831:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
832:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
833:             v  += 36;
834:           }
835:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
836:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
837:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
838:           idiag  -= 36;
839:           i2     -= 6;
840:         }
841:         break;
842:       case 7:
843:         for (i=m-1; i>=0; i--) {
844:           v  = aa + 49*ai[i];
845:           vi = aj + ai[i];
846:           nz = ai[i+1] - ai[i];
847:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
848:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
849:           while (nz--) {
850:             idx = 7*(*vi++);
851:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
852:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
853:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
854:             v  += 49;
855:           }
856:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
857:           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
858:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
859:           idiag  -= 49;
860:           i2     -= 7;
861:         }
862:         break;
863:       default:
864:         for (i=m-1; i>=0; i--) {
865:           v  = aa + bs2*ai[i];
866:           vi = aj + ai[i];
867:           nz = ai[i+1] - ai[i];

869:           PetscArraycpy(w,b+i2,bs);
870:           /* copy all rows of x that are needed into contiguous space */
871:           workt = work;
872:           for (j=0; j<nz; j++) {
873:             PetscArraycpy(workt,x + bs*(*vi++),bs);
874:             workt += bs;
875:           }
876:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
877:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

879:           idiag -= bs2;
880:           i2    -= bs;
881:         }
882:         break;
883:       }
884:       PetscLogFlops(2.0*bs2*(a->nz));
885:     }
886:   }
887:   VecRestoreArray(xx,&x);
888:   VecRestoreArrayRead(bb,&b);
889:   return(0);
890: }


893: /*
894:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
895: */
896: #if defined(PETSC_HAVE_FORTRAN_CAPS)
897: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
898: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
899: #define matsetvaluesblocked4_ matsetvaluesblocked4
900: #endif

902: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
903: {
904:   Mat               A  = *AA;
905:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
906:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
907:   PetscInt          *ai    =a->i,*ailen=a->ilen;
908:   PetscInt          *aj    =a->j,stepval,lastcol = -1;
909:   const PetscScalar *value = v;
910:   MatScalar         *ap,*aa = a->a,*bap;
911:   PetscErrorCode    ierr;

914:   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
915:   stepval = (n-1)*4;
916:   for (k=0; k<m; k++) { /* loop over added rows */
917:     row  = im[k];
918:     rp   = aj + ai[row];
919:     ap   = aa + 16*ai[row];
920:     nrow = ailen[row];
921:     low  = 0;
922:     high = nrow;
923:     for (l=0; l<n; l++) { /* loop over added columns */
924:       col = in[l];
925:       if (col <= lastcol)  low = 0;
926:       else                high = nrow;
927:       lastcol = col;
928:       value   = v + k*(stepval+4 + l)*4;
929:       while (high-low > 7) {
930:         t = (low+high)/2;
931:         if (rp[t] > col) high = t;
932:         else             low  = t;
933:       }
934:       for (i=low; i<high; i++) {
935:         if (rp[i] > col) break;
936:         if (rp[i] == col) {
937:           bap = ap +  16*i;
938:           for (ii=0; ii<4; ii++,value+=stepval) {
939:             for (jj=ii; jj<16; jj+=4) {
940:               bap[jj] += *value++;
941:             }
942:           }
943:           goto noinsert2;
944:         }
945:       }
946:       N = nrow++ - 1;
947:       high++; /* added new column index thus must search to one higher than before */
948:       /* shift up all the later entries in this row */
949:       for (ii=N; ii>=i; ii--) {
950:         rp[ii+1] = rp[ii];
951:         PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
952:       }
953:       if (N >= i) {
954:         PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
955:       }
956:       rp[i] = col;
957:       bap   = ap +  16*i;
958:       for (ii=0; ii<4; ii++,value+=stepval) {
959:         for (jj=ii; jj<16; jj+=4) {
960:           bap[jj] = *value++;
961:         }
962:       }
963:       noinsert2:;
964:       low = i;
965:     }
966:     ailen[row] = nrow;
967:   }
968:   PetscFunctionReturnVoid();
969: }

971: #if defined(PETSC_HAVE_FORTRAN_CAPS)
972: #define matsetvalues4_ MATSETVALUES4
973: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
974: #define matsetvalues4_ matsetvalues4
975: #endif

977: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
978: {
979:   Mat         A  = *AA;
980:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
981:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
982:   PetscInt    *ai=a->i,*ailen=a->ilen;
983:   PetscInt    *aj=a->j,brow,bcol;
984:   PetscInt    ridx,cidx,lastcol = -1;
985:   MatScalar   *ap,value,*aa=a->a,*bap;

989:   for (k=0; k<m; k++) { /* loop over added rows */
990:     row  = im[k]; brow = row/4;
991:     rp   = aj + ai[brow];
992:     ap   = aa + 16*ai[brow];
993:     nrow = ailen[brow];
994:     low  = 0;
995:     high = nrow;
996:     for (l=0; l<n; l++) { /* loop over added columns */
997:       col   = in[l]; bcol = col/4;
998:       ridx  = row % 4; cidx = col % 4;
999:       value = v[l + k*n];
1000:       if (col <= lastcol)  low = 0;
1001:       else                high = nrow;
1002:       lastcol = col;
1003:       while (high-low > 7) {
1004:         t = (low+high)/2;
1005:         if (rp[t] > bcol) high = t;
1006:         else              low  = t;
1007:       }
1008:       for (i=low; i<high; i++) {
1009:         if (rp[i] > bcol) break;
1010:         if (rp[i] == bcol) {
1011:           bap   = ap +  16*i + 4*cidx + ridx;
1012:           *bap += value;
1013:           goto noinsert1;
1014:         }
1015:       }
1016:       N = nrow++ - 1;
1017:       high++; /* added new column thus must search to one higher than before */
1018:       /* shift up all the later entries in this row */
1019:       PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1020:       PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1021:       PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1022:       rp[i]                    = bcol;
1023:       ap[16*i + 4*cidx + ridx] = value;
1024: noinsert1:;
1025:       low = i;
1026:     }
1027:     ailen[brow] = nrow;
1028:   }
1029:   PetscFunctionReturnVoid();
1030: }

1032: /*
1033:      Checks for missing diagonals
1034: */
1035: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1036: {
1037:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1039:   PetscInt       *diag,*ii = a->i,i;

1042:   MatMarkDiagonal_SeqBAIJ(A);
1043:   *missing = PETSC_FALSE;
1044:   if (A->rmap->n > 0 && !ii) {
1045:     *missing = PETSC_TRUE;
1046:     if (d) *d = 0;
1047:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1048:   } else {
1049:     PetscInt n;
1050:     n = PetscMin(a->mbs, a->nbs);
1051:     diag = a->diag;
1052:     for (i=0; i<n; i++) {
1053:       if (diag[i] >= ii[i+1]) {
1054:         *missing = PETSC_TRUE;
1055:         if (d) *d = i;
1056:         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1057:         break;
1058:       }
1059:     }
1060:   }
1061:   return(0);
1062: }

1064: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1065: {
1066:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1068:   PetscInt       i,j,m = a->mbs;

1071:   if (!a->diag) {
1072:     PetscMalloc1(m,&a->diag);
1073:     PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1074:     a->free_diag = PETSC_TRUE;
1075:   }
1076:   for (i=0; i<m; i++) {
1077:     a->diag[i] = a->i[i+1];
1078:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1079:       if (a->j[j] == i) {
1080:         a->diag[i] = j;
1081:         break;
1082:       }
1083:     }
1084:   }
1085:   return(0);
1086: }


1089: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1090: {
1091:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093:   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1094:   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;

1097:   *nn = n;
1098:   if (!ia) return(0);
1099:   if (symmetric) {
1100:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1101:     nz   = tia[n];
1102:   } else {
1103:     tia = a->i; tja = a->j;
1104:   }

1106:   if (!blockcompressed && bs > 1) {
1107:     (*nn) *= bs;
1108:     /* malloc & create the natural set of indices */
1109:     PetscMalloc1((n+1)*bs,ia);
1110:     if (n) {
1111:       (*ia)[0] = oshift;
1112:       for (j=1; j<bs; j++) {
1113:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1114:       }
1115:     }

1117:     for (i=1; i<n; i++) {
1118:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1119:       for (j=1; j<bs; j++) {
1120:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1121:       }
1122:     }
1123:     if (n) {
1124:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1125:     }

1127:     if (inja) {
1128:       PetscMalloc1(nz*bs*bs,ja);
1129:       cnt = 0;
1130:       for (i=0; i<n; i++) {
1131:         for (j=0; j<bs; j++) {
1132:           for (k=tia[i]; k<tia[i+1]; k++) {
1133:             for (l=0; l<bs; l++) {
1134:               (*ja)[cnt++] = bs*tja[k] + l;
1135:             }
1136:           }
1137:         }
1138:       }
1139:     }

1141:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1142:       PetscFree(tia);
1143:       PetscFree(tja);
1144:     }
1145:   } else if (oshift == 1) {
1146:     if (symmetric) {
1147:       nz = tia[A->rmap->n/bs];
1148:       /*  add 1 to i and j indices */
1149:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1150:       *ia = tia;
1151:       if (ja) {
1152:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1153:         *ja = tja;
1154:       }
1155:     } else {
1156:       nz = a->i[A->rmap->n/bs];
1157:       /* malloc space and  add 1 to i and j indices */
1158:       PetscMalloc1(A->rmap->n/bs+1,ia);
1159:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1160:       if (ja) {
1161:         PetscMalloc1(nz,ja);
1162:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1163:       }
1164:     }
1165:   } else {
1166:     *ia = tia;
1167:     if (ja) *ja = tja;
1168:   }
1169:   return(0);
1170: }

1172: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1173: {

1177:   if (!ia) return(0);
1178:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1179:     PetscFree(*ia);
1180:     if (ja) {PetscFree(*ja);}
1181:   }
1182:   return(0);
1183: }

1185: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1186: {
1187:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1191: #if defined(PETSC_USE_LOG)
1192:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1193: #endif
1194:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1195:   ISDestroy(&a->row);
1196:   ISDestroy(&a->col);
1197:   if (a->free_diag) {PetscFree(a->diag);}
1198:   PetscFree(a->idiag);
1199:   if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1200:   PetscFree(a->solve_work);
1201:   PetscFree(a->mult_work);
1202:   PetscFree(a->sor_workt);
1203:   PetscFree(a->sor_work);
1204:   ISDestroy(&a->icol);
1205:   PetscFree(a->saved_values);
1206:   PetscFree2(a->compressedrow.i,a->compressedrow.rindex);

1208:   MatDestroy(&a->sbaijMat);
1209:   MatDestroy(&a->parent);
1210:   PetscFree(A->data);

1212:   PetscObjectChangeTypeName((PetscObject)A,NULL);
1213:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);
1214:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);
1215:   PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1216:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1217:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1218:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1219:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1220:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1221:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1222:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1223:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1224:   PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1225: #if defined(PETSC_HAVE_HYPRE)
1226:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);
1227: #endif
1228:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);
1229:   return(0);
1230: }

1232: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1233: {
1234:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1238:   switch (op) {
1239:   case MAT_ROW_ORIENTED:
1240:     a->roworiented = flg;
1241:     break;
1242:   case MAT_KEEP_NONZERO_PATTERN:
1243:     a->keepnonzeropattern = flg;
1244:     break;
1245:   case MAT_NEW_NONZERO_LOCATIONS:
1246:     a->nonew = (flg ? 0 : 1);
1247:     break;
1248:   case MAT_NEW_NONZERO_LOCATION_ERR:
1249:     a->nonew = (flg ? -1 : 0);
1250:     break;
1251:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1252:     a->nonew = (flg ? -2 : 0);
1253:     break;
1254:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1255:     a->nounused = (flg ? -1 : 0);
1256:     break;
1257:   case MAT_FORCE_DIAGONAL_ENTRIES:
1258:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1259:   case MAT_USE_HASH_TABLE:
1260:   case MAT_SORTED_FULL:
1261:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1262:     break;
1263:   case MAT_SPD:
1264:   case MAT_SYMMETRIC:
1265:   case MAT_STRUCTURALLY_SYMMETRIC:
1266:   case MAT_HERMITIAN:
1267:   case MAT_SYMMETRY_ETERNAL:
1268:   case MAT_SUBMAT_SINGLEIS:
1269:   case MAT_STRUCTURE_ONLY:
1270:     /* These options are handled directly by MatSetOption() */
1271:     break;
1272:   default:
1273:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1274:   }
1275:   return(0);
1276: }

1278: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1279: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1280: {
1282:   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1283:   MatScalar      *aa_i;
1284:   PetscScalar    *v_i;

1287:   bs  = A->rmap->bs;
1288:   bs2 = bs*bs;
1289:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);

1291:   bn  = row/bs;   /* Block number */
1292:   bp  = row % bs; /* Block Position */
1293:   M   = ai[bn+1] - ai[bn];
1294:   *nz = bs*M;

1296:   if (v) {
1297:     *v = NULL;
1298:     if (*nz) {
1299:       PetscMalloc1(*nz,v);
1300:       for (i=0; i<M; i++) { /* for each block in the block row */
1301:         v_i  = *v + i*bs;
1302:         aa_i = aa + bs2*(ai[bn] + i);
1303:         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1304:       }
1305:     }
1306:   }

1308:   if (idx) {
1309:     *idx = NULL;
1310:     if (*nz) {
1311:       PetscMalloc1(*nz,idx);
1312:       for (i=0; i<M; i++) { /* for each block in the block row */
1313:         idx_i = *idx + i*bs;
1314:         itmp  = bs*aj[ai[bn] + i];
1315:         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1316:       }
1317:     }
1318:   }
1319:   return(0);
1320: }

1322: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1323: {
1324:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1328:   MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1329:   return(0);
1330: }

1332: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333: {

1337:   if (nz)  *nz = 0;
1338:   if (idx) {PetscFree(*idx);}
1339:   if (v)   {PetscFree(*v);}
1340:   return(0);
1341: }

1343: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1344: {
1345:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1346:   Mat            C;
1348:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1349:   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1350:   MatScalar      *ata,*aa=a->a;

1353:   PetscCalloc1(1+nbs,&atfill);
1354:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1355:     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */

1357:     MatCreate(PetscObjectComm((PetscObject)A),&C);
1358:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1359:     MatSetType(C,((PetscObject)A)->type_name);
1360:     MatSeqBAIJSetPreallocation(C,bs,0,atfill);

1362:     at  = (Mat_SeqBAIJ*)C->data;
1363:     ati = at->i;
1364:     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1365:   } else {
1366:     C = *B;
1367:     at = (Mat_SeqBAIJ*)C->data;
1368:     ati = at->i;
1369:   }

1371:   atj = at->j;
1372:   ata = at->a;

1374:   /* Copy ati into atfill so we have locations of the next free space in atj */
1375:   PetscArraycpy(atfill,ati,nbs);

1377:   /* Walk through A row-wise and mark nonzero entries of A^T. */
1378:   for (i=0; i<mbs; i++) {
1379:     anzj = ai[i+1] - ai[i];
1380:     for (j=0; j<anzj; j++) {
1381:       atj[atfill[*aj]] = i;
1382:       for (kr=0; kr<bs; kr++) {
1383:         for (k=0; k<bs; k++) {
1384:           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1385:         }
1386:       }
1387:       atfill[*aj++] += 1;
1388:     }
1389:   }
1390:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1391:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

1393:   /* Clean up temporary space and complete requests. */
1394:   PetscFree(atfill);

1396:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1397:     MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1398:     *B = C;
1399:   } else {
1400:     MatHeaderMerge(A,&C);
1401:   }
1402:   return(0);
1403: }

1405: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1406: {
1408:   Mat            Btrans;

1411:   *f   = PETSC_FALSE;
1412:   MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1413:   MatEqual_SeqBAIJ(B,Btrans,f);
1414:   MatDestroy(&Btrans);
1415:   return(0);
1416: }

1418: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1419: PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1420: {
1421:   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1422:   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1423:   PetscInt       *rowlens,*colidxs;
1424:   PetscScalar    *matvals;

1428:   PetscViewerSetUp(viewer);

1430:   M  = mat->rmap->N;
1431:   N  = mat->cmap->N;
1432:   m  = mat->rmap->n;
1433:   bs = mat->rmap->bs;
1434:   nz = bs*bs*A->nz;

1436:   /* write matrix header */
1437:   header[0] = MAT_FILE_CLASSID;
1438:   header[1] = M; header[2] = N; header[3] = nz;
1439:   PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);

1441:   /* store row lengths */
1442:   PetscMalloc1(m,&rowlens);
1443:   for (cnt=0, i=0; i<A->mbs; i++)
1444:     for (j=0; j<bs; j++)
1445:       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1446:   PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);
1447:   PetscFree(rowlens);

1449:   /* store column indices  */
1450:   PetscMalloc1(nz,&colidxs);
1451:   for (cnt=0, i=0; i<A->mbs; i++)
1452:     for (k=0; k<bs; k++)
1453:       for (j=A->i[i]; j<A->i[i+1]; j++)
1454:         for (l=0; l<bs; l++)
1455:           colidxs[cnt++] = bs*A->j[j] + l;
1456:   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1457:   PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);
1458:   PetscFree(colidxs);

1460:   /* store nonzero values */
1461:   PetscMalloc1(nz,&matvals);
1462:   for (cnt=0, i=0; i<A->mbs; i++)
1463:     for (k=0; k<bs; k++)
1464:       for (j=A->i[i]; j<A->i[i+1]; j++)
1465:         for (l=0; l<bs; l++)
1466:           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1467:   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1468:   PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);
1469:   PetscFree(matvals);

1471:   /* write block size option to the viewer's .info file */
1472:   MatView_Binary_BlockSizes(mat,viewer);
1473:   return(0);
1474: }

1476: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1477: {
1479:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1480:   PetscInt       i,bs = A->rmap->bs,k;

1483:   PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1484:   for (i=0; i<a->mbs; i++) {
1485:     PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);
1486:     for (k=a->i[i]; k<a->i[i+1]; k++) {
1487:       PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);
1488:     }
1489:     PetscViewerASCIIPrintf(viewer,"\n");
1490:   }
1491:   PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1492:   return(0);
1493: }

1495: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1496: {
1497:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1498:   PetscErrorCode    ierr;
1499:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1500:   PetscViewerFormat format;

1503:   if (A->structure_only) {
1504:     MatView_SeqBAIJ_ASCII_structonly(A,viewer);
1505:     return(0);
1506:   }

1508:   PetscViewerGetFormat(viewer,&format);
1509:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1510:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1511:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1512:     const char *matname;
1513:     Mat        aij;
1514:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1515:     PetscObjectGetName((PetscObject)A,&matname);
1516:     PetscObjectSetName((PetscObject)aij,matname);
1517:     MatView(aij,viewer);
1518:     MatDestroy(&aij);
1519:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1520:       return(0);
1521:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1522:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1523:     for (i=0; i<a->mbs; i++) {
1524:       for (j=0; j<bs; j++) {
1525:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1526:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1527:           for (l=0; l<bs; l++) {
1528: #if defined(PETSC_USE_COMPLEX)
1529:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1530:               PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1531:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1532:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1533:               PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1534:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1535:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1537:             }
1538: #else
1539:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1540:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1541:             }
1542: #endif
1543:           }
1544:         }
1545:         PetscViewerASCIIPrintf(viewer,"\n");
1546:       }
1547:     }
1548:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1549:   } else {
1550:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1551:     for (i=0; i<a->mbs; i++) {
1552:       for (j=0; j<bs; j++) {
1553:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1554:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1555:           for (l=0; l<bs; l++) {
1556: #if defined(PETSC_USE_COMPLEX)
1557:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1558:               PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1559:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1560:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1561:               PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1562:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1563:             } else {
1564:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1565:             }
1566: #else
1567:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1568: #endif
1569:           }
1570:         }
1571:         PetscViewerASCIIPrintf(viewer,"\n");
1572:       }
1573:     }
1574:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1575:   }
1576:   PetscViewerFlush(viewer);
1577:   return(0);
1578: }

1580: #include <petscdraw.h>
1581: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1582: {
1583:   Mat               A = (Mat) Aa;
1584:   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1585:   PetscErrorCode    ierr;
1586:   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1587:   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1588:   MatScalar         *aa;
1589:   PetscViewer       viewer;
1590:   PetscViewerFormat format;

1593:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1594:   PetscViewerGetFormat(viewer,&format);
1595:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1597:   /* loop over matrix elements drawing boxes */

1599:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1600:     PetscDrawCollectiveBegin(draw);
1601:     /* Blue for negative, Cyan for zero and  Red for positive */
1602:     color = PETSC_DRAW_BLUE;
1603:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1604:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1605:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1606:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1607:         aa  = a->a + j*bs2;
1608:         for (k=0; k<bs; k++) {
1609:           for (l=0; l<bs; l++) {
1610:             if (PetscRealPart(*aa++) >=  0.) continue;
1611:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1612:           }
1613:         }
1614:       }
1615:     }
1616:     color = PETSC_DRAW_CYAN;
1617:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1618:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1619:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1620:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1621:         aa  = a->a + j*bs2;
1622:         for (k=0; k<bs; k++) {
1623:           for (l=0; l<bs; l++) {
1624:             if (PetscRealPart(*aa++) != 0.) continue;
1625:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1626:           }
1627:         }
1628:       }
1629:     }
1630:     color = PETSC_DRAW_RED;
1631:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1632:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1633:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1634:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1635:         aa  = a->a + j*bs2;
1636:         for (k=0; k<bs; k++) {
1637:           for (l=0; l<bs; l++) {
1638:             if (PetscRealPart(*aa++) <= 0.) continue;
1639:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1640:           }
1641:         }
1642:       }
1643:     }
1644:     PetscDrawCollectiveEnd(draw);
1645:   } else {
1646:     /* use contour shading to indicate magnitude of values */
1647:     /* first determine max of all nonzero values */
1648:     PetscReal minv = 0.0, maxv = 0.0;
1649:     PetscDraw popup;

1651:     for (i=0; i<a->nz*a->bs2; i++) {
1652:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1653:     }
1654:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1655:     PetscDrawGetPopup(draw,&popup);
1656:     PetscDrawScalePopup(popup,0.0,maxv);

1658:     PetscDrawCollectiveBegin(draw);
1659:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1660:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1661:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1662:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1663:         aa  = a->a + j*bs2;
1664:         for (k=0; k<bs; k++) {
1665:           for (l=0; l<bs; l++) {
1666:             MatScalar v = *aa++;
1667:             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1668:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1669:           }
1670:         }
1671:       }
1672:     }
1673:     PetscDrawCollectiveEnd(draw);
1674:   }
1675:   return(0);
1676: }

1678: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1679: {
1681:   PetscReal      xl,yl,xr,yr,w,h;
1682:   PetscDraw      draw;
1683:   PetscBool      isnull;

1686:   PetscViewerDrawGetDraw(viewer,0,&draw);
1687:   PetscDrawIsNull(draw,&isnull);
1688:   if (isnull) return(0);

1690:   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1691:   xr  += w;          yr += h;        xl = -w;     yl = -h;
1692:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1693:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1694:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1695:   PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1696:   PetscDrawSave(draw);
1697:   return(0);
1698: }

1700: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1701: {
1703:   PetscBool      iascii,isbinary,isdraw;

1706:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1707:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1708:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1709:   if (iascii) {
1710:     MatView_SeqBAIJ_ASCII(A,viewer);
1711:   } else if (isbinary) {
1712:     MatView_SeqBAIJ_Binary(A,viewer);
1713:   } else if (isdraw) {
1714:     MatView_SeqBAIJ_Draw(A,viewer);
1715:   } else {
1716:     Mat B;
1717:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1718:     MatView(B,viewer);
1719:     MatDestroy(&B);
1720:   }
1721:   return(0);
1722: }


1725: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1726: {
1727:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1728:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1729:   PetscInt    *ai = a->i,*ailen = a->ilen;
1730:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1731:   MatScalar   *ap,*aa = a->a;

1734:   for (k=0; k<m; k++) { /* loop over rows */
1735:     row = im[k]; brow = row/bs;
1736:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1737:     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1738:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1739:     nrow = ailen[brow];
1740:     for (l=0; l<n; l++) { /* loop over columns */
1741:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1742:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1743:       col  = in[l];
1744:       bcol = col/bs;
1745:       cidx = col%bs;
1746:       ridx = row%bs;
1747:       high = nrow;
1748:       low  = 0; /* assume unsorted */
1749:       while (high-low > 5) {
1750:         t = (low+high)/2;
1751:         if (rp[t] > bcol) high = t;
1752:         else             low  = t;
1753:       }
1754:       for (i=low; i<high; i++) {
1755:         if (rp[i] > bcol) break;
1756:         if (rp[i] == bcol) {
1757:           *v++ = ap[bs2*i+bs*cidx+ridx];
1758:           goto finished;
1759:         }
1760:       }
1761:       *v++ = 0.0;
1762: finished:;
1763:     }
1764:   }
1765:   return(0);
1766: }

1768: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1769: {
1770:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1771:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1772:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1773:   PetscErrorCode    ierr;
1774:   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1775:   PetscBool         roworiented=a->roworiented;
1776:   const PetscScalar *value     = v;
1777:   MatScalar         *ap=NULL,*aa = a->a,*bap;

1780:   if (roworiented) {
1781:     stepval = (n-1)*bs;
1782:   } else {
1783:     stepval = (m-1)*bs;
1784:   }
1785:   for (k=0; k<m; k++) { /* loop over added rows */
1786:     row = im[k];
1787:     if (row < 0) continue;
1788:     if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1789:     rp   = aj + ai[row];
1790:     if (!A->structure_only) ap = aa + bs2*ai[row];
1791:     rmax = imax[row];
1792:     nrow = ailen[row];
1793:     low  = 0;
1794:     high = nrow;
1795:     for (l=0; l<n; l++) { /* loop over added columns */
1796:       if (in[l] < 0) continue;
1797:       if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1798:       col = in[l];
1799:       if (!A->structure_only) {
1800:         if (roworiented) {
1801:           value = v + (k*(stepval+bs) + l)*bs;
1802:         } else {
1803:           value = v + (l*(stepval+bs) + k)*bs;
1804:         }
1805:       }
1806:       if (col <= lastcol) low = 0;
1807:       else high = nrow;
1808:       lastcol = col;
1809:       while (high-low > 7) {
1810:         t = (low+high)/2;
1811:         if (rp[t] > col) high = t;
1812:         else             low  = t;
1813:       }
1814:       for (i=low; i<high; i++) {
1815:         if (rp[i] > col) break;
1816:         if (rp[i] == col) {
1817:           if (A->structure_only) goto noinsert2;
1818:           bap = ap +  bs2*i;
1819:           if (roworiented) {
1820:             if (is == ADD_VALUES) {
1821:               for (ii=0; ii<bs; ii++,value+=stepval) {
1822:                 for (jj=ii; jj<bs2; jj+=bs) {
1823:                   bap[jj] += *value++;
1824:                 }
1825:               }
1826:             } else {
1827:               for (ii=0; ii<bs; ii++,value+=stepval) {
1828:                 for (jj=ii; jj<bs2; jj+=bs) {
1829:                   bap[jj] = *value++;
1830:                 }
1831:               }
1832:             }
1833:           } else {
1834:             if (is == ADD_VALUES) {
1835:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1836:                 for (jj=0; jj<bs; jj++) {
1837:                   bap[jj] += value[jj];
1838:                 }
1839:                 bap += bs;
1840:               }
1841:             } else {
1842:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1843:                 for (jj=0; jj<bs; jj++) {
1844:                   bap[jj]  = value[jj];
1845:                 }
1846:                 bap += bs;
1847:               }
1848:             }
1849:           }
1850:           goto noinsert2;
1851:         }
1852:       }
1853:       if (nonew == 1) goto noinsert2;
1854:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1855:       if (A->structure_only) {
1856:         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1857:       } else {
1858:         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1859:       }
1860:       N = nrow++ - 1; high++;
1861:       /* shift up all the later entries in this row */
1862:       PetscArraymove(rp+i+1,rp+i,N-i+1);
1863:       rp[i] = col;
1864:       if (!A->structure_only) {
1865:         PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
1866:         bap   = ap +  bs2*i;
1867:         if (roworiented) {
1868:           for (ii=0; ii<bs; ii++,value+=stepval) {
1869:             for (jj=ii; jj<bs2; jj+=bs) {
1870:               bap[jj] = *value++;
1871:             }
1872:           }
1873:         } else {
1874:           for (ii=0; ii<bs; ii++,value+=stepval) {
1875:             for (jj=0; jj<bs; jj++) {
1876:               *bap++ = *value++;
1877:             }
1878:           }
1879:         }
1880:       }
1881: noinsert2:;
1882:       low = i;
1883:     }
1884:     ailen[row] = nrow;
1885:   }
1886:   return(0);
1887: }

1889: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1890: {
1891:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1892:   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1893:   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1895:   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1896:   MatScalar      *aa  = a->a,*ap;
1897:   PetscReal      ratio=0.6;

1900:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1902:   if (m) rmax = ailen[0];
1903:   for (i=1; i<mbs; i++) {
1904:     /* move each row back by the amount of empty slots (fshift) before it*/
1905:     fshift += imax[i-1] - ailen[i-1];
1906:     rmax    = PetscMax(rmax,ailen[i]);
1907:     if (fshift) {
1908:       ip = aj + ai[i];
1909:       ap = aa + bs2*ai[i];
1910:       N  = ailen[i];
1911:       PetscArraymove(ip-fshift,ip,N);
1912:       if (!A->structure_only) {
1913:         PetscArraymove(ap-bs2*fshift,ap,bs2*N);
1914:       }
1915:     }
1916:     ai[i] = ai[i-1] + ailen[i-1];
1917:   }
1918:   if (mbs) {
1919:     fshift += imax[mbs-1] - ailen[mbs-1];
1920:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1921:   }

1923:   /* reset ilen and imax for each row */
1924:   a->nonzerorowcnt = 0;
1925:   if (A->structure_only) {
1926:     PetscFree2(a->imax,a->ilen);
1927:   } else { /* !A->structure_only */
1928:     for (i=0; i<mbs; i++) {
1929:       ailen[i] = imax[i] = ai[i+1] - ai[i];
1930:       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1931:     }
1932:   }
1933:   a->nz = ai[mbs];

1935:   /* diagonals may have moved, so kill the diagonal pointers */
1936:   a->idiagvalid = PETSC_FALSE;
1937:   if (fshift && a->diag) {
1938:     PetscFree(a->diag);
1939:     PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1940:     a->diag = NULL;
1941:   }
1942:   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1943:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1944:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1945:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);

1947:   A->info.mallocs    += a->reallocs;
1948:   a->reallocs         = 0;
1949:   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1950:   a->rmax             = rmax;

1952:   if (!A->structure_only) {
1953:     MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1954:   }
1955:   return(0);
1956: }

1958: /*
1959:    This function returns an array of flags which indicate the locations of contiguous
1960:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1961:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1962:    Assume: sizes should be long enough to hold all the values.
1963: */
1964: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1965: {
1966:   PetscInt  i,j,k,row;
1967:   PetscBool flg;

1970:   for (i=0,j=0; i<n; j++) {
1971:     row = idx[i];
1972:     if (row%bs!=0) { /* Not the begining of a block */
1973:       sizes[j] = 1;
1974:       i++;
1975:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1976:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1977:       i++;
1978:     } else { /* Begining of the block, so check if the complete block exists */
1979:       flg = PETSC_TRUE;
1980:       for (k=1; k<bs; k++) {
1981:         if (row+k != idx[i+k]) { /* break in the block */
1982:           flg = PETSC_FALSE;
1983:           break;
1984:         }
1985:       }
1986:       if (flg) { /* No break in the bs */
1987:         sizes[j] = bs;
1988:         i       += bs;
1989:       } else {
1990:         sizes[j] = 1;
1991:         i++;
1992:       }
1993:     }
1994:   }
1995:   *bs_max = j;
1996:   return(0);
1997: }

1999: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2000: {
2001:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2002:   PetscErrorCode    ierr;
2003:   PetscInt          i,j,k,count,*rows;
2004:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2005:   PetscScalar       zero = 0.0;
2006:   MatScalar         *aa;
2007:   const PetscScalar *xx;
2008:   PetscScalar       *bb;

2011:   /* fix right hand side if needed */
2012:   if (x && b) {
2013:     VecGetArrayRead(x,&xx);
2014:     VecGetArray(b,&bb);
2015:     for (i=0; i<is_n; i++) {
2016:       bb[is_idx[i]] = diag*xx[is_idx[i]];
2017:     }
2018:     VecRestoreArrayRead(x,&xx);
2019:     VecRestoreArray(b,&bb);
2020:   }

2022:   /* Make a copy of the IS and  sort it */
2023:   /* allocate memory for rows,sizes */
2024:   PetscMalloc2(is_n,&rows,2*is_n,&sizes);

2026:   /* copy IS values to rows, and sort them */
2027:   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2028:   PetscSortInt(is_n,rows);

2030:   if (baij->keepnonzeropattern) {
2031:     for (i=0; i<is_n; i++) sizes[i] = 1;
2032:     bs_max          = is_n;
2033:   } else {
2034:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2035:     A->nonzerostate++;
2036:   }

2038:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2039:     row = rows[j];
2040:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2041:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2042:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2043:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2044:       if (diag != (PetscScalar)0.0) {
2045:         if (baij->ilen[row/bs] > 0) {
2046:           baij->ilen[row/bs]       = 1;
2047:           baij->j[baij->i[row/bs]] = row/bs;

2049:           PetscArrayzero(aa,count*bs);
2050:         }
2051:         /* Now insert all the diagonal values for this bs */
2052:         for (k=0; k<bs; k++) {
2053:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2054:         }
2055:       } else { /* (diag == 0.0) */
2056:         baij->ilen[row/bs] = 0;
2057:       } /* end (diag == 0.0) */
2058:     } else { /* (sizes[i] != bs) */
2059:       if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2060:       for (k=0; k<count; k++) {
2061:         aa[0] =  zero;
2062:         aa   += bs;
2063:       }
2064:       if (diag != (PetscScalar)0.0) {
2065:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2066:       }
2067:     }
2068:   }

2070:   PetscFree2(rows,sizes);
2071:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2072:   return(0);
2073: }

2075: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2076: {
2077:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2078:   PetscErrorCode    ierr;
2079:   PetscInt          i,j,k,count;
2080:   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2081:   PetscScalar       zero = 0.0;
2082:   MatScalar         *aa;
2083:   const PetscScalar *xx;
2084:   PetscScalar       *bb;
2085:   PetscBool         *zeroed,vecs = PETSC_FALSE;

2088:   /* fix right hand side if needed */
2089:   if (x && b) {
2090:     VecGetArrayRead(x,&xx);
2091:     VecGetArray(b,&bb);
2092:     vecs = PETSC_TRUE;
2093:   }

2095:   /* zero the columns */
2096:   PetscCalloc1(A->rmap->n,&zeroed);
2097:   for (i=0; i<is_n; i++) {
2098:     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2099:     zeroed[is_idx[i]] = PETSC_TRUE;
2100:   }
2101:   for (i=0; i<A->rmap->N; i++) {
2102:     if (!zeroed[i]) {
2103:       row = i/bs;
2104:       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2105:         for (k=0; k<bs; k++) {
2106:           col = bs*baij->j[j] + k;
2107:           if (zeroed[col]) {
2108:             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2109:             if (vecs) bb[i] -= aa[0]*xx[col];
2110:             aa[0] = 0.0;
2111:           }
2112:         }
2113:       }
2114:     } else if (vecs) bb[i] = diag*xx[i];
2115:   }
2116:   PetscFree(zeroed);
2117:   if (vecs) {
2118:     VecRestoreArrayRead(x,&xx);
2119:     VecRestoreArray(b,&bb);
2120:   }

2122:   /* zero the rows */
2123:   for (i=0; i<is_n; i++) {
2124:     row   = is_idx[i];
2125:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2126:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2127:     for (k=0; k<count; k++) {
2128:       aa[0] =  zero;
2129:       aa   += bs;
2130:     }
2131:     if (diag != (PetscScalar)0.0) {
2132:       (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2133:     }
2134:   }
2135:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2136:   return(0);
2137: }

2139: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2140: {
2141:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2142:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2143:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2144:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2146:   PetscInt       ridx,cidx,bs2=a->bs2;
2147:   PetscBool      roworiented=a->roworiented;
2148:   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;

2151:   for (k=0; k<m; k++) { /* loop over added rows */
2152:     row  = im[k];
2153:     brow = row/bs;
2154:     if (row < 0) continue;
2155:     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2156:     rp   = aj + ai[brow];
2157:     if (!A->structure_only) ap = aa + bs2*ai[brow];
2158:     rmax = imax[brow];
2159:     nrow = ailen[brow];
2160:     low  = 0;
2161:     high = nrow;
2162:     for (l=0; l<n; l++) { /* loop over added columns */
2163:       if (in[l] < 0) continue;
2164:       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2165:       col  = in[l]; bcol = col/bs;
2166:       ridx = row % bs; cidx = col % bs;
2167:       if (!A->structure_only) {
2168:         if (roworiented) {
2169:           value = v[l + k*n];
2170:         } else {
2171:           value = v[k + l*m];
2172:         }
2173:       }
2174:       if (col <= lastcol) low = 0; else high = nrow;
2175:       lastcol = col;
2176:       while (high-low > 7) {
2177:         t = (low+high)/2;
2178:         if (rp[t] > bcol) high = t;
2179:         else              low  = t;
2180:       }
2181:       for (i=low; i<high; i++) {
2182:         if (rp[i] > bcol) break;
2183:         if (rp[i] == bcol) {
2184:           bap = ap +  bs2*i + bs*cidx + ridx;
2185:           if (!A->structure_only) {
2186:             if (is == ADD_VALUES) *bap += value;
2187:             else                  *bap  = value;
2188:           }
2189:           goto noinsert1;
2190:         }
2191:       }
2192:       if (nonew == 1) goto noinsert1;
2193:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2194:       if (A->structure_only) {
2195:         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2196:       } else {
2197:         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2198:       }
2199:       N = nrow++ - 1; high++;
2200:       /* shift up all the later entries in this row */
2201:       PetscArraymove(rp+i+1,rp+i,N-i+1);
2202:       rp[i] = bcol;
2203:       if (!A->structure_only) {
2204:         PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));
2205:         PetscArrayzero(ap+bs2*i,bs2);
2206:         ap[bs2*i + bs*cidx + ridx] = value;
2207:       }
2208:       a->nz++;
2209:       A->nonzerostate++;
2210: noinsert1:;
2211:       low = i;
2212:     }
2213:     ailen[brow] = nrow;
2214:   }
2215:   return(0);
2216: }

2218: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2219: {
2220:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2221:   Mat            outA;
2223:   PetscBool      row_identity,col_identity;

2226:   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2227:   ISIdentity(row,&row_identity);
2228:   ISIdentity(col,&col_identity);
2229:   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");

2231:   outA            = inA;
2232:   inA->factortype = MAT_FACTOR_LU;
2233:   PetscFree(inA->solvertype);
2234:   PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);

2236:   MatMarkDiagonal_SeqBAIJ(inA);

2238:   PetscObjectReference((PetscObject)row);
2239:   ISDestroy(&a->row);
2240:   a->row = row;
2241:   PetscObjectReference((PetscObject)col);
2242:   ISDestroy(&a->col);
2243:   a->col = col;

2245:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2246:   ISDestroy(&a->icol);
2247:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2248:   PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);

2250:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2251:   if (!a->solve_work) {
2252:     PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2253:     PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2254:   }
2255:   MatLUFactorNumeric(outA,inA,info);
2256:   return(0);
2257: }

2259: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2260: {
2261:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2262:   PetscInt    i,nz,mbs;

2265:   nz  = baij->maxnz;
2266:   mbs = baij->mbs;
2267:   for (i=0; i<nz; i++) {
2268:     baij->j[i] = indices[i];
2269:   }
2270:   baij->nz = nz;
2271:   for (i=0; i<mbs; i++) {
2272:     baij->ilen[i] = baij->imax[i];
2273:   }
2274:   return(0);
2275: }

2277: /*@
2278:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2279:        in the matrix.

2281:   Input Parameters:
2282: +  mat - the SeqBAIJ matrix
2283: -  indices - the column indices

2285:   Level: advanced

2287:   Notes:
2288:     This can be called if you have precomputed the nonzero structure of the
2289:   matrix and want to provide it to the matrix object to improve the performance
2290:   of the MatSetValues() operation.

2292:     You MUST have set the correct numbers of nonzeros per row in the call to
2293:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2295:     MUST be called before any calls to MatSetValues();

2297: @*/
2298: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2299: {

2305:   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2306:   return(0);
2307: }

2309: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2310: {
2311:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2313:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2314:   PetscReal      atmp;
2315:   PetscScalar    *x,zero = 0.0;
2316:   MatScalar      *aa;
2317:   PetscInt       ncols,brow,krow,kcol;

2320:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2321:   bs  = A->rmap->bs;
2322:   aa  = a->a;
2323:   ai  = a->i;
2324:   aj  = a->j;
2325:   mbs = a->mbs;

2327:   VecSet(v,zero);
2328:   VecGetArray(v,&x);
2329:   VecGetLocalSize(v,&n);
2330:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2331:   for (i=0; i<mbs; i++) {
2332:     ncols = ai[1] - ai[0]; ai++;
2333:     brow  = bs*i;
2334:     for (j=0; j<ncols; j++) {
2335:       for (kcol=0; kcol<bs; kcol++) {
2336:         for (krow=0; krow<bs; krow++) {
2337:           atmp = PetscAbsScalar(*aa);aa++;
2338:           row  = brow + krow;   /* row index */
2339:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2340:         }
2341:       }
2342:       aj++;
2343:     }
2344:   }
2345:   VecRestoreArray(v,&x);
2346:   return(0);
2347: }

2349: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2350: {

2354:   /* If the two matrices have the same copy implementation, use fast copy. */
2355:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2356:     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2357:     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2358:     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;

2360:     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2361:     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2362:     PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);
2363:     PetscObjectStateIncrease((PetscObject)B);
2364:   } else {
2365:     MatCopy_Basic(A,B,str);
2366:   }
2367:   return(0);
2368: }

2370: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2371: {

2375:   MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);
2376:   return(0);
2377: }

2379: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2380: {
2381:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

2384:   *array = a->a;
2385:   return(0);
2386: }

2388: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2389: {
2391:   *array = NULL;
2392:   return(0);
2393: }

2395: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2396: {
2397:   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2398:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2399:   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;

2403:   /* Set the number of nonzeros in the new matrix */
2404:   MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2405:   return(0);
2406: }

2408: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2409: {
2410:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2412:   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2413:   PetscBLASInt   one=1;

2416:   if (str == SAME_NONZERO_PATTERN) {
2417:     PetscScalar  alpha = a;
2418:     PetscBLASInt bnz;
2419:     PetscBLASIntCast(x->nz*bs2,&bnz);
2420:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2421:     PetscObjectStateIncrease((PetscObject)Y);
2422:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2423:     MatAXPY_Basic(Y,a,X,str);
2424:   } else {
2425:     Mat      B;
2426:     PetscInt *nnz;
2427:     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2428:     PetscMalloc1(Y->rmap->N,&nnz);
2429:     MatCreate(PetscObjectComm((PetscObject)Y),&B);
2430:     PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2431:     MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2432:     MatSetBlockSizesFromMats(B,Y,Y);
2433:     MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2434:     MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2435:     MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2436:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2437:     MatHeaderReplace(Y,&B);
2438:     PetscFree(nnz);
2439:   }
2440:   return(0);
2441: }

2443: PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2444: {
2445: #if defined(PETSC_USE_COMPLEX)
2446:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2447:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2448:   MatScalar   *aa = a->a;

2451:   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2452: #else
2454: #endif
2455:   return(0);
2456: }

2458: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2459: {
2460:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2461:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2462:   MatScalar   *aa = a->a;

2465:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2466:   return(0);
2467: }

2469: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2470: {
2471:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2472:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2473:   MatScalar   *aa = a->a;

2476:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2477:   return(0);
2478: }

2480: /*
2481:     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2482: */
2483: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2484: {
2485:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2487:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2488:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2491:   *nn = n;
2492:   if (!ia) return(0);
2493:   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2494:   else {
2495:     PetscCalloc1(n,&collengths);
2496:     PetscMalloc1(n+1,&cia);
2497:     PetscMalloc1(nz,&cja);
2498:     jj   = a->j;
2499:     for (i=0; i<nz; i++) {
2500:       collengths[jj[i]]++;
2501:     }
2502:     cia[0] = oshift;
2503:     for (i=0; i<n; i++) {
2504:       cia[i+1] = cia[i] + collengths[i];
2505:     }
2506:     PetscArrayzero(collengths,n);
2507:     jj   = a->j;
2508:     for (row=0; row<m; row++) {
2509:       mr = a->i[row+1] - a->i[row];
2510:       for (i=0; i<mr; i++) {
2511:         col = *jj++;

2513:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2514:       }
2515:     }
2516:     PetscFree(collengths);
2517:     *ia  = cia; *ja = cja;
2518:   }
2519:   return(0);
2520: }

2522: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2523: {

2527:   if (!ia) return(0);
2528:   PetscFree(*ia);
2529:   PetscFree(*ja);
2530:   return(0);
2531: }

2533: /*
2534:  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2535:  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2536:  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2537:  */
2538: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2539: {
2540:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2542:   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2543:   PetscInt       nz = a->i[m],row,*jj,mr,col;
2544:   PetscInt       *cspidx;

2547:   *nn = n;
2548:   if (!ia) return(0);

2550:   PetscCalloc1(n,&collengths);
2551:   PetscMalloc1(n+1,&cia);
2552:   PetscMalloc1(nz,&cja);
2553:   PetscMalloc1(nz,&cspidx);
2554:   jj   = a->j;
2555:   for (i=0; i<nz; i++) {
2556:     collengths[jj[i]]++;
2557:   }
2558:   cia[0] = oshift;
2559:   for (i=0; i<n; i++) {
2560:     cia[i+1] = cia[i] + collengths[i];
2561:   }
2562:   PetscArrayzero(collengths,n);
2563:   jj   = a->j;
2564:   for (row=0; row<m; row++) {
2565:     mr = a->i[row+1] - a->i[row];
2566:     for (i=0; i<mr; i++) {
2567:       col = *jj++;
2568:       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2569:       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2570:     }
2571:   }
2572:   PetscFree(collengths);
2573:   *ia    = cia;
2574:   *ja    = cja;
2575:   *spidx = cspidx;
2576:   return(0);
2577: }

2579: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2580: {

2584:   MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2585:   PetscFree(*spidx);
2586:   return(0);
2587: }

2589: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2590: {
2592:   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;

2595:   if (!Y->preallocated || !aij->nz) {
2596:     MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2597:   }
2598:   MatShift_Basic(Y,a);
2599:   return(0);
2600: }

2602: /* -------------------------------------------------------------------*/
2603: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2604:                                        MatGetRow_SeqBAIJ,
2605:                                        MatRestoreRow_SeqBAIJ,
2606:                                        MatMult_SeqBAIJ_N,
2607:                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2608:                                        MatMultTranspose_SeqBAIJ,
2609:                                        MatMultTransposeAdd_SeqBAIJ,
2610:                                        NULL,
2611:                                        NULL,
2612:                                        NULL,
2613:                                /* 10*/ NULL,
2614:                                        MatLUFactor_SeqBAIJ,
2615:                                        NULL,
2616:                                        NULL,
2617:                                        MatTranspose_SeqBAIJ,
2618:                                /* 15*/ MatGetInfo_SeqBAIJ,
2619:                                        MatEqual_SeqBAIJ,
2620:                                        MatGetDiagonal_SeqBAIJ,
2621:                                        MatDiagonalScale_SeqBAIJ,
2622:                                        MatNorm_SeqBAIJ,
2623:                                /* 20*/ NULL,
2624:                                        MatAssemblyEnd_SeqBAIJ,
2625:                                        MatSetOption_SeqBAIJ,
2626:                                        MatZeroEntries_SeqBAIJ,
2627:                                /* 24*/ MatZeroRows_SeqBAIJ,
2628:                                        NULL,
2629:                                        NULL,
2630:                                        NULL,
2631:                                        NULL,
2632:                                /* 29*/ MatSetUp_SeqBAIJ,
2633:                                        NULL,
2634:                                        NULL,
2635:                                        NULL,
2636:                                        NULL,
2637:                                /* 34*/ MatDuplicate_SeqBAIJ,
2638:                                        NULL,
2639:                                        NULL,
2640:                                        MatILUFactor_SeqBAIJ,
2641:                                        NULL,
2642:                                /* 39*/ MatAXPY_SeqBAIJ,
2643:                                        MatCreateSubMatrices_SeqBAIJ,
2644:                                        MatIncreaseOverlap_SeqBAIJ,
2645:                                        MatGetValues_SeqBAIJ,
2646:                                        MatCopy_SeqBAIJ,
2647:                                /* 44*/ NULL,
2648:                                        MatScale_SeqBAIJ,
2649:                                        MatShift_SeqBAIJ,
2650:                                        NULL,
2651:                                        MatZeroRowsColumns_SeqBAIJ,
2652:                                /* 49*/ NULL,
2653:                                        MatGetRowIJ_SeqBAIJ,
2654:                                        MatRestoreRowIJ_SeqBAIJ,
2655:                                        MatGetColumnIJ_SeqBAIJ,
2656:                                        MatRestoreColumnIJ_SeqBAIJ,
2657:                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2658:                                        NULL,
2659:                                        NULL,
2660:                                        NULL,
2661:                                        MatSetValuesBlocked_SeqBAIJ,
2662:                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2663:                                        MatDestroy_SeqBAIJ,
2664:                                        MatView_SeqBAIJ,
2665:                                        NULL,
2666:                                        NULL,
2667:                                /* 64*/ NULL,
2668:                                        NULL,
2669:                                        NULL,
2670:                                        NULL,
2671:                                        NULL,
2672:                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2673:                                        NULL,
2674:                                        MatConvert_Basic,
2675:                                        NULL,
2676:                                        NULL,
2677:                                /* 74*/ NULL,
2678:                                        MatFDColoringApply_BAIJ,
2679:                                        NULL,
2680:                                        NULL,
2681:                                        NULL,
2682:                                /* 79*/ NULL,
2683:                                        NULL,
2684:                                        NULL,
2685:                                        NULL,
2686:                                        MatLoad_SeqBAIJ,
2687:                                /* 84*/ NULL,
2688:                                        NULL,
2689:                                        NULL,
2690:                                        NULL,
2691:                                        NULL,
2692:                                /* 89*/ NULL,
2693:                                        NULL,
2694:                                        NULL,
2695:                                        NULL,
2696:                                        NULL,
2697:                                /* 94*/ NULL,
2698:                                        NULL,
2699:                                        NULL,
2700:                                        NULL,
2701:                                        NULL,
2702:                                /* 99*/ NULL,
2703:                                        NULL,
2704:                                        NULL,
2705:                                        MatConjugate_SeqBAIJ,
2706:                                        NULL,
2707:                                /*104*/ NULL,
2708:                                        MatRealPart_SeqBAIJ,
2709:                                        MatImaginaryPart_SeqBAIJ,
2710:                                        NULL,
2711:                                        NULL,
2712:                                /*109*/ NULL,
2713:                                        NULL,
2714:                                        NULL,
2715:                                        NULL,
2716:                                        MatMissingDiagonal_SeqBAIJ,
2717:                                /*114*/ NULL,
2718:                                        NULL,
2719:                                        NULL,
2720:                                        NULL,
2721:                                        NULL,
2722:                                /*119*/ NULL,
2723:                                        NULL,
2724:                                        MatMultHermitianTranspose_SeqBAIJ,
2725:                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2726:                                        NULL,
2727:                                /*124*/ NULL,
2728:                                        NULL,
2729:                                        MatInvertBlockDiagonal_SeqBAIJ,
2730:                                        NULL,
2731:                                        NULL,
2732:                                /*129*/ NULL,
2733:                                        NULL,
2734:                                        NULL,
2735:                                        NULL,
2736:                                        NULL,
2737:                                /*134*/ NULL,
2738:                                        NULL,
2739:                                        NULL,
2740:                                        NULL,
2741:                                        NULL,
2742:                                /*139*/ MatSetBlockSizes_Default,
2743:                                        NULL,
2744:                                        NULL,
2745:                                        MatFDColoringSetUp_SeqXAIJ,
2746:                                        NULL,
2747:                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2748:                                        MatDestroySubMatrices_SeqBAIJ
2749: };

2751: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2752: {
2753:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2754:   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;

2758:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

2760:   /* allocate space for values if not already there */
2761:   if (!aij->saved_values) {
2762:     PetscMalloc1(nz+1,&aij->saved_values);
2763:     PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2764:   }

2766:   /* copy values over */
2767:   PetscArraycpy(aij->saved_values,aij->a,nz);
2768:   return(0);
2769: }

2771: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2772: {
2773:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2775:   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;

2778:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2779:   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");

2781:   /* copy values over */
2782:   PetscArraycpy(aij->a,aij->saved_values,nz);
2783:   return(0);
2784: }

2786: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2787: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);

2789: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2790: {
2791:   Mat_SeqBAIJ    *b;
2793:   PetscInt       i,mbs,nbs,bs2;
2794:   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;

2797:   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2798:   if (nz == MAT_SKIP_ALLOCATION) {
2799:     skipallocation = PETSC_TRUE;
2800:     nz             = 0;
2801:   }

2803:   MatSetBlockSize(B,PetscAbs(bs));
2804:   PetscLayoutSetUp(B->rmap);
2805:   PetscLayoutSetUp(B->cmap);
2806:   PetscLayoutGetBlockSize(B->rmap,&bs);

2808:   B->preallocated = PETSC_TRUE;

2810:   mbs = B->rmap->n/bs;
2811:   nbs = B->cmap->n/bs;
2812:   bs2 = bs*bs;

2814:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);

2816:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2817:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2818:   if (nnz) {
2819:     for (i=0; i<mbs; i++) {
2820:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2821:       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2822:     }
2823:   }

2825:   b    = (Mat_SeqBAIJ*)B->data;
2826:   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2827:   PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2828:   PetscOptionsEnd();

2830:   if (!flg) {
2831:     switch (bs) {
2832:     case 1:
2833:       B->ops->mult    = MatMult_SeqBAIJ_1;
2834:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2835:       break;
2836:     case 2:
2837:       B->ops->mult    = MatMult_SeqBAIJ_2;
2838:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2839:       break;
2840:     case 3:
2841:       B->ops->mult    = MatMult_SeqBAIJ_3;
2842:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2843:       break;
2844:     case 4:
2845:       B->ops->mult    = MatMult_SeqBAIJ_4;
2846:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2847:       break;
2848:     case 5:
2849:       B->ops->mult    = MatMult_SeqBAIJ_5;
2850:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2851:       break;
2852:     case 6:
2853:       B->ops->mult    = MatMult_SeqBAIJ_6;
2854:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2855:       break;
2856:     case 7:
2857:       B->ops->mult    = MatMult_SeqBAIJ_7;
2858:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2859:       break;
2860:     case 9:
2861:     {
2862:       PetscInt version = 1;
2863:       PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2864:       switch (version) {
2865: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2866:       case 1:
2867:         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2868:         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2869:         PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);
2870:         break;
2871: #endif
2872:       default:
2873:         B->ops->mult    = MatMult_SeqBAIJ_N;
2874:         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2875:         PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2876:         break;
2877:       }
2878:       break;
2879:     }
2880:     case 11:
2881:       B->ops->mult    = MatMult_SeqBAIJ_11;
2882:       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2883:       break;
2884:     case 12:
2885:     {
2886:       PetscInt version = 1;
2887:       PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2888:       switch (version) {
2889:       case 1:
2890:         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2891:         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2892:         PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2893:         break;
2894:       case 2:
2895:         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2896:         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2897:         PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2898:         break;
2899: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2900:       case 3:
2901:         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2902:         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2903:         PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);
2904:         break;
2905: #endif
2906:       default:
2907:         B->ops->mult    = MatMult_SeqBAIJ_N;
2908:         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2909:         PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2910:         break;
2911:       }
2912:       break;
2913:     }
2914:     case 15:
2915:     {
2916:       PetscInt version = 1;
2917:       PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);
2918:       switch (version) {
2919:       case 1:
2920:         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2921:         PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2922:         break;
2923:       case 2:
2924:         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2925:         PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2926:         break;
2927:       case 3:
2928:         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2929:         PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2930:         break;
2931:       case 4:
2932:         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2933:         PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2934:         break;
2935:       default:
2936:         B->ops->mult    = MatMult_SeqBAIJ_N;
2937:         PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2938:         break;
2939:       }
2940:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2941:       break;
2942:     }
2943:     default:
2944:       B->ops->mult    = MatMult_SeqBAIJ_N;
2945:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2946:       PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2947:       break;
2948:     }
2949:   }
2950:   B->ops->sor = MatSOR_SeqBAIJ;
2951:   b->mbs = mbs;
2952:   b->nbs = nbs;
2953:   if (!skipallocation) {
2954:     if (!b->imax) {
2955:       PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2956:       PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));

2958:       b->free_imax_ilen = PETSC_TRUE;
2959:     }
2960:     /* b->ilen will count nonzeros in each block row so far. */
2961:     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2962:     if (!nnz) {
2963:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2964:       else if (nz < 0) nz = 1;
2965:       nz = PetscMin(nz,nbs);
2966:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2967:       PetscIntMultError(nz,mbs,&nz);
2968:     } else {
2969:       PetscInt64 nz64 = 0;
2970:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2971:       PetscIntCast(nz64,&nz);
2972:     }

2974:     /* allocate the matrix space */
2975:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2976:     if (B->structure_only) {
2977:       PetscMalloc1(nz,&b->j);
2978:       PetscMalloc1(B->rmap->N+1,&b->i);
2979:       PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));
2980:     } else {
2981:       PetscInt nzbs2 = 0;
2982:       PetscIntMultError(nz,bs2,&nzbs2);
2983:       PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2984:       PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2985:       PetscArrayzero(b->a,nz*bs2);
2986:     }
2987:     PetscArrayzero(b->j,nz);

2989:     if (B->structure_only) {
2990:       b->singlemalloc = PETSC_FALSE;
2991:       b->free_a       = PETSC_FALSE;
2992:     } else {
2993:       b->singlemalloc = PETSC_TRUE;
2994:       b->free_a       = PETSC_TRUE;
2995:     }
2996:     b->free_ij = PETSC_TRUE;

2998:     b->i[0] = 0;
2999:     for (i=1; i<mbs+1; i++) {
3000:       b->i[i] = b->i[i-1] + b->imax[i-1];
3001:     }

3003:   } else {
3004:     b->free_a  = PETSC_FALSE;
3005:     b->free_ij = PETSC_FALSE;
3006:   }

3008:   b->bs2              = bs2;
3009:   b->mbs              = mbs;
3010:   b->nz               = 0;
3011:   b->maxnz            = nz;
3012:   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3013:   B->was_assembled    = PETSC_FALSE;
3014:   B->assembled        = PETSC_FALSE;
3015:   if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
3016:   return(0);
3017: }

3019: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3020: {
3021:   PetscInt       i,m,nz,nz_max=0,*nnz;
3022:   PetscScalar    *values=NULL;
3023:   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;

3027:   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3028:   PetscLayoutSetBlockSize(B->rmap,bs);
3029:   PetscLayoutSetBlockSize(B->cmap,bs);
3030:   PetscLayoutSetUp(B->rmap);
3031:   PetscLayoutSetUp(B->cmap);
3032:   PetscLayoutGetBlockSize(B->rmap,&bs);
3033:   m    = B->rmap->n/bs;

3035:   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3036:   PetscMalloc1(m+1, &nnz);
3037:   for (i=0; i<m; i++) {
3038:     nz = ii[i+1]- ii[i];
3039:     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3040:     nz_max = PetscMax(nz_max, nz);
3041:     nnz[i] = nz;
3042:   }
3043:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3044:   PetscFree(nnz);

3046:   values = (PetscScalar*)V;
3047:   if (!values) {
3048:     PetscCalloc1(bs*bs*(nz_max+1),&values);
3049:   }
3050:   for (i=0; i<m; i++) {
3051:     PetscInt          ncols  = ii[i+1] - ii[i];
3052:     const PetscInt    *icols = jj + ii[i];
3053:     if (bs == 1 || !roworiented) {
3054:       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3055:       MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3056:     } else {
3057:       PetscInt j;
3058:       for (j=0; j<ncols; j++) {
3059:         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3060:         MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
3061:       }
3062:     }
3063:   }
3064:   if (!V) { PetscFree(values); }
3065:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3066:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3067:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3068:   return(0);
3069: }

3071: /*@C
3072:    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored

3074:    Not Collective

3076:    Input Parameter:
3077: .  mat - a MATSEQBAIJ matrix

3079:    Output Parameter:
3080: .   array - pointer to the data

3082:    Level: intermediate

3084: .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3085: @*/
3086: PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3087: {

3091:   PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3092:   return(0);
3093: }

3095: /*@C
3096:    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()

3098:    Not Collective

3100:    Input Parameters:
3101: +  mat - a MATSEQBAIJ matrix
3102: -  array - pointer to the data

3104:    Level: intermediate

3106: .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3107: @*/
3108: PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3109: {

3113:   PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3114:   return(0);
3115: }

3117: /*MC
3118:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3119:    block sparse compressed row format.

3121:    Options Database Keys:
3122: + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3123: - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)

3125:    Level: beginner

3127:    Notes:
3128:     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3129:     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored

3131:    Run with -info to see what version of the matrix-vector product is being used

3133: .seealso: MatCreateSeqBAIJ()
3134: M*/

3136: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);

3138: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3139: {
3141:   PetscMPIInt    size;
3142:   Mat_SeqBAIJ    *b;

3145:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3146:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

3148:   PetscNewLog(B,&b);
3149:   B->data = (void*)b;
3150:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));

3152:   b->row          = NULL;
3153:   b->col          = NULL;
3154:   b->icol         = NULL;
3155:   b->reallocs     = 0;
3156:   b->saved_values = NULL;

3158:   b->roworiented        = PETSC_TRUE;
3159:   b->nonew              = 0;
3160:   b->diag               = NULL;
3161:   B->spptr              = NULL;
3162:   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3163:   b->keepnonzeropattern = PETSC_FALSE;

3165:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);
3166:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);
3167:   PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3168:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3169:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3170:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3171:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3172:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3173:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3174:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3175:   PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3176: #if defined(PETSC_HAVE_HYPRE)
3177:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);
3178: #endif
3179:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);
3180:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3181:   return(0);
3182: }

3184: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3185: {
3186:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3188:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

3191:   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");

3193:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3194:     c->imax           = a->imax;
3195:     c->ilen           = a->ilen;
3196:     c->free_imax_ilen = PETSC_FALSE;
3197:   } else {
3198:     PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3199:     PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3200:     for (i=0; i<mbs; i++) {
3201:       c->imax[i] = a->imax[i];
3202:       c->ilen[i] = a->ilen[i];
3203:     }
3204:     c->free_imax_ilen = PETSC_TRUE;
3205:   }

3207:   /* allocate the matrix space */
3208:   if (mallocmatspace) {
3209:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3210:       PetscCalloc1(bs2*nz,&c->a);
3211:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));

3213:       c->i            = a->i;
3214:       c->j            = a->j;
3215:       c->singlemalloc = PETSC_FALSE;
3216:       c->free_a       = PETSC_TRUE;
3217:       c->free_ij      = PETSC_FALSE;
3218:       c->parent       = A;
3219:       C->preallocated = PETSC_TRUE;
3220:       C->assembled    = PETSC_TRUE;

3222:       PetscObjectReference((PetscObject)A);
3223:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3224:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3225:     } else {
3226:       PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3227:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));

3229:       c->singlemalloc = PETSC_TRUE;
3230:       c->free_a       = PETSC_TRUE;
3231:       c->free_ij      = PETSC_TRUE;

3233:       PetscArraycpy(c->i,a->i,mbs+1);
3234:       if (mbs > 0) {
3235:         PetscArraycpy(c->j,a->j,nz);
3236:         if (cpvalues == MAT_COPY_VALUES) {
3237:           PetscArraycpy(c->a,a->a,bs2*nz);
3238:         } else {
3239:           PetscArrayzero(c->a,bs2*nz);
3240:         }
3241:       }
3242:       C->preallocated = PETSC_TRUE;
3243:       C->assembled    = PETSC_TRUE;
3244:     }
3245:   }

3247:   c->roworiented = a->roworiented;
3248:   c->nonew       = a->nonew;

3250:   PetscLayoutReference(A->rmap,&C->rmap);
3251:   PetscLayoutReference(A->cmap,&C->cmap);

3253:   c->bs2         = a->bs2;
3254:   c->mbs         = a->mbs;
3255:   c->nbs         = a->nbs;

3257:   if (a->diag) {
3258:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3259:       c->diag      = a->diag;
3260:       c->free_diag = PETSC_FALSE;
3261:     } else {
3262:       PetscMalloc1(mbs+1,&c->diag);
3263:       PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3264:       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3265:       c->free_diag = PETSC_TRUE;
3266:     }
3267:   } else c->diag = NULL;

3269:   c->nz         = a->nz;
3270:   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3271:   c->solve_work = NULL;
3272:   c->mult_work  = NULL;
3273:   c->sor_workt  = NULL;
3274:   c->sor_work   = NULL;

3276:   c->compressedrow.use   = a->compressedrow.use;
3277:   c->compressedrow.nrows = a->compressedrow.nrows;
3278:   if (a->compressedrow.use) {
3279:     i    = a->compressedrow.nrows;
3280:     PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3281:     PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3282:     PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);
3283:     PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);
3284:   } else {
3285:     c->compressedrow.use    = PETSC_FALSE;
3286:     c->compressedrow.i      = NULL;
3287:     c->compressedrow.rindex = NULL;
3288:   }
3289:   C->nonzerostate = A->nonzerostate;

3291:   PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3292:   return(0);
3293: }

3295: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3296: {

3300:   MatCreate(PetscObjectComm((PetscObject)A),B);
3301:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3302:   MatSetType(*B,MATSEQBAIJ);
3303:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3304:   return(0);
3305: }

3307: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3308: PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3309: {
3310:   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3311:   PetscInt       *rowidxs,*colidxs;
3312:   PetscScalar    *matvals;

3316:   PetscViewerSetUp(viewer);

3318:   /* read matrix header */
3319:   PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);
3320:   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3321:   M = header[1]; N = header[2]; nz = header[3];
3322:   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3323:   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3324:   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");

3326:   /* set block sizes from the viewer's .info file */
3327:   MatLoad_Binary_BlockSizes(mat,viewer);
3328:   /* set local and global sizes if not set already */
3329:   if (mat->rmap->n < 0) mat->rmap->n = M;
3330:   if (mat->cmap->n < 0) mat->cmap->n = N;
3331:   if (mat->rmap->N < 0) mat->rmap->N = M;
3332:   if (mat->cmap->N < 0) mat->cmap->N = N;
3333:   PetscLayoutSetUp(mat->rmap);
3334:   PetscLayoutSetUp(mat->cmap);

3336:   /* check if the matrix sizes are correct */
3337:   MatGetSize(mat,&rows,&cols);
3338:   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3339:   MatGetBlockSize(mat,&bs);
3340:   MatGetLocalSize(mat,&m,&n);
3341:   mbs = m/bs; nbs = n/bs;

3343:   /* read in row lengths, column indices and nonzero values */
3344:   PetscMalloc1(m+1,&rowidxs);
3345:   PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);
3346:   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3347:   sum = rowidxs[m];
3348:   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);

3350:   /* read in column indices and nonzero values */
3351:   PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);
3352:   PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);
3353:   PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);

3355:   { /* preallocate matrix storage */
3356:     PetscBT   bt; /* helper bit set to count nonzeros */
3357:     PetscInt  *nnz;
3358:     PetscBool sbaij;

3360:     PetscBTCreate(nbs,&bt);
3361:     PetscCalloc1(mbs,&nnz);
3362:     PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);
3363:     for (i=0; i<mbs; i++) {
3364:       PetscBTMemzero(nbs,bt);
3365:       for (k=0; k<bs; k++) {
3366:         PetscInt row = bs*i + k;
3367:         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3368:           PetscInt col = colidxs[j];
3369:           if (!sbaij || col >= row)
3370:             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3371:         }
3372:       }
3373:     }
3374:     PetscBTDestroy(&bt);
3375:     MatSeqBAIJSetPreallocation(mat,bs,0,nnz);
3376:     MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);
3377:     PetscFree(nnz);
3378:   }

3380:   /* store matrix values */
3381:   for (i=0; i<m; i++) {
3382:     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3383:     (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);
3384:   }

3386:   PetscFree(rowidxs);
3387:   PetscFree2(colidxs,matvals);
3388:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
3389:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
3390:   return(0);
3391: }

3393: PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3394: {
3396:   PetscBool      isbinary;

3399:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
3400:   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3401:   MatLoad_SeqBAIJ_Binary(mat,viewer);
3402:   return(0);
3403: }

3405: /*@C
3406:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3407:    compressed row) format.  For good matrix assembly performance the
3408:    user should preallocate the matrix storage by setting the parameter nz
3409:    (or the array nnz).  By setting these parameters accurately, performance
3410:    during matrix assembly can be increased by more than a factor of 50.

3412:    Collective

3414:    Input Parameters:
3415: +  comm - MPI communicator, set to PETSC_COMM_SELF
3416: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3417:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3418: .  m - number of rows
3419: .  n - number of columns
3420: .  nz - number of nonzero blocks  per block row (same for all rows)
3421: -  nnz - array containing the number of nonzero blocks in the various block rows
3422:          (possibly different for each block row) or NULL

3424:    Output Parameter:
3425: .  A - the matrix

3427:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3428:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3429:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3431:    Options Database Keys:
3432: +   -mat_no_unroll - uses code that does not unroll the loops in the
3433:                      block calculations (much slower)
3434: -    -mat_block_size - size of the blocks to use

3436:    Level: intermediate

3438:    Notes:
3439:    The number of rows and columns must be divisible by blocksize.

3441:    If the nnz parameter is given then the nz parameter is ignored

3443:    A nonzero block is any block that as 1 or more nonzeros in it

3445:    The block AIJ format is fully compatible with standard Fortran 77
3446:    storage.  That is, the stored row and column indices can begin at
3447:    either one (as in Fortran) or zero.  See the users' manual for details.

3449:    Specify the preallocated storage with either nz or nnz (not both).
3450:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3451:    allocation.  See Users-Manual: ch_mat for details.
3452:    matrices.

3454: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3455: @*/
3456: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3457: {

3461:   MatCreate(comm,A);
3462:   MatSetSizes(*A,m,n,m,n);
3463:   MatSetType(*A,MATSEQBAIJ);
3464:   MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
3465:   return(0);
3466: }

3468: /*@C
3469:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3470:    per row in the matrix. For good matrix assembly performance the
3471:    user should preallocate the matrix storage by setting the parameter nz
3472:    (or the array nnz).  By setting these parameters accurately, performance
3473:    during matrix assembly can be increased by more than a factor of 50.

3475:    Collective

3477:    Input Parameters:
3478: +  B - the matrix
3479: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3480:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3481: .  nz - number of block nonzeros per block row (same for all rows)
3482: -  nnz - array containing the number of block nonzeros in the various block rows
3483:          (possibly different for each block row) or NULL

3485:    Options Database Keys:
3486: +   -mat_no_unroll - uses code that does not unroll the loops in the
3487:                      block calculations (much slower)
3488: -   -mat_block_size - size of the blocks to use

3490:    Level: intermediate

3492:    Notes:
3493:    If the nnz parameter is given then the nz parameter is ignored

3495:    You can call MatGetInfo() to get information on how effective the preallocation was;
3496:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3497:    You can also run with the option -info and look for messages with the string
3498:    malloc in them to see if additional memory allocation was needed.

3500:    The block AIJ format is fully compatible with standard Fortran 77
3501:    storage.  That is, the stored row and column indices can begin at
3502:    either one (as in Fortran) or zero.  See the users' manual for details.

3504:    Specify the preallocated storage with either nz or nnz (not both).
3505:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3506:    allocation.  See Users-Manual: ch_mat for details.

3508: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3509: @*/
3510: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3511: {

3518:   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3519:   return(0);
3520: }

3522: /*@C
3523:    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values

3525:    Collective

3527:    Input Parameters:
3528: +  B - the matrix
3529: .  i - the indices into j for the start of each local row (starts with zero)
3530: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3531: -  v - optional values in the matrix

3533:    Level: advanced

3535:    Notes:
3536:    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3537:    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3538:    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3539:    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3540:    block column and the second index is over columns within a block.

3542:    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well

3544: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3545: @*/
3546: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3547: {

3554:   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3555:   return(0);
3556: }


3559: /*@
3560:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.

3562:      Collective

3564:    Input Parameters:
3565: +  comm - must be an MPI communicator of size 1
3566: .  bs - size of block
3567: .  m - number of rows
3568: .  n - number of columns
3569: .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3570: .  j - column indices
3571: -  a - matrix values

3573:    Output Parameter:
3574: .  mat - the matrix

3576:    Level: advanced

3578:    Notes:
3579:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3580:     once the matrix is destroyed

3582:        You cannot set new nonzero locations into this matrix, that will generate an error.

3584:        The i and j indices are 0 based

3586:        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).

3588:       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3589:       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3590:       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3591:       with column-major ordering within blocks.

3593: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()

3595: @*/
3596: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3597: {
3599:   PetscInt       ii;
3600:   Mat_SeqBAIJ    *baij;

3603:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3604:   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");

3606:   MatCreate(comm,mat);
3607:   MatSetSizes(*mat,m,n,m,n);
3608:   MatSetType(*mat,MATSEQBAIJ);
3609:   MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);
3610:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3611:   PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3612:   PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));

3614:   baij->i = i;
3615:   baij->j = j;
3616:   baij->a = a;

3618:   baij->singlemalloc = PETSC_FALSE;
3619:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3620:   baij->free_a       = PETSC_FALSE;
3621:   baij->free_ij      = PETSC_FALSE;

3623:   for (ii=0; ii<m; ii++) {
3624:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3625:     if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3626:   }
3627:   if (PetscDefined(USE_DEBUG)) {
3628:     for (ii=0; ii<baij->i[m]; ii++) {
3629:       if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3630:       if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3631:     }
3632:   }

3634:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3635:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3636:   return(0);
3637: }

3639: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3640: {
3642:   PetscMPIInt    size;

3645:   MPI_Comm_size(comm,&size);
3646:   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3647:     MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
3648:   } else {
3649:     MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3650:   }
3651:   return(0);
3652: }