Actual source code: baij.c

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2: /*
  3:     Defines the basic matrix operations for the BAIJ (compressed row)
  4:   matrix storage format.
  5: */
  6:  #include <../src/mat/impls/baij/seq/baij.h>
  7:  #include <petscblaslapack.h>
  8:  #include <petsc/private/kernels/blockinvert.h>
  9:  #include <petsc/private/kernels/blockmatmult.h>

 11: #if defined(PETSC_HAVE_HYPRE)
 12: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
 13: #endif

 15: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 16: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*);
 17: #endif

 19: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
 20: {
 21:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 23:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
 24:   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
 25:   PetscReal      shift = 0.0;
 26:   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;

 29:   allowzeropivot = PetscNot(A->erroriffailure);

 31:   if (a->idiagvalid) {
 32:     if (values) *values = a->idiag;
 33:     return(0);
 34:   }
 35:   MatMarkDiagonal_SeqBAIJ(A);
 36:   diag_offset = a->diag;
 37:   if (!a->idiag) {
 38:     PetscMalloc1(2*bs2*mbs,&a->idiag);
 39:     PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));
 40:   }
 41:   diag  = a->idiag;
 42:   mdiag = a->idiag+bs2*mbs;
 43:   if (values) *values = a->idiag;
 44:   /* factor and invert each block */
 45:   switch (bs) {
 46:   case 1:
 47:     for (i=0; i<mbs; i++) {
 48:       odiag    = v + 1*diag_offset[i];
 49:       diag[0]  = odiag[0];
 50:       mdiag[0] = odiag[0];

 52:       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
 53:         if (allowzeropivot) {
 54:           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 55:           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
 56:           A->factorerror_zeropivot_row   = i;
 57:           PetscInfo1(A,"Zero pivot, row %D\n",i);
 58:         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
 59:       }

 61:       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
 62:       diag    += 1;
 63:       mdiag   += 1;
 64:     }
 65:     break;
 66:   case 2:
 67:     for (i=0; i<mbs; i++) {
 68:       odiag    = v + 4*diag_offset[i];
 69:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 70:       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 71:       PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
 72:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 73:       diag    += 4;
 74:       mdiag   += 4;
 75:     }
 76:     break;
 77:   case 3:
 78:     for (i=0; i<mbs; i++) {
 79:       odiag    = v + 9*diag_offset[i];
 80:       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 81:       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 82:       diag[8]  = odiag[8];
 83:       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 84:       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 85:       mdiag[8] = odiag[8];
 86:       PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);
 87:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 88:       diag    += 9;
 89:       mdiag   += 9;
 90:     }
 91:     break;
 92:   case 4:
 93:     for (i=0; i<mbs; i++) {
 94:       odiag  = v + 16*diag_offset[i];
 95:       PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 96:       PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 97:       PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);
 98:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 99:       diag  += 16;
100:       mdiag += 16;
101:     }
102:     break;
103:   case 5:
104:     for (i=0; i<mbs; i++) {
105:       odiag  = v + 25*diag_offset[i];
106:       PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
107:       PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
108:       PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);
109:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
110:       diag  += 25;
111:       mdiag += 25;
112:     }
113:     break;
114:   case 6:
115:     for (i=0; i<mbs; i++) {
116:       odiag  = v + 36*diag_offset[i];
117:       PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
118:       PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
119:       PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);
120:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
121:       diag  += 36;
122:       mdiag += 36;
123:     }
124:     break;
125:   case 7:
126:     for (i=0; i<mbs; i++) {
127:       odiag  = v + 49*diag_offset[i];
128:       PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));
129:       PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));
130:       PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);
131:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
132:       diag  += 49;
133:       mdiag += 49;
134:     }
135:     break;
136:   default:
137:     PetscMalloc2(bs,&v_work,bs,&v_pivots);
138:     for (i=0; i<mbs; i++) {
139:       odiag  = v + bs2*diag_offset[i];
140:       PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));
141:       PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));
142:       PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);
143:       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
144:       diag  += bs2;
145:       mdiag += bs2;
146:     }
147:     PetscFree2(v_work,v_pivots);
148:   }
149:   a->idiagvalid = PETSC_TRUE;
150:   return(0);
151: }

153: PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
154: {
155:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
156:   PetscScalar       *x,*work,*w,*workt,*t;
157:   const MatScalar   *v,*aa = a->a, *idiag;
158:   const PetscScalar *b,*xb;
159:   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
160:   PetscErrorCode    ierr;
161:   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
162:   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;

165:   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
166:   its = its*lits;
167:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
168:   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
169:   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
170:   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
171:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");

173:   if (!a->idiagvalid) {MatInvertBlockDiagonal(A,NULL);}

175:   if (!m) return(0);
176:   diag  = a->diag;
177:   idiag = a->idiag;
178:   k    = PetscMax(A->rmap->n,A->cmap->n);
179:   if (!a->mult_work) {
180:     PetscMalloc1(k+1,&a->mult_work);
181:   }
182:   if (!a->sor_workt) {
183:     PetscMalloc1(k,&a->sor_workt);
184:   }
185:   if (!a->sor_work) {
186:     PetscMalloc1(bs,&a->sor_work);
187:   }
188:   work = a->mult_work;
189:   t    = a->sor_workt;
190:   w    = a->sor_work;

192:   VecGetArray(xx,&x);
193:   VecGetArrayRead(bb,&b);

195:   if (flag & SOR_ZERO_INITIAL_GUESS) {
196:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
197:       switch (bs) {
198:       case 1:
199:         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
200:         t[0] = b[0];
201:         i2     = 1;
202:         idiag += 1;
203:         for (i=1; i<m; i++) {
204:           v  = aa + ai[i];
205:           vi = aj + ai[i];
206:           nz = diag[i] - ai[i];
207:           s[0] = b[i2];
208:           for (j=0; j<nz; j++) {
209:             xw[0] = x[vi[j]];
210:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
211:           }
212:           t[i2] = s[0];
213:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
214:           x[i2]  = xw[0];
215:           idiag += 1;
216:           i2    += 1;
217:         }
218:         break;
219:       case 2:
220:         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
221:         t[0] = b[0]; t[1] = b[1];
222:         i2     = 2;
223:         idiag += 4;
224:         for (i=1; i<m; i++) {
225:           v  = aa + 4*ai[i];
226:           vi = aj + ai[i];
227:           nz = diag[i] - ai[i];
228:           s[0] = b[i2]; s[1] = b[i2+1];
229:           for (j=0; j<nz; j++) {
230:             idx = 2*vi[j];
231:             it  = 4*j;
232:             xw[0] = x[idx]; xw[1] = x[1+idx];
233:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
234:           }
235:           t[i2] = s[0]; t[i2+1] = s[1];
236:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
237:           x[i2]   = xw[0]; x[i2+1] = xw[1];
238:           idiag  += 4;
239:           i2     += 2;
240:         }
241:         break;
242:       case 3:
243:         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
244:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
245:         i2     = 3;
246:         idiag += 9;
247:         for (i=1; i<m; i++) {
248:           v  = aa + 9*ai[i];
249:           vi = aj + ai[i];
250:           nz = diag[i] - ai[i];
251:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
252:           while (nz--) {
253:             idx = 3*(*vi++);
254:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
255:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
256:             v  += 9;
257:           }
258:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
259:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
260:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
261:           idiag  += 9;
262:           i2     += 3;
263:         }
264:         break;
265:       case 4:
266:         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
267:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
268:         i2     = 4;
269:         idiag += 16;
270:         for (i=1; i<m; i++) {
271:           v  = aa + 16*ai[i];
272:           vi = aj + ai[i];
273:           nz = diag[i] - ai[i];
274:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
275:           while (nz--) {
276:             idx = 4*(*vi++);
277:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
278:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
279:             v  += 16;
280:           }
281:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
282:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
283:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
284:           idiag  += 16;
285:           i2     += 4;
286:         }
287:         break;
288:       case 5:
289:         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
290:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
291:         i2     = 5;
292:         idiag += 25;
293:         for (i=1; i<m; i++) {
294:           v  = aa + 25*ai[i];
295:           vi = aj + ai[i];
296:           nz = diag[i] - ai[i];
297:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
298:           while (nz--) {
299:             idx = 5*(*vi++);
300:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
301:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
302:             v  += 25;
303:           }
304:           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
305:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
306:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
307:           idiag  += 25;
308:           i2     += 5;
309:         }
310:         break;
311:       case 6:
312:         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
313:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
314:         i2     = 6;
315:         idiag += 36;
316:         for (i=1; i<m; i++) {
317:           v  = aa + 36*ai[i];
318:           vi = aj + ai[i];
319:           nz = diag[i] - ai[i];
320:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
321:           while (nz--) {
322:             idx = 6*(*vi++);
323:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
324:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
325:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
326:             v  += 36;
327:           }
328:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
329:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
330:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
331:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
332:           idiag  += 36;
333:           i2     += 6;
334:         }
335:         break;
336:       case 7:
337:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
338:         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
339:         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
340:         i2     = 7;
341:         idiag += 49;
342:         for (i=1; i<m; i++) {
343:           v  = aa + 49*ai[i];
344:           vi = aj + ai[i];
345:           nz = diag[i] - ai[i];
346:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
347:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
348:           while (nz--) {
349:             idx = 7*(*vi++);
350:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
351:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
352:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
353:             v  += 49;
354:           }
355:           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
356:           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
357:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
358:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
359:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
360:           idiag  += 49;
361:           i2     += 7;
362:         }
363:         break;
364:       default:
365:         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
366:         PetscMemcpy(t,b,bs*sizeof(PetscScalar));
367:         i2     = bs;
368:         idiag += bs2;
369:         for (i=1; i<m; i++) {
370:           v  = aa + bs2*ai[i];
371:           vi = aj + ai[i];
372:           nz = diag[i] - ai[i];

374:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
375:           /* copy all rows of x that are needed into contiguous space */
376:           workt = work;
377:           for (j=0; j<nz; j++) {
378:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
379:             workt += bs;
380:           }
381:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
382:           PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));
383:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

385:           idiag += bs2;
386:           i2    += bs;
387:         }
388:         break;
389:       }
390:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
391:       PetscLogFlops(1.0*bs2*a->nz);
392:       xb = t;
393:     }
394:     else xb = b;
395:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
396:       idiag = a->idiag+bs2*(a->mbs-1);
397:       i2 = bs * (m-1);
398:       switch (bs) {
399:       case 1:
400:         s[0]  = xb[i2];
401:         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
402:         x[i2] = xw[0];
403:         i2   -= 1;
404:         for (i=m-2; i>=0; i--) {
405:           v  = aa + (diag[i]+1);
406:           vi = aj + diag[i] + 1;
407:           nz = ai[i+1] - diag[i] - 1;
408:           s[0] = xb[i2];
409:           for (j=0; j<nz; j++) {
410:             xw[0] = x[vi[j]];
411:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
412:           }
413:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
414:           x[i2]  = xw[0];
415:           idiag -= 1;
416:           i2    -= 1;
417:         }
418:         break;
419:       case 2:
420:         s[0]  = xb[i2]; s[1] = xb[i2+1];
421:         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
422:         x[i2] = xw[0]; x[i2+1] = xw[1];
423:         i2    -= 2;
424:         idiag -= 4;
425:         for (i=m-2; i>=0; i--) {
426:           v  = aa + 4*(diag[i] + 1);
427:           vi = aj + diag[i] + 1;
428:           nz = ai[i+1] - diag[i] - 1;
429:           s[0] = xb[i2]; s[1] = xb[i2+1];
430:           for (j=0; j<nz; j++) {
431:             idx = 2*vi[j];
432:             it  = 4*j;
433:             xw[0] = x[idx]; xw[1] = x[1+idx];
434:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
435:           }
436:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
437:           x[i2]   = xw[0]; x[i2+1] = xw[1];
438:           idiag  -= 4;
439:           i2     -= 2;
440:         }
441:         break;
442:       case 3:
443:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
444:         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
445:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
446:         i2    -= 3;
447:         idiag -= 9;
448:         for (i=m-2; i>=0; i--) {
449:           v  = aa + 9*(diag[i]+1);
450:           vi = aj + diag[i] + 1;
451:           nz = ai[i+1] - diag[i] - 1;
452:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
453:           while (nz--) {
454:             idx = 3*(*vi++);
455:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
456:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
457:             v  += 9;
458:           }
459:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
460:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
461:           idiag  -= 9;
462:           i2     -= 3;
463:         }
464:         break;
465:       case 4:
466:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
467:         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
468:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
469:         i2    -= 4;
470:         idiag -= 16;
471:         for (i=m-2; i>=0; i--) {
472:           v  = aa + 16*(diag[i]+1);
473:           vi = aj + diag[i] + 1;
474:           nz = ai[i+1] - diag[i] - 1;
475:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
476:           while (nz--) {
477:             idx = 4*(*vi++);
478:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
479:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
480:             v  += 16;
481:           }
482:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
483:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
484:           idiag  -= 16;
485:           i2     -= 4;
486:         }
487:         break;
488:       case 5:
489:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
490:         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
491:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
492:         i2    -= 5;
493:         idiag -= 25;
494:         for (i=m-2; i>=0; i--) {
495:           v  = aa + 25*(diag[i]+1);
496:           vi = aj + diag[i] + 1;
497:           nz = ai[i+1] - diag[i] - 1;
498:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
499:           while (nz--) {
500:             idx = 5*(*vi++);
501:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
502:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
503:             v  += 25;
504:           }
505:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
506:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
507:           idiag  -= 25;
508:           i2     -= 5;
509:         }
510:         break;
511:       case 6:
512:         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
513:         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
514:         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
515:         i2    -= 6;
516:         idiag -= 36;
517:         for (i=m-2; i>=0; i--) {
518:           v  = aa + 36*(diag[i]+1);
519:           vi = aj + diag[i] + 1;
520:           nz = ai[i+1] - diag[i] - 1;
521:           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
522:           while (nz--) {
523:             idx = 6*(*vi++);
524:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
525:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
526:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
527:             v  += 36;
528:           }
529:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
530:           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
531:           idiag  -= 36;
532:           i2     -= 6;
533:         }
534:         break;
535:       case 7:
536:         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
537:         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
538:         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
539:         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540:         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541:         i2    -= 7;
542:         idiag -= 49;
543:         for (i=m-2; i>=0; i--) {
544:           v  = aa + 49*(diag[i]+1);
545:           vi = aj + diag[i] + 1;
546:           nz = ai[i+1] - diag[i] - 1;
547:           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
548:           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
549:           while (nz--) {
550:             idx = 7*(*vi++);
551:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
552:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
553:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
554:             v  += 49;
555:           }
556:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
557:           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
558:           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
559:           idiag  -= 49;
560:           i2     -= 7;
561:         }
562:         break;
563:       default:
564:         PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
565:         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
566:         i2    -= bs;
567:         idiag -= bs2;
568:         for (i=m-2; i>=0; i--) {
569:           v  = aa + bs2*(diag[i]+1);
570:           vi = aj + diag[i] + 1;
571:           nz = ai[i+1] - diag[i] - 1;

573:           PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));
574:           /* copy all rows of x that are needed into contiguous space */
575:           workt = work;
576:           for (j=0; j<nz; j++) {
577:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
578:             workt += bs;
579:           }
580:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
581:           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);

583:           idiag -= bs2;
584:           i2    -= bs;
585:         }
586:         break;
587:       }
588:       PetscLogFlops(1.0*bs2*(a->nz));
589:     }
590:     its--;
591:   }
592:   while (its--) {
593:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
594:       idiag = a->idiag;
595:       i2 = 0;
596:       switch (bs) {
597:       case 1:
598:         for (i=0; i<m; i++) {
599:           v  = aa + ai[i];
600:           vi = aj + ai[i];
601:           nz = ai[i+1] - ai[i];
602:           s[0] = b[i2];
603:           for (j=0; j<nz; j++) {
604:             xw[0] = x[vi[j]];
605:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
606:           }
607:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
608:           x[i2] += xw[0];
609:           idiag += 1;
610:           i2    += 1;
611:         }
612:         break;
613:       case 2:
614:         for (i=0; i<m; i++) {
615:           v  = aa + 4*ai[i];
616:           vi = aj + ai[i];
617:           nz = ai[i+1] - ai[i];
618:           s[0] = b[i2]; s[1] = b[i2+1];
619:           for (j=0; j<nz; j++) {
620:             idx = 2*vi[j];
621:             it  = 4*j;
622:             xw[0] = x[idx]; xw[1] = x[1+idx];
623:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
624:           }
625:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
626:           x[i2]  += xw[0]; x[i2+1] += xw[1];
627:           idiag  += 4;
628:           i2     += 2;
629:         }
630:         break;
631:       case 3:
632:         for (i=0; i<m; i++) {
633:           v  = aa + 9*ai[i];
634:           vi = aj + ai[i];
635:           nz = ai[i+1] - ai[i];
636:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
637:           while (nz--) {
638:             idx = 3*(*vi++);
639:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
640:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
641:             v  += 9;
642:           }
643:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
644:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
645:           idiag  += 9;
646:           i2     += 3;
647:         }
648:         break;
649:       case 4:
650:         for (i=0; i<m; i++) {
651:           v  = aa + 16*ai[i];
652:           vi = aj + ai[i];
653:           nz = ai[i+1] - ai[i];
654:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
655:           while (nz--) {
656:             idx = 4*(*vi++);
657:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
658:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
659:             v  += 16;
660:           }
661:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
662:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
663:           idiag  += 16;
664:           i2     += 4;
665:         }
666:         break;
667:       case 5:
668:         for (i=0; i<m; i++) {
669:           v  = aa + 25*ai[i];
670:           vi = aj + ai[i];
671:           nz = ai[i+1] - ai[i];
672:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
673:           while (nz--) {
674:             idx = 5*(*vi++);
675:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
676:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
677:             v  += 25;
678:           }
679:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
680:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
681:           idiag  += 25;
682:           i2     += 5;
683:         }
684:         break;
685:       case 6:
686:         for (i=0; i<m; i++) {
687:           v  = aa + 36*ai[i];
688:           vi = aj + ai[i];
689:           nz = ai[i+1] - ai[i];
690:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
691:           while (nz--) {
692:             idx = 6*(*vi++);
693:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
694:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
695:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
696:             v  += 36;
697:           }
698:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
699:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
700:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
701:           idiag  += 36;
702:           i2     += 6;
703:         }
704:         break;
705:       case 7:
706:         for (i=0; i<m; i++) {
707:           v  = aa + 49*ai[i];
708:           vi = aj + ai[i];
709:           nz = ai[i+1] - ai[i];
710:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
711:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
712:           while (nz--) {
713:             idx = 7*(*vi++);
714:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
715:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
716:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
717:             v  += 49;
718:           }
719:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
720:           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
721:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
722:           idiag  += 49;
723:           i2     += 7;
724:         }
725:         break;
726:       default:
727:         for (i=0; i<m; i++) {
728:           v  = aa + bs2*ai[i];
729:           vi = aj + ai[i];
730:           nz = ai[i+1] - ai[i];

732:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
733:           /* copy all rows of x that are needed into contiguous space */
734:           workt = work;
735:           for (j=0; j<nz; j++) {
736:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
737:             workt += bs;
738:           }
739:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
740:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

742:           idiag += bs2;
743:           i2    += bs;
744:         }
745:         break;
746:       }
747:       PetscLogFlops(2.0*bs2*a->nz);
748:     }
749:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
750:       idiag = a->idiag+bs2*(a->mbs-1);
751:       i2 = bs * (m-1);
752:       switch (bs) {
753:       case 1:
754:         for (i=m-1; i>=0; i--) {
755:           v  = aa + ai[i];
756:           vi = aj + ai[i];
757:           nz = ai[i+1] - ai[i];
758:           s[0] = b[i2];
759:           for (j=0; j<nz; j++) {
760:             xw[0] = x[vi[j]];
761:             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
762:           }
763:           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
764:           x[i2] += xw[0];
765:           idiag -= 1;
766:           i2    -= 1;
767:         }
768:         break;
769:       case 2:
770:         for (i=m-1; i>=0; i--) {
771:           v  = aa + 4*ai[i];
772:           vi = aj + ai[i];
773:           nz = ai[i+1] - ai[i];
774:           s[0] = b[i2]; s[1] = b[i2+1];
775:           for (j=0; j<nz; j++) {
776:             idx = 2*vi[j];
777:             it  = 4*j;
778:             xw[0] = x[idx]; xw[1] = x[1+idx];
779:             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
780:           }
781:           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
782:           x[i2]  += xw[0]; x[i2+1] += xw[1];
783:           idiag  -= 4;
784:           i2     -= 2;
785:         }
786:         break;
787:       case 3:
788:         for (i=m-1; i>=0; i--) {
789:           v  = aa + 9*ai[i];
790:           vi = aj + ai[i];
791:           nz = ai[i+1] - ai[i];
792:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
793:           while (nz--) {
794:             idx = 3*(*vi++);
795:             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
796:             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
797:             v  += 9;
798:           }
799:           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
800:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
801:           idiag  -= 9;
802:           i2     -= 3;
803:         }
804:         break;
805:       case 4:
806:         for (i=m-1; i>=0; i--) {
807:           v  = aa + 16*ai[i];
808:           vi = aj + ai[i];
809:           nz = ai[i+1] - ai[i];
810:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
811:           while (nz--) {
812:             idx = 4*(*vi++);
813:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
814:             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
815:             v  += 16;
816:           }
817:           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
818:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
819:           idiag  -= 16;
820:           i2     -= 4;
821:         }
822:         break;
823:       case 5:
824:         for (i=m-1; i>=0; i--) {
825:           v  = aa + 25*ai[i];
826:           vi = aj + ai[i];
827:           nz = ai[i+1] - ai[i];
828:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
829:           while (nz--) {
830:             idx = 5*(*vi++);
831:             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
832:             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
833:             v  += 25;
834:           }
835:           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
836:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
837:           idiag  -= 25;
838:           i2     -= 5;
839:         }
840:         break;
841:       case 6:
842:         for (i=m-1; i>=0; i--) {
843:           v  = aa + 36*ai[i];
844:           vi = aj + ai[i];
845:           nz = ai[i+1] - ai[i];
846:           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
847:           while (nz--) {
848:             idx = 6*(*vi++);
849:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
850:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
851:             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
852:             v  += 36;
853:           }
854:           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
855:           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
856:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
857:           idiag  -= 36;
858:           i2     -= 6;
859:         }
860:         break;
861:       case 7:
862:         for (i=m-1; i>=0; i--) {
863:           v  = aa + 49*ai[i];
864:           vi = aj + ai[i];
865:           nz = ai[i+1] - ai[i];
866:           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
867:           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
868:           while (nz--) {
869:             idx = 7*(*vi++);
870:             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
871:             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
872:             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
873:             v  += 49;
874:           }
875:           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
876:           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
877:           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
878:           idiag  -= 49;
879:           i2     -= 7;
880:         }
881:         break;
882:       default:
883:         for (i=m-1; i>=0; i--) {
884:           v  = aa + bs2*ai[i];
885:           vi = aj + ai[i];
886:           nz = ai[i+1] - ai[i];

888:           PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));
889:           /* copy all rows of x that are needed into contiguous space */
890:           workt = work;
891:           for (j=0; j<nz; j++) {
892:             PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));
893:             workt += bs;
894:           }
895:           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
896:           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);

898:           idiag -= bs2;
899:           i2    -= bs;
900:         }
901:         break;
902:       }
903:       PetscLogFlops(2.0*bs2*(a->nz));
904:     }
905:   }
906:   VecRestoreArray(xx,&x);
907:   VecRestoreArrayRead(bb,&b);
908:   return(0);
909: }


912: /*
913:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
914: */
915: #if defined(PETSC_HAVE_FORTRAN_CAPS)
916: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
917: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
918: #define matsetvaluesblocked4_ matsetvaluesblocked4
919: #endif

921: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
922: {
923:   Mat               A  = *AA;
924:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
925:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
926:   PetscInt          *ai    =a->i,*ailen=a->ilen;
927:   PetscInt          *aj    =a->j,stepval,lastcol = -1;
928:   const PetscScalar *value = v;
929:   MatScalar         *ap,*aa = a->a,*bap;

932:   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
933:   stepval = (n-1)*4;
934:   for (k=0; k<m; k++) { /* loop over added rows */
935:     row  = im[k];
936:     rp   = aj + ai[row];
937:     ap   = aa + 16*ai[row];
938:     nrow = ailen[row];
939:     low  = 0;
940:     high = nrow;
941:     for (l=0; l<n; l++) { /* loop over added columns */
942:       col = in[l];
943:       if (col <= lastcol)  low = 0;
944:       else                high = nrow;
945:       lastcol = col;
946:       value   = v + k*(stepval+4 + l)*4;
947:       while (high-low > 7) {
948:         t = (low+high)/2;
949:         if (rp[t] > col) high = t;
950:         else             low  = t;
951:       }
952:       for (i=low; i<high; i++) {
953:         if (rp[i] > col) break;
954:         if (rp[i] == col) {
955:           bap = ap +  16*i;
956:           for (ii=0; ii<4; ii++,value+=stepval) {
957:             for (jj=ii; jj<16; jj+=4) {
958:               bap[jj] += *value++;
959:             }
960:           }
961:           goto noinsert2;
962:         }
963:       }
964:       N = nrow++ - 1;
965:       high++; /* added new column index thus must search to one higher than before */
966:       /* shift up all the later entries in this row */
967:       for (ii=N; ii>=i; ii--) {
968:         rp[ii+1] = rp[ii];
969:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
970:       }
971:       if (N >= i) {
972:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
973:       }
974:       rp[i] = col;
975:       bap   = ap +  16*i;
976:       for (ii=0; ii<4; ii++,value+=stepval) {
977:         for (jj=ii; jj<16; jj+=4) {
978:           bap[jj] = *value++;
979:         }
980:       }
981:       noinsert2:;
982:       low = i;
983:     }
984:     ailen[row] = nrow;
985:   }
986:   PetscFunctionReturnVoid();
987: }

989: #if defined(PETSC_HAVE_FORTRAN_CAPS)
990: #define matsetvalues4_ MATSETVALUES4
991: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
992: #define matsetvalues4_ matsetvalues4
993: #endif

995: PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
996: {
997:   Mat         A  = *AA;
998:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
999:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1000:   PetscInt    *ai=a->i,*ailen=a->ilen;
1001:   PetscInt    *aj=a->j,brow,bcol;
1002:   PetscInt    ridx,cidx,lastcol = -1;
1003:   MatScalar   *ap,value,*aa=a->a,*bap;

1006:   for (k=0; k<m; k++) { /* loop over added rows */
1007:     row  = im[k]; brow = row/4;
1008:     rp   = aj + ai[brow];
1009:     ap   = aa + 16*ai[brow];
1010:     nrow = ailen[brow];
1011:     low  = 0;
1012:     high = nrow;
1013:     for (l=0; l<n; l++) { /* loop over added columns */
1014:       col   = in[l]; bcol = col/4;
1015:       ridx  = row % 4; cidx = col % 4;
1016:       value = v[l + k*n];
1017:       if (col <= lastcol)  low = 0;
1018:       else                high = nrow;
1019:       lastcol = col;
1020:       while (high-low > 7) {
1021:         t = (low+high)/2;
1022:         if (rp[t] > bcol) high = t;
1023:         else              low  = t;
1024:       }
1025:       for (i=low; i<high; i++) {
1026:         if (rp[i] > bcol) break;
1027:         if (rp[i] == bcol) {
1028:           bap   = ap +  16*i + 4*cidx + ridx;
1029:           *bap += value;
1030:           goto noinsert1;
1031:         }
1032:       }
1033:       N = nrow++ - 1;
1034:       high++; /* added new column thus must search to one higher than before */
1035:       /* shift up all the later entries in this row */
1036:       for (ii=N; ii>=i; ii--) {
1037:         rp[ii+1] = rp[ii];
1038:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1039:       }
1040:       if (N>=i) {
1041:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1042:       }
1043:       rp[i]                    = bcol;
1044:       ap[16*i + 4*cidx + ridx] = value;
1045: noinsert1:;
1046:       low = i;
1047:     }
1048:     ailen[brow] = nrow;
1049:   }
1050:   PetscFunctionReturnVoid();
1051: }

1053: /*
1054:      Checks for missing diagonals
1055: */
1056: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1057: {
1058:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1060:   PetscInt       *diag,*ii = a->i,i;

1063:   MatMarkDiagonal_SeqBAIJ(A);
1064:   *missing = PETSC_FALSE;
1065:   if (A->rmap->n > 0 && !ii) {
1066:     *missing = PETSC_TRUE;
1067:     if (d) *d = 0;
1068:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1069:   } else {
1070:     diag = a->diag;
1071:     for (i=0; i<a->mbs; i++) {
1072:       if (diag[i] >= ii[i+1]) {
1073:         *missing = PETSC_TRUE;
1074:         if (d) *d = i;
1075:         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1076:         break;
1077:       }
1078:     }
1079:   }
1080:   return(0);
1081: }

1083: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1084: {
1085:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1087:   PetscInt       i,j,m = a->mbs;

1090:   if (!a->diag) {
1091:     PetscMalloc1(m,&a->diag);
1092:     PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
1093:     a->free_diag = PETSC_TRUE;
1094:   }
1095:   for (i=0; i<m; i++) {
1096:     a->diag[i] = a->i[i+1];
1097:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1098:       if (a->j[j] == i) {
1099:         a->diag[i] = j;
1100:         break;
1101:       }
1102:     }
1103:   }
1104:   return(0);
1105: }


1108: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1109: {
1110:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1112:   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1113:   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;

1116:   *nn = n;
1117:   if (!ia) return(0);
1118:   if (symmetric) {
1119:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);
1120:     nz   = tia[n];
1121:   } else {
1122:     tia = a->i; tja = a->j;
1123:   }

1125:   if (!blockcompressed && bs > 1) {
1126:     (*nn) *= bs;
1127:     /* malloc & create the natural set of indices */
1128:     PetscMalloc1((n+1)*bs,ia);
1129:     if (n) {
1130:       (*ia)[0] = oshift;
1131:       for (j=1; j<bs; j++) {
1132:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1133:       }
1134:     }

1136:     for (i=1; i<n; i++) {
1137:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1138:       for (j=1; j<bs; j++) {
1139:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1140:       }
1141:     }
1142:     if (n) {
1143:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1144:     }

1146:     if (inja) {
1147:       PetscMalloc1(nz*bs*bs,ja);
1148:       cnt = 0;
1149:       for (i=0; i<n; i++) {
1150:         for (j=0; j<bs; j++) {
1151:           for (k=tia[i]; k<tia[i+1]; k++) {
1152:             for (l=0; l<bs; l++) {
1153:               (*ja)[cnt++] = bs*tja[k] + l;
1154:             }
1155:           }
1156:         }
1157:       }
1158:     }

1160:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1161:       PetscFree(tia);
1162:       PetscFree(tja);
1163:     }
1164:   } else if (oshift == 1) {
1165:     if (symmetric) {
1166:       nz = tia[A->rmap->n/bs];
1167:       /*  add 1 to i and j indices */
1168:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1169:       *ia = tia;
1170:       if (ja) {
1171:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1172:         *ja = tja;
1173:       }
1174:     } else {
1175:       nz = a->i[A->rmap->n/bs];
1176:       /* malloc space and  add 1 to i and j indices */
1177:       PetscMalloc1(A->rmap->n/bs+1,ia);
1178:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1179:       if (ja) {
1180:         PetscMalloc1(nz,ja);
1181:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1182:       }
1183:     }
1184:   } else {
1185:     *ia = tia;
1186:     if (ja) *ja = tja;
1187:   }
1188:   return(0);
1189: }

1191: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1192: {

1196:   if (!ia) return(0);
1197:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1198:     PetscFree(*ia);
1199:     if (ja) {PetscFree(*ja);}
1200:   }
1201:   return(0);
1202: }

1204: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1205: {
1206:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1210: #if defined(PETSC_USE_LOG)
1211:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1212: #endif
1213:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1214:   ISDestroy(&a->row);
1215:   ISDestroy(&a->col);
1216:   if (a->free_diag) {PetscFree(a->diag);}
1217:   PetscFree(a->idiag);
1218:   if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1219:   PetscFree(a->solve_work);
1220:   PetscFree(a->mult_work);
1221:   PetscFree(a->sor_workt);
1222:   PetscFree(a->sor_work);
1223:   ISDestroy(&a->icol);
1224:   PetscFree(a->saved_values);
1225:   PetscFree2(a->compressedrow.i,a->compressedrow.rindex);

1227:   MatDestroy(&a->sbaijMat);
1228:   MatDestroy(&a->parent);
1229:   PetscFree(A->data);

1231:   PetscObjectChangeTypeName((PetscObject)A,0);
1232:   PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);
1233:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
1234:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
1235:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);
1236:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);
1237:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);
1238:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);
1239:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);
1240:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);
1241:   PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);
1242: #if defined(PETSC_HAVE_HYPRE)
1243:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);
1244: #endif
1245:   return(0);
1246: }

1248: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1249: {
1250:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1254:   switch (op) {
1255:   case MAT_ROW_ORIENTED:
1256:     a->roworiented = flg;
1257:     break;
1258:   case MAT_KEEP_NONZERO_PATTERN:
1259:     a->keepnonzeropattern = flg;
1260:     break;
1261:   case MAT_NEW_NONZERO_LOCATIONS:
1262:     a->nonew = (flg ? 0 : 1);
1263:     break;
1264:   case MAT_NEW_NONZERO_LOCATION_ERR:
1265:     a->nonew = (flg ? -1 : 0);
1266:     break;
1267:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1268:     a->nonew = (flg ? -2 : 0);
1269:     break;
1270:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1271:     a->nounused = (flg ? -1 : 0);
1272:     break;
1273:   case MAT_NEW_DIAGONALS:
1274:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1275:   case MAT_USE_HASH_TABLE:
1276:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1277:     break;
1278:   case MAT_SPD:
1279:   case MAT_SYMMETRIC:
1280:   case MAT_STRUCTURALLY_SYMMETRIC:
1281:   case MAT_HERMITIAN:
1282:   case MAT_SYMMETRY_ETERNAL:
1283:   case MAT_SUBMAT_SINGLEIS:
1284:   case MAT_STRUCTURE_ONLY:
1285:     /* These options are handled directly by MatSetOption() */
1286:     break;
1287:   default:
1288:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1289:   }
1290:   return(0);
1291: }

1293: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1294: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1295: {
1297:   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1298:   MatScalar      *aa_i;
1299:   PetscScalar    *v_i;

1302:   bs  = A->rmap->bs;
1303:   bs2 = bs*bs;
1304:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);

1306:   bn  = row/bs;   /* Block number */
1307:   bp  = row % bs; /* Block Position */
1308:   M   = ai[bn+1] - ai[bn];
1309:   *nz = bs*M;

1311:   if (v) {
1312:     *v = 0;
1313:     if (*nz) {
1314:       PetscMalloc1(*nz,v);
1315:       for (i=0; i<M; i++) { /* for each block in the block row */
1316:         v_i  = *v + i*bs;
1317:         aa_i = aa + bs2*(ai[bn] + i);
1318:         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1319:       }
1320:     }
1321:   }

1323:   if (idx) {
1324:     *idx = 0;
1325:     if (*nz) {
1326:       PetscMalloc1(*nz,idx);
1327:       for (i=0; i<M; i++) { /* for each block in the block row */
1328:         idx_i = *idx + i*bs;
1329:         itmp  = bs*aj[ai[bn] + i];
1330:         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1331:       }
1332:     }
1333:   }
1334:   return(0);
1335: }

1337: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1338: {
1339:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1343:   MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);
1344:   return(0);
1345: }

1347: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1348: {

1352:   if (idx) {PetscFree(*idx);}
1353:   if (v)   {PetscFree(*v);}
1354:   return(0);
1355: }

1357: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1358: {
1359:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1360:   Mat            C;
1362:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1363:   PetscInt       *rows,*cols,bs2=a->bs2;
1364:   MatScalar      *array;

1367:   if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1368:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1369:     PetscCalloc1(1+nbs,&col);

1371:     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1372:     MatCreate(PetscObjectComm((PetscObject)A),&C);
1373:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1374:     MatSetType(C,((PetscObject)A)->type_name);
1375:     MatSeqBAIJSetPreallocation(C,bs,0,col);
1376:     PetscFree(col);
1377:   } else {
1378:     C = *B;
1379:   }

1381:   array = a->a;
1382:   PetscMalloc2(bs,&rows,bs,&cols);
1383:   for (i=0; i<mbs; i++) {
1384:     cols[0] = i*bs;
1385:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1386:     len = ai[i+1] - ai[i];
1387:     for (j=0; j<len; j++) {
1388:       rows[0] = (*aj++)*bs;
1389:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1390:       MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1391:       array += bs2;
1392:     }
1393:   }
1394:   PetscFree2(rows,cols);

1396:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1397:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

1399:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1400:     *B = C;
1401:   } else {
1402:     MatHeaderMerge(A,&C);
1403:   }
1404:   return(0);
1405: }

1407: PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1408: {
1410:   Mat            Btrans;

1413:   *f   = PETSC_FALSE;
1414:   MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);
1415:   MatEqual_SeqBAIJ(B,Btrans,f);
1416:   MatDestroy(&Btrans);
1417:   return(0);
1418: }

1420: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1421: {
1422:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1424:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1425:   int            fd;
1426:   PetscScalar    *aa;
1427:   FILE           *file;

1430:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1431:   PetscMalloc1(4+A->rmap->N,&col_lens);
1432:   col_lens[0] = MAT_FILE_CLASSID;

1434:   col_lens[1] = A->rmap->N;
1435:   col_lens[2] = A->cmap->n;
1436:   col_lens[3] = a->nz*bs2;

1438:   /* store lengths of each row and write (including header) to file */
1439:   count = 0;
1440:   for (i=0; i<a->mbs; i++) {
1441:     for (j=0; j<bs; j++) {
1442:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1443:     }
1444:   }
1445:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1446:   PetscFree(col_lens);

1448:   /* store column indices (zero start index) */
1449:   PetscMalloc1((a->nz+1)*bs2,&jj);
1450:   count = 0;
1451:   for (i=0; i<a->mbs; i++) {
1452:     for (j=0; j<bs; j++) {
1453:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1454:         for (l=0; l<bs; l++) {
1455:           jj[count++] = bs*a->j[k] + l;
1456:         }
1457:       }
1458:     }
1459:   }
1460:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1461:   PetscFree(jj);

1463:   /* store nonzero values */
1464:   PetscMalloc1((a->nz+1)*bs2,&aa);
1465:   count = 0;
1466:   for (i=0; i<a->mbs; i++) {
1467:     for (j=0; j<bs; j++) {
1468:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1469:         for (l=0; l<bs; l++) {
1470:           aa[count++] = a->a[bs2*k + l*bs + j];
1471:         }
1472:       }
1473:     }
1474:   }
1475:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1476:   PetscFree(aa);

1478:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1479:   if (file) {
1480:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1481:   }
1482:   return(0);
1483: }

1485: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1486: {
1488:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1489:   PetscInt       i,bs = A->rmap->bs,k;

1492:   PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1493:   for (i=0; i<a->mbs; i++) {
1494:     PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);
1495:     for (k=a->i[i]; k<a->i[i+1]; k++) {
1496:       PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);
1497:     }
1498:     PetscViewerASCIIPrintf(viewer,"\n");
1499:   }
1500:   PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1501:   return(0);
1502: }

1504: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1505: {
1506:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1507:   PetscErrorCode    ierr;
1508:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1509:   PetscViewerFormat format;

1512:   if (A->structure_only) {
1513:     MatView_SeqBAIJ_ASCII_structonly(A,viewer);
1514:     return(0);
1515:   }

1517:   PetscViewerGetFormat(viewer,&format);
1518:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1519:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1520:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1521:     const char *matname;
1522:     Mat        aij;
1523:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1524:     PetscObjectGetName((PetscObject)A,&matname);
1525:     PetscObjectSetName((PetscObject)aij,matname);
1526:     MatView(aij,viewer);
1527:     MatDestroy(&aij);
1528:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1529:       return(0);
1530:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1531:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1532:     for (i=0; i<a->mbs; i++) {
1533:       for (j=0; j<bs; j++) {
1534:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1535:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1536:           for (l=0; l<bs; l++) {
1537: #if defined(PETSC_USE_COMPLEX)
1538:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539:               PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1540:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1541:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542:               PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1543:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1544:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1545:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1546:             }
1547: #else
1548:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1549:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1550:             }
1551: #endif
1552:           }
1553:         }
1554:         PetscViewerASCIIPrintf(viewer,"\n");
1555:       }
1556:     }
1557:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1558:   } else {
1559:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1560:     for (i=0; i<a->mbs; i++) {
1561:       for (j=0; j<bs; j++) {
1562:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1563:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1564:           for (l=0; l<bs; l++) {
1565: #if defined(PETSC_USE_COMPLEX)
1566:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1567:               PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1568:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1569:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1570:               PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1571:                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1572:             } else {
1573:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));
1574:             }
1575: #else
1576:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);
1577: #endif
1578:           }
1579:         }
1580:         PetscViewerASCIIPrintf(viewer,"\n");
1581:       }
1582:     }
1583:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1584:   }
1585:   PetscViewerFlush(viewer);
1586:   return(0);
1587: }

1589:  #include <petscdraw.h>
1590: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1591: {
1592:   Mat               A = (Mat) Aa;
1593:   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1594:   PetscErrorCode    ierr;
1595:   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1596:   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1597:   MatScalar         *aa;
1598:   PetscViewer       viewer;
1599:   PetscViewerFormat format;

1602:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1603:   PetscViewerGetFormat(viewer,&format);
1604:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1606:   /* loop over matrix elements drawing boxes */

1608:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1609:     PetscDrawCollectiveBegin(draw);
1610:     /* Blue for negative, Cyan for zero and  Red for positive */
1611:     color = PETSC_DRAW_BLUE;
1612:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1613:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1614:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1615:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1616:         aa  = a->a + j*bs2;
1617:         for (k=0; k<bs; k++) {
1618:           for (l=0; l<bs; l++) {
1619:             if (PetscRealPart(*aa++) >=  0.) continue;
1620:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1621:           }
1622:         }
1623:       }
1624:     }
1625:     color = PETSC_DRAW_CYAN;
1626:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1627:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1628:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1629:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1630:         aa  = a->a + j*bs2;
1631:         for (k=0; k<bs; k++) {
1632:           for (l=0; l<bs; l++) {
1633:             if (PetscRealPart(*aa++) != 0.) continue;
1634:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1635:           }
1636:         }
1637:       }
1638:     }
1639:     color = PETSC_DRAW_RED;
1640:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1641:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1642:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1643:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1644:         aa  = a->a + j*bs2;
1645:         for (k=0; k<bs; k++) {
1646:           for (l=0; l<bs; l++) {
1647:             if (PetscRealPart(*aa++) <= 0.) continue;
1648:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1649:           }
1650:         }
1651:       }
1652:     }
1653:     PetscDrawCollectiveEnd(draw);
1654:   } else {
1655:     /* use contour shading to indicate magnitude of values */
1656:     /* first determine max of all nonzero values */
1657:     PetscReal minv = 0.0, maxv = 0.0;
1658:     PetscDraw popup;

1660:     for (i=0; i<a->nz*a->bs2; i++) {
1661:       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1662:     }
1663:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1664:     PetscDrawGetPopup(draw,&popup);
1665:     PetscDrawScalePopup(popup,0.0,maxv);

1667:     PetscDrawCollectiveBegin(draw);
1668:     for (i=0,row=0; i<mbs; i++,row+=bs) {
1669:       for (j=a->i[i]; j<a->i[i+1]; j++) {
1670:         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1671:         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1672:         aa  = a->a + j*bs2;
1673:         for (k=0; k<bs; k++) {
1674:           for (l=0; l<bs; l++) {
1675:             MatScalar v = *aa++;
1676:             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1677:             PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1678:           }
1679:         }
1680:       }
1681:     }
1682:     PetscDrawCollectiveEnd(draw);
1683:   }
1684:   return(0);
1685: }

1687: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1688: {
1690:   PetscReal      xl,yl,xr,yr,w,h;
1691:   PetscDraw      draw;
1692:   PetscBool      isnull;

1695:   PetscViewerDrawGetDraw(viewer,0,&draw);
1696:   PetscDrawIsNull(draw,&isnull);
1697:   if (isnull) return(0);

1699:   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1700:   xr  += w;          yr += h;        xl = -w;     yl = -h;
1701:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1702:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1703:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1704:   PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1705:   PetscDrawSave(draw);
1706:   return(0);
1707: }

1709: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1710: {
1712:   PetscBool      iascii,isbinary,isdraw;

1715:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1716:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1717:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1718:   if (iascii) {
1719:     MatView_SeqBAIJ_ASCII(A,viewer);
1720:   } else if (isbinary) {
1721:     MatView_SeqBAIJ_Binary(A,viewer);
1722:   } else if (isdraw) {
1723:     MatView_SeqBAIJ_Draw(A,viewer);
1724:   } else {
1725:     Mat B;
1726:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1727:     MatView(B,viewer);
1728:     MatDestroy(&B);
1729:   }
1730:   return(0);
1731: }


1734: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1735: {
1736:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1737:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1738:   PetscInt    *ai = a->i,*ailen = a->ilen;
1739:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1740:   MatScalar   *ap,*aa = a->a;

1743:   for (k=0; k<m; k++) { /* loop over rows */
1744:     row = im[k]; brow = row/bs;
1745:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1746:     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1747:     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1748:     nrow = ailen[brow];
1749:     for (l=0; l<n; l++) { /* loop over columns */
1750:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1751:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1752:       col  = in[l];
1753:       bcol = col/bs;
1754:       cidx = col%bs;
1755:       ridx = row%bs;
1756:       high = nrow;
1757:       low  = 0; /* assume unsorted */
1758:       while (high-low > 5) {
1759:         t = (low+high)/2;
1760:         if (rp[t] > bcol) high = t;
1761:         else             low  = t;
1762:       }
1763:       for (i=low; i<high; i++) {
1764:         if (rp[i] > bcol) break;
1765:         if (rp[i] == bcol) {
1766:           *v++ = ap[bs2*i+bs*cidx+ridx];
1767:           goto finished;
1768:         }
1769:       }
1770:       *v++ = 0.0;
1771: finished:;
1772:     }
1773:   }
1774:   return(0);
1775: }

1777: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1778: {
1779:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1780:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1781:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1782:   PetscErrorCode    ierr;
1783:   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1784:   PetscBool         roworiented=a->roworiented;
1785:   const PetscScalar *value     = v;
1786:   MatScalar         *ap=NULL,*aa = a->a,*bap;

1789:   if (roworiented) {
1790:     stepval = (n-1)*bs;
1791:   } else {
1792:     stepval = (m-1)*bs;
1793:   }
1794:   for (k=0; k<m; k++) { /* loop over added rows */
1795:     row = im[k];
1796:     if (row < 0) continue;
1797: #if defined(PETSC_USE_DEBUG)
1798:     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1799: #endif
1800:     rp   = aj + ai[row];
1801:     if (!A->structure_only) ap = aa + bs2*ai[row];
1802:     rmax = imax[row];
1803:     nrow = ailen[row];
1804:     low  = 0;
1805:     high = nrow;
1806:     for (l=0; l<n; l++) { /* loop over added columns */
1807:       if (in[l] < 0) continue;
1808: #if defined(PETSC_USE_DEBUG)
1809:       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1810: #endif
1811:       col = in[l];
1812:       if (!A->structure_only) {
1813:         if (roworiented) {
1814:           value = v + (k*(stepval+bs) + l)*bs;
1815:         } else {
1816:           value = v + (l*(stepval+bs) + k)*bs;
1817:         }
1818:       }
1819:       if (col <= lastcol) low = 0;
1820:       else high = nrow;
1821:       lastcol = col;
1822:       while (high-low > 7) {
1823:         t = (low+high)/2;
1824:         if (rp[t] > col) high = t;
1825:         else             low  = t;
1826:       }
1827:       for (i=low; i<high; i++) {
1828:         if (rp[i] > col) break;
1829:         if (rp[i] == col) {
1830:           if (A->structure_only) goto noinsert2;
1831:           bap = ap +  bs2*i;
1832:           if (roworiented) {
1833:             if (is == ADD_VALUES) {
1834:               for (ii=0; ii<bs; ii++,value+=stepval) {
1835:                 for (jj=ii; jj<bs2; jj+=bs) {
1836:                   bap[jj] += *value++;
1837:                 }
1838:               }
1839:             } else {
1840:               for (ii=0; ii<bs; ii++,value+=stepval) {
1841:                 for (jj=ii; jj<bs2; jj+=bs) {
1842:                   bap[jj] = *value++;
1843:                 }
1844:               }
1845:             }
1846:           } else {
1847:             if (is == ADD_VALUES) {
1848:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1849:                 for (jj=0; jj<bs; jj++) {
1850:                   bap[jj] += value[jj];
1851:                 }
1852:                 bap += bs;
1853:               }
1854:             } else {
1855:               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1856:                 for (jj=0; jj<bs; jj++) {
1857:                   bap[jj]  = value[jj];
1858:                 }
1859:                 bap += bs;
1860:               }
1861:             }
1862:           }
1863:           goto noinsert2;
1864:         }
1865:       }
1866:       if (nonew == 1) goto noinsert2;
1867:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1868:       if (A->structure_only) {
1869:         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1870:       } else {
1871:         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1872:       }
1873:       N = nrow++ - 1; high++;
1874:       /* shift up all the later entries in this row */
1875:       for (ii=N; ii>=i; ii--) {
1876:         rp[ii+1] = rp[ii];
1877:         if (!A->structure_only) {
1878:           PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1879:         }
1880:       }
1881:       if (N >= i && !A->structure_only) {
1882:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1883:       }

1885:       rp[i] = col;
1886:       if (!A->structure_only) {
1887:         bap   = ap +  bs2*i;
1888:         if (roworiented) {
1889:           for (ii=0; ii<bs; ii++,value+=stepval) {
1890:             for (jj=ii; jj<bs2; jj+=bs) {
1891:               bap[jj] = *value++;
1892:             }
1893:           }
1894:         } else {
1895:           for (ii=0; ii<bs; ii++,value+=stepval) {
1896:             for (jj=0; jj<bs; jj++) {
1897:               *bap++ = *value++;
1898:             }
1899:           }
1900:         }
1901:       }
1902: noinsert2:;
1903:       low = i;
1904:     }
1905:     ailen[row] = nrow;
1906:   }
1907:   return(0);
1908: }

1910: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1911: {
1912:   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1913:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1914:   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1916:   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1917:   MatScalar      *aa  = a->a,*ap;
1918:   PetscReal      ratio=0.6;

1921:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1923:   if (m) rmax = ailen[0];
1924:   for (i=1; i<mbs; i++) {
1925:     /* move each row back by the amount of empty slots (fshift) before it*/
1926:     fshift += imax[i-1] - ailen[i-1];
1927:     rmax    = PetscMax(rmax,ailen[i]);
1928:     if (fshift) {
1929:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1930:       N  = ailen[i];
1931:       for (j=0; j<N; j++) {
1932:         ip[j-fshift] = ip[j];
1933:         if (!A->structure_only) {
1934:           PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1935:         }
1936:       }
1937:     }
1938:     ai[i] = ai[i-1] + ailen[i-1];
1939:   }
1940:   if (mbs) {
1941:     fshift += imax[mbs-1] - ailen[mbs-1];
1942:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1943:   }

1945:   /* reset ilen and imax for each row */
1946:   a->nonzerorowcnt = 0;
1947:   if (A->structure_only) {
1948:     PetscFree2(a->imax,a->ilen);
1949:   } else { /* !A->structure_only */
1950:     for (i=0; i<mbs; i++) {
1951:       ailen[i] = imax[i] = ai[i+1] - ai[i];
1952:       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1953:     }
1954:   }
1955:   a->nz = ai[mbs];

1957:   /* diagonals may have moved, so kill the diagonal pointers */
1958:   a->idiagvalid = PETSC_FALSE;
1959:   if (fshift && a->diag) {
1960:     PetscFree(a->diag);
1961:     PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));
1962:     a->diag = 0;
1963:   }
1964:   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1965:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1966:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1967:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);

1969:   A->info.mallocs    += a->reallocs;
1970:   a->reallocs         = 0;
1971:   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1972:   a->rmax             = rmax;

1974:   if (!A->structure_only) {
1975:     MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);
1976:   }
1977:   return(0);
1978: }

1980: /*
1981:    This function returns an array of flags which indicate the locations of contiguous
1982:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1983:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1984:    Assume: sizes should be long enough to hold all the values.
1985: */
1986: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1987: {
1988:   PetscInt  i,j,k,row;
1989:   PetscBool flg;

1992:   for (i=0,j=0; i<n; j++) {
1993:     row = idx[i];
1994:     if (row%bs!=0) { /* Not the begining of a block */
1995:       sizes[j] = 1;
1996:       i++;
1997:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1998:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1999:       i++;
2000:     } else { /* Begining of the block, so check if the complete block exists */
2001:       flg = PETSC_TRUE;
2002:       for (k=1; k<bs; k++) {
2003:         if (row+k != idx[i+k]) { /* break in the block */
2004:           flg = PETSC_FALSE;
2005:           break;
2006:         }
2007:       }
2008:       if (flg) { /* No break in the bs */
2009:         sizes[j] = bs;
2010:         i       += bs;
2011:       } else {
2012:         sizes[j] = 1;
2013:         i++;
2014:       }
2015:     }
2016:   }
2017:   *bs_max = j;
2018:   return(0);
2019: }

2021: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2022: {
2023:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2024:   PetscErrorCode    ierr;
2025:   PetscInt          i,j,k,count,*rows;
2026:   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2027:   PetscScalar       zero = 0.0;
2028:   MatScalar         *aa;
2029:   const PetscScalar *xx;
2030:   PetscScalar       *bb;

2033:   /* fix right hand side if needed */
2034:   if (x && b) {
2035:     VecGetArrayRead(x,&xx);
2036:     VecGetArray(b,&bb);
2037:     for (i=0; i<is_n; i++) {
2038:       bb[is_idx[i]] = diag*xx[is_idx[i]];
2039:     }
2040:     VecRestoreArrayRead(x,&xx);
2041:     VecRestoreArray(b,&bb);
2042:   }

2044:   /* Make a copy of the IS and  sort it */
2045:   /* allocate memory for rows,sizes */
2046:   PetscMalloc2(is_n,&rows,2*is_n,&sizes);

2048:   /* copy IS values to rows, and sort them */
2049:   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2050:   PetscSortInt(is_n,rows);

2052:   if (baij->keepnonzeropattern) {
2053:     for (i=0; i<is_n; i++) sizes[i] = 1;
2054:     bs_max          = is_n;
2055:   } else {
2056:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2057:     A->nonzerostate++;
2058:   }

2060:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2061:     row = rows[j];
2062:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2063:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2064:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2065:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2066:       if (diag != (PetscScalar)0.0) {
2067:         if (baij->ilen[row/bs] > 0) {
2068:           baij->ilen[row/bs]       = 1;
2069:           baij->j[baij->i[row/bs]] = row/bs;

2071:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2072:         }
2073:         /* Now insert all the diagonal values for this bs */
2074:         for (k=0; k<bs; k++) {
2075:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2076:         }
2077:       } else { /* (diag == 0.0) */
2078:         baij->ilen[row/bs] = 0;
2079:       } /* end (diag == 0.0) */
2080:     } else { /* (sizes[i] != bs) */
2081: #if defined(PETSC_USE_DEBUG)
2082:       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2083: #endif
2084:       for (k=0; k<count; k++) {
2085:         aa[0] =  zero;
2086:         aa   += bs;
2087:       }
2088:       if (diag != (PetscScalar)0.0) {
2089:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2090:       }
2091:     }
2092:   }

2094:   PetscFree2(rows,sizes);
2095:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2096:   return(0);
2097: }

2099: PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2100: {
2101:   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2102:   PetscErrorCode    ierr;
2103:   PetscInt          i,j,k,count;
2104:   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2105:   PetscScalar       zero = 0.0;
2106:   MatScalar         *aa;
2107:   const PetscScalar *xx;
2108:   PetscScalar       *bb;
2109:   PetscBool         *zeroed,vecs = PETSC_FALSE;

2112:   /* fix right hand side if needed */
2113:   if (x && b) {
2114:     VecGetArrayRead(x,&xx);
2115:     VecGetArray(b,&bb);
2116:     vecs = PETSC_TRUE;
2117:   }

2119:   /* zero the columns */
2120:   PetscCalloc1(A->rmap->n,&zeroed);
2121:   for (i=0; i<is_n; i++) {
2122:     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2123:     zeroed[is_idx[i]] = PETSC_TRUE;
2124:   }
2125:   for (i=0; i<A->rmap->N; i++) {
2126:     if (!zeroed[i]) {
2127:       row = i/bs;
2128:       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2129:         for (k=0; k<bs; k++) {
2130:           col = bs*baij->j[j] + k;
2131:           if (zeroed[col]) {
2132:             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2133:             if (vecs) bb[i] -= aa[0]*xx[col];
2134:             aa[0] = 0.0;
2135:           }
2136:         }
2137:       }
2138:     } else if (vecs) bb[i] = diag*xx[i];
2139:   }
2140:   PetscFree(zeroed);
2141:   if (vecs) {
2142:     VecRestoreArrayRead(x,&xx);
2143:     VecRestoreArray(b,&bb);
2144:   }

2146:   /* zero the rows */
2147:   for (i=0; i<is_n; i++) {
2148:     row   = is_idx[i];
2149:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2150:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2151:     for (k=0; k<count; k++) {
2152:       aa[0] =  zero;
2153:       aa   += bs;
2154:     }
2155:     if (diag != (PetscScalar)0.0) {
2156:       (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);
2157:     }
2158:   }
2159:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2160:   return(0);
2161: }

2163: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2164: {
2165:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2166:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2167:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2168:   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2170:   PetscInt       ridx,cidx,bs2=a->bs2;
2171:   PetscBool      roworiented=a->roworiented;
2172:   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;

2175:   for (k=0; k<m; k++) { /* loop over added rows */
2176:     row  = im[k];
2177:     brow = row/bs;
2178:     if (row < 0) continue;
2179: #if defined(PETSC_USE_DEBUG)
2180:     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2181: #endif
2182:     rp   = aj + ai[brow];
2183:     if (!A->structure_only) ap = aa + bs2*ai[brow];
2184:     rmax = imax[brow];
2185:     nrow = ailen[brow];
2186:     low  = 0;
2187:     high = nrow;
2188:     for (l=0; l<n; l++) { /* loop over added columns */
2189:       if (in[l] < 0) continue;
2190: #if defined(PETSC_USE_DEBUG)
2191:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2192: #endif
2193:       col  = in[l]; bcol = col/bs;
2194:       ridx = row % bs; cidx = col % bs;
2195:       if (!A->structure_only) {
2196:         if (roworiented) {
2197:           value = v[l + k*n];
2198:         } else {
2199:           value = v[k + l*m];
2200:         }
2201:       }
2202:       if (col <= lastcol) low = 0; else high = nrow;
2203:       lastcol = col;
2204:       while (high-low > 7) {
2205:         t = (low+high)/2;
2206:         if (rp[t] > bcol) high = t;
2207:         else              low  = t;
2208:       }
2209:       for (i=low; i<high; i++) {
2210:         if (rp[i] > bcol) break;
2211:         if (rp[i] == bcol) {
2212:           bap = ap +  bs2*i + bs*cidx + ridx;
2213:           if (!A->structure_only) {
2214:             if (is == ADD_VALUES) *bap += value;
2215:             else                  *bap  = value;
2216:           }
2217:           goto noinsert1;
2218:         }
2219:       }
2220:       if (nonew == 1) goto noinsert1;
2221:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2222:       if (A->structure_only) {
2223:         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2224:       } else {
2225:         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2226:       }
2227:       N = nrow++ - 1; high++;
2228:       /* shift up all the later entries in this row */
2229:       for (ii=N; ii>=i; ii--) {
2230:         rp[ii+1] = rp[ii];
2231:         if (!A->structure_only) {
2232:           PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2233:         }
2234:       }
2235:       if (N>=i && !A->structure_only) {
2236:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2237:       }
2238:       rp[i]                      = bcol;
2239:       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2240:       a->nz++;
2241:       A->nonzerostate++;
2242: noinsert1:;
2243:       low = i;
2244:     }
2245:     ailen[brow] = nrow;
2246:   }
2247:   return(0);
2248: }

2250: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2251: {
2252:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2253:   Mat            outA;
2255:   PetscBool      row_identity,col_identity;

2258:   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2259:   ISIdentity(row,&row_identity);
2260:   ISIdentity(col,&col_identity);
2261:   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");

2263:   outA            = inA;
2264:   inA->factortype = MAT_FACTOR_LU;
2265:   PetscFree(inA->solvertype);
2266:   PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);

2268:   MatMarkDiagonal_SeqBAIJ(inA);

2270:   PetscObjectReference((PetscObject)row);
2271:   ISDestroy(&a->row);
2272:   a->row = row;
2273:   PetscObjectReference((PetscObject)col);
2274:   ISDestroy(&a->col);
2275:   a->col = col;

2277:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2278:   ISDestroy(&a->icol);
2279:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2280:   PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);

2282:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));
2283:   if (!a->solve_work) {
2284:     PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);
2285:     PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2286:   }
2287:   MatLUFactorNumeric(outA,inA,info);
2288:   return(0);
2289: }

2291: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2292: {
2293:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2294:   PetscInt    i,nz,mbs;

2297:   nz  = baij->maxnz;
2298:   mbs = baij->mbs;
2299:   for (i=0; i<nz; i++) {
2300:     baij->j[i] = indices[i];
2301:   }
2302:   baij->nz = nz;
2303:   for (i=0; i<mbs; i++) {
2304:     baij->ilen[i] = baij->imax[i];
2305:   }
2306:   return(0);
2307: }

2309: /*@
2310:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2311:        in the matrix.

2313:   Input Parameters:
2314: +  mat - the SeqBAIJ matrix
2315: -  indices - the column indices

2317:   Level: advanced

2319:   Notes:
2320:     This can be called if you have precomputed the nonzero structure of the
2321:   matrix and want to provide it to the matrix object to improve the performance
2322:   of the MatSetValues() operation.

2324:     You MUST have set the correct numbers of nonzeros per row in the call to
2325:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2327:     MUST be called before any calls to MatSetValues();

2329: @*/
2330: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2331: {

2337:   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2338:   return(0);
2339: }

2341: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2342: {
2343:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2345:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2346:   PetscReal      atmp;
2347:   PetscScalar    *x,zero = 0.0;
2348:   MatScalar      *aa;
2349:   PetscInt       ncols,brow,krow,kcol;

2352:   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2353:   bs  = A->rmap->bs;
2354:   aa  = a->a;
2355:   ai  = a->i;
2356:   aj  = a->j;
2357:   mbs = a->mbs;

2359:   VecSet(v,zero);
2360:   VecGetArray(v,&x);
2361:   VecGetLocalSize(v,&n);
2362:   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2363:   for (i=0; i<mbs; i++) {
2364:     ncols = ai[1] - ai[0]; ai++;
2365:     brow  = bs*i;
2366:     for (j=0; j<ncols; j++) {
2367:       for (kcol=0; kcol<bs; kcol++) {
2368:         for (krow=0; krow<bs; krow++) {
2369:           atmp = PetscAbsScalar(*aa);aa++;
2370:           row  = brow + krow;   /* row index */
2371:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2372:         }
2373:       }
2374:       aj++;
2375:     }
2376:   }
2377:   VecRestoreArray(v,&x);
2378:   return(0);
2379: }

2381: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2382: {

2386:   /* If the two matrices have the same copy implementation, use fast copy. */
2387:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2388:     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2389:     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2390:     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;

2392:     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2393:     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2394:     PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));
2395:     PetscObjectStateIncrease((PetscObject)B);
2396:   } else {
2397:     MatCopy_Basic(A,B,str);
2398:   }
2399:   return(0);
2400: }

2402: PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2403: {

2407:   MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);
2408:   return(0);
2409: }

2411: PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2412: {
2413:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

2416:   *array = a->a;
2417:   return(0);
2418: }

2420: PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2421: {
2423:   return(0);
2424: }

2426: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2427: {
2428:   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2429:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2430:   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;

2434:   /* Set the number of nonzeros in the new matrix */
2435:   MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);
2436:   return(0);
2437: }

2439: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2440: {
2441:   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2443:   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2444:   PetscBLASInt   one=1;

2447:   if (str == SAME_NONZERO_PATTERN) {
2448:     PetscScalar  alpha = a;
2449:     PetscBLASInt bnz;
2450:     PetscBLASIntCast(x->nz*bs2,&bnz);
2451:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2452:     PetscObjectStateIncrease((PetscObject)Y);
2453:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2454:     MatAXPY_Basic(Y,a,X,str);
2455:   } else {
2456:     Mat      B;
2457:     PetscInt *nnz;
2458:     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2459:     PetscMalloc1(Y->rmap->N,&nnz);
2460:     MatCreate(PetscObjectComm((PetscObject)Y),&B);
2461:     PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2462:     MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2463:     MatSetBlockSizesFromMats(B,Y,Y);
2464:     MatSetType(B,(MatType) ((PetscObject)Y)->type_name);
2465:     MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);
2466:     MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2467:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2468:     MatHeaderReplace(Y,&B);
2469:     PetscFree(nnz);
2470:   }
2471:   return(0);
2472: }

2474: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2475: {
2476:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2477:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2478:   MatScalar   *aa = a->a;

2481:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2482:   return(0);
2483: }

2485: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2486: {
2487:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2488:   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2489:   MatScalar   *aa = a->a;

2492:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2493:   return(0);
2494: }

2496: /*
2497:     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2498: */
2499: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2500: {
2501:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2503:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2504:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2507:   *nn = n;
2508:   if (!ia) return(0);
2509:   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2510:   else {
2511:     PetscCalloc1(n+1,&collengths);
2512:     PetscMalloc1(n+1,&cia);
2513:     PetscMalloc1(nz+1,&cja);
2514:     jj   = a->j;
2515:     for (i=0; i<nz; i++) {
2516:       collengths[jj[i]]++;
2517:     }
2518:     cia[0] = oshift;
2519:     for (i=0; i<n; i++) {
2520:       cia[i+1] = cia[i] + collengths[i];
2521:     }
2522:     PetscMemzero(collengths,n*sizeof(PetscInt));
2523:     jj   = a->j;
2524:     for (row=0; row<m; row++) {
2525:       mr = a->i[row+1] - a->i[row];
2526:       for (i=0; i<mr; i++) {
2527:         col = *jj++;

2529:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2530:       }
2531:     }
2532:     PetscFree(collengths);
2533:     *ia  = cia; *ja = cja;
2534:   }
2535:   return(0);
2536: }

2538: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2539: {

2543:   if (!ia) return(0);
2544:   PetscFree(*ia);
2545:   PetscFree(*ja);
2546:   return(0);
2547: }

2549: /*
2550:  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2551:  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2552:  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2553:  */
2554: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2555: {
2556:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2558:   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2559:   PetscInt       nz = a->i[m],row,*jj,mr,col;
2560:   PetscInt       *cspidx;

2563:   *nn = n;
2564:   if (!ia) return(0);

2566:   PetscCalloc1(n+1,&collengths);
2567:   PetscMalloc1(n+1,&cia);
2568:   PetscMalloc1(nz+1,&cja);
2569:   PetscMalloc1(nz+1,&cspidx);
2570:   jj   = a->j;
2571:   for (i=0; i<nz; i++) {
2572:     collengths[jj[i]]++;
2573:   }
2574:   cia[0] = oshift;
2575:   for (i=0; i<n; i++) {
2576:     cia[i+1] = cia[i] + collengths[i];
2577:   }
2578:   PetscMemzero(collengths,n*sizeof(PetscInt));
2579:   jj   = a->j;
2580:   for (row=0; row<m; row++) {
2581:     mr = a->i[row+1] - a->i[row];
2582:     for (i=0; i<mr; i++) {
2583:       col = *jj++;
2584:       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2585:       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2586:     }
2587:   }
2588:   PetscFree(collengths);
2589:   *ia    = cia; *ja = cja;
2590:   *spidx = cspidx;
2591:   return(0);
2592: }

2594: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2595: {

2599:   MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);
2600:   PetscFree(*spidx);
2601:   return(0);
2602: }

2604: PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2605: {
2607:   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;

2610:   if (!Y->preallocated || !aij->nz) {
2611:     MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);
2612:   }
2613:   MatShift_Basic(Y,a);
2614:   return(0);
2615: }

2617: /* -------------------------------------------------------------------*/
2618: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2619:                                        MatGetRow_SeqBAIJ,
2620:                                        MatRestoreRow_SeqBAIJ,
2621:                                        MatMult_SeqBAIJ_N,
2622:                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2623:                                        MatMultTranspose_SeqBAIJ,
2624:                                        MatMultTransposeAdd_SeqBAIJ,
2625:                                        0,
2626:                                        0,
2627:                                        0,
2628:                                /* 10*/ 0,
2629:                                        MatLUFactor_SeqBAIJ,
2630:                                        0,
2631:                                        0,
2632:                                        MatTranspose_SeqBAIJ,
2633:                                /* 15*/ MatGetInfo_SeqBAIJ,
2634:                                        MatEqual_SeqBAIJ,
2635:                                        MatGetDiagonal_SeqBAIJ,
2636:                                        MatDiagonalScale_SeqBAIJ,
2637:                                        MatNorm_SeqBAIJ,
2638:                                /* 20*/ 0,
2639:                                        MatAssemblyEnd_SeqBAIJ,
2640:                                        MatSetOption_SeqBAIJ,
2641:                                        MatZeroEntries_SeqBAIJ,
2642:                                /* 24*/ MatZeroRows_SeqBAIJ,
2643:                                        0,
2644:                                        0,
2645:                                        0,
2646:                                        0,
2647:                                /* 29*/ MatSetUp_SeqBAIJ,
2648:                                        0,
2649:                                        0,
2650:                                        0,
2651:                                        0,
2652:                                /* 34*/ MatDuplicate_SeqBAIJ,
2653:                                        0,
2654:                                        0,
2655:                                        MatILUFactor_SeqBAIJ,
2656:                                        0,
2657:                                /* 39*/ MatAXPY_SeqBAIJ,
2658:                                        MatCreateSubMatrices_SeqBAIJ,
2659:                                        MatIncreaseOverlap_SeqBAIJ,
2660:                                        MatGetValues_SeqBAIJ,
2661:                                        MatCopy_SeqBAIJ,
2662:                                /* 44*/ 0,
2663:                                        MatScale_SeqBAIJ,
2664:                                        MatShift_SeqBAIJ,
2665:                                        0,
2666:                                        MatZeroRowsColumns_SeqBAIJ,
2667:                                /* 49*/ 0,
2668:                                        MatGetRowIJ_SeqBAIJ,
2669:                                        MatRestoreRowIJ_SeqBAIJ,
2670:                                        MatGetColumnIJ_SeqBAIJ,
2671:                                        MatRestoreColumnIJ_SeqBAIJ,
2672:                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2673:                                        0,
2674:                                        0,
2675:                                        0,
2676:                                        MatSetValuesBlocked_SeqBAIJ,
2677:                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2678:                                        MatDestroy_SeqBAIJ,
2679:                                        MatView_SeqBAIJ,
2680:                                        0,
2681:                                        0,
2682:                                /* 64*/ 0,
2683:                                        0,
2684:                                        0,
2685:                                        0,
2686:                                        0,
2687:                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2688:                                        0,
2689:                                        MatConvert_Basic,
2690:                                        0,
2691:                                        0,
2692:                                /* 74*/ 0,
2693:                                        MatFDColoringApply_BAIJ,
2694:                                        0,
2695:                                        0,
2696:                                        0,
2697:                                /* 79*/ 0,
2698:                                        0,
2699:                                        0,
2700:                                        0,
2701:                                        MatLoad_SeqBAIJ,
2702:                                /* 84*/ 0,
2703:                                        0,
2704:                                        0,
2705:                                        0,
2706:                                        0,
2707:                                /* 89*/ 0,
2708:                                        0,
2709:                                        0,
2710:                                        0,
2711:                                        0,
2712:                                /* 94*/ 0,
2713:                                        0,
2714:                                        0,
2715:                                        0,
2716:                                        0,
2717:                                /* 99*/ 0,
2718:                                        0,
2719:                                        0,
2720:                                        0,
2721:                                        0,
2722:                                /*104*/ 0,
2723:                                        MatRealPart_SeqBAIJ,
2724:                                        MatImaginaryPart_SeqBAIJ,
2725:                                        0,
2726:                                        0,
2727:                                /*109*/ 0,
2728:                                        0,
2729:                                        0,
2730:                                        0,
2731:                                        MatMissingDiagonal_SeqBAIJ,
2732:                                /*114*/ 0,
2733:                                        0,
2734:                                        0,
2735:                                        0,
2736:                                        0,
2737:                                /*119*/ 0,
2738:                                        0,
2739:                                        MatMultHermitianTranspose_SeqBAIJ,
2740:                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2741:                                        0,
2742:                                /*124*/ 0,
2743:                                        0,
2744:                                        MatInvertBlockDiagonal_SeqBAIJ,
2745:                                        0,
2746:                                        0,
2747:                                /*129*/ 0,
2748:                                        0,
2749:                                        0,
2750:                                        0,
2751:                                        0,
2752:                                /*134*/ 0,
2753:                                        0,
2754:                                        0,
2755:                                        0,
2756:                                        0,
2757:                                /*139*/ MatSetBlockSizes_Default,
2758:                                        0,
2759:                                        0,
2760:                                        MatFDColoringSetUp_SeqXAIJ,
2761:                                        0,
2762:                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2763:                                        MatDestroySubMatrices_SeqBAIJ
2764: };

2766: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2767: {
2768:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2769:   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;

2773:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

2775:   /* allocate space for values if not already there */
2776:   if (!aij->saved_values) {
2777:     PetscMalloc1(nz+1,&aij->saved_values);
2778:     PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));
2779:   }

2781:   /* copy values over */
2782:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2783:   return(0);
2784: }

2786: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2787: {
2788:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2790:   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;

2793:   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2794:   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");

2796:   /* copy values over */
2797:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2798:   return(0);
2799: }

2801: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2802: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);

2804: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2805: {
2806:   Mat_SeqBAIJ    *b;
2808:   PetscInt       i,mbs,nbs,bs2;
2809:   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;

2812:   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2813:   if (nz == MAT_SKIP_ALLOCATION) {
2814:     skipallocation = PETSC_TRUE;
2815:     nz             = 0;
2816:   }

2818:   MatSetBlockSize(B,PetscAbs(bs));
2819:   PetscLayoutSetUp(B->rmap);
2820:   PetscLayoutSetUp(B->cmap);
2821:   PetscLayoutGetBlockSize(B->rmap,&bs);

2823:   B->preallocated = PETSC_TRUE;

2825:   mbs = B->rmap->n/bs;
2826:   nbs = B->cmap->n/bs;
2827:   bs2 = bs*bs;

2829:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);

2831:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2832:   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2833:   if (nnz) {
2834:     for (i=0; i<mbs; i++) {
2835:       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2836:       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2837:     }
2838:   }

2840:   b    = (Mat_SeqBAIJ*)B->data;
2841:   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2842:   PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);
2843:   PetscOptionsEnd();

2845:   if (!flg) {
2846:     switch (bs) {
2847:     case 1:
2848:       B->ops->mult    = MatMult_SeqBAIJ_1;
2849:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2850:       break;
2851:     case 2:
2852:       B->ops->mult    = MatMult_SeqBAIJ_2;
2853:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2854:       break;
2855:     case 3:
2856:       B->ops->mult    = MatMult_SeqBAIJ_3;
2857:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2858:       break;
2859:     case 4:
2860:       B->ops->mult    = MatMult_SeqBAIJ_4;
2861:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2862:       break;
2863:     case 5:
2864:       B->ops->mult    = MatMult_SeqBAIJ_5;
2865:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2866:       break;
2867:     case 6:
2868:       B->ops->mult    = MatMult_SeqBAIJ_6;
2869:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2870:       break;
2871:     case 7:
2872:       B->ops->mult    = MatMult_SeqBAIJ_7;
2873:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2874:       break;
2875:     case 11:
2876:       B->ops->mult    = MatMult_SeqBAIJ_11;
2877:       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2878:       break;
2879:     case 15:
2880:       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2881:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2882:       break;
2883:     default:
2884:       B->ops->mult    = MatMult_SeqBAIJ_N;
2885:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2886:       break;
2887:     }
2888:   }
2889:   B->ops->sor = MatSOR_SeqBAIJ;
2890:   b->mbs = mbs;
2891:   b->nbs = nbs;
2892:   if (!skipallocation) {
2893:     if (!b->imax) {
2894:       PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);
2895:       PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));

2897:       b->free_imax_ilen = PETSC_TRUE;
2898:     }
2899:     /* b->ilen will count nonzeros in each block row so far. */
2900:     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2901:     if (!nnz) {
2902:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2903:       else if (nz < 0) nz = 1;
2904:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2905:       nz = nz*mbs;
2906:     } else {
2907:       nz = 0;
2908:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2909:     }

2911:     /* allocate the matrix space */
2912:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2913:     if (B->structure_only) {
2914:       PetscMalloc1(nz,&b->j);
2915:       PetscMalloc1(B->rmap->N+1,&b->i);
2916:       PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));
2917:     } else {
2918:       PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);
2919:       PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2920:       PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2921:     }
2922:     PetscMemzero(b->j,nz*sizeof(PetscInt));

2924:     if (B->structure_only) {
2925:       b->singlemalloc = PETSC_FALSE;
2926:       b->free_a       = PETSC_FALSE;
2927:     } else {
2928:       b->singlemalloc = PETSC_TRUE;
2929:       b->free_a       = PETSC_TRUE;
2930:     }
2931:     b->free_ij = PETSC_TRUE;

2933:     b->i[0] = 0;
2934:     for (i=1; i<mbs+1; i++) {
2935:       b->i[i] = b->i[i-1] + b->imax[i-1];
2936:     }

2938:   } else {
2939:     b->free_a  = PETSC_FALSE;
2940:     b->free_ij = PETSC_FALSE;
2941:   }

2943:   b->bs2              = bs2;
2944:   b->mbs              = mbs;
2945:   b->nz               = 0;
2946:   b->maxnz            = nz;
2947:   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2948:   B->was_assembled    = PETSC_FALSE;
2949:   B->assembled        = PETSC_FALSE;
2950:   if (realalloc) {MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);}
2951:   return(0);
2952: }

2954: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2955: {
2956:   PetscInt       i,m,nz,nz_max=0,*nnz;
2957:   PetscScalar    *values=0;
2958:   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;

2962:   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2963:   PetscLayoutSetBlockSize(B->rmap,bs);
2964:   PetscLayoutSetBlockSize(B->cmap,bs);
2965:   PetscLayoutSetUp(B->rmap);
2966:   PetscLayoutSetUp(B->cmap);
2967:   PetscLayoutGetBlockSize(B->rmap,&bs);
2968:   m    = B->rmap->n/bs;

2970:   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2971:   PetscMalloc1(m+1, &nnz);
2972:   for (i=0; i<m; i++) {
2973:     nz = ii[i+1]- ii[i];
2974:     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2975:     nz_max = PetscMax(nz_max, nz);
2976:     nnz[i] = nz;
2977:   }
2978:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
2979:   PetscFree(nnz);

2981:   values = (PetscScalar*)V;
2982:   if (!values) {
2983:     PetscCalloc1(bs*bs*(nz_max+1),&values);
2984:   }
2985:   for (i=0; i<m; i++) {
2986:     PetscInt          ncols  = ii[i+1] - ii[i];
2987:     const PetscInt    *icols = jj + ii[i];
2988:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2989:     if (!roworiented) {
2990:       MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
2991:     } else {
2992:       PetscInt j;
2993:       for (j=0; j<ncols; j++) {
2994:         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2995:         MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);
2996:       }
2997:     }
2998:   }
2999:   if (!V) { PetscFree(values); }
3000:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3001:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3002:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3003:   return(0);
3004: }

3006: /*MC
3007:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3008:    block sparse compressed row format.

3010:    Options Database Keys:
3011: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

3013:   Level: beginner

3015: .seealso: MatCreateSeqBAIJ()
3016: M*/

3018: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);

3020: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3021: {
3023:   PetscMPIInt    size;
3024:   Mat_SeqBAIJ    *b;

3027:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
3028:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

3030:   PetscNewLog(B,&b);
3031:   B->data = (void*)b;
3032:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));

3034:   b->row          = 0;
3035:   b->col          = 0;
3036:   b->icol         = 0;
3037:   b->reallocs     = 0;
3038:   b->saved_values = 0;

3040:   b->roworiented        = PETSC_TRUE;
3041:   b->nonew              = 0;
3042:   b->diag               = 0;
3043:   B->spptr              = 0;
3044:   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3045:   b->keepnonzeropattern = PETSC_FALSE;

3047:   PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);
3048:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);
3049:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);
3050:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);
3051:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);
3052:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);
3053:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);
3054:   PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3055:   PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);
3056: #if defined(PETSC_HAVE_HYPRE)
3057:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);
3058: #endif
3059:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3060:   return(0);
3061: }

3063: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3064: {
3065:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3067:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

3070:   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");

3072:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3073:     c->imax           = a->imax;
3074:     c->ilen           = a->ilen;
3075:     c->free_imax_ilen = PETSC_FALSE;
3076:   } else {
3077:     PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);
3078:     PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));
3079:     for (i=0; i<mbs; i++) {
3080:       c->imax[i] = a->imax[i];
3081:       c->ilen[i] = a->ilen[i];
3082:     }
3083:     c->free_imax_ilen = PETSC_TRUE;
3084:   }

3086:   /* allocate the matrix space */
3087:   if (mallocmatspace) {
3088:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3089:       PetscCalloc1(bs2*nz,&c->a);
3090:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));

3092:       c->i            = a->i;
3093:       c->j            = a->j;
3094:       c->singlemalloc = PETSC_FALSE;
3095:       c->free_a       = PETSC_TRUE;
3096:       c->free_ij      = PETSC_FALSE;
3097:       c->parent       = A;
3098:       C->preallocated = PETSC_TRUE;
3099:       C->assembled    = PETSC_TRUE;

3101:       PetscObjectReference((PetscObject)A);
3102:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3103:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3104:     } else {
3105:       PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);
3106:       PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));

3108:       c->singlemalloc = PETSC_TRUE;
3109:       c->free_a       = PETSC_TRUE;
3110:       c->free_ij      = PETSC_TRUE;

3112:       PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3113:       if (mbs > 0) {
3114:         PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3115:         if (cpvalues == MAT_COPY_VALUES) {
3116:           PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3117:         } else {
3118:           PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3119:         }
3120:       }
3121:       C->preallocated = PETSC_TRUE;
3122:       C->assembled    = PETSC_TRUE;
3123:     }
3124:   }

3126:   c->roworiented = a->roworiented;
3127:   c->nonew       = a->nonew;

3129:   PetscLayoutReference(A->rmap,&C->rmap);
3130:   PetscLayoutReference(A->cmap,&C->cmap);

3132:   c->bs2         = a->bs2;
3133:   c->mbs         = a->mbs;
3134:   c->nbs         = a->nbs;

3136:   if (a->diag) {
3137:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3138:       c->diag      = a->diag;
3139:       c->free_diag = PETSC_FALSE;
3140:     } else {
3141:       PetscMalloc1(mbs+1,&c->diag);
3142:       PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));
3143:       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3144:       c->free_diag = PETSC_TRUE;
3145:     }
3146:   } else c->diag = 0;

3148:   c->nz         = a->nz;
3149:   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3150:   c->solve_work = NULL;
3151:   c->mult_work  = NULL;
3152:   c->sor_workt  = NULL;
3153:   c->sor_work   = NULL;

3155:   c->compressedrow.use   = a->compressedrow.use;
3156:   c->compressedrow.nrows = a->compressedrow.nrows;
3157:   if (a->compressedrow.use) {
3158:     i    = a->compressedrow.nrows;
3159:     PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);
3160:     PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));
3161:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3162:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3163:   } else {
3164:     c->compressedrow.use    = PETSC_FALSE;
3165:     c->compressedrow.i      = NULL;
3166:     c->compressedrow.rindex = NULL;
3167:   }
3168:   C->nonzerostate = A->nonzerostate;

3170:   PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3171:   return(0);
3172: }

3174: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3175: {

3179:   MatCreate(PetscObjectComm((PetscObject)A),B);
3180:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3181:   MatSetType(*B,MATSEQBAIJ);
3182:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3183:   return(0);
3184: }

3186: PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3187: {
3188:   Mat_SeqBAIJ    *a;
3190:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3191:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3192:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3193:   PetscInt       *masked,nmask,tmp,bs2,ishift;
3194:   PetscMPIInt    size;
3195:   int            fd;
3196:   PetscScalar    *aa;
3197:   MPI_Comm       comm;

3200:   /* force binary viewer to load .info file if it has not yet done so */
3201:   PetscViewerSetUp(viewer);
3202:   PetscObjectGetComm((PetscObject)viewer,&comm);
3203:   PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");
3204:   PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
3205:   PetscOptionsEnd();
3206:   if (bs < 0) bs = 1;
3207:   bs2  = bs*bs;

3209:   MPI_Comm_size(comm,&size);
3210:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3211:   PetscViewerBinaryGetDescriptor(viewer,&fd);
3212:   PetscBinaryRead(fd,header,4,PETSC_INT);
3213:   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3214:   M = header[1]; N = header[2]; nz = header[3];

3216:   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3217:   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");

3219:   /*
3220:      This code adds extra rows to make sure the number of rows is
3221:     divisible by the blocksize
3222:   */
3223:   mbs        = M/bs;
3224:   extra_rows = bs - M + bs*(mbs);
3225:   if (extra_rows == bs) extra_rows = 0;
3226:   else mbs++;
3227:   if (extra_rows) {
3228:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3229:   }

3231:   /* Set global sizes if not already set */
3232:   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3233:     MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3234:   } else { /* Check if the matrix global sizes are correct */
3235:     MatGetSize(newmat,&rows,&cols);
3236:     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3237:       MatGetLocalSize(newmat,&rows,&cols);
3238:     }
3239:     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3240:   }

3242:   /* read in row lengths */
3243:   PetscMalloc1(M+extra_rows,&rowlengths);
3244:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3245:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

3247:   /* read in column indices */
3248:   PetscMalloc1(nz+extra_rows,&jj);
3249:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
3250:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

3252:   /* loop over row lengths determining block row lengths */
3253:   PetscCalloc1(mbs,&browlengths);
3254:   PetscMalloc2(mbs,&mask,mbs,&masked);
3255:   PetscMemzero(mask,mbs*sizeof(PetscInt));
3256:   rowcount = 0;
3257:   nzcount  = 0;
3258:   for (i=0; i<mbs; i++) {
3259:     nmask = 0;
3260:     for (j=0; j<bs; j++) {
3261:       kmax = rowlengths[rowcount];
3262:       for (k=0; k<kmax; k++) {
3263:         tmp = jj[nzcount++]/bs;
3264:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3265:       }
3266:       rowcount++;
3267:     }
3268:     browlengths[i] += nmask;
3269:     /* zero out the mask elements we set */
3270:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3271:   }

3273:   /* Do preallocation  */
3274:   MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);
3275:   a    = (Mat_SeqBAIJ*)newmat->data;

3277:   /* set matrix "i" values */
3278:   a->i[0] = 0;
3279:   for (i=1; i<= mbs; i++) {
3280:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3281:     a->ilen[i-1] = browlengths[i-1];
3282:   }
3283:   a->nz = 0;
3284:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3286:   /* read in nonzero values */
3287:   PetscMalloc1(nz+extra_rows,&aa);
3288:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3289:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3291:   /* set "a" and "j" values into matrix */
3292:   nzcount = 0; jcount = 0;
3293:   for (i=0; i<mbs; i++) {
3294:     nzcountb = nzcount;
3295:     nmask    = 0;
3296:     for (j=0; j<bs; j++) {
3297:       kmax = rowlengths[i*bs+j];
3298:       for (k=0; k<kmax; k++) {
3299:         tmp = jj[nzcount++]/bs;
3300:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3301:       }
3302:     }
3303:     /* sort the masked values */
3304:     PetscSortInt(nmask,masked);

3306:     /* set "j" values into matrix */
3307:     maskcount = 1;
3308:     for (j=0; j<nmask; j++) {
3309:       a->j[jcount++]  = masked[j];
3310:       mask[masked[j]] = maskcount++;
3311:     }
3312:     /* set "a" values into matrix */
3313:     ishift = bs2*a->i[i];
3314:     for (j=0; j<bs; j++) {
3315:       kmax = rowlengths[i*bs+j];
3316:       for (k=0; k<kmax; k++) {
3317:         tmp       = jj[nzcountb]/bs;
3318:         block     = mask[tmp] - 1;
3319:         point     = jj[nzcountb] - bs*tmp;
3320:         idx       = ishift + bs2*block + j + bs*point;
3321:         a->a[idx] = (MatScalar)aa[nzcountb++];
3322:       }
3323:     }
3324:     /* zero out the mask elements we set */
3325:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3326:   }
3327:   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3329:   PetscFree(rowlengths);
3330:   PetscFree(browlengths);
3331:   PetscFree(aa);
3332:   PetscFree(jj);
3333:   PetscFree2(mask,masked);

3335:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
3336:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
3337:   return(0);
3338: }

3340: /*@C
3341:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3342:    compressed row) format.  For good matrix assembly performance the
3343:    user should preallocate the matrix storage by setting the parameter nz
3344:    (or the array nnz).  By setting these parameters accurately, performance
3345:    during matrix assembly can be increased by more than a factor of 50.

3347:    Collective on MPI_Comm

3349:    Input Parameters:
3350: +  comm - MPI communicator, set to PETSC_COMM_SELF
3351: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3352:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3353: .  m - number of rows
3354: .  n - number of columns
3355: .  nz - number of nonzero blocks  per block row (same for all rows)
3356: -  nnz - array containing the number of nonzero blocks in the various block rows
3357:          (possibly different for each block row) or NULL

3359:    Output Parameter:
3360: .  A - the matrix

3362:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3363:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3364:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3366:    Options Database Keys:
3367: .   -mat_no_unroll - uses code that does not unroll the loops in the
3368:                      block calculations (much slower)
3369: .    -mat_block_size - size of the blocks to use

3371:    Level: intermediate

3373:    Notes:
3374:    The number of rows and columns must be divisible by blocksize.

3376:    If the nnz parameter is given then the nz parameter is ignored

3378:    A nonzero block is any block that as 1 or more nonzeros in it

3380:    The block AIJ format is fully compatible with standard Fortran 77
3381:    storage.  That is, the stored row and column indices can begin at
3382:    either one (as in Fortran) or zero.  See the users' manual for details.

3384:    Specify the preallocated storage with either nz or nnz (not both).
3385:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3386:    allocation.  See Users-Manual: ch_mat for details.
3387:    matrices.

3389: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3390: @*/
3391: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3392: {

3396:   MatCreate(comm,A);
3397:   MatSetSizes(*A,m,n,m,n);
3398:   MatSetType(*A,MATSEQBAIJ);
3399:   MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
3400:   return(0);
3401: }

3403: /*@C
3404:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3405:    per row in the matrix. For good matrix assembly performance the
3406:    user should preallocate the matrix storage by setting the parameter nz
3407:    (or the array nnz).  By setting these parameters accurately, performance
3408:    during matrix assembly can be increased by more than a factor of 50.

3410:    Collective on MPI_Comm

3412:    Input Parameters:
3413: +  B - the matrix
3414: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3415:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3416: .  nz - number of block nonzeros per block row (same for all rows)
3417: -  nnz - array containing the number of block nonzeros in the various block rows
3418:          (possibly different for each block row) or NULL

3420:    Options Database Keys:
3421: .   -mat_no_unroll - uses code that does not unroll the loops in the
3422:                      block calculations (much slower)
3423: .   -mat_block_size - size of the blocks to use

3425:    Level: intermediate

3427:    Notes:
3428:    If the nnz parameter is given then the nz parameter is ignored

3430:    You can call MatGetInfo() to get information on how effective the preallocation was;
3431:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3432:    You can also run with the option -info and look for messages with the string
3433:    malloc in them to see if additional memory allocation was needed.

3435:    The block AIJ format is fully compatible with standard Fortran 77
3436:    storage.  That is, the stored row and column indices can begin at
3437:    either one (as in Fortran) or zero.  See the users' manual for details.

3439:    Specify the preallocated storage with either nz or nnz (not both).
3440:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3441:    allocation.  See Users-Manual: ch_mat for details.

3443: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3444: @*/
3445: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3446: {

3453:   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3454:   return(0);
3455: }

3457: /*@C
3458:    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3459:    (the default sequential PETSc format).

3461:    Collective on MPI_Comm

3463:    Input Parameters:
3464: +  B - the matrix
3465: .  i - the indices into j for the start of each local row (starts with zero)
3466: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3467: -  v - optional values in the matrix

3469:    Level: developer

3471:    Notes:
3472:    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3473:    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3474:    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3475:    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3476:    block column and the second index is over columns within a block.

3478: .keywords: matrix, aij, compressed row, sparse

3480: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3481: @*/
3482: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3483: {

3490:   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3491:   return(0);
3492: }


3495: /*@
3496:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.

3498:      Collective on MPI_Comm

3500:    Input Parameters:
3501: +  comm - must be an MPI communicator of size 1
3502: .  bs - size of block
3503: .  m - number of rows
3504: .  n - number of columns
3505: .  i - row indices
3506: .  j - column indices
3507: -  a - matrix values

3509:    Output Parameter:
3510: .  mat - the matrix

3512:    Level: advanced

3514:    Notes:
3515:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3516:     once the matrix is destroyed

3518:        You cannot set new nonzero locations into this matrix, that will generate an error.

3520:        The i and j indices are 0 based

3522:        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).

3524:       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3525:       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3526:       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3527:       with column-major ordering within blocks.

3529: .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()

3531: @*/
3532: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3533: {
3535:   PetscInt       ii;
3536:   Mat_SeqBAIJ    *baij;

3539:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3540:   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");

3542:   MatCreate(comm,mat);
3543:   MatSetSizes(*mat,m,n,m,n);
3544:   MatSetType(*mat,MATSEQBAIJ);
3545:   MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);
3546:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3547:   PetscMalloc2(m,&baij->imax,m,&baij->ilen);
3548:   PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));

3550:   baij->i = i;
3551:   baij->j = j;
3552:   baij->a = a;

3554:   baij->singlemalloc = PETSC_FALSE;
3555:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3556:   baij->free_a       = PETSC_FALSE;
3557:   baij->free_ij      = PETSC_FALSE;

3559:   for (ii=0; ii<m; ii++) {
3560:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3561: #if defined(PETSC_USE_DEBUG)
3562:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3563: #endif
3564:   }
3565: #if defined(PETSC_USE_DEBUG)
3566:   for (ii=0; ii<baij->i[m]; ii++) {
3567:     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3568:     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3569:   }
3570: #endif

3572:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3573:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3574:   return(0);
3575: }

3577: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3578: {
3580:   PetscMPIInt    size;

3583:   MPI_Comm_size(comm,&size);
3584:   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3585:     MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);
3586:   } else {
3587:     MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);
3588:   }
3589:   return(0);
3590: }