Actual source code: baijsolvnat11.c
petsc-3.14.6 2021-03-30
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: /* Block operations are done by accessing one column at at time */
5: /* Default MatSolve for block size 11 */
7: PetscErrorCode MatSolve_SeqBAIJ_11_NaturalOrdering(Mat A,Vec bb,Vec xx)
8: {
9: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
10: PetscErrorCode ierr;
11: const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*adiag=a->diag,*vi,bs=A->rmap->bs,bs2=a->bs2;
12: PetscInt i,k,nz,idx,idt,m;
13: const MatScalar *aa=a->a,*v;
14: PetscScalar s[11];
15: PetscScalar *x,xv;
16: const PetscScalar *b;
19: VecGetArrayRead(bb,&b);
20: VecGetArray(xx,&x);
22: /* forward solve the lower triangular */
23: for (i=0; i<n; i++) {
24: v = aa + bs2*ai[i];
25: vi = aj + ai[i];
26: nz = ai[i+1] - ai[i];
27: idt = bs*i;
28: x[idt] = b[idt]; x[1+idt] = b[1+idt]; x[2+idt] = b[2+idt]; x[3+idt] = b[3+idt]; x[4+idt] = b[4+idt];
29: x[5+idt] = b[5+idt]; x[6+idt] = b[6+idt]; x[7+idt] = b[7+idt]; x[8+idt] = b[8+idt]; x[9+idt] = b[9+idt];
30: x[10+idt] = b[10+idt];
31: for (m=0; m<nz; m++) {
32: idx = bs*vi[m];
33: for (k=0; k<11; k++) {
34: xv = x[k + idx];
35: x[idt] -= v[0]*xv;
36: x[1+idt] -= v[1]*xv;
37: x[2+idt] -= v[2]*xv;
38: x[3+idt] -= v[3]*xv;
39: x[4+idt] -= v[4]*xv;
40: x[5+idt] -= v[5]*xv;
41: x[6+idt] -= v[6]*xv;
42: x[7+idt] -= v[7]*xv;
43: x[8+idt] -= v[8]*xv;
44: x[9+idt] -= v[9]*xv;
45: x[10+idt] -= v[10]*xv;
46: v += 11;
47: }
48: }
49: }
50: /* backward solve the upper triangular */
51: for (i=n-1; i>=0; i--) {
52: v = aa + bs2*(adiag[i+1]+1);
53: vi = aj + adiag[i+1]+1;
54: nz = adiag[i] - adiag[i+1] - 1;
55: idt = bs*i;
56: s[0] = x[idt]; s[1] = x[1+idt]; s[2] = x[2+idt]; s[3] = x[3+idt]; s[4] = x[4+idt];
57: s[5] = x[5+idt]; s[6] = x[6+idt]; s[7] = x[7+idt]; s[8] = x[8+idt]; s[9] = x[9+idt];
58: s[10] = x[10+idt];
60: for (m=0; m<nz; m++) {
61: idx = bs*vi[m];
62: for (k=0; k<11; k++) {
63: xv = x[k + idx];
64: s[0] -= v[0]*xv;
65: s[1] -= v[1]*xv;
66: s[2] -= v[2]*xv;
67: s[3] -= v[3]*xv;
68: s[4] -= v[4]*xv;
69: s[5] -= v[5]*xv;
70: s[6] -= v[6]*xv;
71: s[7] -= v[7]*xv;
72: s[8] -= v[8]*xv;
73: s[9] -= v[9]*xv;
74: s[10] -= v[10]*xv;
75: v += 11;
76: }
77: }
78: PetscArrayzero(x+idt,bs);
79: for (k=0; k<11; k++) {
80: x[idt] += v[0]*s[k];
81: x[1+idt] += v[1]*s[k];
82: x[2+idt] += v[2]*s[k];
83: x[3+idt] += v[3]*s[k];
84: x[4+idt] += v[4]*s[k];
85: x[5+idt] += v[5]*s[k];
86: x[6+idt] += v[6]*s[k];
87: x[7+idt] += v[7]*s[k];
88: x[8+idt] += v[8]*s[k];
89: x[9+idt] += v[9]*s[k];
90: x[10+idt] += v[10]*s[k];
91: v += 11;
92: }
93: }
94: VecRestoreArrayRead(bb,&b);
95: VecRestoreArray(xx,&x);
96: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
97: return(0);
98: }