Actual source code: baijsolvnat3.c

petsc-3.14.6 2021-03-30
Report Typos and Errors
  1: #include <../src/mat/impls/baij/seq/baij.h>
  2: #include <petsc/private/kernels/blockinvert.h>

  4: /*
  5:       Special case where the matrix was ILU(0) factored in the natural
  6:    ordering. This eliminates the need for the column and row permutation.
  7: */
  8: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
  9: {
 10:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
 11:   const PetscInt    n  =a->mbs,*ai=a->i,*aj=a->j;
 12:   PetscErrorCode    ierr;
 13:   const PetscInt    *diag = a->diag,*vi;
 14:   const MatScalar   *aa   =a->a,*v;
 15:   PetscScalar       *x,s1,s2,s3,x1,x2,x3;
 16:   const PetscScalar *b;
 17:   PetscInt          jdx,idt,idx,nz,i;

 20:   VecGetArrayRead(bb,&b);
 21:   VecGetArray(xx,&x);

 23:   /* forward solve the lower triangular */
 24:   idx  = 0;
 25:   x[0] = b[0]; x[1] = b[1]; x[2] = b[2];
 26:   for (i=1; i<n; i++) {
 27:     v    =  aa      + 9*ai[i];
 28:     vi   =  aj      + ai[i];
 29:     nz   =  diag[i] - ai[i];
 30:     idx +=  3;
 31:     s1   =  b[idx];s2 = b[1+idx];s3 = b[2+idx];
 32:     while (nz--) {
 33:       jdx = 3*(*vi++);
 34:       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];
 35:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
 36:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
 37:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
 38:       v  += 9;
 39:     }
 40:     x[idx]   = s1;
 41:     x[1+idx] = s2;
 42:     x[2+idx] = s3;
 43:   }
 44:   /* backward solve the upper triangular */
 45:   for (i=n-1; i>=0; i--) {
 46:     v   = aa + 9*diag[i] + 9;
 47:     vi  = aj + diag[i] + 1;
 48:     nz  = ai[i+1] - diag[i] - 1;
 49:     idt = 3*i;
 50:     s1  = x[idt];  s2 = x[1+idt];
 51:     s3  = x[2+idt];
 52:     while (nz--) {
 53:       idx = 3*(*vi++);
 54:       x1  = x[idx];   x2 = x[1+idx];x3    = x[2+idx];
 55:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
 56:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
 57:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
 58:       v  += 9;
 59:     }
 60:     v        = aa +  9*diag[i];
 61:     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
 62:     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
 63:     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
 64:   }

 66:   VecRestoreArrayRead(bb,&b);
 67:   VecRestoreArray(xx,&x);
 68:   PetscLogFlops(2.0*9*(a->nz) - 3.0*A->cmap->n);
 69:   return(0);
 70: }

 72: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
 73: {
 74:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
 75:   const PetscInt    n  =a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag;
 76:   PetscErrorCode    ierr;
 77:   PetscInt          i,k,nz,idx,jdx,idt;
 78:   const PetscInt    bs = A->rmap->bs,bs2 = a->bs2;
 79:   const MatScalar   *aa=a->a,*v;
 80:   PetscScalar       *x;
 81:   const PetscScalar *b;
 82:   PetscScalar       s1,s2,s3,x1,x2,x3;

 85:   VecGetArrayRead(bb,&b);
 86:   VecGetArray(xx,&x);
 87:   /* forward solve the lower triangular */
 88:   idx  = 0;
 89:   x[0] = b[idx]; x[1] = b[1+idx];x[2] = b[2+idx];
 90:   for (i=1; i<n; i++) {
 91:     v   = aa + bs2*ai[i];
 92:     vi  = aj + ai[i];
 93:     nz  = ai[i+1] - ai[i];
 94:     idx = bs*i;
 95:     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];
 96:     for (k=0; k<nz; k++) {
 97:       jdx = bs*vi[k];
 98:       x1  = x[jdx];x2 = x[1+jdx]; x3 =x[2+jdx];
 99:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
100:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
101:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;

103:       v +=  bs2;
104:     }

106:     x[idx]   = s1;
107:     x[1+idx] = s2;
108:     x[2+idx] = s3;
109:   }

111:   /* backward solve the upper triangular */
112:   for (i=n-1; i>=0; i--) {
113:     v   = aa + bs2*(adiag[i+1]+1);
114:     vi  = aj + adiag[i+1]+1;
115:     nz  = adiag[i] - adiag[i+1]-1;
116:     idt = bs*i;
117:     s1  = x[idt];  s2 = x[1+idt];s3 = x[2+idt];

119:     for (k=0; k<nz; k++) {
120:       idx = bs*vi[k];
121:       x1  = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
122:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
123:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
124:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;

126:       v +=  bs2;
127:     }
128:     /* x = inv_diagonal*x */
129:     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
130:     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
131:     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;

133:   }

135:   VecRestoreArrayRead(bb,&b);
136:   VecRestoreArray(xx,&x);
137:   PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
138:   return(0);
139: }

141: PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
142: {
143:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
144:   const PetscInt    n  =a->mbs,*vi,*ai=a->i,*aj=a->j;
145:   PetscErrorCode    ierr;
146:   PetscInt          i,k,nz,idx,jdx;
147:   const PetscInt    bs = A->rmap->bs,bs2 = a->bs2;
148:   const MatScalar   *aa=a->a,*v;
149:   PetscScalar       *x;
150:   const PetscScalar *b;
151:   PetscScalar       s1,s2,s3,x1,x2,x3;

154:   VecGetArrayRead(bb,&b);
155:   VecGetArray(xx,&x);
156:   /* forward solve the lower triangular */
157:   idx  = 0;
158:   x[0] = b[idx]; x[1] = b[1+idx];x[2] = b[2+idx];
159:   for (i=1; i<n; i++) {
160:     v   = aa + bs2*ai[i];
161:     vi  = aj + ai[i];
162:     nz  = ai[i+1] - ai[i];
163:     idx = bs*i;
164:     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];
165:     for (k=0; k<nz; k++) {
166:       jdx = bs*vi[k];
167:       x1  = x[jdx];x2 = x[1+jdx]; x3 =x[2+jdx];
168:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
169:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
170:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;

172:       v +=  bs2;
173:     }

175:     x[idx]   = s1;
176:     x[1+idx] = s2;
177:     x[2+idx] = s3;
178:   }

180:   VecRestoreArrayRead(bb,&b);
181:   VecRestoreArray(xx,&x);
182:   PetscLogFlops(1.0*bs2*(a->nz) - bs*A->cmap->n);
183:   return(0);
184: }


187: PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
188: {
189:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
190:   const PetscInt    n  =a->mbs,*vi,*aj=a->j,*adiag=a->diag;
191:   PetscErrorCode    ierr;
192:   PetscInt          i,k,nz,idx,idt;
193:   const PetscInt    bs = A->rmap->bs,bs2 = a->bs2;
194:   const MatScalar   *aa=a->a,*v;
195:   PetscScalar       *x;
196:   const PetscScalar *b;
197:   PetscScalar       s1,s2,s3,x1,x2,x3;

200:   VecGetArrayRead(bb,&b);
201:   VecGetArray(xx,&x);

203:   /* backward solve the upper triangular */
204:   for (i=n-1; i>=0; i--) {
205:     v   = aa + bs2*(adiag[i+1]+1);
206:     vi  = aj + adiag[i+1]+1;
207:     nz  = adiag[i] - adiag[i+1]-1;
208:     idt = bs*i;
209:     s1  = b[idt];  s2 = b[1+idt];s3 = b[2+idt];

211:     for (k=0; k<nz; k++) {
212:       idx = bs*vi[k];
213:       x1  = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
214:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
215:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
216:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;

218:       v +=  bs2;
219:     }
220:     /* x = inv_diagonal*x */
221:     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
222:     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
223:     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;

225:   }

227:   VecRestoreArrayRead(bb,&b);
228:   VecRestoreArray(xx,&x);
229:   PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
230:   return(0);
231: }