Actual source code: baijsolvnat2.c
petsc-3.12.5 2020-03-29
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: /*
5: Special case where the matrix was ILU(0) factored in the natural
6: ordering. This eliminates the need for the column and row permutation.
7: */
8: PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
9: {
10: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
11: const PetscInt n =a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
12: PetscErrorCode ierr;
13: const MatScalar *aa=a->a,*v;
14: PetscScalar *x,s1,s2,x1,x2;
15: const PetscScalar *b;
16: PetscInt jdx,idt,idx,nz,i;
19: VecGetArrayRead(bb,&b);
20: VecGetArray(xx,&x);
22: /* forward solve the lower triangular */
23: idx = 0;
24: x[0] = b[0]; x[1] = b[1];
25: for (i=1; i<n; i++) {
26: v = aa + 4*ai[i];
27: vi = aj + ai[i];
28: nz = diag[i] - ai[i];
29: idx += 2;
30: s1 = b[idx];s2 = b[1+idx];
31: while (nz--) {
32: jdx = 2*(*vi++);
33: x1 = x[jdx];x2 = x[1+jdx];
34: s1 -= v[0]*x1 + v[2]*x2;
35: s2 -= v[1]*x1 + v[3]*x2;
36: v += 4;
37: }
38: x[idx] = s1;
39: x[1+idx] = s2;
40: }
41: /* backward solve the upper triangular */
42: for (i=n-1; i>=0; i--) {
43: v = aa + 4*diag[i] + 4;
44: vi = aj + diag[i] + 1;
45: nz = ai[i+1] - diag[i] - 1;
46: idt = 2*i;
47: s1 = x[idt]; s2 = x[1+idt];
48: while (nz--) {
49: idx = 2*(*vi++);
50: x1 = x[idx]; x2 = x[1+idx];
51: s1 -= v[0]*x1 + v[2]*x2;
52: s2 -= v[1]*x1 + v[3]*x2;
53: v += 4;
54: }
55: v = aa + 4*diag[i];
56: x[idt] = v[0]*s1 + v[2]*s2;
57: x[1+idt] = v[1]*s1 + v[3]*s2;
58: }
60: VecRestoreArrayRead(bb,&b);
61: VecRestoreArray(xx,&x);
62: PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n);
63: return(0);
64: }
66: PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
67: {
68: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
69: const PetscInt n = a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag;
70: PetscInt i,k,nz,idx,idt,jdx;
71: PetscErrorCode ierr;
72: const MatScalar *aa=a->a,*v;
73: PetscScalar *x,s1,s2,x1,x2;
74: const PetscScalar *b;
77: VecGetArrayRead(bb,&b);
78: VecGetArray(xx,&x);
79: /* forward solve the lower triangular */
80: idx = 0;
81: x[0] = b[idx]; x[1] = b[1+idx];
82: for (i=1; i<n; i++) {
83: v = aa + 4*ai[i];
84: vi = aj + ai[i];
85: nz = ai[i+1] - ai[i];
86: idx = 2*i;
87: s1 = b[idx];s2 = b[1+idx];
88: PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
89: PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
90: for (k=0; k<nz; k++) {
91: jdx = 2*vi[k];
92: x1 = x[jdx];x2 = x[1+jdx];
93: s1 -= v[0]*x1 + v[2]*x2;
94: s2 -= v[1]*x1 + v[3]*x2;
95: v += 4;
96: }
97: x[idx] = s1;
98: x[1+idx] = s2;
99: }
101: /* backward solve the upper triangular */
102: for (i=n-1; i>=0; i--) {
103: v = aa + 4*(adiag[i+1]+1);
104: vi = aj + adiag[i+1]+1;
105: nz = adiag[i] - adiag[i+1]-1;
106: idt = 2*i;
107: s1 = x[idt]; s2 = x[1+idt];
108: PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
109: PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
110: for (k=0; k<nz; k++) {
111: idx = 2*vi[k];
112: x1 = x[idx]; x2 = x[1+idx];
113: s1 -= v[0]*x1 + v[2]*x2;
114: s2 -= v[1]*x1 + v[3]*x2;
115: v += 4;
116: }
117: /* x = inv_diagonal*x */
118: x[idt] = v[0]*s1 + v[2]*s2;
119: x[1+idt] = v[1]*s1 + v[3]*s2;
120: }
122: VecRestoreArrayRead(bb,&b);
123: VecRestoreArray(xx,&x);
124: PetscLogFlops(2.0*4*(a->nz) - 2.0*A->cmap->n);
125: return(0);
126: }
128: PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
129: {
130: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
131: const PetscInt n = a->mbs,*vi,*ai=a->i,*aj=a->j;
132: PetscInt i,k,nz,idx,jdx;
133: PetscErrorCode ierr;
134: const MatScalar *aa=a->a,*v;
135: PetscScalar *x,s1,s2,x1,x2;
136: const PetscScalar *b;
139: VecGetArrayRead(bb,&b);
140: VecGetArray(xx,&x);
141: /* forward solve the lower triangular */
142: idx = 0;
143: x[0] = b[idx]; x[1] = b[1+idx];
144: for (i=1; i<n; i++) {
145: v = aa + 4*ai[i];
146: vi = aj + ai[i];
147: nz = ai[i+1] - ai[i];
148: idx = 2*i;
149: s1 = b[idx];s2 = b[1+idx];
150: PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
151: PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
152: for (k=0; k<nz; k++) {
153: jdx = 2*vi[k];
154: x1 = x[jdx];x2 = x[1+jdx];
155: s1 -= v[0]*x1 + v[2]*x2;
156: s2 -= v[1]*x1 + v[3]*x2;
157: v += 4;
158: }
159: x[idx] = s1;
160: x[1+idx] = s2;
161: }
164: VecRestoreArrayRead(bb,&b);
165: VecRestoreArray(xx,&x);
166: PetscLogFlops(4.0*(a->nz) - A->cmap->n);
167: return(0);
168: }
170: PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
171: {
172: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
173: const PetscInt n = a->mbs,*vi,*aj=a->j,*adiag=a->diag;
174: PetscInt i,k,nz,idx,idt;
175: PetscErrorCode ierr;
176: const MatScalar *aa=a->a,*v;
177: PetscScalar *x,s1,s2,x1,x2;
178: const PetscScalar *b;
181: VecGetArrayRead(bb,&b);
182: VecGetArray(xx,&x);
184: /* backward solve the upper triangular */
185: for (i=n-1; i>=0; i--) {
186: v = aa + 4*(adiag[i+1]+1);
187: vi = aj + adiag[i+1]+1;
188: nz = adiag[i] - adiag[i+1]-1;
189: idt = 2*i;
190: s1 = b[idt]; s2 = b[1+idt];
191: PetscPrefetchBlock(vi+nz,nz,0,PETSC_PREFETCH_HINT_NTA);
192: PetscPrefetchBlock(v+4*nz,4*nz,0,PETSC_PREFETCH_HINT_NTA);
193: for (k=0; k<nz; k++) {
194: idx = 2*vi[k];
195: x1 = x[idx]; x2 = x[1+idx];
196: s1 -= v[0]*x1 + v[2]*x2;
197: s2 -= v[1]*x1 + v[3]*x2;
198: v += 4;
199: }
200: /* x = inv_diagonal*x */
201: x[idt] = v[0]*s1 + v[2]*s2;
202: x[1+idt] = v[1]*s1 + v[3]*s2;
203: }
205: VecRestoreArrayRead(bb,&b);
206: VecRestoreArray(xx,&x);
207: PetscLogFlops(4.0*a->nz - A->cmap->n);
208: return(0);
209: }