Actual source code: baijsolvtran3.c
petsc-3.11.4 2019-09-28
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: PetscErrorCode MatSolveTranspose_SeqBAIJ_3_inplace(Mat A,Vec bb,Vec xx)
5: {
6: Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data;
7: IS iscol=a->col,isrow=a->row;
8: PetscErrorCode ierr;
9: const PetscInt *r,*c,*rout,*cout;
10: const PetscInt *diag=a->diag,n=a->mbs,*vi,*ai=a->i,*aj=a->j;
11: PetscInt i,nz,idx,idt,ii,ic,ir,oidx;
12: const MatScalar *aa=a->a,*v;
13: PetscScalar s1,s2,s3,x1,x2,x3,*x,*t;
14: const PetscScalar *b;
17: VecGetArrayRead(bb,&b);
18: VecGetArray(xx,&x);
19: t = a->solve_work;
21: ISGetIndices(isrow,&rout); r = rout;
22: ISGetIndices(iscol,&cout); c = cout;
24: /* copy the b into temp work space according to permutation */
25: ii = 0;
26: for (i=0; i<n; i++) {
27: ic = 3*c[i];
28: t[ii] = b[ic];
29: t[ii+1] = b[ic+1];
30: t[ii+2] = b[ic+2];
31: ii += 3;
32: }
34: /* forward solve the U^T */
35: idx = 0;
36: for (i=0; i<n; i++) {
38: v = aa + 9*diag[i];
39: /* multiply by the inverse of the block diagonal */
40: x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx];
41: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3;
42: s2 = v[3]*x1 + v[4]*x2 + v[5]*x3;
43: s3 = v[6]*x1 + v[7]*x2 + v[8]*x3;
44: v += 9;
46: vi = aj + diag[i] + 1;
47: nz = ai[i+1] - diag[i] - 1;
48: while (nz--) {
49: oidx = 3*(*vi++);
50: t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
51: t[oidx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
52: t[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
53: v += 9;
54: }
55: t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;
56: idx += 3;
57: }
58: /* backward solve the L^T */
59: for (i=n-1; i>=0; i--) {
60: v = aa + 9*diag[i] - 9;
61: vi = aj + diag[i] - 1;
62: nz = diag[i] - ai[i];
63: idt = 3*i;
64: s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];
65: while (nz--) {
66: idx = 3*(*vi--);
67: t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
68: t[idx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
69: t[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
70: v -= 9;
71: }
72: }
74: /* copy t into x according to permutation */
75: ii = 0;
76: for (i=0; i<n; i++) {
77: ir = 3*r[i];
78: x[ir] = t[ii];
79: x[ir+1] = t[ii+1];
80: x[ir+2] = t[ii+2];
81: ii += 3;
82: }
84: ISRestoreIndices(isrow,&rout);
85: ISRestoreIndices(iscol,&cout);
86: VecRestoreArrayRead(bb,&b);
87: VecRestoreArray(xx,&x);
88: PetscLogFlops(2.0*9*(a->nz) - 3.0*A->cmap->n);
89: return(0);
90: }
92: PetscErrorCode MatSolveTranspose_SeqBAIJ_3(Mat A,Vec bb,Vec xx)
93: {
94: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
95: PetscErrorCode ierr;
96: IS iscol=a->col,isrow=a->row;
97: const PetscInt n =a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag;
98: const PetscInt *r,*c,*rout,*cout;
99: PetscInt nz,idx,idt,j,i,oidx,ii,ic,ir;
100: const PetscInt bs =A->rmap->bs,bs2=a->bs2;
101: const MatScalar *aa=a->a,*v;
102: PetscScalar s1,s2,s3,x1,x2,x3,*x,*t;
103: const PetscScalar *b;
106: VecGetArrayRead(bb,&b);
107: VecGetArray(xx,&x);
108: t = a->solve_work;
110: ISGetIndices(isrow,&rout); r = rout;
111: ISGetIndices(iscol,&cout); c = cout;
113: /* copy b into temp work space according to permutation */
114: for (i=0; i<n; i++) {
115: ii = bs*i; ic = bs*c[i];
116: t[ii] = b[ic]; t[ii+1] = b[ic+1]; t[ii+2] = b[ic+2];
117: }
119: /* forward solve the U^T */
120: idx = 0;
121: for (i=0; i<n; i++) {
122: v = aa + bs2*diag[i];
123: /* multiply by the inverse of the block diagonal */
124: x1 = t[idx]; x2 = t[1+idx]; x3 = t[2+idx];
125: s1 = v[0]*x1 + v[1]*x2 + v[2]*x3;
126: s2 = v[3]*x1 + v[4]*x2 + v[5]*x3;
127: s3 = v[6]*x1 + v[7]*x2 + v[8]*x3;
128: v -= bs2;
130: vi = aj + diag[i] - 1;
131: nz = diag[i] - diag[i+1] - 1;
132: for (j=0; j>-nz; j--) {
133: oidx = bs*vi[j];
134: t[oidx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
135: t[oidx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
136: t[oidx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
137: v -= bs2;
138: }
139: t[idx] = s1;t[1+idx] = s2; t[2+idx] = s3;
140: idx += bs;
141: }
142: /* backward solve the L^T */
143: for (i=n-1; i>=0; i--) {
144: v = aa + bs2*ai[i];
145: vi = aj + ai[i];
146: nz = ai[i+1] - ai[i];
147: idt = bs*i;
148: s1 = t[idt]; s2 = t[1+idt]; s3 = t[2+idt];
149: for (j=0; j<nz; j++) {
150: idx = bs*vi[j];
151: t[idx] -= v[0]*s1 + v[1]*s2 + v[2]*s3;
152: t[idx+1] -= v[3]*s1 + v[4]*s2 + v[5]*s3;
153: t[idx+2] -= v[6]*s1 + v[7]*s2 + v[8]*s3;
154: v += bs2;
155: }
156: }
158: /* copy t into x according to permutation */
159: for (i=0; i<n; i++) {
160: ii = bs*i; ir = bs*r[i];
161: x[ir] = t[ii]; x[ir+1] = t[ii+1]; x[ir+2] = t[ii+2];
162: }
164: ISRestoreIndices(isrow,&rout);
165: ISRestoreIndices(iscol,&cout);
166: VecRestoreArrayRead(bb,&b);
167: VecRestoreArray(xx,&x);
168: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
169: return(0);
170: }