Actual source code: baijsolvnat14.c
petsc-3.13.6 2020-09-29
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: /* Block operations are done by accessing one column at at time */
5: /* Default MatSolve for block size 14 */
7: PetscErrorCode MatSolve_SeqBAIJ_14_NaturalOrdering(Mat A,Vec bb,Vec xx)
8: {
9: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
10: PetscErrorCode ierr;
11: const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*adiag=a->diag,*vi,bs=A->rmap->bs,bs2=a->bs2;
12: PetscInt i,k,nz,idx,idt,m;
13: const MatScalar *aa=a->a,*v;
14: PetscScalar s[14];
15: PetscScalar *x,xv;
16: const PetscScalar *b;
19: VecGetArrayRead(bb,&b);
20: VecGetArray(xx,&x);
22: /* forward solve the lower triangular */
23: for (i=0; i<n; i++) {
24: v = aa + bs2*ai[i];
25: vi = aj + ai[i];
26: nz = ai[i+1] - ai[i];
27: idt = bs*i;
28: x[idt] = b[idt]; x[1+idt] = b[1+idt]; x[2+idt] = b[2+idt]; x[3+idt] = b[3+idt]; x[4+idt] = b[4+idt];
29: x[5+idt] = b[5+idt]; x[6+idt] = b[6+idt]; x[7+idt] = b[7+idt]; x[8+idt] = b[8+idt]; x[9+idt] = b[9+idt];
30: x[10+idt] = b[10+idt]; x[11+idt] = b[11+idt]; x[12+idt] = b[12+idt]; x[13+idt] = b[13+idt];
31: for (m=0; m<nz; m++) {
32: idx = bs*vi[m];
33: for (k=0; k<bs; k++) {
34: xv = x[k + idx];
35: x[idt] -= v[0]*xv;
36: x[1+idt] -= v[1]*xv;
37: x[2+idt] -= v[2]*xv;
38: x[3+idt] -= v[3]*xv;
39: x[4+idt] -= v[4]*xv;
40: x[5+idt] -= v[5]*xv;
41: x[6+idt] -= v[6]*xv;
42: x[7+idt] -= v[7]*xv;
43: x[8+idt] -= v[8]*xv;
44: x[9+idt] -= v[9]*xv;
45: x[10+idt] -= v[10]*xv;
46: x[11+idt] -= v[11]*xv;
47: x[12+idt] -= v[12]*xv;
48: x[13+idt] -= v[13]*xv;
49: v += 14;
50: }
51: }
52: }
53: /* backward solve the upper triangular */
54: for (i=n-1; i>=0; i--) {
55: v = aa + bs2*(adiag[i+1]+1);
56: vi = aj + adiag[i+1]+1;
57: nz = adiag[i] - adiag[i+1] - 1;
58: idt = bs*i;
59: s[0] = x[idt]; s[1] = x[1+idt]; s[2] = x[2+idt]; s[3] = x[3+idt]; s[4] = x[4+idt];
60: s[5] = x[5+idt]; s[6] = x[6+idt]; s[7] = x[7+idt]; s[8] = x[8+idt]; s[9] = x[9+idt];
61: s[10] = x[10+idt]; s[11] = x[11+idt]; s[12] = x[12+idt]; s[13] = x[13+idt];
63: for (m=0; m<nz; m++) {
64: idx = bs*vi[m];
65: for (k=0; k<bs; k++) {
66: xv = x[k + idx];
67: s[0] -= v[0]*xv;
68: s[1] -= v[1]*xv;
69: s[2] -= v[2]*xv;
70: s[3] -= v[3]*xv;
71: s[4] -= v[4]*xv;
72: s[5] -= v[5]*xv;
73: s[6] -= v[6]*xv;
74: s[7] -= v[7]*xv;
75: s[8] -= v[8]*xv;
76: s[9] -= v[9]*xv;
77: s[10] -= v[10]*xv;
78: s[11] -= v[11]*xv;
79: s[12] -= v[12]*xv;
80: s[13] -= v[13]*xv;
81: v += 14;
82: }
83: }
84: PetscArrayzero(x+idt,bs);
85: for (k=0; k<bs; k++) {
86: x[idt] += v[0]*s[k];
87: x[1+idt] += v[1]*s[k];
88: x[2+idt] += v[2]*s[k];
89: x[3+idt] += v[3]*s[k];
90: x[4+idt] += v[4]*s[k];
91: x[5+idt] += v[5]*s[k];
92: x[6+idt] += v[6]*s[k];
93: x[7+idt] += v[7]*s[k];
94: x[8+idt] += v[8]*s[k];
95: x[9+idt] += v[9]*s[k];
96: x[10+idt] += v[10]*s[k];
97: x[11+idt] += v[11]*s[k];
98: x[12+idt] += v[12]*s[k];
99: x[13+idt] += v[13]*s[k];
100: v += 14;
101: }
102: }
103: VecRestoreArrayRead(bb,&b);
104: VecRestoreArray(xx,&x);
105: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
106: return(0);
107: }
109: /* Block operations are done by accessing one column at at time */
110: /* Default MatSolve for block size 13 */
112: PetscErrorCode MatSolve_SeqBAIJ_13_NaturalOrdering(Mat A,Vec bb,Vec xx)
113: {
114: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
115: PetscErrorCode ierr;
116: const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*adiag=a->diag,*vi,bs=A->rmap->bs,bs2=a->bs2;
117: PetscInt i,k,nz,idx,idt,m;
118: const MatScalar *aa=a->a,*v;
119: PetscScalar s[13];
120: PetscScalar *x,xv;
121: const PetscScalar *b;
124: VecGetArrayRead(bb,&b);
125: VecGetArray(xx,&x);
127: /* forward solve the lower triangular */
128: for (i=0; i<n; i++) {
129: v = aa + bs2*ai[i];
130: vi = aj + ai[i];
131: nz = ai[i+1] - ai[i];
132: idt = bs*i;
133: x[idt] = b[idt]; x[1+idt] = b[1+idt]; x[2+idt] = b[2+idt]; x[3+idt] = b[3+idt]; x[4+idt] = b[4+idt];
134: x[5+idt] = b[5+idt]; x[6+idt] = b[6+idt]; x[7+idt] = b[7+idt]; x[8+idt] = b[8+idt]; x[9+idt] = b[9+idt];
135: x[10+idt] = b[10+idt]; x[11+idt] = b[11+idt]; x[12+idt] = b[12+idt];
136: for (m=0; m<nz; m++) {
137: idx = bs*vi[m];
138: for (k=0; k<bs; k++) {
139: xv = x[k + idx];
140: x[idt] -= v[0]*xv;
141: x[1+idt] -= v[1]*xv;
142: x[2+idt] -= v[2]*xv;
143: x[3+idt] -= v[3]*xv;
144: x[4+idt] -= v[4]*xv;
145: x[5+idt] -= v[5]*xv;
146: x[6+idt] -= v[6]*xv;
147: x[7+idt] -= v[7]*xv;
148: x[8+idt] -= v[8]*xv;
149: x[9+idt] -= v[9]*xv;
150: x[10+idt] -= v[10]*xv;
151: x[11+idt] -= v[11]*xv;
152: x[12+idt] -= v[12]*xv;
153: v += 13;
154: }
155: }
156: }
157: /* backward solve the upper triangular */
158: for (i=n-1; i>=0; i--) {
159: v = aa + bs2*(adiag[i+1]+1);
160: vi = aj + adiag[i+1]+1;
161: nz = adiag[i] - adiag[i+1] - 1;
162: idt = bs*i;
163: s[0] = x[idt]; s[1] = x[1+idt]; s[2] = x[2+idt]; s[3] = x[3+idt]; s[4] = x[4+idt];
164: s[5] = x[5+idt]; s[6] = x[6+idt]; s[7] = x[7+idt]; s[8] = x[8+idt]; s[9] = x[9+idt];
165: s[10] = x[10+idt]; s[11] = x[11+idt]; s[12] = x[12+idt];
167: for (m=0; m<nz; m++) {
168: idx = bs*vi[m];
169: for (k=0; k<bs; k++) {
170: xv = x[k + idx];
171: s[0] -= v[0]*xv;
172: s[1] -= v[1]*xv;
173: s[2] -= v[2]*xv;
174: s[3] -= v[3]*xv;
175: s[4] -= v[4]*xv;
176: s[5] -= v[5]*xv;
177: s[6] -= v[6]*xv;
178: s[7] -= v[7]*xv;
179: s[8] -= v[8]*xv;
180: s[9] -= v[9]*xv;
181: s[10] -= v[10]*xv;
182: s[11] -= v[11]*xv;
183: s[12] -= v[12]*xv;
184: v += 13;
185: }
186: }
187: PetscArrayzero(x+idt,bs);
188: for (k=0; k<bs; k++) {
189: x[idt] += v[0]*s[k];
190: x[1+idt] += v[1]*s[k];
191: x[2+idt] += v[2]*s[k];
192: x[3+idt] += v[3]*s[k];
193: x[4+idt] += v[4]*s[k];
194: x[5+idt] += v[5]*s[k];
195: x[6+idt] += v[6]*s[k];
196: x[7+idt] += v[7]*s[k];
197: x[8+idt] += v[8]*s[k];
198: x[9+idt] += v[9]*s[k];
199: x[10+idt] += v[10]*s[k];
200: x[11+idt] += v[11]*s[k];
201: x[12+idt] += v[12]*s[k];
202: v += 13;
203: }
204: }
205: VecRestoreArrayRead(bb,&b);
206: VecRestoreArray(xx,&x);
207: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
208: return(0);
209: }
211: /* Block operations are done by accessing one column at at time */
212: /* Default MatSolve for block size 12 */
214: PetscErrorCode MatSolve_SeqBAIJ_12_NaturalOrdering(Mat A,Vec bb,Vec xx)
215: {
216: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
217: PetscErrorCode ierr;
218: const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*adiag=a->diag,*vi,bs=A->rmap->bs,bs2=a->bs2;
219: PetscInt i,k,nz,idx,idt,m;
220: const MatScalar *aa=a->a,*v;
221: PetscScalar s[12];
222: PetscScalar *x,xv;
223: const PetscScalar *b;
226: VecGetArrayRead(bb,&b);
227: VecGetArray(xx,&x);
229: /* forward solve the lower triangular */
230: for (i=0; i<n; i++) {
231: v = aa + bs2*ai[i];
232: vi = aj + ai[i];
233: nz = ai[i+1] - ai[i];
234: idt = bs*i;
235: x[idt] = b[idt]; x[1+idt] = b[1+idt]; x[2+idt] = b[2+idt]; x[3+idt] = b[3+idt]; x[4+idt] = b[4+idt];
236: x[5+idt] = b[5+idt]; x[6+idt] = b[6+idt]; x[7+idt] = b[7+idt]; x[8+idt] = b[8+idt]; x[9+idt] = b[9+idt];
237: x[10+idt] = b[10+idt]; x[11+idt] = b[11+idt];
238: for (m=0; m<nz; m++) {
239: idx = bs*vi[m];
240: for (k=0; k<bs; k++) {
241: xv = x[k + idx];
242: x[idt] -= v[0]*xv;
243: x[1+idt] -= v[1]*xv;
244: x[2+idt] -= v[2]*xv;
245: x[3+idt] -= v[3]*xv;
246: x[4+idt] -= v[4]*xv;
247: x[5+idt] -= v[5]*xv;
248: x[6+idt] -= v[6]*xv;
249: x[7+idt] -= v[7]*xv;
250: x[8+idt] -= v[8]*xv;
251: x[9+idt] -= v[9]*xv;
252: x[10+idt] -= v[10]*xv;
253: x[11+idt] -= v[11]*xv;
254: v += 12;
255: }
256: }
257: }
258: /* backward solve the upper triangular */
259: for (i=n-1; i>=0; i--) {
260: v = aa + bs2*(adiag[i+1]+1);
261: vi = aj + adiag[i+1]+1;
262: nz = adiag[i] - adiag[i+1] - 1;
263: idt = bs*i;
264: s[0] = x[idt]; s[1] = x[1+idt]; s[2] = x[2+idt]; s[3] = x[3+idt]; s[4] = x[4+idt];
265: s[5] = x[5+idt]; s[6] = x[6+idt]; s[7] = x[7+idt]; s[8] = x[8+idt]; s[9] = x[9+idt];
266: s[10] = x[10+idt]; s[11] = x[11+idt];
268: for (m=0; m<nz; m++) {
269: idx = bs*vi[m];
270: for (k=0; k<bs; k++) {
271: xv = x[k + idx];
272: s[0] -= v[0]*xv;
273: s[1] -= v[1]*xv;
274: s[2] -= v[2]*xv;
275: s[3] -= v[3]*xv;
276: s[4] -= v[4]*xv;
277: s[5] -= v[5]*xv;
278: s[6] -= v[6]*xv;
279: s[7] -= v[7]*xv;
280: s[8] -= v[8]*xv;
281: s[9] -= v[9]*xv;
282: s[10] -= v[10]*xv;
283: s[11] -= v[11]*xv;
284: v += 12;
285: }
286: }
287: PetscArrayzero(x+idt,bs);
288: for (k=0; k<bs; k++) {
289: x[idt] += v[0]*s[k];
290: x[1+idt] += v[1]*s[k];
291: x[2+idt] += v[2]*s[k];
292: x[3+idt] += v[3]*s[k];
293: x[4+idt] += v[4]*s[k];
294: x[5+idt] += v[5]*s[k];
295: x[6+idt] += v[6]*s[k];
296: x[7+idt] += v[7]*s[k];
297: x[8+idt] += v[8]*s[k];
298: x[9+idt] += v[9]*s[k];
299: x[10+idt] += v[10]*s[k];
300: x[11+idt] += v[11]*s[k];
301: v += 12;
302: }
303: }
304: VecRestoreArrayRead(bb,&b);
305: VecRestoreArray(xx,&x);
306: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
307: return(0);
308: }