Actual source code: fdaij.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/mat/impls/baij/seq/baij.h>
3: #include <../src/mat/impls/sell/seq/sell.h>
4: #include <petsc/private/isimpl.h>
6: /*
7: This routine is shared by SeqAIJ and SeqBAIJ matrices,
8: since it operators only on the nonzero structure of the elements or blocks.
9: */
10: PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
11: {
12: PetscInt bs, nis = iscoloring->n, m = mat->rmap->n;
13: PetscBool isBAIJ, isSELL;
15: PetscFunctionBegin;
16: /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */
17: PetscCall(MatGetBlockSize(mat, &bs));
18: PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ));
19: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL));
20: if (isBAIJ) {
21: c->brows = m;
22: c->bcols = 1;
23: } else { /* seqaij matrix */
24: /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */
25: PetscReal mem;
26: PetscInt nz, brows, bcols;
27: if (isSELL) {
28: Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data;
29: nz = spA->nz;
30: } else {
31: Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data;
32: nz = spA->nz;
33: }
35: bs = 1; /* only bs=1 is supported for SeqAIJ matrix */
36: mem = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
37: bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
38: if (!bcols) bcols = 1;
39: brows = 1000 / bcols;
40: if (bcols > nis) bcols = nis;
41: if (brows == 0 || brows > m) brows = m;
42: c->brows = brows;
43: c->bcols = bcols;
44: }
46: c->M = mat->rmap->N / bs; /* set total rows, columns and local rows */
47: c->N = mat->cmap->N / bs;
48: c->m = mat->rmap->N / bs;
49: c->rstart = 0;
50: c->ncolors = nis;
51: c->ctype = iscoloring->ctype;
52: PetscFunctionReturn(PETSC_SUCCESS);
53: }
55: /*
56: Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance
57: Input Parameters:
58: + mat - the matrix containing the nonzero structure of the Jacobian
59: . color - the coloring context
60: - nz - number of local non-zeros in mat
61: */
62: PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat, MatFDColoring c, PetscInt nz)
63: {
64: PetscInt i, j, nrows, nbcols, brows = c->brows, bcols = c->bcols, mbs = c->m, nis = c->ncolors;
65: PetscInt *color_start, *row_start, *nrows_new, nz_new, row_end;
67: PetscFunctionBegin;
68: if (brows < 1 || brows > mbs) brows = mbs;
69: PetscCall(PetscMalloc2(bcols + 1, &color_start, bcols, &row_start));
70: PetscCall(PetscCalloc1(nis, &nrows_new));
71: PetscCall(PetscMalloc1(bcols * mat->rmap->n, &c->dy));
73: nz_new = 0;
74: nbcols = 0;
75: color_start[bcols] = 0;
77: if (c->htype[0] == 'd') { /* c->htype == 'ds', use MatEntry */
78: MatEntry *Jentry_new, *Jentry = c->matentry;
80: PetscCall(PetscMalloc1(nz, &Jentry_new));
81: for (i = 0; i < nis; i += bcols) { /* loop over colors */
82: if (i + bcols > nis) {
83: color_start[nis - i] = color_start[bcols];
84: bcols = nis - i;
85: }
87: color_start[0] = color_start[bcols];
88: for (j = 0; j < bcols; j++) {
89: color_start[j + 1] = c->nrows[i + j] + color_start[j];
90: row_start[j] = 0;
91: }
93: row_end = brows;
94: if (row_end > mbs) row_end = mbs;
96: while (row_end <= mbs) { /* loop over block rows */
97: for (j = 0; j < bcols; j++) { /* loop over block columns */
98: nrows = c->nrows[i + j];
99: nz = color_start[j];
100: while (row_start[j] < nrows) {
101: if (Jentry[nz].row >= row_end) {
102: color_start[j] = nz;
103: break;
104: } else { /* copy Jentry[nz] to Jentry_new[nz_new] */
105: Jentry_new[nz_new].row = Jentry[nz].row + j * mbs; /* index in dy-array */
106: Jentry_new[nz_new].col = Jentry[nz].col;
107: Jentry_new[nz_new].valaddr = Jentry[nz].valaddr;
108: nz_new++;
109: nz++;
110: row_start[j]++;
111: }
112: }
113: }
114: if (row_end == mbs) break;
115: row_end += brows;
116: if (row_end > mbs) row_end = mbs;
117: }
118: nrows_new[nbcols++] = nz_new;
119: }
120: PetscCall(PetscFree(Jentry));
121: c->matentry = Jentry_new;
122: } else { /* c->htype == 'wp', use MatEntry2 */
123: MatEntry2 *Jentry2_new, *Jentry2 = c->matentry2;
125: PetscCall(PetscMalloc1(nz, &Jentry2_new));
126: for (i = 0; i < nis; i += bcols) { /* loop over colors */
127: if (i + bcols > nis) {
128: color_start[nis - i] = color_start[bcols];
129: bcols = nis - i;
130: }
132: color_start[0] = color_start[bcols];
133: for (j = 0; j < bcols; j++) {
134: color_start[j + 1] = c->nrows[i + j] + color_start[j];
135: row_start[j] = 0;
136: }
138: row_end = brows;
139: if (row_end > mbs) row_end = mbs;
141: while (row_end <= mbs) { /* loop over block rows */
142: for (j = 0; j < bcols; j++) { /* loop over block columns */
143: nrows = c->nrows[i + j];
144: nz = color_start[j];
145: while (row_start[j] < nrows) {
146: if (Jentry2[nz].row >= row_end) {
147: color_start[j] = nz;
148: break;
149: } else { /* copy Jentry2[nz] to Jentry2_new[nz_new] */
150: Jentry2_new[nz_new].row = Jentry2[nz].row + j * mbs; /* index in dy-array */
151: Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr;
152: nz_new++;
153: nz++;
154: row_start[j]++;
155: }
156: }
157: }
158: if (row_end == mbs) break;
159: row_end += brows;
160: if (row_end > mbs) row_end = mbs;
161: }
162: nrows_new[nbcols++] = nz_new;
163: }
164: PetscCall(PetscFree(Jentry2));
165: c->matentry2 = Jentry2_new;
166: } /* ---------------------------------------------*/
168: PetscCall(PetscFree2(color_start, row_start));
170: for (i = nbcols - 1; i > 0; i--) nrows_new[i] -= nrows_new[i - 1];
171: PetscCall(PetscFree(c->nrows));
172: c->nrows = nrows_new;
173: PetscFunctionReturn(PETSC_SUCCESS);
174: }
176: PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
177: {
178: PetscInt i, n, nrows, mbs = c->m, j, k, m, ncols, col, nis = iscoloring->n, *rowhit, bs, bs2, *spidx, nz, tmp;
179: const PetscInt *is, *row, *ci, *cj;
180: PetscBool isBAIJ, isSELL;
181: const PetscScalar *A_val;
182: PetscScalar **valaddrhit;
183: MatEntry *Jentry;
184: MatEntry2 *Jentry2;
186: PetscFunctionBegin;
187: PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa));
189: PetscCall(MatGetBlockSize(mat, &bs));
190: PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ));
191: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL));
192: if (isBAIJ) {
193: Mat_SeqBAIJ *spA = (Mat_SeqBAIJ *)mat->data;
195: A_val = spA->a;
196: nz = spA->nz;
197: } else if (isSELL) {
198: Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data;
200: A_val = spA->val;
201: nz = spA->nz;
202: bs = 1; /* only bs=1 is supported for SeqSELL matrix */
203: } else {
204: Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data;
206: A_val = spA->a;
207: nz = spA->nz;
208: bs = 1; /* only bs=1 is supported for SeqAIJ matrix */
209: }
211: PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns));
212: PetscCall(PetscMalloc1(nis, &c->nrows)); /* nrows is freed separately from ncolumns and columns */
214: if (c->htype[0] == 'd') {
215: PetscCall(PetscMalloc1(nz, &Jentry));
216: c->matentry = Jentry;
217: } else if (c->htype[0] == 'w') {
218: PetscCall(PetscMalloc1(nz, &Jentry2));
219: c->matentry2 = Jentry2;
220: } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");
222: if (isBAIJ) {
223: PetscCall(MatGetColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
224: } else if (isSELL) {
225: PetscCall(MatGetColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
226: } else {
227: PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
228: }
230: PetscCall(PetscCalloc1(c->m, &rowhit));
231: PetscCall(PetscMalloc1(c->m, &valaddrhit));
233: nz = 0;
234: for (i = 0; i < nis; i++) { /* loop over colors */
235: PetscCall(ISGetLocalSize(c->isa[i], &n));
236: PetscCall(ISGetIndices(c->isa[i], &is));
238: c->ncolumns[i] = n;
239: c->columns[i] = (PetscInt *)is;
240: /* note: we know that c->isa is going to be around as long at the c->columns values */
241: PetscCall(ISRestoreIndices(c->isa[i], &is));
243: /* fast, crude version requires O(N*N) work */
244: bs2 = bs * bs;
245: nrows = 0;
246: for (j = 0; j < n; j++) { /* loop over columns */
247: col = is[j];
248: tmp = ci[col];
249: row = cj + tmp;
250: m = ci[col + 1] - tmp;
251: nrows += m;
252: for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */
253: rowhit[*row] = col + 1;
254: valaddrhit[*row++] = (PetscScalar *)&A_val[bs2 * spidx[tmp + k]];
255: }
256: }
257: c->nrows[i] = nrows; /* total num of rows for this color */
259: if (c->htype[0] == 'd') {
260: for (j = 0; j < mbs; j++) { /* loop over rows */
261: if (rowhit[j]) {
262: Jentry[nz].row = j; /* local row index */
263: Jentry[nz].col = rowhit[j] - 1; /* local column index */
264: Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
265: nz++;
266: rowhit[j] = 0.0; /* zero rowhit for reuse */
267: }
268: }
269: } else { /* c->htype == 'wp' */
270: for (j = 0; j < mbs; j++) { /* loop over rows */
271: if (rowhit[j]) {
272: Jentry2[nz].row = j; /* local row index */
273: Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
274: nz++;
275: rowhit[j] = 0.0; /* zero rowhit for reuse */
276: }
277: }
278: }
279: }
281: if (c->bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
282: PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz));
283: }
285: if (isBAIJ) {
286: PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
287: PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy));
288: } else if (isSELL) {
289: PetscCall(MatRestoreColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
290: } else {
291: PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
292: }
293: PetscCall(PetscFree(rowhit));
294: PetscCall(PetscFree(valaddrhit));
295: PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa));
297: PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->rmap->n, PETSC_DETERMINE, 0, NULL, &c->vscale));
298: PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols));
299: PetscFunctionReturn(PETSC_SUCCESS);
300: }