Actual source code: mmsell.c
1: /*
2: Support for the parallel SELL matrix vector multiply
3: */
4: #include <../src/mat/impls/sell/mpi/mpisell.h>
5: #include <petsc/private/isimpl.h>
7: /*
8: Takes the local part of an already assembled MPISELL matrix
9: and disassembles it. This is to allow new nonzeros into the matrix
10: that require more communication in the matrix vector multiply.
11: Thus certain data-structures must be rebuilt.
13: Kind of slow! But that's what application programmers get when
14: they are sloppy.
15: */
16: PetscErrorCode MatDisAssemble_MPISELL(Mat A)
17: {
18: Mat_MPISELL *sell = (Mat_MPISELL *)A->data;
19: Mat B = sell->B, Bnew;
20: Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data;
21: PetscInt i, j, totalslices, N = A->cmap->N, row;
22: PetscBool isnonzero;
24: PetscFunctionBegin;
25: /* free stuff related to matrix-vec multiply */
26: PetscCall(VecDestroy(&sell->lvec));
27: PetscCall(VecScatterDestroy(&sell->Mvctx));
28: if (sell->colmap) {
29: #if defined(PETSC_USE_CTABLE)
30: PetscCall(PetscHMapIDestroy(&sell->colmap));
31: #else
32: PetscCall(PetscFree(sell->colmap));
33: #endif
34: }
36: /* make sure that B is assembled so we can access its values */
37: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
38: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
40: /* invent new B and copy stuff over */
41: PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
42: PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N));
43: PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
44: PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
45: PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen));
46: if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */
47: ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew;
48: }
50: /*
51: Ensure that B's nonzerostate is monotonically increasing.
52: Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
53: */
54: Bnew->nonzerostate = B->nonzerostate;
56: totalslices = PetscCeilInt(B->rmap->n, Bsell->sliceheight);
57: for (i = 0; i < totalslices; i++) { /* loop over slices */
58: for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = (row + 1) % Bsell->sliceheight) {
59: isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / Bsell->sliceheight < Bsell->rlen[Bsell->sliceheight * i + row]);
60: if (isnonzero) { PetscCall(MatSetValue(Bnew, Bsell->sliceheight * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode)); }
61: }
62: }
64: PetscCall(PetscFree(sell->garray));
65: PetscCall(MatDestroy(&B));
67: sell->B = Bnew;
68: A->was_assembled = PETSC_FALSE;
69: A->assembled = PETSC_FALSE;
70: PetscFunctionReturn(PETSC_SUCCESS);
71: }
73: PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat)
74: {
75: Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
76: Mat_SeqSELL *B = (Mat_SeqSELL *)(sell->B->data);
77: PetscInt i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices;
78: IS from, to;
79: Vec gvec;
80: PetscBool isnonzero;
81: #if defined(PETSC_USE_CTABLE)
82: PetscHMapI gid1_lid1 = NULL;
83: PetscHashIter tpos;
84: PetscInt gid, lid;
85: #else
86: PetscInt N = mat->cmap->N, *indices;
87: #endif
89: PetscFunctionBegin;
90: totalslices = PetscCeilInt(sell->B->rmap->n, B->sliceheight);
92: /* ec counts the number of columns that contain nonzeros */
93: #if defined(PETSC_USE_CTABLE)
94: /* use a table */
95: PetscCall(PetscHMapICreateWithSize(sell->B->rmap->n, &gid1_lid1));
96: for (i = 0; i < totalslices; i++) { /* loop over slices */
97: for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
98: isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]);
99: if (isnonzero) { /* check the mask bit */
100: PetscInt data, gid1 = bcolidx[j] + 1;
102: PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
103: /* one based table */
104: if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
105: }
106: }
107: }
109: /* form array of columns we need */
110: PetscCall(PetscMalloc1(ec, &garray));
111: PetscHashIterBegin(gid1_lid1, tpos);
112: while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
113: PetscHashIterGetKey(gid1_lid1, tpos, gid);
114: PetscHashIterGetVal(gid1_lid1, tpos, lid);
115: PetscHashIterNext(gid1_lid1, tpos);
116: gid--;
117: lid--;
118: garray[lid] = gid;
119: }
120: PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
121: PetscCall(PetscHMapIClear(gid1_lid1));
122: for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
124: /* compact out the extra columns in B */
125: for (i = 0; i < totalslices; i++) { /* loop over slices */
126: for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
127: isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]);
128: if (isnonzero) {
129: PetscInt gid1 = bcolidx[j] + 1;
130: PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
131: lid--;
132: bcolidx[j] = lid;
133: }
134: }
135: }
136: PetscCall(PetscLayoutDestroy(&sell->B->cmap));
137: PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap));
138: PetscCall(PetscHMapIDestroy(&gid1_lid1));
139: #else
140: /* Make an array as long as the number of columns */
141: PetscCall(PetscCalloc1(N, &indices));
142: /* mark those columns that are in sell->B */
143: for (i = 0; i < totalslices; i++) { /* loop over slices */
144: for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
145: isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]);
146: if (isnonzero) {
147: if (!indices[bcolidx[j]]) ec++;
148: indices[bcolidx[j]] = 1;
149: }
150: }
151: }
153: /* form array of columns we need */
154: PetscCall(PetscMalloc1(ec, &garray));
155: ec = 0;
156: for (i = 0; i < N; i++) {
157: if (indices[i]) garray[ec++] = i;
158: }
160: /* make indices now point into garray */
161: for (i = 0; i < ec; i++) indices[garray[i]] = i;
163: /* compact out the extra columns in B */
164: for (i = 0; i < totalslices; i++) { /* loop over slices */
165: for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
166: isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]);
167: if (isnonzero) bcolidx[j] = indices[bcolidx[j]];
168: }
169: }
170: PetscCall(PetscLayoutDestroy(&sell->B->cmap));
171: PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap));
172: PetscCall(PetscFree(indices));
173: #endif
174: /* create local vector that is used to scatter into */
175: PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec));
176: /* create two temporary Index sets for build scatter gather */
177: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
178: PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
180: /* create temporary global vector to generate scatter context */
181: /* This does not allocate the array's memory so is efficient */
182: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
184: /* generate the scatter context */
185: PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx));
186: PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
188: sell->garray = garray;
190: PetscCall(ISDestroy(&from));
191: PetscCall(ISDestroy(&to));
192: PetscCall(VecDestroy(&gvec));
193: PetscFunctionReturn(PETSC_SUCCESS);
194: }
196: /* ugly stuff added for Glenn someday we should fix this up */
197: static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
198: static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */
200: static PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale)
201: {
202: Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */
203: PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices;
204: PetscInt *r_rmapd, *r_rmapo;
206: PetscFunctionBegin;
207: PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
208: PetscCall(MatGetSize(ina->A, NULL, &n));
209: PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
210: nt = 0;
211: for (i = 0; i < inA->rmap->mapping->n; i++) {
212: if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
213: nt++;
214: r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
215: }
216: }
217: PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n);
218: PetscCall(PetscMalloc1(n + 1, &auglyrmapd));
219: for (i = 0; i < inA->rmap->mapping->n; i++) {
220: if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i;
221: }
222: PetscCall(PetscFree(r_rmapd));
223: PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
224: PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices));
225: for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1;
226: no = inA->rmap->mapping->n - nt;
227: PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
228: nt = 0;
229: for (i = 0; i < inA->rmap->mapping->n; i++) {
230: if (lindices[inA->rmap->mapping->indices[i]]) {
231: nt++;
232: r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
233: }
234: }
235: PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
236: PetscCall(PetscFree(lindices));
237: PetscCall(PetscMalloc1(nt + 1, &auglyrmapo));
238: for (i = 0; i < inA->rmap->mapping->n; i++) {
239: if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i;
240: }
241: PetscCall(PetscFree(r_rmapo));
242: PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo));
243: PetscFunctionReturn(PETSC_SUCCESS);
244: }
246: PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale)
247: {
248: Mat_MPISELL *a = (Mat_MPISELL *)A->data; /*access private part of matrix */
249: PetscInt n, i;
250: PetscScalar *d, *o;
251: const PetscScalar *s;
253: PetscFunctionBegin;
254: if (!auglyrmapd) PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale));
255: PetscCall(VecGetArrayRead(scale, &s));
256: PetscCall(VecGetLocalSize(auglydd, &n));
257: PetscCall(VecGetArray(auglydd, &d));
258: for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
259: PetscCall(VecRestoreArray(auglydd, &d));
260: /* column scale "diagonal" portion of local matrix */
261: PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
262: PetscCall(VecGetLocalSize(auglyoo, &n));
263: PetscCall(VecGetArray(auglyoo, &o));
264: for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
265: PetscCall(VecRestoreArrayRead(scale, &s));
266: PetscCall(VecRestoreArray(auglyoo, &o));
267: /* column scale "off-diagonal" portion of local matrix */
268: PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
269: PetscFunctionReturn(PETSC_SUCCESS);
270: }