Actual source code: mmsbaij.c
petsc-3.10.5 2019-03-28
2: /*
3: Support for the parallel SBAIJ matrix vector multiply
4: */
5: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
7: PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8: {
9: Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data;
10: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data);
12: PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
13: PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
14: IS from,to;
15: Vec gvec;
16: PetscMPIInt rank =sbaij->rank,lsize,size=sbaij->size;
17: PetscInt *owners=sbaij->rangebs,*ec_owner,k;
18: const PetscInt *sowners;
19: PetscScalar *ptr;
22: VecScatterDestroy(&sbaij->sMvctx);
24: /* For the first stab we make an array as long as the number of columns */
25: /* mark those columns that are in sbaij->B */
26: PetscCalloc1(Nbs,&indices);
27: for (i=0; i<mbs; i++) {
28: for (j=0; j<B->ilen[i]; j++) {
29: if (!indices[aj[B->i[i] + j]]) ec++;
30: indices[aj[B->i[i] + j]] = 1;
31: }
32: }
34: /* form arrays of columns we need */
35: PetscMalloc1(ec,&garray);
36: PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);
38: ec = 0;
39: for (j=0; j<size; j++) {
40: for (i=owners[j]; i<owners[j+1]; i++) {
41: if (indices[i]) {
42: garray[ec] = i;
43: ec_owner[ec] = j;
44: ec++;
45: }
46: }
47: }
49: /* make indices now point into garray */
50: for (i=0; i<ec; i++) indices[garray[i]] = i;
52: /* compact out the extra columns in B */
53: for (i=0; i<mbs; i++) {
54: for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
55: }
56: B->nbs = ec;
58: sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs;
60: PetscLayoutSetUp((sbaij->B->cmap));
61: PetscFree(indices);
63: /* create local vector that is used to scatter into */
64: VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);
66: /* create two temporary index sets for building scatter-gather */
67: PetscMalloc1(2*ec,&stmp);
68: ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);
69: for (i=0; i<ec; i++) stmp[i] = i;
70: ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);
72: /* generate the scatter context
73: -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */
74: VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);
75: VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);
76: VecDestroy(&gvec);
78: sbaij->garray = garray;
80: PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);
81: PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);
82: PetscLogObjectParent((PetscObject)mat,(PetscObject)from);
83: PetscLogObjectParent((PetscObject)mat,(PetscObject)to);
85: ISDestroy(&from);
86: ISDestroy(&to);
88: /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
89: lsize = (mbs + ec)*bs;
90: VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);
91: VecDuplicate(sbaij->slvec0,&sbaij->slvec1);
92: VecGetSize(sbaij->slvec0,&vec_size);
94: VecGetOwnershipRanges(sbaij->slvec0,&sowners);
96: /* x index in the IS sfrom */
97: for (i=0; i<ec; i++) {
98: j = ec_owner[i];
99: sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
100: }
101: /* b index in the IS sfrom */
102: k = sowners[rank]/bs + mbs;
103: for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
104: ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);
106: /* x index in the IS sto */
107: k = sowners[rank]/bs + mbs;
108: for (i=0; i<ec; i++) stmp[i] = (k + i);
109: /* b index in the IS sto */
110: for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
112: ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);
114: VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);
116: VecGetLocalSize(sbaij->slvec1,&nt);
117: VecGetArray(sbaij->slvec1,&ptr);
118: VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);
119: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);
120: VecRestoreArray(sbaij->slvec1,&ptr);
122: VecGetArray(sbaij->slvec0,&ptr);
123: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);
124: VecRestoreArray(sbaij->slvec0,&ptr);
126: PetscFree(stmp);
127: MPI_Barrier(PetscObjectComm((PetscObject)mat));
129: PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);
130: PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);
131: PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);
132: PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);
133: PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);
134: PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);
135: PetscLogObjectParent((PetscObject)mat,(PetscObject)from);
136: PetscLogObjectParent((PetscObject)mat,(PetscObject)to);
138: PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));
139: ISDestroy(&from);
140: ISDestroy(&to);
141: PetscFree2(sgarray,ec_owner);
142: return(0);
143: }
145: PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat)
146: {
147: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
148: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data);
150: PetscInt i,j,*aj = B->j,ec = 0,*garray;
151: PetscInt bs = mat->rmap->bs,*stmp;
152: IS from,to;
153: Vec gvec;
154: #if defined(PETSC_USE_CTABLE)
155: PetscTable gid1_lid1;
156: PetscTablePosition tpos;
157: PetscInt gid,lid;
158: #else
159: PetscInt Nbs = baij->Nbs,*indices;
160: #endif
163: #if defined(PETSC_USE_CTABLE)
164: /* use a table - Mark Adams */
165: PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);
166: for (i=0; i<B->mbs; i++) {
167: for (j=0; j<B->ilen[i]; j++) {
168: PetscInt data,gid1 = aj[B->i[i]+j] + 1;
169: PetscTableFind(gid1_lid1,gid1,&data);
170: if (!data) {
171: /* one based table */
172: PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);
173: }
174: }
175: }
176: /* form array of columns we need */
177: PetscMalloc1(ec,&garray);
178: PetscTableGetHeadPosition(gid1_lid1,&tpos);
179: while (tpos) {
180: PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);
181: gid--; lid--;
182: garray[lid] = gid;
183: }
184: PetscSortInt(ec,garray);
185: PetscTableRemoveAll(gid1_lid1);
186: for (i=0; i<ec; i++) {
187: PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);
188: }
189: /* compact out the extra columns in B */
190: for (i=0; i<B->mbs; i++) {
191: for (j=0; j<B->ilen[i]; j++) {
192: PetscInt gid1 = aj[B->i[i] + j] + 1;
193: PetscTableFind(gid1_lid1,gid1,&lid);
194: lid--;
195: aj[B->i[i]+j] = lid;
196: }
197: }
198: B->nbs = ec;
200: baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
202: PetscLayoutSetUp((baij->B->cmap));
203: PetscTableDestroy(&gid1_lid1);
204: #else
205: /* For the first stab we make an array as long as the number of columns */
206: /* mark those columns that are in baij->B */
207: PetscCalloc1(Nbs,&indices);
208: for (i=0; i<B->mbs; i++) {
209: for (j=0; j<B->ilen[i]; j++) {
210: if (!indices[aj[B->i[i] + j]]) ec++;
211: indices[aj[B->i[i] + j]] = 1;
212: }
213: }
215: /* form array of columns we need */
216: PetscMalloc1(ec,&garray);
217: ec = 0;
218: for (i=0; i<Nbs; i++) {
219: if (indices[i]) {
220: garray[ec++] = i;
221: }
222: }
224: /* make indices now point into garray */
225: for (i=0; i<ec; i++) indices[garray[i]] = i;
227: /* compact out the extra columns in B */
228: for (i=0; i<B->mbs; i++) {
229: for (j=0; j<B->ilen[i]; j++) {
230: aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
231: }
232: }
233: B->nbs = ec;
235: baij->B->cmap->n = ec*mat->rmap->bs;
237: PetscFree(indices);
238: #endif
240: /* create local vector that is used to scatter into */
241: VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);
243: /* create two temporary index sets for building scatter-gather */
244: ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);
246: PetscMalloc1(ec,&stmp);
247: for (i=0; i<ec; i++) stmp[i] = i;
248: ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);
250: /* create temporary global vector to generate scatter context */
251: VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);
252: VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);
253: VecDestroy(&gvec);
255: PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);
256: PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);
257: PetscLogObjectParent((PetscObject)mat,(PetscObject)from);
258: PetscLogObjectParent((PetscObject)mat,(PetscObject)to);
260: baij->garray = garray;
262: PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));
263: ISDestroy(&from);
264: ISDestroy(&to);
265: return(0);
266: }
268: /*
269: Takes the local part of an already assembled MPISBAIJ matrix
270: and disassembles it. This is to allow new nonzeros into the matrix
271: that require more communication in the matrix vector multiply.
272: Thus certain data-structures must be rebuilt.
274: Kind of slow! But that's what application programmers get when
275: they are sloppy.
276: */
277: PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
278: {
279: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data;
280: Mat B = baij->B,Bnew;
281: Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
283: PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
284: PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
285: MatScalar *a = Bbaij->a;
286: PetscScalar *atmp;
287: #if defined(PETSC_USE_REAL_MAT_SINGLE)
288: PetscInt l;
289: #endif
292: #if defined(PETSC_USE_REAL_MAT_SINGLE)
293: PetscMalloc1(A->rmap->bs,&atmp);
294: #endif
295: /* free stuff related to matrix-vec multiply */
296: VecGetSize(baij->lvec,&ec); /* needed for PetscLogObjectMemory below */
297: VecDestroy(&baij->lvec);
298: VecScatterDestroy(&baij->Mvctx);
300: VecDestroy(&baij->slvec0);
301: VecDestroy(&baij->slvec0b);
302: VecDestroy(&baij->slvec1);
303: VecDestroy(&baij->slvec1a);
304: VecDestroy(&baij->slvec1b);
306: if (baij->colmap) {
307: #if defined(PETSC_USE_CTABLE)
308: PetscTableDestroy(&baij->colmap);
309: #else
310: PetscFree(baij->colmap);
311: PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));
312: #endif
313: }
315: /* make sure that B is assembled so we can access its values */
316: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
317: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
319: /* invent new B and copy stuff over */
320: PetscMalloc1(mbs,&nz);
321: for (i=0; i<mbs; i++) {
322: nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
323: }
324: MatCreate(PETSC_COMM_SELF,&Bnew);
325: MatSetSizes(Bnew,m,n,m,n);
326: MatSetType(Bnew,((PetscObject)B)->type_name);
327: MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);
328: PetscFree(nz);
330: if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
331: ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
332: }
334: /*
335: Ensure that B's nonzerostate is monotonically increasing.
336: Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
337: */
338: Bnew->nonzerostate = B->nonzerostate;
339: PetscMalloc1(bs,&rvals);
340: for (i=0; i<mbs; i++) {
341: rvals[0] = bs*i;
342: for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
343: for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
344: col = garray[Bbaij->j[j]]*bs;
345: for (k=0; k<bs; k++) {
346: #if defined(PETSC_USE_REAL_MAT_SINGLE)
347: for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
348: #else
349: atmp = a+j*bs2 + k*bs;
350: #endif
351: MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);
352: col++;
353: }
354: }
355: }
356: #if defined(PETSC_USE_REAL_MAT_SINGLE)
357: PetscFree(atmp);
358: #endif
359: PetscFree(baij->garray);
361: baij->garray = 0;
363: PetscFree(rvals);
364: PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));
365: MatDestroy(&B);
366: PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);
368: baij->B = Bnew;
370: A->was_assembled = PETSC_FALSE;
371: return(0);
372: }