Actual source code: mpiaij.c
2: #include src/mat/impls/aij/mpi/mpiaij.h
3: #include src/inline/spops.h
5: /*
6: Local utility routine that creates a mapping from the global column
7: number to the local number in the off-diagonal part of the local
8: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
9: a slightly higher hash table cost; without it it is not scalable (each processor
10: has an order N integer array but is fast to acess.
11: */
14: PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15: {
16: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
18: PetscInt n = aij->B->n,i;
21: #if defined (PETSC_USE_CTABLE)
22: PetscTableCreate(n,&aij->colmap);
23: for (i=0; i<n; i++){
24: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
25: }
26: #else
27: PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);
28: PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29: PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));
30: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31: #endif
32: return(0);
33: }
35: #define CHUNKSIZE 15
36: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37: { \
38: \
39: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40: rmax = aimax[row]; nrow = ailen[row]; \
41: col1 = col - shift; \
42: \
43: low = 0; high = nrow; \
44: while (high-low > 5) { \
45: t = (low+high)/2; \
46: if (rp[t] > col) high = t; \
47: else low = t; \
48: } \
49: for (_i=low; _i<high; _i++) { \
50: if (rp[_i] > col1) break; \
51: if (rp[_i] == col1) { \
52: if (addv == ADD_VALUES) ap[_i] += value; \
53: else ap[_i] = value; \
54: goto a_noinsert; \
55: } \
56: } \
57: if (nonew == 1) goto a_noinsert; \
58: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
59: if (nrow >= rmax) { \
60: /* there is no extra room in row, therefore enlarge */ \
61: PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62: PetscScalar *new_a; \
63: \
64: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
65: \
66: /* malloc new storage space */ \
67: len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
68: PetscMalloc(len,&new_a); \
69: new_j = (PetscInt*)(new_a + new_nz); \
70: new_i = new_j + new_nz; \
71: \
72: /* copy over old data into new slots */ \
73: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74: for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75: PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt)); \
76: len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77: PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78: len*sizeof(PetscInt)); \
79: PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar)); \
80: PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81: len*sizeof(PetscScalar)); \
82: /* free up old matrix storage */ \
83: \
84: PetscFree(a->a); \
85: if (!a->singlemalloc) { \
86: PetscFree(a->i); \
87: PetscFree(a->j); \
88: } \
89: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \
90: a->singlemalloc = PETSC_TRUE; \
91: \
92: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93: rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
95: a->maxnz += CHUNKSIZE; \
96: a->reallocs++; \
97: } \
98: N = nrow++ - 1; a->nz++; \
99: /* shift up all the later entries in this row */ \
100: for (ii=N; ii>=_i; ii--) { \
101: rp[ii+1] = rp[ii]; \
102: ap[ii+1] = ap[ii]; \
103: } \
104: rp[_i] = col1; \
105: ap[_i] = value; \
106: a_noinsert: ; \
107: ailen[row] = nrow; \
108: }
110: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111: { \
112: \
113: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114: rmax = bimax[row]; nrow = bilen[row]; \
115: col1 = col - shift; \
116: \
117: low = 0; high = nrow; \
118: while (high-low > 5) { \
119: t = (low+high)/2; \
120: if (rp[t] > col) high = t; \
121: else low = t; \
122: } \
123: for (_i=low; _i<high; _i++) { \
124: if (rp[_i] > col1) break; \
125: if (rp[_i] == col1) { \
126: if (addv == ADD_VALUES) ap[_i] += value; \
127: else ap[_i] = value; \
128: goto b_noinsert; \
129: } \
130: } \
131: if (nonew == 1) goto b_noinsert; \
132: else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133: if (nrow >= rmax) { \
134: /* there is no extra room in row, therefore enlarge */ \
135: PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136: PetscScalar *new_a; \
137: \
138: if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
139: \
140: /* malloc new storage space */ \
141: len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
142: PetscMalloc(len,&new_a); \
143: new_j = (PetscInt*)(new_a + new_nz); \
144: new_i = new_j + new_nz; \
145: \
146: /* copy over old data into new slots */ \
147: for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148: for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149: PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt)); \
150: len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151: PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152: len*sizeof(PetscInt)); \
153: PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar)); \
154: PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155: len*sizeof(PetscScalar)); \
156: /* free up old matrix storage */ \
157: \
158: PetscFree(b->a); \
159: if (!b->singlemalloc) { \
160: PetscFree(b->i); \
161: PetscFree(b->j); \
162: } \
163: ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \
164: b->singlemalloc = PETSC_TRUE; \
165: \
166: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167: rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168: PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
169: b->maxnz += CHUNKSIZE; \
170: b->reallocs++; \
171: } \
172: N = nrow++ - 1; b->nz++; \
173: /* shift up all the later entries in this row */ \
174: for (ii=N; ii>=_i; ii--) { \
175: rp[ii+1] = rp[ii]; \
176: ap[ii+1] = ap[ii]; \
177: } \
178: rp[_i] = col1; \
179: ap[_i] = value; \
180: b_noinsert: ; \
181: bilen[row] = nrow; \
182: }
186: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
187: {
188: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
189: PetscScalar value;
191: PetscInt i,j,rstart = aij->rstart,rend = aij->rend;
192: PetscInt cstart = aij->cstart,cend = aij->cend,row,col;
193: PetscTruth roworiented = aij->roworiented;
195: /* Some Variables required in the macro */
196: Mat A = aij->A;
197: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
198: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199: PetscScalar *aa = a->a;
200: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201: Mat B = aij->B;
202: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
203: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204: PetscScalar *ba = b->a;
206: PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207: PetscInt nonew = a->nonew,shift=0;
208: PetscScalar *ap;
211: for (i=0; i<m; i++) {
212: if (im[i] < 0) continue;
213: #if defined(PETSC_USE_BOPT_g)
214: if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
215: #endif
216: if (im[i] >= rstart && im[i] < rend) {
217: row = im[i] - rstart;
218: for (j=0; j<n; j++) {
219: if (in[j] >= cstart && in[j] < cend){
220: col = in[j] - cstart;
221: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222: if (ignorezeroentries && value == 0.0) continue;
223: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224: /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
225: } else if (in[j] < 0) continue;
226: #if defined(PETSC_USE_BOPT_g)
227: else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
228: #endif
229: else {
230: if (mat->was_assembled) {
231: if (!aij->colmap) {
232: CreateColmap_MPIAIJ_Private(mat);
233: }
234: #if defined (PETSC_USE_CTABLE)
235: PetscTableFind(aij->colmap,in[j]+1,&col);
236: col--;
237: #else
238: col = aij->colmap[in[j]] - 1;
239: #endif
240: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241: DisAssemble_MPIAIJ(mat);
242: col = in[j];
243: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244: B = aij->B;
245: b = (Mat_SeqAIJ*)B->data;
246: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247: ba = b->a;
248: }
249: } else col = in[j];
250: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251: if (ignorezeroentries && value == 0.0) continue;
252: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253: /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
254: }
255: }
256: } else {
257: if (!aij->donotstash) {
258: if (roworiented) {
259: if (ignorezeroentries && v[i*n] == 0.0) continue;
260: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
261: } else {
262: if (ignorezeroentries && v[i] == 0.0) continue;
263: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
264: }
265: }
266: }
267: }
268: return(0);
269: }
273: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
274: {
275: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
277: PetscInt i,j,rstart = aij->rstart,rend = aij->rend;
278: PetscInt cstart = aij->cstart,cend = aij->cend,row,col;
281: for (i=0; i<m; i++) {
282: if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
283: if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
284: if (idxm[i] >= rstart && idxm[i] < rend) {
285: row = idxm[i] - rstart;
286: for (j=0; j<n; j++) {
287: if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
288: if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
289: if (idxn[j] >= cstart && idxn[j] < cend){
290: col = idxn[j] - cstart;
291: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
292: } else {
293: if (!aij->colmap) {
294: CreateColmap_MPIAIJ_Private(mat);
295: }
296: #if defined (PETSC_USE_CTABLE)
297: PetscTableFind(aij->colmap,idxn[j]+1,&col);
298: col --;
299: #else
300: col = aij->colmap[idxn[j]] - 1;
301: #endif
302: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303: else {
304: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
305: }
306: }
307: }
308: } else {
309: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310: }
311: }
312: return(0);
313: }
317: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318: {
319: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
321: PetscInt nstash,reallocs;
322: InsertMode addv;
325: if (aij->donotstash) {
326: return(0);
327: }
329: /* make sure all processors are either in INSERTMODE or ADDMODE */
330: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
331: if (addv == (ADD_VALUES|INSERT_VALUES)) {
332: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333: }
334: mat->insertmode = addv; /* in case this processor had no cache */
336: MatStashScatterBegin_Private(&mat->stash,aij->rowners);
337: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
338: PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
339: return(0);
340: }
345: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346: {
347: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
348: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350: PetscMPIInt n;
351: PetscInt i,j,rstart,ncols,flg;
352: PetscInt *row,*col,other_disassembled;
353: PetscScalar *val;
354: InsertMode addv = mat->insertmode;
357: if (!aij->donotstash) {
358: while (1) {
359: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
360: if (!flg) break;
362: for (i=0; i<n;) {
363: /* Now identify the consecutive vals belonging to the same row */
364: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
365: if (j < n) ncols = j-i;
366: else ncols = n-i;
367: /* Now assemble all these values with a single function call */
368: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
369: i = j;
370: }
371: }
372: MatStashScatterEnd_Private(&mat->stash);
373: }
374:
375: MatAssemblyBegin(aij->A,mode);
376: MatAssemblyEnd(aij->A,mode);
378: /* determine if any processor has disassembled, if so we must
379: also disassemble ourselfs, in order that we may reassemble. */
380: /*
381: if nonzero structure of submatrix B cannot change then we know that
382: no processor disassembled thus we can skip this stuff
383: */
384: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
385: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
386: if (mat->was_assembled && !other_disassembled) {
387: DisAssemble_MPIAIJ(mat);
388: /* reaccess the b because aij->B was changed */
389: b = (Mat_SeqAIJ *)aij->B->data;
390: }
391: }
393: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394: MatSetUpMultiply_MPIAIJ(mat);
395: }
396: MatAssemblyBegin(aij->B,mode);
397: MatAssemblyEnd(aij->B,mode);
399: if (aij->rowvalues) {
400: PetscFree(aij->rowvalues);
401: aij->rowvalues = 0;
402: }
404: /* used by MatAXPY() */
405: a->xtoy = 0; b->xtoy = 0;
406: a->XtoY = 0; b->XtoY = 0;
408: return(0);
409: }
413: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
414: {
415: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
419: MatZeroEntries(l->A);
420: MatZeroEntries(l->B);
421: return(0);
422: }
426: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
427: {
428: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
430: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag;
431: PetscInt i,N,*rows,*owners = l->rowners;
432: PetscInt *nprocs,j,idx,nsends,row;
433: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
434: PetscInt *rvalues,count,base,slen,*source;
435: PetscInt *lens,*lrows,*values,rstart=l->rstart;
436: MPI_Comm comm = A->comm;
437: MPI_Request *send_waits,*recv_waits;
438: MPI_Status recv_status,*send_status;
439: IS istmp;
440: PetscTruth found;
443: ISGetLocalSize(is,&N);
444: ISGetIndices(is,&rows);
446: /* first count number of contributors to each processor */
447: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
448: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
449: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
450: for (i=0; i<N; i++) {
451: idx = rows[i];
452: found = PETSC_FALSE;
453: for (j=0; j<size; j++) {
454: if (idx >= owners[j] && idx < owners[j+1]) {
455: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
456: }
457: }
458: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
459: }
460: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
462: /* inform other processors of number of messages and max length*/
463: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
465: /* post receives: */
466: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
467: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
468: for (i=0; i<nrecvs; i++) {
469: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
470: }
472: /* do sends:
473: 1) starts[i] gives the starting index in svalues for stuff going to
474: the ith processor
475: */
476: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
477: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
478: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
479: starts[0] = 0;
480: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
481: for (i=0; i<N; i++) {
482: svalues[starts[owner[i]]++] = rows[i];
483: }
484: ISRestoreIndices(is,&rows);
486: starts[0] = 0;
487: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
488: count = 0;
489: for (i=0; i<size; i++) {
490: if (nprocs[2*i+1]) {
491: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
492: }
493: }
494: PetscFree(starts);
496: base = owners[rank];
498: /* wait on receives */
499: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
500: source = lens + nrecvs;
501: count = nrecvs; slen = 0;
502: while (count) {
503: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
504: /* unpack receives into our local space */
505: MPI_Get_count(&recv_status,MPIU_INT,&n);
506: source[imdex] = recv_status.MPI_SOURCE;
507: lens[imdex] = n;
508: slen += n;
509: count--;
510: }
511: PetscFree(recv_waits);
512:
513: /* move the data into the send scatter */
514: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
515: count = 0;
516: for (i=0; i<nrecvs; i++) {
517: values = rvalues + i*nmax;
518: for (j=0; j<lens[i]; j++) {
519: lrows[count++] = values[j] - base;
520: }
521: }
522: PetscFree(rvalues);
523: PetscFree(lens);
524: PetscFree(owner);
525: PetscFree(nprocs);
526:
527: /* actually zap the local rows */
528: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
529: PetscLogObjectParent(A,istmp);
531: /*
532: Zero the required rows. If the "diagonal block" of the matrix
533: is square and the user wishes to set the diagonal we use seperate
534: code so that MatSetValues() is not called for each diagonal allocating
535: new memory, thus calling lots of mallocs and slowing things down.
537: Contributed by: Mathew Knepley
538: */
539: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
540: MatZeroRows(l->B,istmp,0);
541: if (diag && (l->A->M == l->A->N)) {
542: MatZeroRows(l->A,istmp,diag);
543: } else if (diag) {
544: MatZeroRows(l->A,istmp,0);
545: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
546: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
547: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
548: }
549: for (i = 0; i < slen; i++) {
550: row = lrows[i] + rstart;
551: MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
552: }
553: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
554: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
555: } else {
556: MatZeroRows(l->A,istmp,0);
557: }
558: ISDestroy(istmp);
559: PetscFree(lrows);
561: /* wait on sends */
562: if (nsends) {
563: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
564: MPI_Waitall(nsends,send_waits,send_status);
565: PetscFree(send_status);
566: }
567: PetscFree(send_waits);
568: PetscFree(svalues);
570: return(0);
571: }
575: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
576: {
577: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
579: PetscInt nt;
582: VecGetLocalSize(xx,&nt);
583: if (nt != A->n) {
584: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
585: }
586: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
587: (*a->A->ops->mult)(a->A,xx,yy);
588: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
589: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
590: return(0);
591: }
595: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
596: {
597: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
601: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
602: (*a->A->ops->multadd)(a->A,xx,yy,zz);
603: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
604: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
605: return(0);
606: }
610: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
611: {
612: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
616: /* do nondiagonal part */
617: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
618: /* send it on its way */
619: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
620: /* do local part */
621: (*a->A->ops->multtranspose)(a->A,xx,yy);
622: /* receive remote parts: note this assumes the values are not actually */
623: /* inserted in yy until the next line, which is true for my implementation*/
624: /* but is not perhaps always true. */
625: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
626: return(0);
627: }
632: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
633: {
634: MPI_Comm comm;
635: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
636: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
637: IS Me,Notme;
639: PetscInt M,N,first,last,*notme,i;
640: PetscMPIInt size;
644: /* Easy test: symmetric diagonal block */
645: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
646: MatIsTranspose(Adia,Bdia,tol,f);
647: if (!*f) return(0);
648: PetscObjectGetComm((PetscObject)Amat,&comm);
649: MPI_Comm_size(comm,&size);
650: if (size == 1) return(0);
652: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
653: MatGetSize(Amat,&M,&N);
654: MatGetOwnershipRange(Amat,&first,&last);
655: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
656: for (i=0; i<first; i++) notme[i] = i;
657: for (i=last; i<M; i++) notme[i-last+first] = i;
658: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
659: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
660: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
661: Aoff = Aoffs[0];
662: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
663: Boff = Boffs[0];
664: MatIsTranspose(Aoff,Boff,tol,f);
665: MatDestroyMatrices(1,&Aoffs);
666: MatDestroyMatrices(1,&Boffs);
667: ISDestroy(Me);
668: ISDestroy(Notme);
670: return(0);
671: }
676: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677: {
678: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
682: /* do nondiagonal part */
683: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
684: /* send it on its way */
685: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
686: /* do local part */
687: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
688: /* receive remote parts: note this assumes the values are not actually */
689: /* inserted in yy until the next line, which is true for my implementation*/
690: /* but is not perhaps always true. */
691: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
692: return(0);
693: }
695: /*
696: This only works correctly for square matrices where the subblock A->A is the
697: diagonal block
698: */
701: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
702: {
704: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
707: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
708: if (a->rstart != a->cstart || a->rend != a->cend) {
709: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
710: }
711: MatGetDiagonal(a->A,v);
712: return(0);
713: }
717: PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
718: {
719: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
723: MatScale(aa,a->A);
724: MatScale(aa,a->B);
725: return(0);
726: }
730: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
731: {
732: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
736: #if defined(PETSC_USE_LOG)
737: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
738: #endif
739: MatStashDestroy_Private(&mat->stash);
740: PetscFree(aij->rowners);
741: MatDestroy(aij->A);
742: MatDestroy(aij->B);
743: #if defined (PETSC_USE_CTABLE)
744: if (aij->colmap) {PetscTableDelete(aij->colmap);}
745: #else
746: if (aij->colmap) {PetscFree(aij->colmap);}
747: #endif
748: if (aij->garray) {PetscFree(aij->garray);}
749: if (aij->lvec) {VecDestroy(aij->lvec);}
750: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
751: if (aij->rowvalues) {PetscFree(aij->rowvalues);}
752: PetscFree(aij);
754: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
755: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
756: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
757: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
758: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
759: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
760: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
761: return(0);
762: }
766: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
767: {
768: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
769: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
770: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
771: PetscErrorCode ierr;
772: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
773: int fd;
774: PetscInt nz,header[4],*row_lengths,*range,rlen,i;
775: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
776: PetscScalar *column_values;
779: MPI_Comm_rank(mat->comm,&rank);
780: MPI_Comm_size(mat->comm,&size);
781: nz = A->nz + B->nz;
782: if (!rank) {
783: header[0] = MAT_FILE_COOKIE;
784: header[1] = mat->M;
785: header[2] = mat->N;
786: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);
787: PetscViewerBinaryGetDescriptor(viewer,&fd);
788: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
789: /* get largest number of rows any processor has */
790: rlen = mat->m;
791: PetscMapGetGlobalRange(mat->rmap,&range);
792: for (i=1; i<size; i++) {
793: rlen = PetscMax(rlen,range[i+1] - range[i]);
794: }
795: } else {
796: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);
797: rlen = mat->m;
798: }
800: /* load up the local row counts */
801: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
802: for (i=0; i<mat->m; i++) {
803: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
804: }
806: /* store the row lengths to the file */
807: if (!rank) {
808: MPI_Status status;
809: PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);
810: for (i=1; i<size; i++) {
811: rlen = range[i+1] - range[i];
812: MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);
813: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
814: }
815: } else {
816: MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);
817: }
818: PetscFree(row_lengths);
820: /* load up the local column indices */
821: nzmax = nz; /* )th processor needs space a largest processor needs */
822: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);
823: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
824: cnt = 0;
825: for (i=0; i<mat->m; i++) {
826: for (j=B->i[i]; j<B->i[i+1]; j++) {
827: if ( (col = garray[B->j[j]]) > cstart) break;
828: column_indices[cnt++] = col;
829: }
830: for (k=A->i[i]; k<A->i[i+1]; k++) {
831: column_indices[cnt++] = A->j[k] + cstart;
832: }
833: for (; j<B->i[i+1]; j++) {
834: column_indices[cnt++] = garray[B->j[j]];
835: }
836: }
837: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
839: /* store the column indices to the file */
840: if (!rank) {
841: MPI_Status status;
842: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
843: for (i=1; i<size; i++) {
844: MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
845: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
846: MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);
847: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
848: }
849: } else {
850: MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
851: MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);
852: }
853: PetscFree(column_indices);
855: /* load up the local column values */
856: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
857: cnt = 0;
858: for (i=0; i<mat->m; i++) {
859: for (j=B->i[i]; j<B->i[i+1]; j++) {
860: if ( garray[B->j[j]] > cstart) break;
861: column_values[cnt++] = B->a[j];
862: }
863: for (k=A->i[i]; k<A->i[i+1]; k++) {
864: column_values[cnt++] = A->a[k];
865: }
866: for (; j<B->i[i+1]; j++) {
867: column_values[cnt++] = B->a[j];
868: }
869: }
870: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
872: /* store the column values to the file */
873: if (!rank) {
874: MPI_Status status;
875: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
876: for (i=1; i<size; i++) {
877: MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
878: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
879: MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
880: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
881: }
882: } else {
883: MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
884: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
885: }
886: PetscFree(column_values);
887: return(0);
888: }
892: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
893: {
894: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
895: PetscErrorCode ierr;
896: PetscMPIInt rank = aij->rank,size = aij->size;
897: PetscTruth isdraw,iascii,flg,isbinary;
898: PetscViewer sviewer;
899: PetscViewerFormat format;
902: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
903: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
904: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
905: if (iascii) {
906: PetscViewerGetFormat(viewer,&format);
907: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
908: MatInfo info;
909: MPI_Comm_rank(mat->comm,&rank);
910: MatGetInfo(mat,MAT_LOCAL,&info);
911: PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
912: if (flg) {
913: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
914: rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
915: } else {
916: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
917: rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
918: }
919: MatGetInfo(aij->A,MAT_LOCAL,&info);
920: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
921: MatGetInfo(aij->B,MAT_LOCAL,&info);
922: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
923: PetscViewerFlush(viewer);
924: VecScatterView(aij->Mvctx,viewer);
925: return(0);
926: } else if (format == PETSC_VIEWER_ASCII_INFO) {
927: return(0);
928: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
929: return(0);
930: }
931: } else if (isbinary) {
932: if (size == 1) {
933: PetscObjectSetName((PetscObject)aij->A,mat->name);
934: MatView(aij->A,viewer);
935: } else {
936: MatView_MPIAIJ_Binary(mat,viewer);
937: }
938: return(0);
939: } else if (isdraw) {
940: PetscDraw draw;
941: PetscTruth isnull;
942: PetscViewerDrawGetDraw(viewer,0,&draw);
943: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
944: }
946: if (size == 1) {
947: PetscObjectSetName((PetscObject)aij->A,mat->name);
948: MatView(aij->A,viewer);
949: } else {
950: /* assemble the entire matrix onto first processor. */
951: Mat A;
952: Mat_SeqAIJ *Aloc;
953: PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
954: PetscScalar *a;
956: if (!rank) {
957: MatCreate(mat->comm,M,N,M,N,&A);
958: } else {
959: MatCreate(mat->comm,0,0,M,N,&A);
960: }
961: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
962: MatSetType(A,MATMPIAIJ);
963: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
964: PetscLogObjectParent(mat,A);
966: /* copy over the A part */
967: Aloc = (Mat_SeqAIJ*)aij->A->data;
968: m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
969: row = aij->rstart;
970: for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
971: for (i=0; i<m; i++) {
972: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
973: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
974: }
975: aj = Aloc->j;
976: for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
978: /* copy over the B part */
979: Aloc = (Mat_SeqAIJ*)aij->B->data;
980: m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
981: row = aij->rstart;
982: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
983: ct = cols;
984: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
985: for (i=0; i<m; i++) {
986: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
987: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
988: }
989: PetscFree(ct);
990: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
991: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
992: /*
993: Everyone has to call to draw the matrix since the graphics waits are
994: synchronized across all processors that share the PetscDraw object
995: */
996: PetscViewerGetSingleton(viewer,&sviewer);
997: if (!rank) {
998: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
999: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1000: }
1001: PetscViewerRestoreSingleton(viewer,&sviewer);
1002: MatDestroy(A);
1003: }
1004: return(0);
1005: }
1009: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1010: {
1012: PetscTruth iascii,isdraw,issocket,isbinary;
1013:
1015: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1016: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1017: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1018: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1019: if (iascii || isdraw || isbinary || issocket) {
1020: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1021: } else {
1022: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1023: }
1024: return(0);
1025: }
1031: PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1032: {
1033: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1035: Vec bb1;
1036: PetscScalar mone=-1.0;
1039: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1041: VecDuplicate(bb,&bb1);
1043: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1044: if (flag & SOR_ZERO_INITIAL_GUESS) {
1045: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1046: its--;
1047: }
1048:
1049: while (its--) {
1050: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1051: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1053: /* update rhs: bb1 = bb - B*x */
1054: VecScale(&mone,mat->lvec);
1055: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1057: /* local sweep */
1058: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1059:
1060: }
1061: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1062: if (flag & SOR_ZERO_INITIAL_GUESS) {
1063: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1064: its--;
1065: }
1066: while (its--) {
1067: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1068: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1070: /* update rhs: bb1 = bb - B*x */
1071: VecScale(&mone,mat->lvec);
1072: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1074: /* local sweep */
1075: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1076:
1077: }
1078: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1079: if (flag & SOR_ZERO_INITIAL_GUESS) {
1080: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1081: its--;
1082: }
1083: while (its--) {
1084: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1085: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1087: /* update rhs: bb1 = bb - B*x */
1088: VecScale(&mone,mat->lvec);
1089: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1091: /* local sweep */
1092: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1093:
1094: }
1095: } else {
1096: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1097: }
1099: VecDestroy(bb1);
1100: return(0);
1101: }
1105: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1106: {
1107: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1108: Mat A = mat->A,B = mat->B;
1110: PetscReal isend[5],irecv[5];
1113: info->block_size = 1.0;
1114: MatGetInfo(A,MAT_LOCAL,info);
1115: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1116: isend[3] = info->memory; isend[4] = info->mallocs;
1117: MatGetInfo(B,MAT_LOCAL,info);
1118: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1119: isend[3] += info->memory; isend[4] += info->mallocs;
1120: if (flag == MAT_LOCAL) {
1121: info->nz_used = isend[0];
1122: info->nz_allocated = isend[1];
1123: info->nz_unneeded = isend[2];
1124: info->memory = isend[3];
1125: info->mallocs = isend[4];
1126: } else if (flag == MAT_GLOBAL_MAX) {
1127: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1128: info->nz_used = irecv[0];
1129: info->nz_allocated = irecv[1];
1130: info->nz_unneeded = irecv[2];
1131: info->memory = irecv[3];
1132: info->mallocs = irecv[4];
1133: } else if (flag == MAT_GLOBAL_SUM) {
1134: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1135: info->nz_used = irecv[0];
1136: info->nz_allocated = irecv[1];
1137: info->nz_unneeded = irecv[2];
1138: info->memory = irecv[3];
1139: info->mallocs = irecv[4];
1140: }
1141: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1142: info->fill_ratio_needed = 0;
1143: info->factor_mallocs = 0;
1144: info->rows_global = (double)matin->M;
1145: info->columns_global = (double)matin->N;
1146: info->rows_local = (double)matin->m;
1147: info->columns_local = (double)matin->N;
1149: return(0);
1150: }
1154: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1155: {
1156: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1160: switch (op) {
1161: case MAT_NO_NEW_NONZERO_LOCATIONS:
1162: case MAT_YES_NEW_NONZERO_LOCATIONS:
1163: case MAT_COLUMNS_UNSORTED:
1164: case MAT_COLUMNS_SORTED:
1165: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1166: case MAT_KEEP_ZEROED_ROWS:
1167: case MAT_NEW_NONZERO_LOCATION_ERR:
1168: case MAT_USE_INODES:
1169: case MAT_DO_NOT_USE_INODES:
1170: case MAT_IGNORE_ZERO_ENTRIES:
1171: MatSetOption(a->A,op);
1172: MatSetOption(a->B,op);
1173: break;
1174: case MAT_ROW_ORIENTED:
1175: a->roworiented = PETSC_TRUE;
1176: MatSetOption(a->A,op);
1177: MatSetOption(a->B,op);
1178: break;
1179: case MAT_ROWS_SORTED:
1180: case MAT_ROWS_UNSORTED:
1181: case MAT_YES_NEW_DIAGONALS:
1182: PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1183: break;
1184: case MAT_COLUMN_ORIENTED:
1185: a->roworiented = PETSC_FALSE;
1186: MatSetOption(a->A,op);
1187: MatSetOption(a->B,op);
1188: break;
1189: case MAT_IGNORE_OFF_PROC_ENTRIES:
1190: a->donotstash = PETSC_TRUE;
1191: break;
1192: case MAT_NO_NEW_DIAGONALS:
1193: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1194: case MAT_SYMMETRIC:
1195: case MAT_STRUCTURALLY_SYMMETRIC:
1196: case MAT_HERMITIAN:
1197: case MAT_SYMMETRY_ETERNAL:
1198: MatSetOption(a->A,op);
1199: break;
1200: case MAT_NOT_SYMMETRIC:
1201: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1202: case MAT_NOT_HERMITIAN:
1203: case MAT_NOT_SYMMETRY_ETERNAL:
1204: break;
1205: default:
1206: SETERRQ(PETSC_ERR_SUP,"unknown option");
1207: }
1208: return(0);
1209: }
1213: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1214: {
1215: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1216: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1218: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1219: PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1220: PetscInt *cmap,*idx_p;
1223: if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1224: mat->getrowactive = PETSC_TRUE;
1226: if (!mat->rowvalues && (idx || v)) {
1227: /*
1228: allocate enough space to hold information from the longest row.
1229: */
1230: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1231: PetscInt max = 1,tmp;
1232: for (i=0; i<matin->m; i++) {
1233: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1234: if (max < tmp) { max = tmp; }
1235: }
1236: PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);
1237: mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1238: }
1240: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1241: lrow = row - rstart;
1243: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1244: if (!v) {pvA = 0; pvB = 0;}
1245: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1246: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1247: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1248: nztot = nzA + nzB;
1250: cmap = mat->garray;
1251: if (v || idx) {
1252: if (nztot) {
1253: /* Sort by increasing column numbers, assuming A and B already sorted */
1254: PetscInt imark = -1;
1255: if (v) {
1256: *v = v_p = mat->rowvalues;
1257: for (i=0; i<nzB; i++) {
1258: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1259: else break;
1260: }
1261: imark = i;
1262: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1263: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1264: }
1265: if (idx) {
1266: *idx = idx_p = mat->rowindices;
1267: if (imark > -1) {
1268: for (i=0; i<imark; i++) {
1269: idx_p[i] = cmap[cworkB[i]];
1270: }
1271: } else {
1272: for (i=0; i<nzB; i++) {
1273: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1274: else break;
1275: }
1276: imark = i;
1277: }
1278: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1279: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1280: }
1281: } else {
1282: if (idx) *idx = 0;
1283: if (v) *v = 0;
1284: }
1285: }
1286: *nz = nztot;
1287: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1288: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1289: return(0);
1290: }
1294: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1295: {
1296: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1299: if (aij->getrowactive == PETSC_FALSE) {
1300: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1301: }
1302: aij->getrowactive = PETSC_FALSE;
1303: return(0);
1304: }
1308: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1309: {
1310: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1311: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1313: PetscInt i,j,cstart = aij->cstart;
1314: PetscReal sum = 0.0;
1315: PetscScalar *v;
1318: if (aij->size == 1) {
1319: MatNorm(aij->A,type,norm);
1320: } else {
1321: if (type == NORM_FROBENIUS) {
1322: v = amat->a;
1323: for (i=0; i<amat->nz; i++) {
1324: #if defined(PETSC_USE_COMPLEX)
1325: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1326: #else
1327: sum += (*v)*(*v); v++;
1328: #endif
1329: }
1330: v = bmat->a;
1331: for (i=0; i<bmat->nz; i++) {
1332: #if defined(PETSC_USE_COMPLEX)
1333: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1334: #else
1335: sum += (*v)*(*v); v++;
1336: #endif
1337: }
1338: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1339: *norm = sqrt(*norm);
1340: } else if (type == NORM_1) { /* max column norm */
1341: PetscReal *tmp,*tmp2;
1342: PetscInt *jj,*garray = aij->garray;
1343: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1344: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1345: PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1346: *norm = 0.0;
1347: v = amat->a; jj = amat->j;
1348: for (j=0; j<amat->nz; j++) {
1349: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1350: }
1351: v = bmat->a; jj = bmat->j;
1352: for (j=0; j<bmat->nz; j++) {
1353: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1354: }
1355: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1356: for (j=0; j<mat->N; j++) {
1357: if (tmp2[j] > *norm) *norm = tmp2[j];
1358: }
1359: PetscFree(tmp);
1360: PetscFree(tmp2);
1361: } else if (type == NORM_INFINITY) { /* max row norm */
1362: PetscReal ntemp = 0.0;
1363: for (j=0; j<aij->A->m; j++) {
1364: v = amat->a + amat->i[j];
1365: sum = 0.0;
1366: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1367: sum += PetscAbsScalar(*v); v++;
1368: }
1369: v = bmat->a + bmat->i[j];
1370: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1371: sum += PetscAbsScalar(*v); v++;
1372: }
1373: if (sum > ntemp) ntemp = sum;
1374: }
1375: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1376: } else {
1377: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1378: }
1379: }
1380: return(0);
1381: }
1385: PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1386: {
1387: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1388: Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1390: PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1391: Mat B;
1392: PetscScalar *array;
1395: if (!matout && M != N) {
1396: SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1397: }
1399: MatCreate(A->comm,A->n,A->m,N,M,&B);
1400: MatSetType(B,A->type_name);
1401: MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);
1403: /* copy over the A part */
1404: Aloc = (Mat_SeqAIJ*)a->A->data;
1405: m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1406: row = a->rstart;
1407: for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1408: for (i=0; i<m; i++) {
1409: MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1410: row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1411: }
1412: aj = Aloc->j;
1413: for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1415: /* copy over the B part */
1416: Aloc = (Mat_SeqAIJ*)a->B->data;
1417: m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1418: row = a->rstart;
1419: PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);
1420: ct = cols;
1421: for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1422: for (i=0; i<m; i++) {
1423: MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1424: row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1425: }
1426: PetscFree(ct);
1427: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1428: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1429: if (matout) {
1430: *matout = B;
1431: } else {
1432: MatHeaderCopy(A,B);
1433: }
1434: return(0);
1435: }
1439: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1440: {
1441: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1442: Mat a = aij->A,b = aij->B;
1444: PetscInt s1,s2,s3;
1447: MatGetLocalSize(mat,&s2,&s3);
1448: if (rr) {
1449: VecGetLocalSize(rr,&s1);
1450: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1451: /* Overlap communication with computation. */
1452: VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1453: }
1454: if (ll) {
1455: VecGetLocalSize(ll,&s1);
1456: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1457: (*b->ops->diagonalscale)(b,ll,0);
1458: }
1459: /* scale the diagonal block */
1460: (*a->ops->diagonalscale)(a,ll,rr);
1462: if (rr) {
1463: /* Do a scatter end and then right scale the off-diagonal block */
1464: VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1465: (*b->ops->diagonalscale)(b,0,aij->lvec);
1466: }
1467:
1468: return(0);
1469: }
1474: PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1475: {
1476: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1480: if (!a->rank) {
1481: MatPrintHelp_SeqAIJ(a->A);
1482: }
1483: return(0);
1484: }
1488: PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1489: {
1490: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1494: MatSetBlockSize(a->A,bs);
1495: MatSetBlockSize(a->B,bs);
1496: return(0);
1497: }
1500: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1501: {
1502: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1506: MatSetUnfactored(a->A);
1507: return(0);
1508: }
1512: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1513: {
1514: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1515: Mat a,b,c,d;
1516: PetscTruth flg;
1520: a = matA->A; b = matA->B;
1521: c = matB->A; d = matB->B;
1523: MatEqual(a,c,&flg);
1524: if (flg == PETSC_TRUE) {
1525: MatEqual(b,d,&flg);
1526: }
1527: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1528: return(0);
1529: }
1533: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1534: {
1536: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1537: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1540: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1541: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1542: /* because of the column compression in the off-processor part of the matrix a->B,
1543: the number of columns in a->B and b->B may be different, hence we cannot call
1544: the MatCopy() directly on the two parts. If need be, we can provide a more
1545: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1546: then copying the submatrices */
1547: MatCopy_Basic(A,B,str);
1548: } else {
1549: MatCopy(a->A,b->A,str);
1550: MatCopy(a->B,b->B,str);
1551: }
1552: return(0);
1553: }
1557: PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1558: {
1562: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1563: return(0);
1564: }
1566: #include petscblaslapack.h
1569: PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1570: {
1572: PetscInt i;
1573: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1574: PetscBLASInt bnz,one=1;
1575: Mat_SeqAIJ *x,*y;
1578: if (str == SAME_NONZERO_PATTERN) {
1579: x = (Mat_SeqAIJ *)xx->A->data;
1580: y = (Mat_SeqAIJ *)yy->A->data;
1581: bnz = (PetscBLASInt)x->nz;
1582: BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1583: x = (Mat_SeqAIJ *)xx->B->data;
1584: y = (Mat_SeqAIJ *)yy->B->data;
1585: bnz = (PetscBLASInt)x->nz;
1586: BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1587: } else if (str == SUBSET_NONZERO_PATTERN) {
1588: MatAXPY_SeqAIJ(a,xx->A,yy->A,str);
1590: x = (Mat_SeqAIJ *)xx->B->data;
1591: y = (Mat_SeqAIJ *)yy->B->data;
1592: if (y->xtoy && y->XtoY != xx->B) {
1593: PetscFree(y->xtoy);
1594: MatDestroy(y->XtoY);
1595: }
1596: if (!y->xtoy) { /* get xtoy */
1597: MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
1598: y->XtoY = xx->B;
1599: }
1600: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1601: } else {
1602: MatAXPY_Basic(a,X,Y,str);
1603: }
1604: return(0);
1605: }
1607: /* -------------------------------------------------------------------*/
1608: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1609: MatGetRow_MPIAIJ,
1610: MatRestoreRow_MPIAIJ,
1611: MatMult_MPIAIJ,
1612: /* 4*/ MatMultAdd_MPIAIJ,
1613: MatMultTranspose_MPIAIJ,
1614: MatMultTransposeAdd_MPIAIJ,
1615: 0,
1616: 0,
1617: 0,
1618: /*10*/ 0,
1619: 0,
1620: 0,
1621: MatRelax_MPIAIJ,
1622: MatTranspose_MPIAIJ,
1623: /*15*/ MatGetInfo_MPIAIJ,
1624: MatEqual_MPIAIJ,
1625: MatGetDiagonal_MPIAIJ,
1626: MatDiagonalScale_MPIAIJ,
1627: MatNorm_MPIAIJ,
1628: /*20*/ MatAssemblyBegin_MPIAIJ,
1629: MatAssemblyEnd_MPIAIJ,
1630: 0,
1631: MatSetOption_MPIAIJ,
1632: MatZeroEntries_MPIAIJ,
1633: /*25*/ MatZeroRows_MPIAIJ,
1634: 0,
1635: 0,
1636: 0,
1637: 0,
1638: /*30*/ MatSetUpPreallocation_MPIAIJ,
1639: 0,
1640: 0,
1641: 0,
1642: 0,
1643: /*35*/ MatDuplicate_MPIAIJ,
1644: 0,
1645: 0,
1646: 0,
1647: 0,
1648: /*40*/ MatAXPY_MPIAIJ,
1649: MatGetSubMatrices_MPIAIJ,
1650: MatIncreaseOverlap_MPIAIJ,
1651: MatGetValues_MPIAIJ,
1652: MatCopy_MPIAIJ,
1653: /*45*/ MatPrintHelp_MPIAIJ,
1654: MatScale_MPIAIJ,
1655: 0,
1656: 0,
1657: 0,
1658: /*50*/ MatSetBlockSize_MPIAIJ,
1659: 0,
1660: 0,
1661: 0,
1662: 0,
1663: /*55*/ MatFDColoringCreate_MPIAIJ,
1664: 0,
1665: MatSetUnfactored_MPIAIJ,
1666: 0,
1667: 0,
1668: /*60*/ MatGetSubMatrix_MPIAIJ,
1669: MatDestroy_MPIAIJ,
1670: MatView_MPIAIJ,
1671: MatGetPetscMaps_Petsc,
1672: 0,
1673: /*65*/ 0,
1674: 0,
1675: 0,
1676: 0,
1677: 0,
1678: /*70*/ 0,
1679: 0,
1680: MatSetColoring_MPIAIJ,
1681: MatSetValuesAdic_MPIAIJ,
1682: MatSetValuesAdifor_MPIAIJ,
1683: /*75*/ 0,
1684: 0,
1685: 0,
1686: 0,
1687: 0,
1688: /*80*/ 0,
1689: 0,
1690: 0,
1691: 0,
1692: /*84*/ MatLoad_MPIAIJ,
1693: 0,
1694: 0,
1695: 0,
1696: 0,
1697: 0,
1698: /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1699: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1700: MatMatMultNumeric_MPIAIJ_MPIAIJ,
1701: MatPtAP_MPIAIJ_MPIAIJ,
1702: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1703: /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1704: 0,
1705: 0,
1706: 0};
1708: /* ----------------------------------------------------------------------------------------*/
1713: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1714: {
1715: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1719: MatStoreValues(aij->A);
1720: MatStoreValues(aij->B);
1721: return(0);
1722: }
1728: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1729: {
1730: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1734: MatRetrieveValues(aij->A);
1735: MatRetrieveValues(aij->B);
1736: return(0);
1737: }
1740: #include petscpc.h
1744: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1745: {
1746: Mat_MPIAIJ *b;
1748: PetscInt i;
1751: B->preallocated = PETSC_TRUE;
1752: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1753: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1754: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1755: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1756: if (d_nnz) {
1757: for (i=0; i<B->m; i++) {
1758: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1759: }
1760: }
1761: if (o_nnz) {
1762: for (i=0; i<B->m; i++) {
1763: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1764: }
1765: }
1766: b = (Mat_MPIAIJ*)B->data;
1767: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
1768: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
1770: return(0);
1771: }
1774: /*MC
1775: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1777: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1778: and MATMPIAIJ otherwise. As a result, for single process communicators,
1779: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1780: for communicators controlling multiple processes. It is recommended that you call both of
1781: the above preallocation routines for simplicity.
1783: Options Database Keys:
1784: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1786: Level: beginner
1788: .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1789: M*/
1794: PetscErrorCode MatCreate_AIJ(Mat A)
1795: {
1797: PetscMPIInt size;
1800: PetscObjectChangeTypeName((PetscObject)A,MATAIJ);
1801: MPI_Comm_size(A->comm,&size);
1802: if (size == 1) {
1803: MatSetType(A,MATSEQAIJ);
1804: } else {
1805: MatSetType(A,MATMPIAIJ);
1806: }
1807: return(0);
1808: }
1813: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1814: {
1815: Mat mat;
1816: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1820: *newmat = 0;
1821: MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1822: MatSetType(mat,matin->type_name);
1823: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
1824: a = (Mat_MPIAIJ*)mat->data;
1825:
1826: mat->factor = matin->factor;
1827: mat->bs = matin->bs;
1828: mat->assembled = PETSC_TRUE;
1829: mat->insertmode = NOT_SET_VALUES;
1830: mat->preallocated = PETSC_TRUE;
1832: a->rstart = oldmat->rstart;
1833: a->rend = oldmat->rend;
1834: a->cstart = oldmat->cstart;
1835: a->cend = oldmat->cend;
1836: a->size = oldmat->size;
1837: a->rank = oldmat->rank;
1838: a->donotstash = oldmat->donotstash;
1839: a->roworiented = oldmat->roworiented;
1840: a->rowindices = 0;
1841: a->rowvalues = 0;
1842: a->getrowactive = PETSC_FALSE;
1844: PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));
1845: MatStashCreate_Private(matin->comm,1,&mat->stash);
1846: if (oldmat->colmap) {
1847: #if defined (PETSC_USE_CTABLE)
1848: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1849: #else
1850: PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);
1851: PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1852: PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));
1853: #endif
1854: } else a->colmap = 0;
1855: if (oldmat->garray) {
1856: PetscInt len;
1857: len = oldmat->B->n;
1858: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
1859: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1860: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
1861: } else a->garray = 0;
1862:
1863: VecDuplicate(oldmat->lvec,&a->lvec);
1864: PetscLogObjectParent(mat,a->lvec);
1865: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1866: PetscLogObjectParent(mat,a->Mvctx);
1867: MatDestroy(a->A);
1868: MatDuplicate(oldmat->A,cpvalues,&a->A);
1869: PetscLogObjectParent(mat,a->A);
1870: MatDestroy(a->B);
1871: MatDuplicate(oldmat->B,cpvalues,&a->B);
1872: PetscLogObjectParent(mat,a->B);
1873: PetscFListDuplicate(matin->qlist,&mat->qlist);
1874: *newmat = mat;
1875: return(0);
1876: }
1878: #include petscsys.h
1882: PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1883: {
1884: Mat A;
1885: PetscScalar *vals,*svals;
1886: MPI_Comm comm = ((PetscObject)viewer)->comm;
1887: MPI_Status status;
1889: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1890: PetscInt i,nz,j,rstart,rend;
1891: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
1892: PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1893: PetscInt cend,cstart,n,*rowners;
1894: int fd;
1897: MPI_Comm_size(comm,&size);
1898: MPI_Comm_rank(comm,&rank);
1899: if (!rank) {
1900: PetscViewerBinaryGetDescriptor(viewer,&fd);
1901: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1902: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1903: }
1905: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1906: M = header[1]; N = header[2];
1907: /* determine ownership of all rows */
1908: m = M/size + ((M % size) > rank);
1909: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
1910: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1911: rowners[0] = 0;
1912: for (i=2; i<=size; i++) {
1913: rowners[i] += rowners[i-1];
1914: }
1915: rstart = rowners[rank];
1916: rend = rowners[rank+1];
1918: /* distribute row lengths to all processors */
1919: PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);
1920: if (!rank) {
1921: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
1922: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
1923: PetscMalloc(size*sizeof(PetscInt),&procsnz);
1924: PetscMemzero(procsnz,size*sizeof(PetscInt));
1925: for (j=0; j<m; j++) {
1926: procsnz[0] += ourlens[j];
1927: }
1928: for (i=1; i<size; i++) {
1929: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
1930: /* calculate the number of nonzeros on each processor */
1931: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1932: procsnz[i] += rowlengths[j];
1933: }
1934: MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
1935: }
1936: PetscFree(rowlengths);
1937: } else {
1938: MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);
1939: }
1941: if (!rank) {
1942: /* determine max buffer needed and allocate it */
1943: maxnz = 0;
1944: for (i=0; i<size; i++) {
1945: maxnz = PetscMax(maxnz,procsnz[i]);
1946: }
1947: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
1949: /* read in my part of the matrix column indices */
1950: nz = procsnz[0];
1951: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1952: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1954: /* read in every one elses and ship off */
1955: for (i=1; i<size; i++) {
1956: nz = procsnz[i];
1957: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1958: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1959: }
1960: PetscFree(cols);
1961: } else {
1962: /* determine buffer space needed for message */
1963: nz = 0;
1964: for (i=0; i<m; i++) {
1965: nz += ourlens[i];
1966: }
1967: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1969: /* receive message of column indices*/
1970: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1971: MPI_Get_count(&status,MPIU_INT,&maxnz);
1972: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1973: }
1975: /* determine column ownership if matrix is not square */
1976: if (N != M) {
1977: n = N/size + ((N % size) > rank);
1978: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
1979: cstart = cend - n;
1980: } else {
1981: cstart = rstart;
1982: cend = rend;
1983: n = cend - cstart;
1984: }
1986: /* loop over local rows, determining number of off diagonal entries */
1987: PetscMemzero(offlens,m*sizeof(PetscInt));
1988: jj = 0;
1989: for (i=0; i<m; i++) {
1990: for (j=0; j<ourlens[i]; j++) {
1991: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1992: jj++;
1993: }
1994: }
1996: /* create our matrix */
1997: for (i=0; i<m; i++) {
1998: ourlens[i] -= offlens[i];
1999: }
2000: MatCreate(comm,m,n,M,N,&A);
2001: MatSetType(A,type);
2002: MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);
2004: MatSetOption(A,MAT_COLUMNS_SORTED);
2005: for (i=0; i<m; i++) {
2006: ourlens[i] += offlens[i];
2007: }
2009: if (!rank) {
2010: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
2012: /* read in my part of the matrix numerical values */
2013: nz = procsnz[0];
2014: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2015:
2016: /* insert into matrix */
2017: jj = rstart;
2018: smycols = mycols;
2019: svals = vals;
2020: for (i=0; i<m; i++) {
2021: MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2022: smycols += ourlens[i];
2023: svals += ourlens[i];
2024: jj++;
2025: }
2027: /* read in other processors and ship out */
2028: for (i=1; i<size; i++) {
2029: nz = procsnz[i];
2030: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2031: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2032: }
2033: PetscFree(procsnz);
2034: } else {
2035: /* receive numeric values */
2036: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2038: /* receive message of values*/
2039: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2040: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2041: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2043: /* insert into matrix */
2044: jj = rstart;
2045: smycols = mycols;
2046: svals = vals;
2047: for (i=0; i<m; i++) {
2048: MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2049: smycols += ourlens[i];
2050: svals += ourlens[i];
2051: jj++;
2052: }
2053: }
2054: PetscFree2(ourlens,offlens);
2055: PetscFree(vals);
2056: PetscFree(mycols);
2057: PetscFree(rowners);
2059: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2060: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2061: *newmat = A;
2062: return(0);
2063: }
2067: /*
2068: Not great since it makes two copies of the submatrix, first an SeqAIJ
2069: in local and then by concatenating the local matrices the end result.
2070: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2071: */
2072: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2073: {
2075: PetscMPIInt rank,size;
2076: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j;
2077: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2078: Mat *local,M,Mreuse;
2079: PetscScalar *vwork,*aa;
2080: MPI_Comm comm = mat->comm;
2081: Mat_SeqAIJ *aij;
2085: MPI_Comm_rank(comm,&rank);
2086: MPI_Comm_size(comm,&size);
2088: if (call == MAT_REUSE_MATRIX) {
2089: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2090: if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2091: local = &Mreuse;
2092: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2093: } else {
2094: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2095: Mreuse = *local;
2096: PetscFree(local);
2097: }
2099: /*
2100: m - number of local rows
2101: n - number of columns (same on all processors)
2102: rstart - first row in new global matrix generated
2103: */
2104: MatGetSize(Mreuse,&m,&n);
2105: if (call == MAT_INITIAL_MATRIX) {
2106: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2107: ii = aij->i;
2108: jj = aij->j;
2110: /*
2111: Determine the number of non-zeros in the diagonal and off-diagonal
2112: portions of the matrix in order to do correct preallocation
2113: */
2115: /* first get start and end of "diagonal" columns */
2116: if (csize == PETSC_DECIDE) {
2117: ISGetSize(isrow,&mglobal);
2118: if (mglobal == n) { /* square matrix */
2119: nlocal = m;
2120: } else {
2121: nlocal = n/size + ((n % size) > rank);
2122: }
2123: } else {
2124: nlocal = csize;
2125: }
2126: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
2127: rstart = rend - nlocal;
2128: if (rank == size - 1 && rend != n) {
2129: SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2130: }
2132: /* next, compute all the lengths */
2133: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
2134: olens = dlens + m;
2135: for (i=0; i<m; i++) {
2136: jend = ii[i+1] - ii[i];
2137: olen = 0;
2138: dlen = 0;
2139: for (j=0; j<jend; j++) {
2140: if (*jj < rstart || *jj >= rend) olen++;
2141: else dlen++;
2142: jj++;
2143: }
2144: olens[i] = olen;
2145: dlens[i] = dlen;
2146: }
2147: MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);
2148: MatSetType(M,mat->type_name);
2149: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
2150: PetscFree(dlens);
2151: } else {
2152: PetscInt ml,nl;
2154: M = *newmat;
2155: MatGetLocalSize(M,&ml,&nl);
2156: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2157: MatZeroEntries(M);
2158: /*
2159: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2160: rather than the slower MatSetValues().
2161: */
2162: M->was_assembled = PETSC_TRUE;
2163: M->assembled = PETSC_FALSE;
2164: }
2165: MatGetOwnershipRange(M,&rstart,&rend);
2166: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2167: ii = aij->i;
2168: jj = aij->j;
2169: aa = aij->a;
2170: for (i=0; i<m; i++) {
2171: row = rstart + i;
2172: nz = ii[i+1] - ii[i];
2173: cwork = jj; jj += nz;
2174: vwork = aa; aa += nz;
2175: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2176: }
2178: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2179: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2180: *newmat = M;
2182: /* save submatrix used in processor for next request */
2183: if (call == MAT_INITIAL_MATRIX) {
2184: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2185: PetscObjectDereference((PetscObject)Mreuse);
2186: }
2188: return(0);
2189: }
2194: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2195: {
2196: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2197: PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2198: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2199: const PetscInt *JJ;
2200: PetscScalar *values;
2204: #if defined(PETSC_OPT_g)
2205: if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2206: #endif
2207: PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);
2208: o_nnz = d_nnz + m;
2210: for (i=0; i<m; i++) {
2211: nnz = I[i+1]- I[i];
2212: JJ = J + I[i];
2213: nnz_max = PetscMax(nnz_max,nnz);
2214: #if defined(PETSC_OPT_g)
2215: if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2216: #endif
2217: for (j=0; j<nnz; j++) {
2218: if (*JJ >= cstart) break;
2219: JJ++;
2220: }
2221: d = 0;
2222: for (; j<nnz; j++) {
2223: if (*JJ++ >= cend) break;
2224: d++;
2225: }
2226: d_nnz[i] = d;
2227: o_nnz[i] = nnz - d;
2228: }
2229: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
2230: PetscFree(d_nnz);
2232: if (v) values = (PetscScalar*)v;
2233: else {
2234: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
2235: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
2236: }
2238: MatSetOption(B,MAT_COLUMNS_SORTED);
2239: for (i=0; i<m; i++) {
2240: ii = i + rstart;
2241: nnz = I[i+1]- I[i];
2242: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);
2243: }
2244: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2245: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2246: MatSetOption(B,MAT_COLUMNS_UNSORTED);
2248: if (!v) {
2249: PetscFree(values);
2250: }
2251: return(0);
2252: }
2257: /*@C
2258: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2259: (the default parallel PETSc format).
2261: Collective on MPI_Comm
2263: Input Parameters:
2264: + A - the matrix
2265: . i - the indices into j for the start of each local row (starts with zero)
2266: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2267: - v - optional values in the matrix
2269: Level: developer
2271: .keywords: matrix, aij, compressed row, sparse, parallel
2273: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2274: @*/
2275: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2276: {
2277: PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2280: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);
2281: if (f) {
2282: (*f)(B,i,j,v);
2283: }
2284: return(0);
2285: }
2289: /*@C
2290: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2291: (the default parallel PETSc format). For good matrix assembly performance
2292: the user should preallocate the matrix storage by setting the parameters
2293: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2294: performance can be increased by more than a factor of 50.
2296: Collective on MPI_Comm
2298: Input Parameters:
2299: + A - the matrix
2300: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2301: (same value is used for all local rows)
2302: . d_nnz - array containing the number of nonzeros in the various rows of the
2303: DIAGONAL portion of the local submatrix (possibly different for each row)
2304: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2305: The size of this array is equal to the number of local rows, i.e 'm'.
2306: You must leave room for the diagonal entry even if it is zero.
2307: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2308: submatrix (same value is used for all local rows).
2309: - o_nnz - array containing the number of nonzeros in the various rows of the
2310: OFF-DIAGONAL portion of the local submatrix (possibly different for
2311: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2312: structure. The size of this array is equal to the number
2313: of local rows, i.e 'm'.
2315: If the *_nnz parameter is given then the *_nz parameter is ignored
2317: The AIJ format (also called the Yale sparse matrix format or
2318: compressed row storage (CSR)), is fully compatible with standard Fortran 77
2319: storage. The stored row and column indices begin with zero. See the users manual for details.
2321: The parallel matrix is partitioned such that the first m0 rows belong to
2322: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2323: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2325: The DIAGONAL portion of the local submatrix of a processor can be defined
2326: as the submatrix which is obtained by extraction the part corresponding
2327: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2328: first row that belongs to the processor, and r2 is the last row belonging
2329: to the this processor. This is a square mxm matrix. The remaining portion
2330: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2332: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2334: Example usage:
2335:
2336: Consider the following 8x8 matrix with 34 non-zero values, that is
2337: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2338: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2339: as follows:
2341: .vb
2342: 1 2 0 | 0 3 0 | 0 4
2343: Proc0 0 5 6 | 7 0 0 | 8 0
2344: 9 0 10 | 11 0 0 | 12 0
2345: -------------------------------------
2346: 13 0 14 | 15 16 17 | 0 0
2347: Proc1 0 18 0 | 19 20 21 | 0 0
2348: 0 0 0 | 22 23 0 | 24 0
2349: -------------------------------------
2350: Proc2 25 26 27 | 0 0 28 | 29 0
2351: 30 0 0 | 31 32 33 | 0 34
2352: .ve
2354: This can be represented as a collection of submatrices as:
2356: .vb
2357: A B C
2358: D E F
2359: G H I
2360: .ve
2362: Where the submatrices A,B,C are owned by proc0, D,E,F are
2363: owned by proc1, G,H,I are owned by proc2.
2365: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2366: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2367: The 'M','N' parameters are 8,8, and have the same values on all procs.
2369: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2370: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2371: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2372: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2373: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2374: matrix, ans [DF] as another SeqAIJ matrix.
2376: When d_nz, o_nz parameters are specified, d_nz storage elements are
2377: allocated for every row of the local diagonal submatrix, and o_nz
2378: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2379: One way to choose d_nz and o_nz is to use the max nonzerors per local
2380: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2381: In this case, the values of d_nz,o_nz are:
2382: .vb
2383: proc0 : dnz = 2, o_nz = 2
2384: proc1 : dnz = 3, o_nz = 2
2385: proc2 : dnz = 1, o_nz = 4
2386: .ve
2387: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2388: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2389: for proc3. i.e we are using 12+15+10=37 storage locations to store
2390: 34 values.
2392: When d_nnz, o_nnz parameters are specified, the storage is specified
2393: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2394: In the above case the values for d_nnz,o_nnz are:
2395: .vb
2396: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2397: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2398: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2399: .ve
2400: Here the space allocated is sum of all the above values i.e 34, and
2401: hence pre-allocation is perfect.
2403: Level: intermediate
2405: .keywords: matrix, aij, compressed row, sparse, parallel
2407: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2408: MPIAIJ
2409: @*/
2410: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2411: {
2412: PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2415: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);
2416: if (f) {
2417: (*f)(B,d_nz,d_nnz,o_nz,o_nnz);
2418: }
2419: return(0);
2420: }
2424: /*@C
2425: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2426: (the default parallel PETSc format). For good matrix assembly performance
2427: the user should preallocate the matrix storage by setting the parameters
2428: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2429: performance can be increased by more than a factor of 50.
2431: Collective on MPI_Comm
2433: Input Parameters:
2434: + comm - MPI communicator
2435: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2436: This value should be the same as the local size used in creating the
2437: y vector for the matrix-vector product y = Ax.
2438: . n - This value should be the same as the local size used in creating the
2439: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2440: calculated if N is given) For square matrices n is almost always m.
2441: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2442: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2443: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2444: (same value is used for all local rows)
2445: . d_nnz - array containing the number of nonzeros in the various rows of the
2446: DIAGONAL portion of the local submatrix (possibly different for each row)
2447: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2448: The size of this array is equal to the number of local rows, i.e 'm'.
2449: You must leave room for the diagonal entry even if it is zero.
2450: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2451: submatrix (same value is used for all local rows).
2452: - o_nnz - array containing the number of nonzeros in the various rows of the
2453: OFF-DIAGONAL portion of the local submatrix (possibly different for
2454: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2455: structure. The size of this array is equal to the number
2456: of local rows, i.e 'm'.
2458: Output Parameter:
2459: . A - the matrix
2461: Notes:
2462: If the *_nnz parameter is given then the *_nz parameter is ignored
2464: m,n,M,N parameters specify the size of the matrix, and its partitioning across
2465: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2466: storage requirements for this matrix.
2468: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
2469: processor than it must be used on all processors that share the object for
2470: that argument.
2472: The user MUST specify either the local or global matrix dimensions
2473: (possibly both).
2475: The parallel matrix is partitioned such that the first m0 rows belong to
2476: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2477: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2479: The DIAGONAL portion of the local submatrix of a processor can be defined
2480: as the submatrix which is obtained by extraction the part corresponding
2481: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2482: first row that belongs to the processor, and r2 is the last row belonging
2483: to the this processor. This is a square mxm matrix. The remaining portion
2484: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2486: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2488: When calling this routine with a single process communicator, a matrix of
2489: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
2490: type of communicator, use the construction mechanism:
2491: MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2493: By default, this format uses inodes (identical nodes) when possible.
2494: We search for consecutive rows with the same nonzero structure, thereby
2495: reusing matrix information to achieve increased efficiency.
2497: Options Database Keys:
2498: + -mat_aij_no_inode - Do not use inodes
2499: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2500: - -mat_aij_oneindex - Internally use indexing starting at 1
2501: rather than 0. Note that when calling MatSetValues(),
2502: the user still MUST index entries starting at 0!
2505: Example usage:
2506:
2507: Consider the following 8x8 matrix with 34 non-zero values, that is
2508: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2509: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2510: as follows:
2512: .vb
2513: 1 2 0 | 0 3 0 | 0 4
2514: Proc0 0 5 6 | 7 0 0 | 8 0
2515: 9 0 10 | 11 0 0 | 12 0
2516: -------------------------------------
2517: 13 0 14 | 15 16 17 | 0 0
2518: Proc1 0 18 0 | 19 20 21 | 0 0
2519: 0 0 0 | 22 23 0 | 24 0
2520: -------------------------------------
2521: Proc2 25 26 27 | 0 0 28 | 29 0
2522: 30 0 0 | 31 32 33 | 0 34
2523: .ve
2525: This can be represented as a collection of submatrices as:
2527: .vb
2528: A B C
2529: D E F
2530: G H I
2531: .ve
2533: Where the submatrices A,B,C are owned by proc0, D,E,F are
2534: owned by proc1, G,H,I are owned by proc2.
2536: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2537: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2538: The 'M','N' parameters are 8,8, and have the same values on all procs.
2540: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2541: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2542: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2543: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2544: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2545: matrix, ans [DF] as another SeqAIJ matrix.
2547: When d_nz, o_nz parameters are specified, d_nz storage elements are
2548: allocated for every row of the local diagonal submatrix, and o_nz
2549: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2550: One way to choose d_nz and o_nz is to use the max nonzerors per local
2551: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2552: In this case, the values of d_nz,o_nz are:
2553: .vb
2554: proc0 : dnz = 2, o_nz = 2
2555: proc1 : dnz = 3, o_nz = 2
2556: proc2 : dnz = 1, o_nz = 4
2557: .ve
2558: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2559: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2560: for proc3. i.e we are using 12+15+10=37 storage locations to store
2561: 34 values.
2563: When d_nnz, o_nnz parameters are specified, the storage is specified
2564: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2565: In the above case the values for d_nnz,o_nnz are:
2566: .vb
2567: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2568: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2569: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2570: .ve
2571: Here the space allocated is sum of all the above values i.e 34, and
2572: hence pre-allocation is perfect.
2574: Level: intermediate
2576: .keywords: matrix, aij, compressed row, sparse, parallel
2578: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2579: MPIAIJ
2580: @*/
2581: PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2582: {
2584: PetscMPIInt size;
2587: MatCreate(comm,m,n,M,N,A);
2588: MPI_Comm_size(comm,&size);
2589: if (size > 1) {
2590: MatSetType(*A,MATMPIAIJ);
2591: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2592: } else {
2593: MatSetType(*A,MATSEQAIJ);
2594: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2595: }
2596: return(0);
2597: }
2601: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2602: {
2603: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2606: *Ad = a->A;
2607: *Ao = a->B;
2608: *colmap = a->garray;
2609: return(0);
2610: }
2614: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2615: {
2617: PetscInt i;
2618: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2621: if (coloring->ctype == IS_COLORING_LOCAL) {
2622: ISColoringValue *allcolors,*colors;
2623: ISColoring ocoloring;
2625: /* set coloring for diagonal portion */
2626: MatSetColoring_SeqAIJ(a->A,coloring);
2628: /* set coloring for off-diagonal portion */
2629: ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2630: PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2631: for (i=0; i<a->B->n; i++) {
2632: colors[i] = allcolors[a->garray[i]];
2633: }
2634: PetscFree(allcolors);
2635: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2636: MatSetColoring_SeqAIJ(a->B,ocoloring);
2637: ISColoringDestroy(ocoloring);
2638: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2639: ISColoringValue *colors;
2640: PetscInt *larray;
2641: ISColoring ocoloring;
2643: /* set coloring for diagonal portion */
2644: PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);
2645: for (i=0; i<a->A->n; i++) {
2646: larray[i] = i + a->cstart;
2647: }
2648: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2649: PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);
2650: for (i=0; i<a->A->n; i++) {
2651: colors[i] = coloring->colors[larray[i]];
2652: }
2653: PetscFree(larray);
2654: ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2655: MatSetColoring_SeqAIJ(a->A,ocoloring);
2656: ISColoringDestroy(ocoloring);
2658: /* set coloring for off-diagonal portion */
2659: PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);
2660: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2661: PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2662: for (i=0; i<a->B->n; i++) {
2663: colors[i] = coloring->colors[larray[i]];
2664: }
2665: PetscFree(larray);
2666: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2667: MatSetColoring_SeqAIJ(a->B,ocoloring);
2668: ISColoringDestroy(ocoloring);
2669: } else {
2670: SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2671: }
2673: return(0);
2674: }
2678: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2679: {
2680: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2684: MatSetValuesAdic_SeqAIJ(a->A,advalues);
2685: MatSetValuesAdic_SeqAIJ(a->B,advalues);
2686: return(0);
2687: }
2691: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2692: {
2693: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2697: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2698: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2699: return(0);
2700: }
2704: /*@C
2705: MatMerge - Creates a single large PETSc matrix by concatinating sequential
2706: matrices from each processor
2708: Collective on MPI_Comm
2710: Input Parameters:
2711: + comm - the communicators the parallel matrix will live on
2712: . inmat - the input sequential matrices
2713: . n - number of local columns (or PETSC_DECIDE)
2714: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2716: Output Parameter:
2717: . outmat - the parallel matrix generated
2719: Level: advanced
2721: Notes: The number of columns of the matrix in EACH processor MUST be the same.
2723: @*/
2724: PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2725: {
2727: PetscInt m,N,i,rstart,nnz,I,*dnz,*onz;
2728: PetscInt *indx;
2729: PetscScalar *values;
2730: PetscMap columnmap,rowmap;
2733: MatGetSize(inmat,&m,&N);
2734: /*
2735: PetscMPIInt rank;
2736: MPI_Comm_rank(comm,&rank);
2737: PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2738: */
2739: if (scall == MAT_INITIAL_MATRIX){
2740: /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2741: if (n == PETSC_DECIDE){
2742: PetscMapCreate(comm,&columnmap);
2743: PetscMapSetSize(columnmap,N);
2744: PetscMapSetType(columnmap,MAP_MPI);
2745: PetscMapGetLocalSize(columnmap,&n);
2746: PetscMapDestroy(columnmap);
2747: }
2749: PetscMapCreate(comm,&rowmap);
2750: PetscMapSetLocalSize(rowmap,m);
2751: PetscMapSetType(rowmap,MAP_MPI);
2752: PetscMapGetLocalRange(rowmap,&rstart,0);
2753: PetscMapDestroy(rowmap);
2755: MatPreallocateInitialize(comm,m,n,dnz,onz);
2756: for (i=0;i<m;i++) {
2757: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
2758: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
2759: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
2760: }
2761: /* This routine will ONLY return MPIAIJ type matrix */
2762: MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);
2763: MatSetType(*outmat,MATMPIAIJ);
2764: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
2765: MatPreallocateFinalize(dnz,onz);
2766:
2767: } else if (scall == MAT_REUSE_MATRIX){
2768: MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);
2769: } else {
2770: SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2771: }
2773: for (i=0;i<m;i++) {
2774: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
2775: I = i + rstart;
2776: MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);
2777: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
2778: }
2779: MatDestroy(inmat);
2780: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
2781: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
2783: return(0);
2784: }
2788: PetscErrorCode MatFileSplit(Mat A,char *outfile)
2789: {
2790: PetscErrorCode ierr;
2791: PetscMPIInt rank;
2792: PetscInt m,N,i,rstart,nnz;
2793: size_t len;
2794: const PetscInt *indx;
2795: PetscViewer out;
2796: char *name;
2797: Mat B;
2798: const PetscScalar *values;
2801: MatGetLocalSize(A,&m,0);
2802: MatGetSize(A,0,&N);
2803: /* Should this be the type of the diagonal block of A? */
2804: MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);
2805: MatSetType(B,MATSEQAIJ);
2806: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
2807: MatGetOwnershipRange(A,&rstart,0);
2808: for (i=0;i<m;i++) {
2809: MatGetRow(A,i+rstart,&nnz,&indx,&values);
2810: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
2811: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
2812: }
2813: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2814: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2816: MPI_Comm_rank(A->comm,&rank);
2817: PetscStrlen(outfile,&len);
2818: PetscMalloc((len+5)*sizeof(char),&name);
2819: sprintf(name,"%s.%d",outfile,rank);
2820: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);
2821: PetscFree(name);
2822: MatView(B,out);
2823: PetscViewerDestroy(out);
2824: MatDestroy(B);
2825: return(0);
2826: }
2828: EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2831: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2832: {
2833: PetscErrorCode ierr;
2834: Mat_Merge_SeqsToMPI *merge;
2835: PetscObjectContainer container;
2838: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
2839: if (container) {
2840: PetscObjectContainerGetPointer(container,(void **)&merge);
2841: PetscFree(merge->id_r);
2842: PetscFree(merge->len_s);
2843: PetscFree(merge->len_r);
2844: PetscFree(merge->bi);
2845: PetscFree(merge->bj);
2846: PetscFree(merge->buf_ri);
2847: PetscFree(merge->buf_rj);
2848: PetscMapDestroy(merge->rowmap);
2849: if (merge->coi){PetscFree(merge->coi);}
2850: if (merge->coj){PetscFree(merge->coj);}
2851: if (merge->owners_co){PetscFree(merge->owners_co);}
2852:
2853: PetscObjectContainerDestroy(container);
2854: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
2855: }
2856: PetscFree(merge);
2858: MatDestroy_MPIAIJ(A);
2859: return(0);
2860: }
2862: #include src/mat/utils/freespace.h
2863: #include petscbt.h
2866: /*@C
2867: MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2868: matrices from each processor
2870: Collective on MPI_Comm
2872: Input Parameters:
2873: + comm - the communicators the parallel matrix will live on
2874: . seqmat - the input sequential matrices
2875: . m - number of local rows (or PETSC_DECIDE)
2876: . n - number of local columns (or PETSC_DECIDE)
2877: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2879: Output Parameter:
2880: . mpimat - the parallel matrix generated
2882: Level: advanced
2884: Notes:
2885: The dimensions of the sequential matrix in each processor MUST be the same.
2886: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2887: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2888: @*/
2889: static PetscEvent logkey_seqstompinum = 0;
2890: PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2891: {
2892: PetscErrorCode ierr;
2893: MPI_Comm comm=mpimat->comm;
2894: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
2895: PetscMPIInt size,rank,taga,*len_s;
2896: PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2897: PetscInt proc,m;
2898: PetscInt **buf_ri,**buf_rj;
2899: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2900: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
2901: MPI_Request *s_waits,*r_waits;
2902: MPI_Status *status;
2903: MatScalar *aa=a->a,**abuf_r,*ba_i;
2904: Mat_Merge_SeqsToMPI *merge;
2905: PetscObjectContainer container;
2906:
2908: if (!logkey_seqstompinum) {
2909: PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2910: }
2911: PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);
2913: MPI_Comm_size(comm,&size);
2914: MPI_Comm_rank(comm,&rank);
2916: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
2917: if (container) {
2918: PetscObjectContainerGetPointer(container,(void **)&merge);
2919: }
2920: bi = merge->bi;
2921: bj = merge->bj;
2922: buf_ri = merge->buf_ri;
2923: buf_rj = merge->buf_rj;
2925: PetscMalloc(size*sizeof(MPI_Status),&status);
2926: PetscMapGetGlobalRange(merge->rowmap,&owners);
2927: len_s = merge->len_s;
2929: /* send and recv matrix values */
2930: /*-----------------------------*/
2931: PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);
2932: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
2934: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
2935: for (proc=0,k=0; proc<size; proc++){
2936: if (!len_s[proc]) continue;
2937: i = owners[proc];
2938: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
2939: k++;
2940: }
2942: MPI_Waitall(merge->nrecv,r_waits,status);
2943: MPI_Waitall(merge->nsend,s_waits,status);
2944: PetscFree(status);
2946: PetscFree(s_waits);
2947: PetscFree(r_waits);
2949: /* insert mat values of mpimat */
2950: /*----------------------------*/
2951: PetscMalloc(N*sizeof(MatScalar),&ba_i);
2952: PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
2953: nextrow = buf_ri_k + merge->nrecv;
2954: nextai = nextrow + merge->nrecv;
2956: for (k=0; k<merge->nrecv; k++){
2957: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2958: nrows = *(buf_ri_k[k]);
2959: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
2960: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
2961: }
2963: /* set values of ba */
2964: PetscMapGetLocalSize(merge->rowmap,&m);
2965: for (i=0; i<m; i++) {
2966: arow = owners[rank] + i;
2967: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
2968: bnzi = bi[i+1] - bi[i];
2969: PetscMemzero(ba_i,bnzi*sizeof(MatScalar));
2971: /* add local non-zero vals of this proc's seqmat into ba */
2972: anzi = ai[arow+1] - ai[arow];
2973: aj = a->j + ai[arow];
2974: aa = a->a + ai[arow];
2975: nextaj = 0;
2976: for (j=0; nextaj<anzi; j++){
2977: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2978: ba_i[j] += aa[nextaj++];
2979: }
2980: }
2982: /* add received vals into ba */
2983: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2984: /* i-th row */
2985: if (i == *nextrow[k]) {
2986: anzi = *(nextai[k]+1) - *nextai[k];
2987: aj = buf_rj[k] + *(nextai[k]);
2988: aa = abuf_r[k] + *(nextai[k]);
2989: nextaj = 0;
2990: for (j=0; nextaj<anzi; j++){
2991: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2992: ba_i[j] += aa[nextaj++];
2993: }
2994: }
2995: nextrow[k]++; nextai[k]++;
2996: }
2997: }
2998: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
2999: }
3000: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
3001: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
3003: PetscFree(abuf_r);
3004: PetscFree(ba_i);
3005: PetscFree(buf_ri_k);
3006: PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);
3007: return(0);
3008: }
3009: static PetscEvent logkey_seqstompisym = 0;
3010: PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3011: {
3012: PetscErrorCode ierr;
3013: Mat B_mpi;
3014: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
3015: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3016: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
3017: PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3018: PetscInt len,proc,*dnz,*onz;
3019: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3020: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3021: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
3022: MPI_Status *status;
3023: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
3024: PetscBT lnkbt;
3025: Mat_Merge_SeqsToMPI *merge;
3026: PetscObjectContainer container;
3029: if (!logkey_seqstompisym) {
3030: PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3031: }
3032: PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);
3034: MPI_Comm_size(comm,&size);
3035: MPI_Comm_rank(comm,&rank);
3036:
3037: PetscNew(Mat_Merge_SeqsToMPI,&merge);
3038: PetscMalloc(size*sizeof(MPI_Status),&status);
3040: /* determine row ownership */
3041: /*---------------------------------------------------------*/
3042: PetscMapCreate(comm,&merge->rowmap);
3043: if (m == PETSC_DECIDE) {
3044: PetscMapSetSize(merge->rowmap,M);
3045: } else {
3046: PetscMapSetLocalSize(merge->rowmap,m);
3047: }
3048: PetscMapSetType(merge->rowmap,MAP_MPI);
3049: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
3050: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
3051:
3052: if (m == PETSC_DECIDE) {PetscMapGetLocalSize(merge->rowmap,&m); }
3053: PetscMapGetGlobalRange(merge->rowmap,&owners);
3055: /* determine the number of messages to send, their lengths */
3056: /*---------------------------------------------------------*/
3057: len_s = merge->len_s;
3059: len = 0; /* length of buf_si[] */
3060: merge->nsend = 0;
3061: for (proc=0; proc<size; proc++){
3062: len_si[proc] = 0;
3063: if (proc == rank){
3064: len_s[proc] = 0;
3065: } else {
3066: len_si[proc] = owners[proc+1] - owners[proc] + 1;
3067: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3068: }
3069: if (len_s[proc]) {
3070: merge->nsend++;
3071: nrows = 0;
3072: for (i=owners[proc]; i<owners[proc+1]; i++){
3073: if (ai[i+1] > ai[i]) nrows++;
3074: }
3075: len_si[proc] = 2*(nrows+1);
3076: len += len_si[proc];
3077: }
3078: }
3080: /* determine the number and length of messages to receive for ij-structure */
3081: /*-------------------------------------------------------------------------*/
3082: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
3083: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
3085: /* post the Irecv of j-structure */
3086: /*-------------------------------*/
3087: PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);
3088: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
3090: /* post the Isend of j-structure */
3091: /*--------------------------------*/
3092: PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);
3093: sj_waits = si_waits + merge->nsend;
3095: for (proc=0, k=0; proc<size; proc++){
3096: if (!len_s[proc]) continue;
3097: i = owners[proc];
3098: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
3099: k++;
3100: }
3102: /* receives and sends of j-structure are complete */
3103: /*------------------------------------------------*/
3104: MPI_Waitall(merge->nrecv,rj_waits,status);
3105: MPI_Waitall(merge->nsend,sj_waits,status);
3106:
3107: /* send and recv i-structure */
3108: /*---------------------------*/
3109: PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);
3110: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
3111:
3112: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
3113: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
3114: for (proc=0,k=0; proc<size; proc++){
3115: if (!len_s[proc]) continue;
3116: /* form outgoing message for i-structure:
3117: buf_si[0]: nrows to be sent
3118: [1:nrows]: row index (global)
3119: [nrows+1:2*nrows+1]: i-structure index
3120: */
3121: /*-------------------------------------------*/
3122: nrows = len_si[proc]/2 - 1;
3123: buf_si_i = buf_si + nrows+1;
3124: buf_si[0] = nrows;
3125: buf_si_i[0] = 0;
3126: nrows = 0;
3127: for (i=owners[proc]; i<owners[proc+1]; i++){
3128: anzi = ai[i+1] - ai[i];
3129: if (anzi) {
3130: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3131: buf_si[nrows+1] = i-owners[proc]; /* local row index */
3132: nrows++;
3133: }
3134: }
3135: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
3136: k++;
3137: buf_si += len_si[proc];
3138: }
3140: MPI_Waitall(merge->nrecv,ri_waits,status);
3141: MPI_Waitall(merge->nsend,si_waits,status);
3143: PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
3144: for (i=0; i<merge->nrecv; i++){
3145: PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
3146: }
3148: PetscFree(len_si);
3149: PetscFree(len_ri);
3150: PetscFree(rj_waits);
3151: PetscFree(si_waits);
3152: PetscFree(ri_waits);
3153: PetscFree(buf_s);
3154: PetscFree(status);
3156: /* compute a local seq matrix in each processor */
3157: /*----------------------------------------------*/
3158: /* allocate bi array and free space for accumulating nonzero column info */
3159: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
3160: bi[0] = 0;
3162: /* create and initialize a linked list */
3163: nlnk = N+1;
3164: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
3165:
3166: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3167: len = 0;
3168: len = ai[owners[rank+1]] - ai[owners[rank]];
3169: GetMoreSpace((PetscInt)(2*len+1),&free_space);
3170: current_space = free_space;
3172: /* determine symbolic info for each local row */
3173: PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
3174: nextrow = buf_ri_k + merge->nrecv;
3175: nextai = nextrow + merge->nrecv;
3176: for (k=0; k<merge->nrecv; k++){
3177: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3178: nrows = *buf_ri_k[k];
3179: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
3180: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
3181: }
3183: MatPreallocateInitialize(comm,m,n,dnz,onz);
3184: len = 0;
3185: for (i=0;i<m;i++) {
3186: bnzi = 0;
3187: /* add local non-zero cols of this proc's seqmat into lnk */
3188: arow = owners[rank] + i;
3189: anzi = ai[arow+1] - ai[arow];
3190: aj = a->j + ai[arow];
3191: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3192: bnzi += nlnk;
3193: /* add received col data into lnk */
3194: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3195: if (i == *nextrow[k]) { /* i-th row */
3196: anzi = *(nextai[k]+1) - *nextai[k];
3197: aj = buf_rj[k] + *nextai[k];
3198: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3199: bnzi += nlnk;
3200: nextrow[k]++; nextai[k]++;
3201: }
3202: }
3203: if (len < bnzi) len = bnzi; /* =max(bnzi) */
3205: /* if free space is not available, make more free space */
3206: if (current_space->local_remaining<bnzi) {
3207: GetMoreSpace(current_space->total_array_size,¤t_space);
3208: nspacedouble++;
3209: }
3210: /* copy data into free space, then initialize lnk */
3211: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
3212: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
3214: current_space->array += bnzi;
3215: current_space->local_used += bnzi;
3216: current_space->local_remaining -= bnzi;
3217:
3218: bi[i+1] = bi[i] + bnzi;
3219: }
3220:
3221: PetscFree(buf_ri_k);
3223: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
3224: MakeSpaceContiguous(&free_space,bj);
3225: PetscLLDestroy(lnk,lnkbt);
3227: /* create symbolic parallel matrix B_mpi */
3228: /*---------------------------------------*/
3229: if (n==PETSC_DECIDE) {
3230: MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);
3231: } else {
3232: MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);
3233: }
3234: MatSetType(B_mpi,MATMPIAIJ);
3235: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
3236: MatPreallocateFinalize(dnz,onz);
3238: /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3239: B_mpi->assembled = PETSC_FALSE;
3240: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
3241: merge->bi = bi;
3242: merge->bj = bj;
3243: merge->buf_ri = buf_ri;
3244: merge->buf_rj = buf_rj;
3245: merge->coi = PETSC_NULL;
3246: merge->coj = PETSC_NULL;
3247: merge->owners_co = PETSC_NULL;
3249: /* attach the supporting struct to B_mpi for reuse */
3250: PetscObjectContainerCreate(PETSC_COMM_SELF,&container);
3251: PetscObjectContainerSetPointer(container,merge);
3252: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
3253: *mpimat = B_mpi;
3254: PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);
3255: return(0);
3256: }
3258: static PetscEvent logkey_seqstompi = 0;
3259: PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3260: {
3261: PetscErrorCode ierr;
3264: if (!logkey_seqstompi) {
3265: PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3266: }
3267: PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);
3268: if (scall == MAT_INITIAL_MATRIX){
3269: MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);
3270: }
3271: MatMerge_SeqsToMPINumeric(seqmat,*mpimat);
3272: PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);
3273: return(0);
3274: }
3275: static PetscEvent logkey_getlocalmat = 0;
3278: /*@C
3279: MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3281: Not Collective
3283: Input Parameters:
3284: + A - the matrix
3285: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3287: Output Parameter:
3288: . A_loc - the local sequential matrix generated
3290: Level: developer
3292: @*/
3293: PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3294: {
3295: PetscErrorCode ierr;
3296: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
3297: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3298: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3299: PetscScalar *aa=a->a,*ba=b->a,*ca;
3300: PetscInt am=A->m,i,j,k,cstart=mpimat->cstart;
3301: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
3304: if (!logkey_getlocalmat) {
3305: PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3306: }
3307: PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);
3308: if (scall == MAT_INITIAL_MATRIX){
3309: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
3310: ci[0] = 0;
3311: for (i=0; i<am; i++){
3312: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3313: }
3314: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
3315: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
3316: k = 0;
3317: for (i=0; i<am; i++) {
3318: ncols_o = bi[i+1] - bi[i];
3319: ncols_d = ai[i+1] - ai[i];
3320: /* off-diagonal portion of A */
3321: for (jo=0; jo<ncols_o; jo++) {
3322: col = cmap[*bj];
3323: if (col >= cstart) break;
3324: cj[k] = col; bj++;
3325: ca[k++] = *ba++;
3326: }
3327: /* diagonal portion of A */
3328: for (j=0; j<ncols_d; j++) {
3329: cj[k] = cstart + *aj++;
3330: ca[k++] = *aa++;
3331: }
3332: /* off-diagonal portion of A */
3333: for (j=jo; j<ncols_o; j++) {
3334: cj[k] = cmap[*bj++];
3335: ca[k++] = *ba++;
3336: }
3337: }
3338: /* put together the new matrix */
3339: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);
3340: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3341: /* Since these are PETSc arrays, change flags to free them as necessary. */
3342: mat = (Mat_SeqAIJ*)(*A_loc)->data;
3343: mat->freedata = PETSC_TRUE;
3344: mat->nonew = 0;
3345: } else if (scall == MAT_REUSE_MATRIX){
3346: mat=(Mat_SeqAIJ*)(*A_loc)->data;
3347: ci = mat->i; cj = mat->j; ca = mat->a;
3348: for (i=0; i<am; i++) {
3349: /* off-diagonal portion of A */
3350: ncols_o = bi[i+1] - bi[i];
3351: for (jo=0; jo<ncols_o; jo++) {
3352: col = cmap[*bj];
3353: if (col >= cstart) break;
3354: *ca++ = *ba++; bj++;
3355: }
3356: /* diagonal portion of A */
3357: ncols_d = ai[i+1] - ai[i];
3358: for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3359: /* off-diagonal portion of A */
3360: for (j=jo; j<ncols_o; j++) {
3361: *ca++ = *ba++; bj++;
3362: }
3363: }
3364: } else {
3365: SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3366: }
3368: PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);
3369: return(0);
3370: }
3372: static PetscEvent logkey_getlocalmatcondensed = 0;
3375: /*@C
3376: MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3378: Not Collective
3380: Input Parameters:
3381: + A - the matrix
3382: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3383: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
3385: Output Parameter:
3386: . A_loc - the local sequential matrix generated
3388: Level: developer
3390: @*/
3391: PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3392: {
3393: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
3394: PetscErrorCode ierr;
3395: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3396: IS isrowa,iscola;
3397: Mat *aloc;
3400: if (!logkey_getlocalmatcondensed) {
3401: PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3402: }
3403: PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);
3404: if (!row){
3405: start = a->rstart; end = a->rend;
3406: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
3407: } else {
3408: isrowa = *row;
3409: }
3410: if (!col){
3411: start = a->cstart;
3412: cmap = a->garray;
3413: nzA = a->A->n;
3414: nzB = a->B->n;
3415: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
3416: ncols = 0;
3417: for (i=0; i<nzB; i++) {
3418: if (cmap[i] < start) idx[ncols++] = cmap[i];
3419: else break;
3420: }
3421: imark = i;
3422: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3423: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3424: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);
3425: PetscFree(idx);
3426: } else {
3427: iscola = *col;
3428: }
3429: if (scall != MAT_INITIAL_MATRIX){
3430: PetscMalloc(sizeof(Mat),&aloc);
3431: aloc[0] = *A_loc;
3432: }
3433: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
3434: *A_loc = aloc[0];
3435: PetscFree(aloc);
3436: if (!row){
3437: ISDestroy(isrowa);
3438: }
3439: if (!col){
3440: ISDestroy(iscola);
3441: }
3442: PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);
3443: return(0);
3444: }
3446: static PetscEvent logkey_GetBrowsOfAcols = 0;
3449: /*@C
3450: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3452: Collective on Mat
3454: Input Parameters:
3455: + A,B - the matrices in mpiaij format
3456: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3457: - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3459: Output Parameter:
3460: + rowb, colb - index sets of rows and columns of B to extract
3461: . brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3462: - B_seq - the sequential matrix generated
3464: Level: developer
3466: @*/
3467: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3468: {
3469: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3470: PetscErrorCode ierr;
3471: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3472: IS isrowb,iscolb;
3473: Mat *bseq;
3474:
3476: if (a->cstart != b->rstart || a->cend != b->rend){
3477: SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3478: }
3479: if (!logkey_GetBrowsOfAcols) {
3480: PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3481: }
3482: PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);
3483:
3484: if (scall == MAT_INITIAL_MATRIX){
3485: start = a->cstart;
3486: cmap = a->garray;
3487: nzA = a->A->n;
3488: nzB = a->B->n;
3489: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
3490: ncols = 0;
3491: for (i=0; i<nzB; i++) { /* row < local row index */
3492: if (cmap[i] < start) idx[ncols++] = cmap[i];
3493: else break;
3494: }
3495: imark = i;
3496: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
3497: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3498: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);
3499: PetscFree(idx);
3500: *brstart = imark;
3501: ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);
3502: } else {
3503: if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3504: isrowb = *rowb; iscolb = *colb;
3505: PetscMalloc(sizeof(Mat),&bseq);
3506: bseq[0] = *B_seq;
3507: }
3508: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
3509: *B_seq = bseq[0];
3510: PetscFree(bseq);
3511: if (!rowb){
3512: ISDestroy(isrowb);
3513: } else {
3514: *rowb = isrowb;
3515: }
3516: if (!colb){
3517: ISDestroy(iscolb);
3518: } else {
3519: *colb = iscolb;
3520: }
3521: PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);
3522: return(0);
3523: }
3525: static PetscEvent logkey_GetBrowsOfAocols = 0;
3528: /*@C
3529: MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3530: of the OFF-DIAGONAL portion of local A
3532: Collective on Mat
3534: Input Parameters:
3535: + A,B - the matrices in mpiaij format
3536: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3537: . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3538: - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3540: Output Parameter:
3541: + B_oth - the sequential matrix generated
3543: Level: developer
3545: @*/
3546: PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3547: {
3548: VecScatter_MPI_General *gen_to,*gen_from;
3549: PetscErrorCode ierr;
3550: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3551: Mat_SeqAIJ *b_oth;
3552: VecScatter ctx=a->Mvctx;
3553: MPI_Comm comm=ctx->comm;
3554: PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank;
3555: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3556: PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA;
3557: PetscInt i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj,*sstarts,*sstartsj,len;
3558: MPI_Request *rwaits,*swaits;
3559: MPI_Status *sstatus,rstatus;
3560: PetscInt *cols;
3561: PetscScalar *vals;
3562: PetscMPIInt j;
3563:
3565: if (a->cstart != b->rstart || a->cend != b->rend){
3566: SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3567: }
3568: if (!logkey_GetBrowsOfAocols) {
3569: PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3570: }
3571: PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);
3572: MPI_Comm_rank(comm,&rank);
3574: gen_to = (VecScatter_MPI_General*)ctx->todata;
3575: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3576: rvalues = gen_from->values; /* holds the length of sending row */
3577: svalues = gen_to->values; /* holds the length of receiving row */
3578: nrecvs = gen_from->n;
3579: nsends = gen_to->n;
3580: rwaits = gen_from->requests;
3581: swaits = gen_to->requests;
3582: rrow = gen_from->indices; /* local row index to be received */
3583: srow = gen_to->indices; /* local row index to be sent */
3584: rstarts = gen_from->starts;
3585: sstarts = gen_to->starts;
3586: rprocs = gen_from->procs;
3587: sprocs = gen_to->procs;
3588: sstatus = gen_to->sstatus;
3590: if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3591: if (scall == MAT_INITIAL_MATRIX){
3592: /* i-array */
3593: /*---------*/
3594: /* post receives */
3595: for (i=0; i<nrecvs; i++){
3596: rowlen = (PetscInt*)rvalues + rstarts[i];
3597: nrows = rstarts[i+1]-rstarts[i];
3598: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
3599: }
3601: /* pack the outgoing message */
3602: PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);
3603: rstartsj = sstartsj + nsends +1;
3604: sstartsj[0] = 0; rstartsj[0] = 0;
3605: len = 0; /* total length of j or a array to be sent */
3606: k = 0;
3607: for (i=0; i<nsends; i++){
3608: rowlen = (PetscInt*)svalues + sstarts[i];
3609: nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3610: for (j=0; j<nrows; j++) {
3611: row = srow[k] + b->rowners[rank]; /* global row idx */
3612: MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL); /* rowlength */
3613: len += rowlen[j];
3614: MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);
3615: k++;
3616: }
3617: MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);
3618: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3619: }
3620: /* recvs and sends of i-array are completed */
3621: i = nrecvs;
3622: while (i--) {
3623: MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
3624: }
3625: if (nsends) {
3626: MPI_Waitall(nsends,swaits,sstatus);
3627: }
3628: /* allocate buffers for sending j and a arrays */
3629: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
3630: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
3632: /* create i-array of B_oth */
3633: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
3634: b_othi[0] = 0;
3635: len = 0; /* total length of j or a array to be received */
3636: k = 0;
3637: for (i=0; i<nrecvs; i++){
3638: rowlen = (PetscInt*)rvalues + rstarts[i];
3639: nrows = rstarts[i+1]-rstarts[i];
3640: for (j=0; j<nrows; j++) {
3641: b_othi[k+1] = b_othi[k] + rowlen[j];
3642: len += rowlen[j]; k++;
3643: }
3644: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3645: }
3647: /* allocate space for j and a arrrays of B_oth */
3648: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
3649: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);
3651: /* j-array */
3652: /*---------*/
3653: /* post receives of j-array */
3654: for (i=0; i<nrecvs; i++){
3655: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3656: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
3657: }
3658: k = 0;
3659: for (i=0; i<nsends; i++){
3660: nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3661: bufJ = bufj+sstartsj[i];
3662: for (j=0; j<nrows; j++) {
3663: row = srow[k++] + b->rowners[rank]; /* global row idx */
3664: MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);
3665: for (l=0; l<ncols; l++){
3666: *bufJ++ = cols[l];
3667: }
3668: MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);
3669: }
3670: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
3671: }
3673: /* recvs and sends of j-array are completed */
3674: i = nrecvs;
3675: while (i--) {
3676: MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
3677: }
3678: if (nsends) {
3679: MPI_Waitall(nsends,swaits,sstatus);
3680: }
3681: } else if (scall == MAT_REUSE_MATRIX){
3682: sstartsj = *startsj;
3683: rstartsj = sstartsj + nsends +1;
3684: bufa = *bufa_ptr;
3685: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
3686: b_otha = b_oth->a;
3687: } else {
3688: SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3689: }
3691: /* a-array */
3692: /*---------*/
3693: /* post receives of a-array */
3694: for (i=0; i<nrecvs; i++){
3695: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3696: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
3697: }
3698: k = 0;
3699: for (i=0; i<nsends; i++){
3700: nrows = sstarts[i+1]-sstarts[i];
3701: bufA = bufa+sstartsj[i];
3702: for (j=0; j<nrows; j++) {
3703: row = srow[k++] + b->rowners[rank]; /* global row idx */
3704: MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);
3705: for (l=0; l<ncols; l++){
3706: *bufA++ = vals[l];
3707: }
3708: MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);
3710: }
3711: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
3712: }
3713: /* recvs and sends of a-array are completed */
3714: i = nrecvs;
3715: while (i--) {
3716: MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
3717: }
3718: if (nsends) {
3719: MPI_Waitall(nsends,swaits,sstatus);
3720: }
3721:
3722: if (scall == MAT_INITIAL_MATRIX){
3723: /* put together the new matrix */
3724: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);
3726: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3727: /* Since these are PETSc arrays, change flags to free them as necessary. */
3728: b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3729: b_oth->freedata = PETSC_TRUE;
3730: b_oth->nonew = 0;
3732: PetscFree(bufj);
3733: if (!startsj || !bufa_ptr){
3734: PetscFree(sstartsj);
3735: PetscFree(bufa_ptr);
3736: } else {
3737: *startsj = sstartsj;
3738: *bufa_ptr = bufa;
3739: }
3740: }
3741: PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);
3742:
3743: return(0);
3744: }
3746: /*MC
3747: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3749: Options Database Keys:
3750: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3752: Level: beginner
3754: .seealso: MatCreateMPIAIJ
3755: M*/
3760: PetscErrorCode MatCreate_MPIAIJ(Mat B)
3761: {
3762: Mat_MPIAIJ *b;
3764: PetscInt i;
3765: PetscMPIInt size;
3768: MPI_Comm_size(B->comm,&size);
3770: PetscNew(Mat_MPIAIJ,&b);
3771: B->data = (void*)b;
3772: PetscMemzero(b,sizeof(Mat_MPIAIJ));
3773: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3774: B->factor = 0;
3775: B->assembled = PETSC_FALSE;
3776: B->mapping = 0;
3778: B->insertmode = NOT_SET_VALUES;
3779: b->size = size;
3780: MPI_Comm_rank(B->comm,&b->rank);
3782: PetscSplitOwnership(B->comm,&B->m,&B->M);
3783: PetscSplitOwnership(B->comm,&B->n,&B->N);
3785: /* the information in the maps duplicates the information computed below, eventually
3786: we should remove the duplicate information that is not contained in the maps */
3787: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
3788: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
3790: /* build local table of row and column ownerships */
3791: PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);
3792: PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3793: b->cowners = b->rowners + b->size + 2;
3794: MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);
3795: b->rowners[0] = 0;
3796: for (i=2; i<=b->size; i++) {
3797: b->rowners[i] += b->rowners[i-1];
3798: }
3799: b->rstart = b->rowners[b->rank];
3800: b->rend = b->rowners[b->rank+1];
3801: MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);
3802: b->cowners[0] = 0;
3803: for (i=2; i<=b->size; i++) {
3804: b->cowners[i] += b->cowners[i-1];
3805: }
3806: b->cstart = b->cowners[b->rank];
3807: b->cend = b->cowners[b->rank+1];
3809: /* build cache for off array entries formed */
3810: MatStashCreate_Private(B->comm,1,&B->stash);
3811: b->donotstash = PETSC_FALSE;
3812: b->colmap = 0;
3813: b->garray = 0;
3814: b->roworiented = PETSC_TRUE;
3816: /* stuff used for matrix vector multiply */
3817: b->lvec = PETSC_NULL;
3818: b->Mvctx = PETSC_NULL;
3820: /* stuff for MatGetRow() */
3821: b->rowindices = 0;
3822: b->rowvalues = 0;
3823: b->getrowactive = PETSC_FALSE;
3825: /* Explicitly create 2 MATSEQAIJ matrices. */
3826: MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);
3827: MatSetType(b->A,MATSEQAIJ);
3828: PetscLogObjectParent(B,b->A);
3829: MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);
3830: MatSetType(b->B,MATSEQAIJ);
3831: PetscLogObjectParent(B,b->B);
3833: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3834: "MatStoreValues_MPIAIJ",
3835: MatStoreValues_MPIAIJ);
3836: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3837: "MatRetrieveValues_MPIAIJ",
3838: MatRetrieveValues_MPIAIJ);
3839: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3840: "MatGetDiagonalBlock_MPIAIJ",
3841: MatGetDiagonalBlock_MPIAIJ);
3842: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3843: "MatIsTranspose_MPIAIJ",
3844: MatIsTranspose_MPIAIJ);
3845: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3846: "MatMPIAIJSetPreallocation_MPIAIJ",
3847: MatMPIAIJSetPreallocation_MPIAIJ);
3848: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3849: "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3850: MatMPIAIJSetPreallocationCSR_MPIAIJ);
3851: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3852: "MatDiagonalScaleLocal_MPIAIJ",
3853: MatDiagonalScaleLocal_MPIAIJ);
3854: return(0);
3855: }